]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-517.3.7.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * @OSF_COPYRIGHT@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53 /*
54 */
55 /*
56 * File: vm/vm_pageout.c
57 * Author: Avadis Tevanian, Jr., Michael Wayne Young
58 * Date: 1985
59 *
60 * The proverbial page-out daemon.
61 */
62
63 #include <mach_pagemap.h>
64 #include <mach_cluster_stats.h>
65 #include <mach_kdb.h>
66 #include <advisory_pageout.h>
67
68 #include <mach/mach_types.h>
69 #include <mach/memory_object.h>
70 #include <mach/memory_object_default.h>
71 #include <mach/memory_object_control_server.h>
72 #include <mach/mach_host_server.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_statistics.h>
75 #include <kern/host_statistics.h>
76 #include <kern/counters.h>
77 #include <kern/thread.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_fault.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <machine/vm_tuning.h>
86 #include <kern/misc_protos.h>
87
88
89 extern ipc_port_t memory_manager_default;
90
91 #ifndef VM_PAGE_LAUNDRY_MAX
92 #define VM_PAGE_LAUNDRY_MAX 16 /* outstanding DMM+EMM page cleans */
93 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
94
95 #ifndef VM_PAGEOUT_BURST_MAX
96 #define VM_PAGEOUT_BURST_MAX 6 /* simultaneous EMM page cleans */
97 #endif /* VM_PAGEOUT_BURST_MAX */
98
99 #ifndef VM_PAGEOUT_BURST_WAIT
100 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
101 #endif /* VM_PAGEOUT_BURST_WAIT */
102
103 #ifndef VM_PAGEOUT_EMPTY_WAIT
104 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
105 #endif /* VM_PAGEOUT_EMPTY_WAIT */
106
107 /*
108 * To obtain a reasonable LRU approximation, the inactive queue
109 * needs to be large enough to give pages on it a chance to be
110 * referenced a second time. This macro defines the fraction
111 * of active+inactive pages that should be inactive.
112 * The pageout daemon uses it to update vm_page_inactive_target.
113 *
114 * If vm_page_free_count falls below vm_page_free_target and
115 * vm_page_inactive_count is below vm_page_inactive_target,
116 * then the pageout daemon starts running.
117 */
118
119 #ifndef VM_PAGE_INACTIVE_TARGET
120 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
121 #endif /* VM_PAGE_INACTIVE_TARGET */
122
123 /*
124 * Once the pageout daemon starts running, it keeps going
125 * until vm_page_free_count meets or exceeds vm_page_free_target.
126 */
127
128 #ifndef VM_PAGE_FREE_TARGET
129 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
130 #endif /* VM_PAGE_FREE_TARGET */
131
132 /*
133 * The pageout daemon always starts running once vm_page_free_count
134 * falls below vm_page_free_min.
135 */
136
137 #ifndef VM_PAGE_FREE_MIN
138 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
139 #endif /* VM_PAGE_FREE_MIN */
140
141 /*
142 * When vm_page_free_count falls below vm_page_free_reserved,
143 * only vm-privileged threads can allocate pages. vm-privilege
144 * allows the pageout daemon and default pager (and any other
145 * associated threads needed for default pageout) to continue
146 * operation by dipping into the reserved pool of pages.
147 */
148
149 #ifndef VM_PAGE_FREE_RESERVED
150 #define VM_PAGE_FREE_RESERVED \
151 ((6 * VM_PAGE_LAUNDRY_MAX) + NCPUS)
152 #endif /* VM_PAGE_FREE_RESERVED */
153
154 /*
155 * Exported variable used to broadcast the activation of the pageout scan
156 * Working Set uses this to throttle its use of pmap removes. In this
157 * way, code which runs within memory in an uncontested context does
158 * not keep encountering soft faults.
159 */
160
161 unsigned int vm_pageout_scan_event_counter = 0;
162
163 /*
164 * Forward declarations for internal routines.
165 */
166 extern void vm_pageout_continue(void);
167 extern void vm_pageout_scan(void);
168 extern void vm_pageout_throttle(vm_page_t m);
169 extern vm_page_t vm_pageout_cluster_page(
170 vm_object_t object,
171 vm_object_offset_t offset,
172 boolean_t precious_clean);
173
174 unsigned int vm_pageout_reserved_internal = 0;
175 unsigned int vm_pageout_reserved_really = 0;
176
177 unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */
178 unsigned int vm_page_laundry_min = 0;
179 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
180 unsigned int vm_pageout_burst_max = 0;
181 unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
182 unsigned int vm_pageout_burst_min = 0;
183 unsigned int vm_pageout_burst_loop_throttle = 4096;
184 unsigned int vm_pageout_pause_count = 0;
185 unsigned int vm_pageout_pause_max = 0;
186 unsigned int vm_free_page_pause = 100; /* milliseconds */
187
188 /*
189 * Protection against zero fill flushing live working sets derived
190 * from existing backing store and files
191 */
192 unsigned int vm_accellerate_zf_pageout_trigger = 400;
193 unsigned int vm_zf_iterator;
194 unsigned int vm_zf_iterator_count = 40;
195 unsigned int last_page_zf;
196 unsigned int vm_zf_count = 0;
197
198 /*
199 * These variables record the pageout daemon's actions:
200 * how many pages it looks at and what happens to those pages.
201 * No locking needed because only one thread modifies the variables.
202 */
203
204 unsigned int vm_pageout_active = 0; /* debugging */
205 unsigned int vm_pageout_inactive = 0; /* debugging */
206 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
207 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
208 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
209 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
210 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
211 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
212 unsigned int vm_pageout_inactive_used = 0; /* debugging */
213 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
214 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
215 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
216 unsigned int vm_stat_discard = 0; /* debugging */
217 unsigned int vm_stat_discard_sent = 0; /* debugging */
218 unsigned int vm_stat_discard_failure = 0; /* debugging */
219 unsigned int vm_stat_discard_throttle = 0; /* debugging */
220 unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */
221 unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */
222 unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */
223 unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */
224 unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */
225 unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */
226
227 /*
228 * Backing store throttle when BS is exhausted
229 */
230 unsigned int vm_backing_store_low = 0;
231
232 unsigned int vm_pageout_out_of_line = 0;
233 unsigned int vm_pageout_in_place = 0;
234
235
236 /*
237 * Routine: vm_backing_store_disable
238 * Purpose:
239 * Suspend non-privileged threads wishing to extend
240 * backing store when we are low on backing store
241 * (Synchronized by caller)
242 */
243 void
244 vm_backing_store_disable(
245 boolean_t disable)
246 {
247 if(disable) {
248 vm_backing_store_low = 1;
249 } else {
250 if(vm_backing_store_low) {
251 vm_backing_store_low = 0;
252 thread_wakeup((event_t) &vm_backing_store_low);
253 }
254 }
255 }
256
257
258 /*
259 * Routine: vm_pageout_object_allocate
260 * Purpose:
261 * Allocate an object for use as out-of-line memory in a
262 * data_return/data_initialize message.
263 * The page must be in an unlocked object.
264 *
265 * If the page belongs to a trusted pager, cleaning in place
266 * will be used, which utilizes a special "pageout object"
267 * containing private alias pages for the real page frames.
268 * Untrusted pagers use normal out-of-line memory.
269 */
270 vm_object_t
271 vm_pageout_object_allocate(
272 vm_page_t m,
273 vm_size_t size,
274 vm_object_offset_t offset)
275 {
276 vm_object_t object = m->object;
277 vm_object_t new_object;
278
279 assert(object->pager_ready);
280
281 new_object = vm_object_allocate(size);
282
283 if (object->pager_trusted) {
284 assert (offset < object->size);
285
286 vm_object_lock(new_object);
287 new_object->pageout = TRUE;
288 new_object->shadow = object;
289 new_object->can_persist = FALSE;
290 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
291 new_object->shadow_offset = offset;
292 vm_object_unlock(new_object);
293
294 /*
295 * Take a paging reference on the object. This will be dropped
296 * in vm_pageout_object_terminate()
297 */
298 vm_object_lock(object);
299 vm_object_paging_begin(object);
300 vm_page_lock_queues();
301 vm_pageout_throttle(m);
302 vm_page_unlock_queues();
303 vm_object_unlock(object);
304
305 vm_pageout_in_place++;
306 } else
307 vm_pageout_out_of_line++;
308 return(new_object);
309 }
310
311 #if MACH_CLUSTER_STATS
312 unsigned long vm_pageout_cluster_dirtied = 0;
313 unsigned long vm_pageout_cluster_cleaned = 0;
314 unsigned long vm_pageout_cluster_collisions = 0;
315 unsigned long vm_pageout_cluster_clusters = 0;
316 unsigned long vm_pageout_cluster_conversions = 0;
317 unsigned long vm_pageout_target_collisions = 0;
318 unsigned long vm_pageout_target_page_dirtied = 0;
319 unsigned long vm_pageout_target_page_freed = 0;
320 #define CLUSTER_STAT(clause) clause
321 #else /* MACH_CLUSTER_STATS */
322 #define CLUSTER_STAT(clause)
323 #endif /* MACH_CLUSTER_STATS */
324
325 /*
326 * Routine: vm_pageout_object_terminate
327 * Purpose:
328 * Destroy the pageout_object allocated by
329 * vm_pageout_object_allocate(), and perform all of the
330 * required cleanup actions.
331 *
332 * In/Out conditions:
333 * The object must be locked, and will be returned locked.
334 */
335 void
336 vm_pageout_object_terminate(
337 vm_object_t object)
338 {
339 vm_object_t shadow_object;
340 boolean_t shadow_internal;
341
342 /*
343 * Deal with the deallocation (last reference) of a pageout object
344 * (used for cleaning-in-place) by dropping the paging references/
345 * freeing pages in the original object.
346 */
347
348 assert(object->pageout);
349 shadow_object = object->shadow;
350 vm_object_lock(shadow_object);
351 shadow_internal = shadow_object->internal;
352
353 while (!queue_empty(&object->memq)) {
354 vm_page_t p, m;
355 vm_object_offset_t offset;
356
357 p = (vm_page_t) queue_first(&object->memq);
358
359 assert(p->private);
360 assert(p->pageout);
361 p->pageout = FALSE;
362 assert(!p->cleaning);
363
364 offset = p->offset;
365 VM_PAGE_FREE(p);
366 p = VM_PAGE_NULL;
367
368 m = vm_page_lookup(shadow_object,
369 offset + object->shadow_offset);
370
371 if(m == VM_PAGE_NULL)
372 continue;
373 assert(m->cleaning);
374 /* used as a trigger on upl_commit etc to recognize the */
375 /* pageout daemon's subseqent desire to pageout a cleaning */
376 /* page. When the bit is on the upl commit code will */
377 /* respect the pageout bit in the target page over the */
378 /* caller's page list indication */
379 m->dump_cleaning = FALSE;
380
381 /*
382 * Account for the paging reference taken when
383 * m->cleaning was set on this page.
384 */
385 vm_object_paging_end(shadow_object);
386 assert((m->dirty) || (m->precious) ||
387 (m->busy && m->cleaning));
388
389 /*
390 * Handle the trusted pager throttle.
391 * Also decrement the burst throttle (if external).
392 */
393 vm_page_lock_queues();
394 if (m->laundry) {
395 if (!shadow_internal)
396 vm_page_burst_count--;
397 vm_page_laundry_count--;
398 m->laundry = FALSE;
399 if (vm_page_laundry_count < vm_page_laundry_min) {
400 vm_page_laundry_min = 0;
401 thread_wakeup((event_t) &vm_page_laundry_count);
402 }
403 }
404
405 /*
406 * Handle the "target" page(s). These pages are to be freed if
407 * successfully cleaned. Target pages are always busy, and are
408 * wired exactly once. The initial target pages are not mapped,
409 * (so cannot be referenced or modified) but converted target
410 * pages may have been modified between the selection as an
411 * adjacent page and conversion to a target.
412 */
413 if (m->pageout) {
414 assert(m->busy);
415 assert(m->wire_count == 1);
416 m->cleaning = FALSE;
417 m->pageout = FALSE;
418 #if MACH_CLUSTER_STATS
419 if (m->wanted) vm_pageout_target_collisions++;
420 #endif
421 /*
422 * Revoke all access to the page. Since the object is
423 * locked, and the page is busy, this prevents the page
424 * from being dirtied after the pmap_is_modified() call
425 * returns.
426 */
427 pmap_page_protect(m->phys_page, VM_PROT_NONE);
428
429 /*
430 * Since the page is left "dirty" but "not modifed", we
431 * can detect whether the page was redirtied during
432 * pageout by checking the modify state.
433 */
434 m->dirty = pmap_is_modified(m->phys_page);
435
436 if (m->dirty) {
437 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
438 vm_page_unwire(m);/* reactivates */
439 VM_STAT(reactivations++);
440 PAGE_WAKEUP_DONE(m);
441 } else {
442 CLUSTER_STAT(vm_pageout_target_page_freed++;)
443 vm_page_free(m);/* clears busy, etc. */
444 }
445 vm_page_unlock_queues();
446 continue;
447 }
448 /*
449 * Handle the "adjacent" pages. These pages were cleaned in
450 * place, and should be left alone.
451 * If prep_pin_count is nonzero, then someone is using the
452 * page, so make it active.
453 */
454 if (!m->active && !m->inactive && !m->private) {
455 if (m->reference)
456 vm_page_activate(m);
457 else
458 vm_page_deactivate(m);
459 }
460 if((m->busy) && (m->cleaning)) {
461
462 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
463 m->busy = FALSE;
464
465 /* We do not re-set m->dirty ! */
466 /* The page was busy so no extraneous activity */
467 /* could have occured. COPY_INTO is a read into the */
468 /* new pages. CLEAN_IN_PLACE does actually write */
469 /* out the pages but handling outside of this code */
470 /* will take care of resetting dirty. We clear the */
471 /* modify however for the Programmed I/O case. */
472 pmap_clear_modify(m->phys_page);
473 if(m->absent) {
474 m->absent = FALSE;
475 if(shadow_object->absent_count == 1)
476 vm_object_absent_release(shadow_object);
477 else
478 shadow_object->absent_count--;
479 }
480 m->overwriting = FALSE;
481 } else if (m->overwriting) {
482 /* alternate request page list, write to page_list */
483 /* case. Occurs when the original page was wired */
484 /* at the time of the list request */
485 assert(m->wire_count != 0);
486 vm_page_unwire(m);/* reactivates */
487 m->overwriting = FALSE;
488 } else {
489 /*
490 * Set the dirty state according to whether or not the page was
491 * modified during the pageout. Note that we purposefully do
492 * NOT call pmap_clear_modify since the page is still mapped.
493 * If the page were to be dirtied between the 2 calls, this
494 * this fact would be lost. This code is only necessary to
495 * maintain statistics, since the pmap module is always
496 * consulted if m->dirty is false.
497 */
498 #if MACH_CLUSTER_STATS
499 m->dirty = pmap_is_modified(m->phys_page);
500
501 if (m->dirty) vm_pageout_cluster_dirtied++;
502 else vm_pageout_cluster_cleaned++;
503 if (m->wanted) vm_pageout_cluster_collisions++;
504 #else
505 m->dirty = 0;
506 #endif
507 }
508 m->cleaning = FALSE;
509
510 /*
511 * Wakeup any thread waiting for the page to be un-cleaning.
512 */
513 PAGE_WAKEUP(m);
514 vm_page_unlock_queues();
515 }
516 /*
517 * Account for the paging reference taken in vm_paging_object_allocate.
518 */
519 vm_object_paging_end(shadow_object);
520 vm_object_unlock(shadow_object);
521
522 assert(object->ref_count == 0);
523 assert(object->paging_in_progress == 0);
524 assert(object->resident_page_count == 0);
525 return;
526 }
527
528 /*
529 * Routine: vm_pageout_setup
530 * Purpose:
531 * Set up a page for pageout (clean & flush).
532 *
533 * Move the page to a new object, as part of which it will be
534 * sent to its memory manager in a memory_object_data_write or
535 * memory_object_initialize message.
536 *
537 * The "new_object" and "new_offset" arguments
538 * indicate where the page should be moved.
539 *
540 * In/Out conditions:
541 * The page in question must not be on any pageout queues,
542 * and must be busy. The object to which it belongs
543 * must be unlocked, and the caller must hold a paging
544 * reference to it. The new_object must not be locked.
545 *
546 * This routine returns a pointer to a place-holder page,
547 * inserted at the same offset, to block out-of-order
548 * requests for the page. The place-holder page must
549 * be freed after the data_write or initialize message
550 * has been sent.
551 *
552 * The original page is put on a paging queue and marked
553 * not busy on exit.
554 */
555 vm_page_t
556 vm_pageout_setup(
557 register vm_page_t m,
558 register vm_object_t new_object,
559 vm_object_offset_t new_offset)
560 {
561 register vm_object_t old_object = m->object;
562 vm_object_offset_t paging_offset;
563 vm_object_offset_t offset;
564 register vm_page_t holding_page;
565 register vm_page_t new_m;
566 register vm_page_t new_page;
567 boolean_t need_to_wire = FALSE;
568
569
570 XPR(XPR_VM_PAGEOUT,
571 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
572 (integer_t)m->object, (integer_t)m->offset,
573 (integer_t)m, (integer_t)new_object,
574 (integer_t)new_offset);
575 assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
576 !m->restart);
577
578 assert(m->dirty || m->precious);
579
580 /*
581 * Create a place-holder page where the old one was, to prevent
582 * attempted pageins of this page while we're unlocked.
583 */
584 VM_PAGE_GRAB_FICTITIOUS(holding_page);
585
586 vm_object_lock(old_object);
587
588 offset = m->offset;
589 paging_offset = offset + old_object->paging_offset;
590
591 if (old_object->pager_trusted) {
592 /*
593 * This pager is trusted, so we can clean this page
594 * in place. Leave it in the old object, and mark it
595 * cleaning & pageout.
596 */
597 new_m = holding_page;
598 holding_page = VM_PAGE_NULL;
599
600 /*
601 * Set up new page to be private shadow of real page.
602 */
603 new_m->phys_page = m->phys_page;
604 new_m->fictitious = FALSE;
605 new_m->pageout = TRUE;
606
607 /*
608 * Mark real page as cleaning (indicating that we hold a
609 * paging reference to be released via m_o_d_r_c) and
610 * pageout (indicating that the page should be freed
611 * when the pageout completes).
612 */
613 pmap_clear_modify(m->phys_page);
614 vm_page_lock_queues();
615 new_m->private = TRUE;
616 vm_page_wire(new_m);
617 m->cleaning = TRUE;
618 m->pageout = TRUE;
619
620 vm_page_wire(m);
621 assert(m->wire_count == 1);
622 vm_page_unlock_queues();
623
624 m->dirty = TRUE;
625 m->precious = FALSE;
626 m->page_lock = VM_PROT_NONE;
627 m->unusual = FALSE;
628 m->unlock_request = VM_PROT_NONE;
629 } else {
630 /*
631 * Cannot clean in place, so rip the old page out of the
632 * object, and stick the holding page in. Set new_m to the
633 * page in the new object.
634 */
635 vm_page_lock_queues();
636 VM_PAGE_QUEUES_REMOVE(m);
637 vm_page_remove(m);
638
639 vm_page_insert(holding_page, old_object, offset);
640 vm_page_unlock_queues();
641
642 m->dirty = TRUE;
643 m->precious = FALSE;
644 new_m = m;
645 new_m->page_lock = VM_PROT_NONE;
646 new_m->unlock_request = VM_PROT_NONE;
647
648 if (old_object->internal)
649 need_to_wire = TRUE;
650 }
651 /*
652 * Record that this page has been written out
653 */
654 #if MACH_PAGEMAP
655 vm_external_state_set(old_object->existence_map, offset);
656 #endif /* MACH_PAGEMAP */
657
658 vm_object_unlock(old_object);
659
660 vm_object_lock(new_object);
661
662 /*
663 * Put the page into the new object. If it is a not wired
664 * (if it's the real page) it will be activated.
665 */
666
667 vm_page_lock_queues();
668 vm_page_insert(new_m, new_object, new_offset);
669 if (need_to_wire)
670 vm_page_wire(new_m);
671 else
672 vm_page_activate(new_m);
673 PAGE_WAKEUP_DONE(new_m);
674 vm_page_unlock_queues();
675
676 vm_object_unlock(new_object);
677
678 /*
679 * Return the placeholder page to simplify cleanup.
680 */
681 return (holding_page);
682 }
683
684 /*
685 * Routine: vm_pageclean_setup
686 *
687 * Purpose: setup a page to be cleaned (made non-dirty), but not
688 * necessarily flushed from the VM page cache.
689 * This is accomplished by cleaning in place.
690 *
691 * The page must not be busy, and the object and page
692 * queues must be locked.
693 *
694 */
695 void
696 vm_pageclean_setup(
697 vm_page_t m,
698 vm_page_t new_m,
699 vm_object_t new_object,
700 vm_object_offset_t new_offset)
701 {
702 vm_object_t old_object = m->object;
703 assert(!m->busy);
704 assert(!m->cleaning);
705
706 XPR(XPR_VM_PAGEOUT,
707 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
708 (integer_t)old_object, m->offset, (integer_t)m,
709 (integer_t)new_m, new_offset);
710
711 pmap_clear_modify(m->phys_page);
712 vm_object_paging_begin(old_object);
713
714 /*
715 * Record that this page has been written out
716 */
717 #if MACH_PAGEMAP
718 vm_external_state_set(old_object->existence_map, m->offset);
719 #endif /*MACH_PAGEMAP*/
720
721 /*
722 * Mark original page as cleaning in place.
723 */
724 m->cleaning = TRUE;
725 m->dirty = TRUE;
726 m->precious = FALSE;
727
728 /*
729 * Convert the fictitious page to a private shadow of
730 * the real page.
731 */
732 assert(new_m->fictitious);
733 new_m->fictitious = FALSE;
734 new_m->private = TRUE;
735 new_m->pageout = TRUE;
736 new_m->phys_page = m->phys_page;
737 vm_page_wire(new_m);
738
739 vm_page_insert(new_m, new_object, new_offset);
740 assert(!new_m->wanted);
741 new_m->busy = FALSE;
742 }
743
744 void
745 vm_pageclean_copy(
746 vm_page_t m,
747 vm_page_t new_m,
748 vm_object_t new_object,
749 vm_object_offset_t new_offset)
750 {
751 XPR(XPR_VM_PAGEOUT,
752 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
753 m, new_m, new_object, new_offset, 0);
754
755 assert((!m->busy) && (!m->cleaning));
756
757 assert(!new_m->private && !new_m->fictitious);
758
759 pmap_clear_modify(m->phys_page);
760
761 m->busy = TRUE;
762 vm_object_paging_begin(m->object);
763 vm_page_unlock_queues();
764 vm_object_unlock(m->object);
765
766 /*
767 * Copy the original page to the new page.
768 */
769 vm_page_copy(m, new_m);
770
771 /*
772 * Mark the old page as clean. A request to pmap_is_modified
773 * will get the right answer.
774 */
775 vm_object_lock(m->object);
776 m->dirty = FALSE;
777
778 vm_object_paging_end(m->object);
779
780 vm_page_lock_queues();
781 if (!m->active && !m->inactive)
782 vm_page_activate(m);
783 PAGE_WAKEUP_DONE(m);
784
785 vm_page_insert(new_m, new_object, new_offset);
786 vm_page_activate(new_m);
787 new_m->busy = FALSE; /* No other thread can be waiting */
788 }
789
790
791 /*
792 * Routine: vm_pageout_initialize_page
793 * Purpose:
794 * Causes the specified page to be initialized in
795 * the appropriate memory object. This routine is used to push
796 * pages into a copy-object when they are modified in the
797 * permanent object.
798 *
799 * The page is moved to a temporary object and paged out.
800 *
801 * In/out conditions:
802 * The page in question must not be on any pageout queues.
803 * The object to which it belongs must be locked.
804 * The page must be busy, but not hold a paging reference.
805 *
806 * Implementation:
807 * Move this page to a completely new object.
808 */
809 void
810 vm_pageout_initialize_page(
811 vm_page_t m)
812 {
813 vm_map_copy_t copy;
814 vm_object_t new_object;
815 vm_object_t object;
816 vm_object_offset_t paging_offset;
817 vm_page_t holding_page;
818
819
820 XPR(XPR_VM_PAGEOUT,
821 "vm_pageout_initialize_page, page 0x%X\n",
822 (integer_t)m, 0, 0, 0, 0);
823 assert(m->busy);
824
825 /*
826 * Verify that we really want to clean this page
827 */
828 assert(!m->absent);
829 assert(!m->error);
830 assert(m->dirty);
831
832 /*
833 * Create a paging reference to let us play with the object.
834 */
835 object = m->object;
836 paging_offset = m->offset + object->paging_offset;
837 vm_object_paging_begin(object);
838 if (m->absent || m->error || m->restart ||
839 (!m->dirty && !m->precious)) {
840 VM_PAGE_FREE(m);
841 panic("reservation without pageout?"); /* alan */
842 vm_object_unlock(object);
843 return;
844 }
845
846 /* set the page for future call to vm_fault_list_request */
847 holding_page = NULL;
848 vm_page_lock_queues();
849 pmap_clear_modify(m->phys_page);
850 m->dirty = TRUE;
851 m->busy = TRUE;
852 m->list_req_pending = TRUE;
853 m->cleaning = TRUE;
854 m->pageout = TRUE;
855 vm_page_wire(m);
856 vm_pageout_throttle(m);
857 vm_page_unlock_queues();
858 vm_object_unlock(object);
859
860 /*
861 * Write the data to its pager.
862 * Note that the data is passed by naming the new object,
863 * not a virtual address; the pager interface has been
864 * manipulated to use the "internal memory" data type.
865 * [The object reference from its allocation is donated
866 * to the eventual recipient.]
867 */
868 memory_object_data_initialize(object->pager,
869 paging_offset,
870 PAGE_SIZE);
871
872 vm_object_lock(object);
873 }
874
875 #if MACH_CLUSTER_STATS
876 #define MAXCLUSTERPAGES 16
877 struct {
878 unsigned long pages_in_cluster;
879 unsigned long pages_at_higher_offsets;
880 unsigned long pages_at_lower_offsets;
881 } cluster_stats[MAXCLUSTERPAGES];
882 #endif /* MACH_CLUSTER_STATS */
883
884 boolean_t allow_clustered_pageouts = FALSE;
885
886 /*
887 * vm_pageout_cluster:
888 *
889 * Given a page, page it out, and attempt to clean adjacent pages
890 * in the same operation.
891 *
892 * The page must be busy, and the object locked. We will take a
893 * paging reference to prevent deallocation or collapse when we
894 * temporarily release the object lock.
895 *
896 * The page must not be on any pageout queue.
897 */
898 void
899 vm_pageout_cluster(
900 vm_page_t m)
901 {
902 vm_object_t object = m->object;
903 vm_object_offset_t offset = m->offset; /* from vm_object start */
904 vm_object_offset_t paging_offset;
905 vm_object_t new_object;
906 vm_object_offset_t new_offset;
907 vm_size_t cluster_size;
908 vm_object_offset_t cluster_offset; /* from memory_object start */
909 vm_object_offset_t cluster_lower_bound; /* from vm_object_start */
910 vm_object_offset_t cluster_upper_bound; /* from vm_object_start */
911 vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */
912 vm_object_offset_t offset_within_cluster;
913 vm_size_t length_of_data;
914 vm_page_t friend, holding_page;
915 kern_return_t rc;
916 boolean_t precious_clean = TRUE;
917 int pages_in_cluster;
918
919 CLUSTER_STAT(int pages_at_higher_offsets = 0;)
920 CLUSTER_STAT(int pages_at_lower_offsets = 0;)
921
922 XPR(XPR_VM_PAGEOUT,
923 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
924 (integer_t)object, offset, (integer_t)m, 0, 0);
925
926 CLUSTER_STAT(vm_pageout_cluster_clusters++;)
927
928 /*
929 * protect the object from collapse -
930 * locking in the object's paging_offset.
931 */
932 vm_object_paging_begin(object);
933 paging_offset = m->offset + object->paging_offset;
934
935 /*
936 * Only a certain kind of page is appreciated here.
937 */
938 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
939 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
940
941 cluster_size = object->cluster_size;
942
943 assert(cluster_size >= PAGE_SIZE);
944 if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE;
945 assert(object->pager_created && object->pager_initialized);
946 assert(object->internal || object->pager_ready);
947
948 if (m->precious && !m->dirty)
949 precious_clean = TRUE;
950
951 if (!object->pager_trusted || !allow_clustered_pageouts)
952 cluster_size = PAGE_SIZE;
953
954 cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1);
955 /* bytes from beginning of cluster */
956 /*
957 * Due to unaligned mappings, we have to be careful
958 * of negative offsets into the VM object. Clip the cluster
959 * boundary to the VM object, not the memory object.
960 */
961 if (offset > cluster_offset) {
962 cluster_lower_bound = offset - cluster_offset;
963 /* from vm_object */
964 } else {
965 cluster_lower_bound = 0;
966 }
967 cluster_upper_bound = (offset - cluster_offset) +
968 (vm_object_offset_t)cluster_size;
969
970 /* set the page for future call to vm_fault_list_request */
971 holding_page = NULL;
972 vm_page_lock_queues();
973 m->busy = TRUE;
974 m->list_req_pending = TRUE;
975 m->cleaning = TRUE;
976 m->pageout = TRUE;
977 vm_page_wire(m);
978 vm_pageout_throttle(m);
979 vm_page_unlock_queues();
980 vm_object_unlock(object);
981
982 /*
983 * Search backward for adjacent eligible pages to clean in
984 * this operation.
985 */
986
987 cluster_start = offset;
988 if (offset) { /* avoid wrap-around at zero */
989 for (cluster_start = offset - PAGE_SIZE_64;
990 cluster_start >= cluster_lower_bound;
991 cluster_start -= PAGE_SIZE_64) {
992 assert(cluster_size > PAGE_SIZE);
993
994 vm_object_lock(object);
995 vm_page_lock_queues();
996
997 if ((friend = vm_pageout_cluster_page(object, cluster_start,
998 precious_clean)) == VM_PAGE_NULL) {
999 vm_page_unlock_queues();
1000 vm_object_unlock(object);
1001 break;
1002 }
1003 new_offset = (cluster_start + object->paging_offset)
1004 & (cluster_size - 1);
1005
1006 assert(new_offset < cluster_offset);
1007 m->list_req_pending = TRUE;
1008 m->cleaning = TRUE;
1009 /* do nothing except advance the write request, all we really need to */
1010 /* do is push the target page and let the code at the other end decide */
1011 /* what is really the right size */
1012 if (vm_page_free_count <= vm_page_free_reserved) {
1013 m->busy = TRUE;
1014 m->pageout = TRUE;
1015 vm_page_wire(m);
1016 }
1017
1018 vm_page_unlock_queues();
1019 vm_object_unlock(object);
1020 if(m->dirty || m->object->internal) {
1021 CLUSTER_STAT(pages_at_lower_offsets++;)
1022 }
1023
1024 }
1025 cluster_start += PAGE_SIZE_64;
1026 }
1027 assert(cluster_start >= cluster_lower_bound);
1028 assert(cluster_start <= offset);
1029 /*
1030 * Search forward for adjacent eligible pages to clean in
1031 * this operation.
1032 */
1033 for (cluster_end = offset + PAGE_SIZE_64;
1034 cluster_end < cluster_upper_bound;
1035 cluster_end += PAGE_SIZE_64) {
1036 assert(cluster_size > PAGE_SIZE);
1037
1038 vm_object_lock(object);
1039 vm_page_lock_queues();
1040
1041 if ((friend = vm_pageout_cluster_page(object, cluster_end,
1042 precious_clean)) == VM_PAGE_NULL) {
1043 vm_page_unlock_queues();
1044 vm_object_unlock(object);
1045 break;
1046 }
1047 new_offset = (cluster_end + object->paging_offset)
1048 & (cluster_size - 1);
1049
1050 assert(new_offset < cluster_size);
1051 m->list_req_pending = TRUE;
1052 m->cleaning = TRUE;
1053 /* do nothing except advance the write request, all we really need to */
1054 /* do is push the target page and let the code at the other end decide */
1055 /* what is really the right size */
1056 if (vm_page_free_count <= vm_page_free_reserved) {
1057 m->busy = TRUE;
1058 m->pageout = TRUE;
1059 vm_page_wire(m);
1060 }
1061
1062 vm_page_unlock_queues();
1063 vm_object_unlock(object);
1064
1065 if(m->dirty || m->object->internal) {
1066 CLUSTER_STAT(pages_at_higher_offsets++;)
1067 }
1068 }
1069 assert(cluster_end <= cluster_upper_bound);
1070 assert(cluster_end >= offset + PAGE_SIZE);
1071
1072 /*
1073 * (offset - cluster_offset) is beginning of cluster_object
1074 * relative to vm_object start.
1075 */
1076 offset_within_cluster = cluster_start - (offset - cluster_offset);
1077 length_of_data = cluster_end - cluster_start;
1078
1079 assert(offset_within_cluster < cluster_size);
1080 assert((offset_within_cluster + length_of_data) <= cluster_size);
1081
1082 rc = KERN_SUCCESS;
1083 assert(rc == KERN_SUCCESS);
1084
1085 pages_in_cluster = length_of_data/PAGE_SIZE;
1086
1087 #if MACH_CLUSTER_STATS
1088 (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++;
1089 (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++;
1090 (cluster_stats[pages_in_cluster].pages_in_cluster)++;
1091 #endif /* MACH_CLUSTER_STATS */
1092
1093 /*
1094 * Send the data to the pager.
1095 */
1096 paging_offset = cluster_start + object->paging_offset;
1097
1098 rc = memory_object_data_return(object->pager,
1099 paging_offset,
1100 length_of_data,
1101 !precious_clean,
1102 FALSE);
1103
1104 vm_object_lock(object);
1105 vm_object_paging_end(object);
1106
1107 if (holding_page) {
1108 assert(!object->pager_trusted);
1109 VM_PAGE_FREE(holding_page);
1110 vm_object_paging_end(object);
1111 }
1112 }
1113
1114 /*
1115 * Trusted pager throttle.
1116 * Object and page queues must be locked.
1117 */
1118 void
1119 vm_pageout_throttle(
1120 register vm_page_t m)
1121 {
1122 register vm_object_t object;
1123
1124 /*
1125 * need to keep track of the object we
1126 * started with... if we drop the object lock
1127 * due to the throttle, it's possible that someone
1128 * else will gather this page into an I/O if this
1129 * is an external object... the page will then be
1130 * potentially freed before we unwedge from the
1131 * throttle... this is ok since no one plays with
1132 * the page directly after the throttle... the object
1133 * and offset are passed into the memory_object_data_return
1134 * function where eventually it's relooked up against the
1135 * object... if it's changed state or there is no longer
1136 * a page at that offset, the pageout just finishes without
1137 * issuing an I/O
1138 */
1139 object = m->object;
1140
1141 assert(!m->laundry);
1142 m->laundry = TRUE;
1143 if (!object->internal)
1144 vm_page_burst_count++;
1145 vm_page_laundry_count++;
1146
1147 while (vm_page_laundry_count > vm_page_laundry_max) {
1148 /*
1149 * Set the threshold for when vm_page_free()
1150 * should wake us up.
1151 */
1152 vm_page_laundry_min = vm_page_laundry_max/2;
1153
1154 assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT);
1155 vm_page_unlock_queues();
1156 vm_object_unlock(object);
1157 /*
1158 * Pause to let the default pager catch up.
1159 */
1160 thread_block((void (*)(void)) 0);
1161
1162 vm_object_lock(object);
1163 vm_page_lock_queues();
1164 }
1165 }
1166
1167 /*
1168 * The global variable vm_pageout_clean_active_pages controls whether
1169 * active pages are considered valid to be cleaned in place during a
1170 * clustered pageout. Performance measurements are necessary to determine
1171 * the best policy.
1172 */
1173 int vm_pageout_clean_active_pages = 1;
1174 /*
1175 * vm_pageout_cluster_page: [Internal]
1176 *
1177 * return a vm_page_t to the page at (object,offset) if it is appropriate
1178 * to clean in place. Pages that are non-existent, busy, absent, already
1179 * cleaning, or not dirty are not eligible to be cleaned as an adjacent
1180 * page in a cluster.
1181 *
1182 * The object must be locked on entry, and remains locked throughout
1183 * this call.
1184 */
1185
1186 vm_page_t
1187 vm_pageout_cluster_page(
1188 vm_object_t object,
1189 vm_object_offset_t offset,
1190 boolean_t precious_clean)
1191 {
1192 vm_page_t m;
1193
1194 XPR(XPR_VM_PAGEOUT,
1195 "vm_pageout_cluster_page, object 0x%X offset 0x%X\n",
1196 (integer_t)object, offset, 0, 0, 0);
1197
1198 if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
1199 return(VM_PAGE_NULL);
1200
1201 if (m->busy || m->absent || m->cleaning ||
1202 (m->wire_count != 0) || m->error)
1203 return(VM_PAGE_NULL);
1204
1205 if (vm_pageout_clean_active_pages) {
1206 if (!m->active && !m->inactive) return(VM_PAGE_NULL);
1207 } else {
1208 if (!m->inactive) return(VM_PAGE_NULL);
1209 }
1210
1211 assert(!m->private);
1212 assert(!m->fictitious);
1213
1214 if (!m->dirty) m->dirty = pmap_is_modified(m->phys_page);
1215
1216 if (precious_clean) {
1217 if (!m->precious || !m->dirty)
1218 return(VM_PAGE_NULL);
1219 } else {
1220 if (!m->dirty)
1221 return(VM_PAGE_NULL);
1222 }
1223 return(m);
1224 }
1225
1226 /*
1227 * vm_pageout_scan does the dirty work for the pageout daemon.
1228 * It returns with vm_page_queue_free_lock held and
1229 * vm_page_free_wanted == 0.
1230 */
1231 extern void vm_pageout_scan_continue(void); /* forward; */
1232
1233 #define DELAYED_UNLOCK_LIMIT 50
1234 #define LOCAL_FREED_LIMIT 50
1235
1236 void
1237 vm_pageout_scan(void)
1238 {
1239 boolean_t now = FALSE;
1240 unsigned int laundry_pages;
1241 int loop_count = 0;
1242 int loop_bursted_count = 0;
1243 int active_loop_detect;
1244 vm_page_t local_freeq = 0;
1245 int local_freed = 0;
1246 int delayed_unlock = 0;
1247 int need_internal_inactive = 0;
1248 int need_pause;
1249
1250 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1251
1252 /*???*/ /*
1253 * We want to gradually dribble pages from the active queue
1254 * to the inactive queue. If we let the inactive queue get
1255 * very small, and then suddenly dump many pages into it,
1256 * those pages won't get a sufficient chance to be referenced
1257 * before we start taking them from the inactive queue.
1258 *
1259 * We must limit the rate at which we send pages to the pagers.
1260 * data_write messages consume memory, for message buffers and
1261 * for map-copy objects. If we get too far ahead of the pagers,
1262 * we can potentially run out of memory.
1263 *
1264 * We can use the laundry count to limit directly the number
1265 * of pages outstanding to the default pager. A similar
1266 * strategy for external pagers doesn't work, because
1267 * external pagers don't have to deallocate the pages sent them,
1268 * and because we might have to send pages to external pagers
1269 * even if they aren't processing writes. So we also
1270 * use a burst count to limit writes to external pagers.
1271 *
1272 * When memory is very tight, we can't rely on external pagers to
1273 * clean pages. They probably aren't running, because they
1274 * aren't vm-privileged. If we kept sending dirty pages to them,
1275 * we could exhaust the free list.
1276 *
1277 * consider_zone_gc should be last, because the other operations
1278 * might return memory to zones.
1279 */
1280 Restart:
1281
1282 stack_collect();
1283 consider_task_collect();
1284 consider_machine_collect();
1285 consider_zone_gc();
1286
1287 for (;;) {
1288 register vm_page_t m;
1289 register vm_object_t object;
1290
1291 /*
1292 * Recalculate vm_page_inactivate_target.
1293 */
1294 if (delayed_unlock == 0)
1295 vm_page_lock_queues();
1296 vm_page_inactive_target =
1297 VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1298 vm_page_inactive_count);
1299
1300 active_loop_detect = vm_page_active_count;
1301 /*
1302 * Move pages from active to inactive.
1303 */
1304 while ((need_internal_inactive ||
1305 vm_page_inactive_count < vm_page_inactive_target) &&
1306 !queue_empty(&vm_page_queue_active) &&
1307 ((active_loop_detect--) > 0)) {
1308
1309 need_pause = 1;
1310 vm_pageout_active++;
1311
1312 m = (vm_page_t) queue_first(&vm_page_queue_active);
1313 object = m->object;
1314
1315 /*
1316 * If we're getting really low on memory,
1317 * or we have already exceed the burst
1318 * count for the external pagers,
1319 * try skipping to a page that will go
1320 * directly to the default_pager.
1321 */
1322 if (need_internal_inactive &&
1323 IP_VALID(memory_manager_default)) {
1324 vm_pageout_scan_active_emm_throttle++;
1325
1326 assert(m->active && !m->inactive);
1327
1328 if (vm_object_lock_try(object)) {
1329 if (object->internal)
1330 goto object_locked_active;
1331
1332 if (!m->dirty)
1333 m->dirty = pmap_is_modified(m->phys_page);
1334 if (!m->dirty && !m->precious)
1335 goto object_locked_active;
1336
1337 vm_object_unlock(object);
1338
1339 need_pause = 0;
1340 }
1341 goto object_lock_try_active_failed;
1342 }
1343 assert(m->active && !m->inactive);
1344
1345 if (!vm_object_lock_try(object)) {
1346 /*
1347 * Move page to end and continue.
1348 */
1349 object_lock_try_active_failed:
1350 queue_remove(&vm_page_queue_active, m,
1351 vm_page_t, pageq);
1352 queue_enter(&vm_page_queue_active, m,
1353 vm_page_t, pageq);
1354
1355 if (local_freeq) {
1356 vm_page_free_list(local_freeq);
1357
1358 local_freeq = 0;
1359 local_freed = 0;
1360 }
1361 if (need_pause) {
1362 delayed_unlock = 0;
1363
1364 vm_page_unlock_queues();
1365 mutex_pause();
1366 vm_page_lock_queues();
1367 }
1368 continue;
1369 }
1370
1371 object_locked_active:
1372 /*
1373 * If the page is busy, then we pull it
1374 * off the active queue and leave it alone.
1375 */
1376
1377 if (m->busy) {
1378 vm_object_unlock(object);
1379 queue_remove(&vm_page_queue_active, m,
1380 vm_page_t, pageq);
1381 m->active = FALSE;
1382 if (!m->fictitious)
1383 vm_page_active_count--;
1384 continue;
1385 }
1386
1387 /*
1388 * Deactivate the page while holding the object
1389 * locked, so we know the page is still not busy.
1390 * This should prevent races between pmap_enter
1391 * and pmap_clear_reference. The page might be
1392 * absent or fictitious, but vm_page_deactivate
1393 * can handle that.
1394 */
1395
1396 if (need_internal_inactive) {
1397 /* found one ! */
1398 vm_pageout_scan_active_emm_throttle_success++;
1399 need_internal_inactive--;
1400 }
1401 vm_page_deactivate(m);
1402 vm_object_unlock(object);
1403 }
1404 /*
1405 * We are done if we have met our target *and*
1406 * nobody is still waiting for a page.
1407 */
1408 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1409 if (local_freeq) {
1410 vm_page_free_list(local_freeq);
1411
1412 local_freeq = 0;
1413 local_freed = 0;
1414 }
1415
1416 consider_machine_adjust();
1417
1418 mutex_lock(&vm_page_queue_free_lock);
1419
1420 if ((vm_page_free_count >= vm_page_free_target) &&
1421 (vm_page_free_wanted == 0)) {
1422
1423 delayed_unlock = 0;
1424 vm_page_unlock_queues();
1425 break;
1426 }
1427 mutex_unlock(&vm_page_queue_free_lock);
1428 }
1429
1430 /*
1431 * Sometimes we have to pause:
1432 * 1) No inactive pages - nothing to do.
1433 * 2) Flow control - nothing but external pages and
1434 * we have to wait for untrusted pagers to catch up.
1435 */
1436
1437 loop_count++;
1438 if ((queue_empty(&vm_page_queue_inactive) &&
1439 queue_empty(&vm_page_queue_zf)) ||
1440 loop_bursted_count >= vm_pageout_burst_loop_throttle) {
1441
1442 unsigned int pages, msecs;
1443 int wait_result;
1444
1445 consider_machine_adjust();
1446 /*
1447 * vm_pageout_burst_wait is msecs/page.
1448 * If there is nothing for us to do, we wait
1449 * at least vm_pageout_empty_wait msecs.
1450 */
1451 pages = vm_page_burst_count;
1452
1453 if (pages) {
1454 msecs = pages * vm_pageout_burst_wait;
1455 } else {
1456 printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n");
1457 msecs = vm_free_page_pause;
1458 }
1459
1460 if (queue_empty(&vm_page_queue_inactive) &&
1461 queue_empty(&vm_page_queue_zf) &&
1462 (msecs < vm_pageout_empty_wait))
1463 msecs = vm_pageout_empty_wait;
1464
1465 if (local_freeq) {
1466 vm_page_free_list(local_freeq);
1467
1468 local_freeq = 0;
1469 local_freed = 0;
1470 }
1471 delayed_unlock = 0;
1472 vm_page_unlock_queues();
1473
1474 assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE);
1475 counter(c_vm_pageout_scan_block++);
1476
1477 /*
1478 * Unfortunately, we don't have call_continuation
1479 * so we can't rely on tail-recursion.
1480 */
1481 wait_result = thread_block((void (*)(void)) 0);
1482 if (wait_result != THREAD_TIMED_OUT)
1483 thread_cancel_timer();
1484 vm_pageout_scan_continue();
1485
1486 if (loop_count >= vm_page_inactive_count) {
1487 if (vm_page_burst_count >= vm_pageout_burst_max) {
1488 /*
1489 * Make sure we move enough "appropriate"
1490 * pages to the inactive queue before trying
1491 * again.
1492 */
1493 need_internal_inactive = vm_page_laundry_max;
1494 }
1495 loop_count = 0;
1496 }
1497 loop_bursted_count = 0;
1498 goto Restart;
1499 /*NOTREACHED*/
1500 }
1501
1502 vm_pageout_inactive++;
1503
1504 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
1505 vm_zf_iterator = 0;
1506 } else {
1507 last_page_zf = 0;
1508 if((vm_zf_iterator+=1) >= vm_zf_iterator_count) {
1509 vm_zf_iterator = 0;
1510 }
1511 }
1512 if(queue_empty(&vm_page_queue_zf) ||
1513 (((last_page_zf) || (vm_zf_iterator == 0)) &&
1514 !queue_empty(&vm_page_queue_inactive))) {
1515 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1516 last_page_zf = 0;
1517 } else {
1518 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1519 last_page_zf = 1;
1520 }
1521 object = m->object;
1522
1523 need_pause = 1;
1524
1525 if (vm_page_burst_count >= vm_pageout_burst_max &&
1526 IP_VALID(memory_manager_default)) {
1527 /*
1528 * We're throttling external pagers.
1529 * Try to select a page that would
1530 * go directly to the default_pager
1531 * or that is clean...
1532 */
1533 vm_pageout_scan_inactive_emm_throttle++;
1534
1535 assert(!m->active && m->inactive);
1536
1537 if (vm_object_lock_try(object)) {
1538 if (object->internal) {
1539 /* found one ! */
1540 vm_pageout_scan_inactive_emm_throttle_success++;
1541 goto object_locked_inactive;
1542 }
1543 if (!m->dirty)
1544 m->dirty = pmap_is_modified(m->phys_page);
1545 if (!m->dirty && !m->precious) {
1546 /* found one ! */
1547 vm_pageout_scan_inactive_emm_throttle_success++;
1548 goto object_locked_inactive;
1549 }
1550 vm_object_unlock(object);
1551
1552 need_pause = 0;
1553 }
1554 loop_bursted_count++;
1555 goto object_lock_try_inactive_failed;
1556 }
1557
1558 assert(!m->active && m->inactive);
1559
1560 /*
1561 * Try to lock object; since we've got the
1562 * page queues lock, we can only try for this one.
1563 */
1564
1565 if (!vm_object_lock_try(object)) {
1566 object_lock_try_inactive_failed:
1567 /*
1568 * Move page to end and continue.
1569 * Don't re-issue ticket
1570 */
1571 if (m->zero_fill) {
1572 queue_remove(&vm_page_queue_zf, m,
1573 vm_page_t, pageq);
1574 queue_enter(&vm_page_queue_zf, m,
1575 vm_page_t, pageq);
1576 } else {
1577 queue_remove(&vm_page_queue_inactive, m,
1578 vm_page_t, pageq);
1579 queue_enter(&vm_page_queue_inactive, m,
1580 vm_page_t, pageq);
1581 }
1582 if (local_freeq) {
1583 vm_page_free_list(local_freeq);
1584
1585 local_freeq = 0;
1586 local_freed = 0;
1587 }
1588 delayed_unlock = 0;
1589 vm_page_unlock_queues();
1590
1591 if (need_pause) {
1592 mutex_pause();
1593 vm_pageout_inactive_nolock++;
1594 }
1595 continue;
1596 }
1597
1598 object_locked_inactive:
1599 /*
1600 * Paging out pages of external objects which
1601 * are currently being created must be avoided.
1602 * The pager may claim for memory, thus leading to a
1603 * possible dead lock between it and the pageout thread,
1604 * if such pages are finally chosen. The remaining assumption
1605 * is that there will finally be enough available pages in the
1606 * inactive pool to page out in order to satisfy all memory
1607 * claimed by the thread which concurrently creates the pager.
1608 */
1609 if (!object->pager_initialized && object->pager_created) {
1610 /*
1611 * Move page to end and continue, hoping that
1612 * there will be enough other inactive pages to
1613 * page out so that the thread which currently
1614 * initializes the pager will succeed.
1615 * Don't re-grant the ticket, the page should
1616 * pulled from the queue and paged out whenever
1617 * one of its logically adjacent fellows is
1618 * targeted.
1619 */
1620 if(m->zero_fill) {
1621 queue_remove(&vm_page_queue_zf, m,
1622 vm_page_t, pageq);
1623 queue_enter(&vm_page_queue_zf, m,
1624 vm_page_t, pageq);
1625 last_page_zf = 1;
1626 vm_zf_iterator = vm_zf_iterator_count - 1;
1627 } else {
1628 queue_remove(&vm_page_queue_inactive, m,
1629 vm_page_t, pageq);
1630 queue_enter(&vm_page_queue_inactive, m,
1631 vm_page_t, pageq);
1632 last_page_zf = 0;
1633 vm_zf_iterator = 1;
1634 }
1635 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1636 delayed_unlock = 0;
1637 vm_page_unlock_queues();
1638 }
1639 vm_object_unlock(object);
1640 vm_pageout_inactive_avoid++;
1641 continue;
1642 }
1643
1644 /*
1645 * Remove the page from the inactive list.
1646 */
1647
1648 if(m->zero_fill) {
1649 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1650 } else {
1651 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1652 }
1653 m->inactive = FALSE;
1654 if (!m->fictitious)
1655 vm_page_inactive_count--;
1656
1657 if (m->busy || !object->alive) {
1658 /*
1659 * Somebody is already playing with this page.
1660 * Leave it off the pageout queues.
1661 */
1662
1663 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1664 delayed_unlock = 0;
1665 vm_page_unlock_queues();
1666 }
1667 vm_object_unlock(object);
1668 vm_pageout_inactive_busy++;
1669 continue;
1670 }
1671
1672 /*
1673 * If it's absent or in error, we can reclaim the page.
1674 */
1675
1676 if (m->absent || m->error) {
1677 vm_pageout_inactive_absent++;
1678 reclaim_page:
1679
1680 if (m->tabled)
1681 vm_page_remove(m); /* clears tabled, object, offset */
1682 if (m->absent)
1683 vm_object_absent_release(object);
1684
1685 m->pageq.next = (queue_entry_t)local_freeq;
1686 local_freeq = m;
1687
1688 if (local_freed++ > LOCAL_FREED_LIMIT) {
1689 vm_page_free_list(local_freeq);
1690
1691 local_freeq = 0;
1692 local_freed = 0;
1693 }
1694 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1695 delayed_unlock = 0;
1696 vm_page_unlock_queues();
1697 }
1698 vm_object_unlock(object);
1699 loop_bursted_count = 0;
1700 continue;
1701 }
1702
1703 assert(!m->private);
1704 assert(!m->fictitious);
1705
1706 /*
1707 * If already cleaning this page in place, convert from
1708 * "adjacent" to "target". We can leave the page mapped,
1709 * and vm_pageout_object_terminate will determine whether
1710 * to free or reactivate.
1711 */
1712
1713 if (m->cleaning) {
1714 #if MACH_CLUSTER_STATS
1715 vm_pageout_cluster_conversions++;
1716 #endif
1717 m->busy = TRUE;
1718 m->pageout = TRUE;
1719 m->dump_cleaning = TRUE;
1720 vm_page_wire(m);
1721 vm_object_unlock(object);
1722
1723 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1724 delayed_unlock = 0;
1725 vm_page_unlock_queues();
1726 }
1727 loop_bursted_count = 0;
1728 continue;
1729 }
1730
1731 /*
1732 * If it's being used, reactivate.
1733 * (Fictitious pages are either busy or absent.)
1734 */
1735
1736 if (m->reference || pmap_is_referenced(m->phys_page)) {
1737 vm_pageout_inactive_used++;
1738 reactivate_page:
1739 #if ADVISORY_PAGEOUT
1740 if (m->discard_request) {
1741 m->discard_request = FALSE;
1742 }
1743 #endif /* ADVISORY_PAGEOUT */
1744 last_page_zf = 0;
1745 vm_object_unlock(object);
1746 vm_page_activate(m);
1747 VM_STAT(reactivations++);
1748
1749 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1750 delayed_unlock = 0;
1751 vm_page_unlock_queues();
1752 }
1753 continue;
1754 }
1755
1756 #if ADVISORY_PAGEOUT
1757 if (object->advisory_pageout) {
1758 boolean_t do_throttle;
1759 memory_object_t pager;
1760 vm_object_offset_t discard_offset;
1761
1762 if (m->discard_request) {
1763 vm_stat_discard_failure++;
1764 goto mandatory_pageout;
1765 }
1766
1767 assert(object->pager_initialized);
1768 m->discard_request = TRUE;
1769 pager = object->pager;
1770
1771 /* system-wide throttle */
1772 do_throttle = (vm_page_free_count <=
1773 vm_page_free_reserved);
1774
1775 #if 0
1776 /*
1777 * JMM - Do we need a replacement throttle
1778 * mechanism for pagers?
1779 */
1780 if (!do_throttle) {
1781 /* throttle on this pager */
1782 /* XXX lock ordering ? */
1783 ip_lock(port);
1784 do_throttle= imq_full(&port->ip_messages);
1785 ip_unlock(port);
1786 }
1787 #endif
1788
1789 if (do_throttle) {
1790 vm_stat_discard_throttle++;
1791 #if 0
1792 /* ignore this page and skip to next */
1793 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1794 delayed_unlock = 0;
1795 vm_page_unlock_queues();
1796 }
1797 vm_object_unlock(object);
1798 continue;
1799 #else
1800 /* force mandatory pageout */
1801 goto mandatory_pageout;
1802 #endif
1803 }
1804
1805 /* proceed with discard_request */
1806 vm_page_activate(m);
1807 vm_stat_discard++;
1808 VM_STAT(reactivations++);
1809 discard_offset = m->offset + object->paging_offset;
1810 vm_stat_discard_sent++;
1811
1812 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1813 delayed_unlock = 0;
1814 vm_page_unlock_queues();
1815 }
1816 vm_object_unlock(object);
1817
1818 /*
1819 memory_object_discard_request(object->pager,
1820 discard_offset,
1821 PAGE_SIZE);
1822 */
1823 continue;
1824 }
1825 mandatory_pageout:
1826 #endif /* ADVISORY_PAGEOUT */
1827
1828 XPR(XPR_VM_PAGEOUT,
1829 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1830 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1831
1832 /*
1833 * Eliminate all mappings.
1834 */
1835
1836 m->busy = TRUE;
1837
1838 if (m->no_isync == FALSE)
1839 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1840
1841 if (!m->dirty)
1842 m->dirty = pmap_is_modified(m->phys_page);
1843 /*
1844 * If it's clean and not precious, we can free the page.
1845 */
1846
1847 if (!m->dirty && !m->precious) {
1848 vm_pageout_inactive_clean++;
1849 goto reclaim_page;
1850 }
1851 if (local_freeq) {
1852 vm_page_free_list(local_freeq);
1853
1854 local_freeq = 0;
1855 local_freed = 0;
1856 }
1857 delayed_unlock = 0;
1858 vm_page_unlock_queues();
1859
1860 /*
1861 * If there is no memory object for the page, create
1862 * one and hand it to the default pager.
1863 */
1864
1865 if (!object->pager_initialized)
1866 vm_object_collapse(object, (vm_object_offset_t)0);
1867 if (!object->pager_initialized)
1868 vm_object_pager_create(object);
1869 if (!object->pager_initialized) {
1870 /*
1871 * Still no pager for the object.
1872 * Reactivate the page.
1873 *
1874 * Should only happen if there is no
1875 * default pager.
1876 */
1877 vm_page_lock_queues();
1878 vm_page_activate(m);
1879 vm_page_unlock_queues();
1880
1881 /*
1882 * And we are done with it.
1883 */
1884 PAGE_WAKEUP_DONE(m);
1885 vm_object_unlock(object);
1886
1887 /*
1888 * break here to get back to the preemption
1889 * point in the outer loop so that we don't
1890 * spin forever if there is no default pager.
1891 */
1892 vm_pageout_dirty_no_pager++;
1893 /*
1894 * Well there's no pager, but we can still reclaim
1895 * free pages out of the inactive list. Go back
1896 * to top of loop and look for suitable pages.
1897 */
1898 continue;
1899 } else if (object->pager == MEMORY_OBJECT_NULL) {
1900 /*
1901 * This pager has been destroyed by either
1902 * memory_object_destroy or vm_object_destroy, and
1903 * so there is nowhere for the page to go.
1904 * Just free the page.
1905 */
1906 VM_PAGE_FREE(m);
1907 vm_object_unlock(object);
1908 loop_bursted_count = 0;
1909 continue;
1910 }
1911
1912 vm_pageout_inactive_dirty++;
1913 vm_pageout_cluster(m); /* flush it */
1914 vm_object_unlock(object);
1915 loop_bursted_count = 0;
1916 }
1917 }
1918
1919 counter(unsigned int c_vm_pageout_scan_continue = 0;)
1920
1921 void
1922 vm_pageout_scan_continue(void)
1923 {
1924 /*
1925 * We just paused to let the pagers catch up.
1926 * If vm_page_laundry_count is still high,
1927 * then we aren't waiting long enough.
1928 * If we have paused some vm_pageout_pause_max times without
1929 * adjusting vm_pageout_burst_wait, it might be too big,
1930 * so we decrease it.
1931 */
1932
1933 vm_page_lock_queues();
1934 counter(++c_vm_pageout_scan_continue);
1935 if (vm_page_laundry_count > vm_pageout_burst_min) {
1936 vm_pageout_burst_wait++;
1937 vm_pageout_pause_count = 0;
1938 } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
1939 vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
1940 if (vm_pageout_burst_wait < 1)
1941 vm_pageout_burst_wait = 1;
1942 vm_pageout_pause_count = 0;
1943 }
1944 vm_page_unlock_queues();
1945 }
1946
1947 void vm_page_free_reserve(int pages);
1948 int vm_page_free_count_init;
1949
1950 void
1951 vm_page_free_reserve(
1952 int pages)
1953 {
1954 int free_after_reserve;
1955
1956 vm_page_free_reserved += pages;
1957
1958 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1959
1960 vm_page_free_min = vm_page_free_reserved +
1961 VM_PAGE_FREE_MIN(free_after_reserve);
1962
1963 vm_page_free_target = vm_page_free_reserved +
1964 VM_PAGE_FREE_TARGET(free_after_reserve);
1965
1966 if (vm_page_free_target < vm_page_free_min + 5)
1967 vm_page_free_target = vm_page_free_min + 5;
1968 }
1969
1970 /*
1971 * vm_pageout is the high level pageout daemon.
1972 */
1973
1974 void
1975 vm_pageout_continue(void)
1976 {
1977 vm_pageout_scan_event_counter++;
1978 vm_pageout_scan();
1979 /* we hold vm_page_queue_free_lock now */
1980 assert(vm_page_free_wanted == 0);
1981 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1982 mutex_unlock(&vm_page_queue_free_lock);
1983
1984 counter(c_vm_pageout_block++);
1985 thread_block(vm_pageout_continue);
1986 /*NOTREACHED*/
1987 }
1988
1989 void
1990 vm_pageout(void)
1991 {
1992 thread_t self = current_thread();
1993 spl_t s;
1994
1995 /*
1996 * Set thread privileges.
1997 */
1998 self->vm_privilege = TRUE;
1999
2000 s = splsched();
2001 thread_lock(self);
2002 self->priority = BASEPRI_PREEMPT - 1;
2003 set_sched_pri(self, self->priority);
2004 thread_unlock(self);
2005 splx(s);
2006
2007 /*
2008 * Initialize some paging parameters.
2009 */
2010
2011 if (vm_page_laundry_max == 0)
2012 vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX;
2013
2014 if (vm_pageout_burst_max == 0)
2015 vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
2016
2017 if (vm_pageout_burst_wait == 0)
2018 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2019
2020 if (vm_pageout_empty_wait == 0)
2021 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2022
2023 /*
2024 * Set kernel task to low backing store privileged
2025 * status
2026 */
2027 task_lock(kernel_task);
2028 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2029 task_unlock(kernel_task);
2030
2031 vm_page_free_count_init = vm_page_free_count;
2032 vm_zf_iterator = 0;
2033 /*
2034 * even if we've already called vm_page_free_reserve
2035 * call it again here to insure that the targets are
2036 * accurately calculated (it uses vm_page_free_count_init)
2037 * calling it with an arg of 0 will not change the reserve
2038 * but will re-calculate free_min and free_target
2039 */
2040 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED) {
2041 int scale;
2042
2043 /*
2044 * HFS Journaling exists on the vm_pageout path...
2045 * it can need to allocate a lot more memory than a
2046 * typical driver/filesystem... if it can't allocate
2047 * the transaction buffer(s), we will deadlock...
2048 * the amount is scaled
2049 * based on the physical footprint of the system, so
2050 * let's double our reserve on systems with > 512Mbytes
2051 */
2052 if (vm_page_free_count > (512 * 1024 * 1024) / PAGE_SIZE)
2053 scale = 2;
2054 else
2055 scale = 1;
2056 vm_page_free_reserve((VM_PAGE_FREE_RESERVED * scale) - vm_page_free_reserved);
2057 } else
2058 vm_page_free_reserve(0);
2059
2060 vm_pageout_continue();
2061 /*NOTREACHED*/
2062 }
2063
2064 kern_return_t
2065 vm_pageout_emergency_availability_request()
2066 {
2067 vm_page_t m;
2068 vm_object_t object;
2069
2070 vm_page_lock_queues();
2071 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
2072
2073 while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) {
2074
2075 object = m->object;
2076
2077 if ( !vm_object_lock_try(object)) {
2078 m = (vm_page_t) queue_next(&m->pageq);
2079 continue;
2080 }
2081 if ((!object->alive) || (object->pageout)) {
2082 vm_object_unlock(object);
2083
2084 m = (vm_page_t) queue_next(&m->pageq);
2085 continue;
2086 }
2087 if (m->dirty || m->busy || m->wire_count || m->absent || m->fictitious
2088 || m->precious || m->cleaning
2089 || m->dump_cleaning || m->error
2090 || m->pageout || m->laundry
2091 || m->list_req_pending
2092 || m->overwriting) {
2093 vm_object_unlock(object);
2094
2095 m = (vm_page_t) queue_next(&m->pageq);
2096 continue;
2097 }
2098 m->busy = TRUE;
2099 pmap_page_protect(m->phys_page, VM_PROT_NONE);
2100 m->dirty = pmap_is_modified(m->phys_page);
2101
2102 if (m->dirty) {
2103 PAGE_WAKEUP_DONE(m);
2104 vm_object_unlock(object);
2105
2106 m = (vm_page_t) queue_next(&m->pageq);
2107 continue;
2108 }
2109 vm_page_free(m);
2110 vm_object_unlock(object);
2111 vm_page_unlock_queues();
2112
2113 return KERN_SUCCESS;
2114 }
2115 m = (vm_page_t) queue_first(&vm_page_queue_active);
2116
2117 while (!queue_end(&vm_page_queue_active, (queue_entry_t) m)) {
2118
2119 object = m->object;
2120
2121 if ( !vm_object_lock_try(object)) {
2122 m = (vm_page_t) queue_next(&m->pageq);
2123 continue;
2124 }
2125 if ((!object->alive) || (object->pageout)) {
2126 vm_object_unlock(object);
2127
2128 m = (vm_page_t) queue_next(&m->pageq);
2129 continue;
2130 }
2131 if (m->dirty || m->busy || m->wire_count || m->absent || m->fictitious
2132 || m->precious || m->cleaning
2133 || m->dump_cleaning || m->error
2134 || m->pageout || m->laundry
2135 || m->list_req_pending
2136 || m->overwriting) {
2137 vm_object_unlock(object);
2138
2139 m = (vm_page_t) queue_next(&m->pageq);
2140 continue;
2141 }
2142 m->busy = TRUE;
2143 pmap_page_protect(m->phys_page, VM_PROT_NONE);
2144 m->dirty = pmap_is_modified(m->phys_page);
2145
2146 if (m->dirty) {
2147 PAGE_WAKEUP_DONE(m);
2148 vm_object_unlock(object);
2149
2150 m = (vm_page_t) queue_next(&m->pageq);
2151 continue;
2152 }
2153 vm_page_free(m);
2154 vm_object_unlock(object);
2155 vm_page_unlock_queues();
2156
2157 return KERN_SUCCESS;
2158 }
2159 vm_page_unlock_queues();
2160
2161 return KERN_FAILURE;
2162 }
2163
2164
2165 static upl_t
2166 upl_create(
2167 int flags,
2168 vm_size_t size)
2169 {
2170 upl_t upl;
2171 int page_field_size; /* bit field in word size buf */
2172
2173 page_field_size = 0;
2174 if (flags & UPL_CREATE_LITE) {
2175 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2176 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2177 }
2178 if(flags & UPL_CREATE_INTERNAL) {
2179 upl = (upl_t)kalloc(sizeof(struct upl)
2180 + (sizeof(struct upl_page_info)*(size/PAGE_SIZE))
2181 + page_field_size);
2182 } else {
2183 upl = (upl_t)kalloc(sizeof(struct upl) + page_field_size);
2184 }
2185 upl->flags = 0;
2186 upl->src_object = NULL;
2187 upl->kaddr = (vm_offset_t)0;
2188 upl->size = 0;
2189 upl->map_object = NULL;
2190 upl->ref_count = 1;
2191 upl_lock_init(upl);
2192 #ifdef UBC_DEBUG
2193 upl->ubc_alias1 = 0;
2194 upl->ubc_alias2 = 0;
2195 #endif /* UBC_DEBUG */
2196 return(upl);
2197 }
2198
2199 static void
2200 upl_destroy(
2201 upl_t upl)
2202 {
2203 int page_field_size; /* bit field in word size buf */
2204
2205 #ifdef UBC_DEBUG
2206 {
2207 upl_t upl_ele;
2208 vm_object_t object;
2209 if (upl->map_object->pageout) {
2210 object = upl->map_object->shadow;
2211 } else {
2212 object = upl->map_object;
2213 }
2214 vm_object_lock(object);
2215 queue_iterate(&object->uplq, upl_ele, upl_t, uplq) {
2216 if(upl_ele == upl) {
2217 queue_remove(&object->uplq,
2218 upl_ele, upl_t, uplq);
2219 break;
2220 }
2221 }
2222 vm_object_unlock(object);
2223 }
2224 #endif /* UBC_DEBUG */
2225 /* drop a reference on the map_object whether or */
2226 /* not a pageout object is inserted */
2227 if(upl->map_object->pageout)
2228 vm_object_deallocate(upl->map_object);
2229
2230 page_field_size = 0;
2231 if (upl->flags & UPL_LITE) {
2232 page_field_size = ((upl->size/PAGE_SIZE) + 7) >> 3;
2233 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2234 }
2235 if(upl->flags & UPL_INTERNAL) {
2236 kfree((vm_offset_t)upl,
2237 sizeof(struct upl) +
2238 (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
2239 + page_field_size);
2240 } else {
2241 kfree((vm_offset_t)upl, sizeof(struct upl) + page_field_size);
2242 }
2243 }
2244
2245 __private_extern__ void
2246 uc_upl_dealloc(
2247 upl_t upl)
2248 {
2249 upl->ref_count -= 1;
2250 if(upl->ref_count == 0) {
2251 upl_destroy(upl);
2252 }
2253 }
2254
2255 void
2256 upl_deallocate(
2257 upl_t upl)
2258 {
2259
2260 upl->ref_count -= 1;
2261 if(upl->ref_count == 0) {
2262 upl_destroy(upl);
2263 }
2264 }
2265
2266 /*
2267 * Routine: vm_object_upl_request
2268 * Purpose:
2269 * Cause the population of a portion of a vm_object.
2270 * Depending on the nature of the request, the pages
2271 * returned may be contain valid data or be uninitialized.
2272 * A page list structure, listing the physical pages
2273 * will be returned upon request.
2274 * This function is called by the file system or any other
2275 * supplier of backing store to a pager.
2276 * IMPORTANT NOTE: The caller must still respect the relationship
2277 * between the vm_object and its backing memory object. The
2278 * caller MUST NOT substitute changes in the backing file
2279 * without first doing a memory_object_lock_request on the
2280 * target range unless it is know that the pages are not
2281 * shared with another entity at the pager level.
2282 * Copy_in_to:
2283 * if a page list structure is present
2284 * return the mapped physical pages, where a
2285 * page is not present, return a non-initialized
2286 * one. If the no_sync bit is turned on, don't
2287 * call the pager unlock to synchronize with other
2288 * possible copies of the page. Leave pages busy
2289 * in the original object, if a page list structure
2290 * was specified. When a commit of the page list
2291 * pages is done, the dirty bit will be set for each one.
2292 * Copy_out_from:
2293 * If a page list structure is present, return
2294 * all mapped pages. Where a page does not exist
2295 * map a zero filled one. Leave pages busy in
2296 * the original object. If a page list structure
2297 * is not specified, this call is a no-op.
2298 *
2299 * Note: access of default pager objects has a rather interesting
2300 * twist. The caller of this routine, presumably the file system
2301 * page cache handling code, will never actually make a request
2302 * against a default pager backed object. Only the default
2303 * pager will make requests on backing store related vm_objects
2304 * In this way the default pager can maintain the relationship
2305 * between backing store files (abstract memory objects) and
2306 * the vm_objects (cache objects), they support.
2307 *
2308 */
2309 __private_extern__ kern_return_t
2310 vm_object_upl_request(
2311 vm_object_t object,
2312 vm_object_offset_t offset,
2313 vm_size_t size,
2314 upl_t *upl_ptr,
2315 upl_page_info_array_t user_page_list,
2316 unsigned int *page_list_count,
2317 int cntrl_flags)
2318 {
2319 vm_page_t dst_page;
2320 vm_object_offset_t dst_offset = offset;
2321 vm_size_t xfer_size = size;
2322 boolean_t do_m_lock = FALSE;
2323 boolean_t dirty;
2324 boolean_t hw_dirty;
2325 upl_t upl = NULL;
2326 int entry;
2327 boolean_t encountered_lrp = FALSE;
2328
2329 vm_page_t alias_page = NULL;
2330 int page_ticket;
2331 wpl_array_t lite_list;
2332
2333 page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2334 >> UPL_PAGE_TICKET_SHIFT;
2335
2336 if(((size/PAGE_SIZE) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2337 size = MAX_UPL_TRANSFER * PAGE_SIZE;
2338 }
2339
2340 if(cntrl_flags & UPL_SET_INTERNAL)
2341 if(page_list_count != NULL)
2342 *page_list_count = MAX_UPL_TRANSFER;
2343 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
2344 ((page_list_count != NULL) && (*page_list_count != 0)
2345 && *page_list_count < (size/page_size)))
2346 return KERN_INVALID_ARGUMENT;
2347
2348 if((!object->internal) && (object->paging_offset != 0))
2349 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
2350
2351 if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2352 return KERN_SUCCESS;
2353 }
2354
2355 if(upl_ptr) {
2356 if(cntrl_flags & UPL_SET_INTERNAL) {
2357 if(cntrl_flags & UPL_SET_LITE) {
2358 vm_offset_t page_field_size;
2359 upl = upl_create(
2360 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
2361 size);
2362 user_page_list = (upl_page_info_t *)
2363 (((vm_offset_t)upl) + sizeof(struct upl));
2364 lite_list = (wpl_array_t)
2365 (((vm_offset_t)user_page_list) +
2366 ((size/PAGE_SIZE) *
2367 sizeof(upl_page_info_t)));
2368 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2369 page_field_size =
2370 (page_field_size + 3) & 0xFFFFFFFC;
2371 bzero((char *)lite_list, page_field_size);
2372 upl->flags =
2373 UPL_LITE | UPL_INTERNAL;
2374 } else {
2375 upl = upl_create(UPL_CREATE_INTERNAL, size);
2376 user_page_list = (upl_page_info_t *)
2377 (((vm_offset_t)upl)
2378 + sizeof(struct upl));
2379 upl->flags = UPL_INTERNAL;
2380 }
2381 } else {
2382 if(cntrl_flags & UPL_SET_LITE) {
2383 vm_offset_t page_field_size;
2384 upl = upl_create(UPL_CREATE_LITE, size);
2385 lite_list = (wpl_array_t)
2386 (((vm_offset_t)upl) + sizeof(struct upl));
2387 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2388 page_field_size =
2389 (page_field_size + 3) & 0xFFFFFFFC;
2390 bzero((char *)lite_list, page_field_size);
2391 upl->flags = UPL_LITE;
2392 } else {
2393 upl = upl_create(UPL_CREATE_EXTERNAL, size);
2394 upl->flags = 0;
2395 }
2396 }
2397
2398 if(object->phys_contiguous) {
2399 upl->map_object = object;
2400 /* don't need any shadow mappings for this one */
2401 /* since it is already I/O memory */
2402 upl->flags |= UPL_DEVICE_MEMORY;
2403
2404 vm_object_lock(object);
2405 vm_object_paging_begin(object);
2406 vm_object_unlock(object);
2407
2408 /* paging_in_progress protects paging_offset */
2409 upl->offset = offset + object->paging_offset;
2410 upl->size = size;
2411 *upl_ptr = upl;
2412 if(user_page_list) {
2413 user_page_list[0].phys_addr =
2414 (offset + object->shadow_offset)>>12;
2415 user_page_list[0].device = TRUE;
2416 }
2417
2418 if(page_list_count != NULL) {
2419 if (upl->flags & UPL_INTERNAL) {
2420 *page_list_count = 0;
2421 } else {
2422 *page_list_count = 1;
2423 }
2424 }
2425 return KERN_SUCCESS;
2426 }
2427 if(user_page_list)
2428 user_page_list[0].device = FALSE;
2429
2430 if(cntrl_flags & UPL_SET_LITE) {
2431 upl->map_object = object;
2432 } else {
2433 upl->map_object = vm_object_allocate(size);
2434 vm_object_lock(upl->map_object);
2435 upl->map_object->shadow = object;
2436 upl->map_object->pageout = TRUE;
2437 upl->map_object->can_persist = FALSE;
2438 upl->map_object->copy_strategy =
2439 MEMORY_OBJECT_COPY_NONE;
2440 upl->map_object->shadow_offset = offset;
2441 upl->map_object->wimg_bits = object->wimg_bits;
2442 vm_object_unlock(upl->map_object);
2443 }
2444 }
2445 if (!(cntrl_flags & UPL_SET_LITE)) {
2446 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2447 }
2448 vm_object_lock(object);
2449 vm_object_paging_begin(object);
2450
2451 /* we can lock in the paging_offset once paging_in_progress is set */
2452 if(upl_ptr) {
2453 upl->size = size;
2454 upl->offset = offset + object->paging_offset;
2455 *upl_ptr = upl;
2456 #ifdef UBC_DEBUG
2457 queue_enter(&object->uplq, upl, upl_t, uplq);
2458 #endif /* UBC_DEBUG */
2459 }
2460
2461 entry = 0;
2462 if(cntrl_flags & UPL_COPYOUT_FROM) {
2463 upl->flags |= UPL_PAGE_SYNC_DONE;
2464
2465 while (xfer_size) {
2466 if((alias_page == NULL) &&
2467 !(cntrl_flags & UPL_SET_LITE)) {
2468 vm_object_unlock(object);
2469 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2470 vm_object_lock(object);
2471 }
2472 if(((dst_page = vm_page_lookup(object,
2473 dst_offset)) == VM_PAGE_NULL) ||
2474 dst_page->fictitious ||
2475 dst_page->absent ||
2476 dst_page->error ||
2477 (dst_page->wire_count != 0 &&
2478 !dst_page->pageout) ||
2479 ((!(dst_page->dirty || dst_page->precious ||
2480 pmap_is_modified(dst_page->phys_page)))
2481 && (cntrl_flags & UPL_RET_ONLY_DIRTY)) ||
2482 ((!(dst_page->inactive))
2483 && (dst_page->page_ticket != page_ticket)
2484 && ((dst_page->page_ticket+1) != page_ticket)
2485 && (cntrl_flags & UPL_FOR_PAGEOUT)) ||
2486 ((!dst_page->list_req_pending) && (cntrl_flags & UPL_FOR_PAGEOUT) &&
2487 (cntrl_flags & UPL_RET_ONLY_DIRTY) &&
2488 pmap_is_referenced(dst_page->phys_page))) {
2489 if(user_page_list) {
2490 user_page_list[entry].phys_addr = 0;
2491 }
2492 } else {
2493
2494 if(dst_page->busy &&
2495 (!(dst_page->list_req_pending &&
2496 dst_page->pageout))) {
2497 if(cntrl_flags & UPL_NOBLOCK) {
2498 if(user_page_list) {
2499 user_page_list[entry].phys_addr = 0;
2500 }
2501 entry++;
2502 dst_offset += PAGE_SIZE_64;
2503 xfer_size -= PAGE_SIZE;
2504 continue;
2505 }
2506 /*someone else is playing with the */
2507 /* page. We will have to wait. */
2508 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2509 continue;
2510 }
2511 /* Someone else already cleaning the page? */
2512 if((dst_page->cleaning || dst_page->absent ||
2513 dst_page->wire_count != 0) &&
2514 !dst_page->list_req_pending) {
2515 if(user_page_list) {
2516 user_page_list[entry].phys_addr = 0;
2517 }
2518 entry++;
2519 dst_offset += PAGE_SIZE_64;
2520 xfer_size -= PAGE_SIZE;
2521 continue;
2522 }
2523 /* eliminate all mappings from the */
2524 /* original object and its prodigy */
2525
2526 vm_page_lock_queues();
2527
2528 /* pageout statistics gathering. count */
2529 /* all the pages we will page out that */
2530 /* were not counted in the initial */
2531 /* vm_pageout_scan work */
2532 if(dst_page->list_req_pending)
2533 encountered_lrp = TRUE;
2534 if((dst_page->dirty ||
2535 (dst_page->object->internal &&
2536 dst_page->precious)) &&
2537 (dst_page->list_req_pending
2538 == FALSE)) {
2539 if(encountered_lrp) {
2540 CLUSTER_STAT
2541 (pages_at_higher_offsets++;)
2542 } else {
2543 CLUSTER_STAT
2544 (pages_at_lower_offsets++;)
2545 }
2546 }
2547
2548 /* Turn off busy indication on pending */
2549 /* pageout. Note: we can only get here */
2550 /* in the request pending case. */
2551 dst_page->list_req_pending = FALSE;
2552 dst_page->busy = FALSE;
2553 dst_page->cleaning = FALSE;
2554
2555 hw_dirty = pmap_is_modified(dst_page->phys_page);
2556 dirty = hw_dirty ? TRUE : dst_page->dirty;
2557
2558 if(cntrl_flags & UPL_SET_LITE) {
2559 int pg_num;
2560 pg_num = (dst_offset-offset)/PAGE_SIZE;
2561 lite_list[pg_num>>5] |=
2562 1 << (pg_num & 31);
2563 if (hw_dirty)
2564 pmap_clear_modify(dst_page->phys_page);
2565 /*
2566 * Record that this page has been
2567 * written out
2568 */
2569 #if MACH_PAGEMAP
2570 vm_external_state_set(
2571 object->existence_map,
2572 dst_page->offset);
2573 #endif /*MACH_PAGEMAP*/
2574
2575 /*
2576 * Mark original page as cleaning
2577 * in place.
2578 */
2579 dst_page->cleaning = TRUE;
2580 dst_page->dirty = TRUE;
2581 dst_page->precious = FALSE;
2582 } else {
2583 /* use pageclean setup, it is more */
2584 /* convenient even for the pageout */
2585 /* cases here */
2586 vm_pageclean_setup(dst_page,
2587 alias_page, upl->map_object,
2588 size - xfer_size);
2589
2590 alias_page->absent = FALSE;
2591 alias_page = NULL;
2592 }
2593
2594 if(!dirty) {
2595 dst_page->dirty = FALSE;
2596 dst_page->precious = TRUE;
2597 }
2598
2599 if(dst_page->pageout)
2600 dst_page->busy = TRUE;
2601
2602 if((!(cntrl_flags & UPL_CLEAN_IN_PLACE))
2603 || (cntrl_flags & UPL_FOR_PAGEOUT)) {
2604 /* deny access to the target page */
2605 /* while it is being worked on */
2606 if((!dst_page->pageout) &&
2607 (dst_page->wire_count == 0)) {
2608 dst_page->busy = TRUE;
2609 dst_page->pageout = TRUE;
2610 vm_page_wire(dst_page);
2611 }
2612 }
2613 if(user_page_list) {
2614 user_page_list[entry].phys_addr
2615 = dst_page->phys_page;
2616 user_page_list[entry].dirty =
2617 dst_page->dirty;
2618 user_page_list[entry].pageout =
2619 dst_page->pageout;
2620 user_page_list[entry].absent =
2621 dst_page->absent;
2622 user_page_list[entry].precious =
2623 dst_page->precious;
2624 }
2625 vm_page_unlock_queues();
2626 }
2627 entry++;
2628 dst_offset += PAGE_SIZE_64;
2629 xfer_size -= PAGE_SIZE;
2630 }
2631 } else {
2632 while (xfer_size) {
2633 if((alias_page == NULL) &&
2634 !(cntrl_flags & UPL_SET_LITE)) {
2635 vm_object_unlock(object);
2636 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2637 vm_object_lock(object);
2638 }
2639 dst_page = vm_page_lookup(object, dst_offset);
2640
2641 if(dst_page != VM_PAGE_NULL) {
2642 if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
2643 !((dst_page->list_req_pending)
2644 && (dst_page->absent))) {
2645 /* we are doing extended range */
2646 /* requests. we want to grab */
2647 /* pages around some which are */
2648 /* already present. */
2649 if(user_page_list) {
2650 user_page_list[entry].phys_addr = 0;
2651 }
2652 entry++;
2653 dst_offset += PAGE_SIZE_64;
2654 xfer_size -= PAGE_SIZE;
2655 continue;
2656 }
2657 if((dst_page->cleaning) &&
2658 !(dst_page->list_req_pending)) {
2659 /*someone else is writing to the */
2660 /* page. We will have to wait. */
2661 PAGE_SLEEP(object,dst_page,THREAD_UNINT);
2662 continue;
2663 }
2664 if ((dst_page->fictitious &&
2665 dst_page->list_req_pending)) {
2666 /* dump the fictitious page */
2667 dst_page->list_req_pending = FALSE;
2668 dst_page->clustered = FALSE;
2669
2670 vm_page_lock_queues();
2671 vm_page_free(dst_page);
2672 vm_page_unlock_queues();
2673
2674 } else if ((dst_page->absent &&
2675 dst_page->list_req_pending)) {
2676 /* the default_pager case */
2677 dst_page->list_req_pending = FALSE;
2678 dst_page->busy = FALSE;
2679 dst_page->clustered = FALSE;
2680 }
2681 }
2682 if((dst_page = vm_page_lookup(object, dst_offset)) ==
2683 VM_PAGE_NULL) {
2684 if(object->private) {
2685 /*
2686 * This is a nasty wrinkle for users
2687 * of upl who encounter device or
2688 * private memory however, it is
2689 * unavoidable, only a fault can
2690 * reslove the actual backing
2691 * physical page by asking the
2692 * backing device.
2693 */
2694 if(user_page_list) {
2695 user_page_list[entry].phys_addr = 0;
2696 }
2697 entry++;
2698 dst_offset += PAGE_SIZE_64;
2699 xfer_size -= PAGE_SIZE;
2700 continue;
2701 }
2702 /* need to allocate a page */
2703 dst_page = vm_page_alloc(object, dst_offset);
2704 if (dst_page == VM_PAGE_NULL) {
2705 vm_object_unlock(object);
2706 VM_PAGE_WAIT();
2707 vm_object_lock(object);
2708 continue;
2709 }
2710 dst_page->busy = FALSE;
2711 #if 0
2712 if(cntrl_flags & UPL_NO_SYNC) {
2713 dst_page->page_lock = 0;
2714 dst_page->unlock_request = 0;
2715 }
2716 #endif
2717 dst_page->absent = TRUE;
2718 object->absent_count++;
2719 }
2720 #if 1
2721 if(cntrl_flags & UPL_NO_SYNC) {
2722 dst_page->page_lock = 0;
2723 dst_page->unlock_request = 0;
2724 }
2725 #endif /* 1 */
2726 dst_page->overwriting = TRUE;
2727 if(dst_page->fictitious) {
2728 panic("need corner case for fictitious page");
2729 }
2730 if(dst_page->page_lock) {
2731 do_m_lock = TRUE;
2732 }
2733 if(upl_ptr) {
2734
2735 /* eliminate all mappings from the */
2736 /* original object and its prodigy */
2737
2738 if(dst_page->busy) {
2739 /*someone else is playing with the */
2740 /* page. We will have to wait. */
2741 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2742 continue;
2743 }
2744 vm_page_lock_queues();
2745
2746 if( !(cntrl_flags & UPL_FILE_IO)) {
2747 pmap_page_protect(dst_page->phys_page, VM_PROT_NONE);
2748 }
2749 hw_dirty = pmap_is_modified(dst_page->phys_page);
2750 dirty = hw_dirty ? TRUE : dst_page->dirty;
2751
2752 if(cntrl_flags & UPL_SET_LITE) {
2753 int pg_num;
2754 pg_num = (dst_offset-offset)/PAGE_SIZE;
2755 lite_list[pg_num>>5] |=
2756 1 << (pg_num & 31);
2757 if (hw_dirty)
2758 pmap_clear_modify(dst_page->phys_page);
2759 /*
2760 * Record that this page has been
2761 * written out
2762 */
2763 #if MACH_PAGEMAP
2764 vm_external_state_set(
2765 object->existence_map,
2766 dst_page->offset);
2767 #endif /*MACH_PAGEMAP*/
2768
2769 /*
2770 * Mark original page as cleaning
2771 * in place.
2772 */
2773 dst_page->cleaning = TRUE;
2774 dst_page->dirty = TRUE;
2775 dst_page->precious = FALSE;
2776 } else {
2777 /* use pageclean setup, it is more */
2778 /* convenient even for the pageout */
2779 /* cases here */
2780 vm_pageclean_setup(dst_page,
2781 alias_page, upl->map_object,
2782 size - xfer_size);
2783
2784 alias_page->absent = FALSE;
2785 alias_page = NULL;
2786 }
2787
2788 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
2789 /* clean in place for read implies */
2790 /* that a write will be done on all */
2791 /* the pages that are dirty before */
2792 /* a upl commit is done. The caller */
2793 /* is obligated to preserve the */
2794 /* contents of all pages marked */
2795 /* dirty. */
2796 upl->flags |= UPL_CLEAR_DIRTY;
2797 }
2798
2799 if(!dirty) {
2800 dst_page->dirty = FALSE;
2801 dst_page->precious = TRUE;
2802 }
2803
2804 if (dst_page->wire_count == 0) {
2805 /* deny access to the target page while */
2806 /* it is being worked on */
2807 dst_page->busy = TRUE;
2808 } else {
2809 vm_page_wire(dst_page);
2810 }
2811 /*
2812 * expect the page to be used
2813 */
2814 dst_page->reference = TRUE;
2815 dst_page->precious =
2816 (cntrl_flags & UPL_PRECIOUS)
2817 ? TRUE : FALSE;
2818 if(user_page_list) {
2819 user_page_list[entry].phys_addr
2820 = dst_page->phys_page;
2821 user_page_list[entry].dirty =
2822 dst_page->dirty;
2823 user_page_list[entry].pageout =
2824 dst_page->pageout;
2825 user_page_list[entry].absent =
2826 dst_page->absent;
2827 user_page_list[entry].precious =
2828 dst_page->precious;
2829 }
2830 vm_page_unlock_queues();
2831 }
2832 entry++;
2833 dst_offset += PAGE_SIZE_64;
2834 xfer_size -= PAGE_SIZE;
2835 }
2836 }
2837 if (upl->flags & UPL_INTERNAL) {
2838 if(page_list_count != NULL)
2839 *page_list_count = 0;
2840 } else if (*page_list_count > entry) {
2841 if(page_list_count != NULL)
2842 *page_list_count = entry;
2843 }
2844
2845 if(alias_page != NULL) {
2846 vm_page_lock_queues();
2847 vm_page_free(alias_page);
2848 vm_page_unlock_queues();
2849 }
2850
2851 if(do_m_lock) {
2852 vm_prot_t access_required;
2853 /* call back all associated pages from other users of the pager */
2854 /* all future updates will be on data which is based on the */
2855 /* changes we are going to make here. Note: it is assumed that */
2856 /* we already hold copies of the data so we will not be seeing */
2857 /* an avalanche of incoming data from the pager */
2858 access_required = (cntrl_flags & UPL_COPYOUT_FROM)
2859 ? VM_PROT_READ : VM_PROT_WRITE;
2860 while (TRUE) {
2861 kern_return_t rc;
2862
2863 if(!object->pager_ready) {
2864 wait_result_t wait_result;
2865
2866 wait_result = vm_object_sleep(object,
2867 VM_OBJECT_EVENT_PAGER_READY,
2868 THREAD_UNINT);
2869 if (wait_result != THREAD_AWAKENED) {
2870 vm_object_unlock(object);
2871 return(KERN_FAILURE);
2872 }
2873 continue;
2874 }
2875
2876 vm_object_unlock(object);
2877
2878 if (rc = memory_object_data_unlock(
2879 object->pager,
2880 dst_offset + object->paging_offset,
2881 size,
2882 access_required)) {
2883 if (rc == MACH_SEND_INTERRUPTED)
2884 continue;
2885 else
2886 return KERN_FAILURE;
2887 }
2888 break;
2889
2890 }
2891 /* lets wait on the last page requested */
2892 /* NOTE: we will have to update lock completed routine to signal */
2893 if(dst_page != VM_PAGE_NULL &&
2894 (access_required & dst_page->page_lock) != access_required) {
2895 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
2896 thread_block((void (*)(void))0);
2897 vm_object_lock(object);
2898 }
2899 }
2900 vm_object_unlock(object);
2901 return KERN_SUCCESS;
2902 }
2903
2904 /* JMM - Backward compatability for now */
2905 kern_return_t
2906 vm_fault_list_request(
2907 memory_object_control_t control,
2908 vm_object_offset_t offset,
2909 vm_size_t size,
2910 upl_t *upl_ptr,
2911 upl_page_info_t **user_page_list_ptr,
2912 int page_list_count,
2913 int cntrl_flags)
2914 {
2915 int local_list_count;
2916 upl_page_info_t *user_page_list;
2917 kern_return_t kr;
2918
2919 if (user_page_list_ptr != NULL) {
2920 local_list_count = page_list_count;
2921 user_page_list = *user_page_list_ptr;
2922 } else {
2923 local_list_count = 0;
2924 user_page_list = NULL;
2925 }
2926 kr = memory_object_upl_request(control,
2927 offset,
2928 size,
2929 upl_ptr,
2930 user_page_list,
2931 &local_list_count,
2932 cntrl_flags);
2933
2934 if(kr != KERN_SUCCESS)
2935 return kr;
2936
2937 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
2938 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
2939 }
2940
2941 return KERN_SUCCESS;
2942 }
2943
2944
2945
2946 /*
2947 * Routine: vm_object_super_upl_request
2948 * Purpose:
2949 * Cause the population of a portion of a vm_object
2950 * in much the same way as memory_object_upl_request.
2951 * Depending on the nature of the request, the pages
2952 * returned may be contain valid data or be uninitialized.
2953 * However, the region may be expanded up to the super
2954 * cluster size provided.
2955 */
2956
2957 __private_extern__ kern_return_t
2958 vm_object_super_upl_request(
2959 vm_object_t object,
2960 vm_object_offset_t offset,
2961 vm_size_t size,
2962 vm_size_t super_cluster,
2963 upl_t *upl,
2964 upl_page_info_t *user_page_list,
2965 unsigned int *page_list_count,
2966 int cntrl_flags)
2967 {
2968 vm_page_t target_page;
2969 int ticket;
2970
2971 if(object->paging_offset > offset)
2972 return KERN_FAILURE;
2973
2974 assert(object->paging_in_progress);
2975 offset = offset - object->paging_offset;
2976 if(cntrl_flags & UPL_FOR_PAGEOUT) {
2977 if((target_page = vm_page_lookup(object, offset))
2978 != VM_PAGE_NULL) {
2979 ticket = target_page->page_ticket;
2980 cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
2981 cntrl_flags = cntrl_flags |
2982 ((ticket << UPL_PAGE_TICKET_SHIFT)
2983 & UPL_PAGE_TICKET_MASK);
2984 }
2985 }
2986
2987
2988 /* turns off super cluster exercised by the default_pager */
2989 /*
2990 super_cluster = size;
2991 */
2992 if ((super_cluster > size) &&
2993 (vm_page_free_count > vm_page_free_reserved)) {
2994
2995 vm_object_offset_t base_offset;
2996 vm_size_t super_size;
2997
2998 base_offset = (offset &
2999 ~((vm_object_offset_t) super_cluster - 1));
3000 super_size = (offset+size) > (base_offset + super_cluster) ?
3001 super_cluster<<1 : super_cluster;
3002 super_size = ((base_offset + super_size) > object->size) ?
3003 (object->size - base_offset) : super_size;
3004 if(offset > (base_offset + super_size))
3005 panic("vm_object_super_upl_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset);
3006 /* apparently there is a case where the vm requests a */
3007 /* page to be written out who's offset is beyond the */
3008 /* object size */
3009 if((offset + size) > (base_offset + super_size))
3010 super_size = (offset + size) - base_offset;
3011
3012 offset = base_offset;
3013 size = super_size;
3014 }
3015 vm_object_upl_request(object, offset, size,
3016 upl, user_page_list, page_list_count,
3017 cntrl_flags);
3018 }
3019
3020
3021 kern_return_t
3022 vm_upl_map(
3023 vm_map_t map,
3024 upl_t upl,
3025 vm_offset_t *dst_addr)
3026 {
3027 vm_size_t size;
3028 vm_object_offset_t offset;
3029 vm_offset_t addr;
3030 vm_page_t m;
3031 kern_return_t kr;
3032
3033 if (upl == UPL_NULL)
3034 return KERN_INVALID_ARGUMENT;
3035
3036 upl_lock(upl);
3037
3038 /* check to see if already mapped */
3039 if(UPL_PAGE_LIST_MAPPED & upl->flags) {
3040 upl_unlock(upl);
3041 return KERN_FAILURE;
3042 }
3043
3044 if((!(upl->map_object->pageout)) &&
3045 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3046 (upl->map_object->phys_contiguous))) {
3047 vm_object_t object;
3048 vm_page_t alias_page;
3049 vm_object_offset_t new_offset;
3050 int pg_num;
3051 wpl_array_t lite_list;
3052
3053 if(upl->flags & UPL_INTERNAL) {
3054 lite_list = (wpl_array_t)
3055 ((((vm_offset_t)upl) + sizeof(struct upl))
3056 + ((upl->size/PAGE_SIZE)
3057 * sizeof(upl_page_info_t)));
3058 } else {
3059 lite_list = (wpl_array_t)
3060 (((vm_offset_t)upl) + sizeof(struct upl));
3061 }
3062 object = upl->map_object;
3063 upl->map_object = vm_object_allocate(upl->size);
3064 vm_object_lock(upl->map_object);
3065 upl->map_object->shadow = object;
3066 upl->map_object->pageout = TRUE;
3067 upl->map_object->can_persist = FALSE;
3068 upl->map_object->copy_strategy =
3069 MEMORY_OBJECT_COPY_NONE;
3070 upl->map_object->shadow_offset =
3071 upl->offset - object->paging_offset;
3072 upl->map_object->wimg_bits = object->wimg_bits;
3073 vm_object_unlock(upl->map_object);
3074 offset = upl->map_object->shadow_offset;
3075 new_offset = 0;
3076 size = upl->size;
3077 vm_object_lock(object);
3078 while(size) {
3079 pg_num = (new_offset)/PAGE_SIZE;
3080 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3081 vm_object_unlock(object);
3082 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3083 vm_object_lock(object);
3084 m = vm_page_lookup(object, offset);
3085 if (m == VM_PAGE_NULL) {
3086 panic("vm_upl_map: page missing\n");
3087 }
3088
3089 vm_object_paging_begin(object);
3090
3091 /*
3092 * Convert the fictitious page to a private
3093 * shadow of the real page.
3094 */
3095 assert(alias_page->fictitious);
3096 alias_page->fictitious = FALSE;
3097 alias_page->private = TRUE;
3098 alias_page->pageout = TRUE;
3099 alias_page->phys_page = m->phys_page;
3100 vm_page_wire(alias_page);
3101
3102 vm_page_insert(alias_page,
3103 upl->map_object, new_offset);
3104 assert(!alias_page->wanted);
3105 alias_page->busy = FALSE;
3106 alias_page->absent = FALSE;
3107 }
3108
3109 size -= PAGE_SIZE;
3110 offset += PAGE_SIZE_64;
3111 new_offset += PAGE_SIZE_64;
3112 }
3113 vm_object_unlock(object);
3114 }
3115 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3116 offset = upl->offset - upl->map_object->paging_offset;
3117 else
3118 offset = 0;
3119
3120 size = upl->size;
3121
3122 vm_object_lock(upl->map_object);
3123 upl->map_object->ref_count++;
3124 vm_object_res_reference(upl->map_object);
3125 vm_object_unlock(upl->map_object);
3126
3127 *dst_addr = 0;
3128
3129
3130 /* NEED A UPL_MAP ALIAS */
3131 kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE,
3132 upl->map_object, offset, FALSE,
3133 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3134
3135 if (kr != KERN_SUCCESS) {
3136 upl_unlock(upl);
3137 return(kr);
3138 }
3139
3140 for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
3141 m = vm_page_lookup(upl->map_object, offset);
3142 if(m) {
3143 unsigned int cache_attr;
3144 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3145
3146 PMAP_ENTER(map->pmap, addr,
3147 m, VM_PROT_ALL,
3148 cache_attr, TRUE);
3149 }
3150 offset+=PAGE_SIZE_64;
3151 }
3152 upl->ref_count++; /* hold a reference for the mapping */
3153 upl->flags |= UPL_PAGE_LIST_MAPPED;
3154 upl->kaddr = *dst_addr;
3155 upl_unlock(upl);
3156 return KERN_SUCCESS;
3157 }
3158
3159
3160 kern_return_t
3161 vm_upl_unmap(
3162 vm_map_t map,
3163 upl_t upl)
3164 {
3165 vm_address_t addr;
3166 vm_size_t size;
3167
3168 if (upl == UPL_NULL)
3169 return KERN_INVALID_ARGUMENT;
3170
3171 upl_lock(upl);
3172 if(upl->flags & UPL_PAGE_LIST_MAPPED) {
3173 addr = upl->kaddr;
3174 size = upl->size;
3175 assert(upl->ref_count > 1);
3176 upl->ref_count--; /* removing mapping ref */
3177 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3178 upl->kaddr = (vm_offset_t) 0;
3179 upl_unlock(upl);
3180
3181 vm_deallocate(map, addr, size);
3182 return KERN_SUCCESS;
3183 }
3184 upl_unlock(upl);
3185 return KERN_FAILURE;
3186 }
3187
3188 kern_return_t
3189 upl_commit_range(
3190 upl_t upl,
3191 vm_offset_t offset,
3192 vm_size_t size,
3193 int flags,
3194 upl_page_info_t *page_list,
3195 mach_msg_type_number_t count,
3196 boolean_t *empty)
3197 {
3198 vm_size_t xfer_size = size;
3199 vm_object_t shadow_object;
3200 vm_object_t object = upl->map_object;
3201 vm_object_offset_t target_offset;
3202 int entry;
3203 wpl_array_t lite_list;
3204 int occupied;
3205 int delayed_unlock = 0;
3206 boolean_t shadow_internal;
3207
3208 *empty = FALSE;
3209
3210 if (upl == UPL_NULL)
3211 return KERN_INVALID_ARGUMENT;
3212
3213
3214 if (count == 0)
3215 page_list = NULL;
3216
3217 if(object->pageout) {
3218 shadow_object = object->shadow;
3219 } else {
3220 shadow_object = object;
3221 }
3222
3223 upl_lock(upl);
3224
3225 if (upl->flags & UPL_CLEAR_DIRTY)
3226 flags |= UPL_COMMIT_CLEAR_DIRTY;
3227
3228 if (upl->flags & UPL_DEVICE_MEMORY) {
3229 xfer_size = 0;
3230 } else if ((offset + size) > upl->size) {
3231 upl_unlock(upl);
3232 return KERN_FAILURE;
3233 }
3234
3235 if (upl->flags & UPL_INTERNAL) {
3236 lite_list = (wpl_array_t)
3237 ((((vm_offset_t)upl) + sizeof(struct upl))
3238 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3239 } else {
3240 lite_list = (wpl_array_t)
3241 (((vm_offset_t)upl) + sizeof(struct upl));
3242 }
3243
3244 vm_object_lock(shadow_object);
3245 shadow_internal = shadow_object->internal;
3246
3247 entry = offset/PAGE_SIZE;
3248 target_offset = (vm_object_offset_t)offset;
3249
3250 while(xfer_size) {
3251 vm_page_t t,m;
3252 upl_page_info_t *p;
3253
3254 m = VM_PAGE_NULL;
3255
3256 if (upl->flags & UPL_LITE) {
3257 int pg_num;
3258
3259 pg_num = target_offset/PAGE_SIZE;
3260
3261 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3262 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3263 m = vm_page_lookup(shadow_object,
3264 target_offset + (upl->offset -
3265 shadow_object->paging_offset));
3266 }
3267 }
3268 if (object->pageout) {
3269 if ((t = vm_page_lookup(object, target_offset)) != NULL) {
3270 t->pageout = FALSE;
3271
3272 if (delayed_unlock) {
3273 delayed_unlock = 0;
3274 vm_page_unlock_queues();
3275 }
3276 VM_PAGE_FREE(t);
3277
3278 if (m == NULL) {
3279 m = vm_page_lookup(
3280 shadow_object,
3281 target_offset +
3282 object->shadow_offset);
3283 }
3284 if (m != VM_PAGE_NULL)
3285 vm_object_paging_end(m->object);
3286 }
3287 }
3288 if (m != VM_PAGE_NULL) {
3289
3290 if (upl->flags & UPL_IO_WIRE) {
3291
3292 if (delayed_unlock == 0)
3293 vm_page_lock_queues();
3294
3295 vm_page_unwire(m);
3296
3297 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3298 delayed_unlock = 0;
3299 vm_page_unlock_queues();
3300 }
3301 if (page_list) {
3302 page_list[entry].phys_addr = 0;
3303 }
3304 if (flags & UPL_COMMIT_SET_DIRTY) {
3305 m->dirty = TRUE;
3306 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3307 m->dirty = FALSE;
3308 pmap_clear_modify(m->phys_page);
3309 }
3310 if (flags & UPL_COMMIT_INACTIVATE) {
3311 m->reference = FALSE;
3312 vm_page_deactivate(m);
3313 pmap_clear_reference(m->phys_page);
3314 }
3315 target_offset += PAGE_SIZE_64;
3316 xfer_size -= PAGE_SIZE;
3317 entry++;
3318 continue;
3319 }
3320 if (delayed_unlock == 0)
3321 vm_page_lock_queues();
3322 /*
3323 * make sure to clear the hardware
3324 * modify or reference bits before
3325 * releasing the BUSY bit on this page
3326 * otherwise we risk losing a legitimate
3327 * change of state
3328 */
3329 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3330 m->dirty = FALSE;
3331 pmap_clear_modify(m->phys_page);
3332 }
3333 if (flags & UPL_COMMIT_INACTIVATE)
3334 pmap_clear_reference(m->phys_page);
3335
3336 if (page_list) {
3337 p = &(page_list[entry]);
3338 if(p->phys_addr && p->pageout && !m->pageout) {
3339 m->busy = TRUE;
3340 m->pageout = TRUE;
3341 vm_page_wire(m);
3342 } else if (page_list[entry].phys_addr &&
3343 !p->pageout && m->pageout &&
3344 !m->dump_cleaning) {
3345 m->pageout = FALSE;
3346 m->absent = FALSE;
3347 m->overwriting = FALSE;
3348 vm_page_unwire(m);
3349 PAGE_WAKEUP_DONE(m);
3350 }
3351 page_list[entry].phys_addr = 0;
3352 }
3353 m->dump_cleaning = FALSE;
3354 if(m->laundry) {
3355 if (!shadow_internal)
3356 vm_page_burst_count--;
3357 vm_page_laundry_count--;
3358 m->laundry = FALSE;
3359 if (vm_page_laundry_count < vm_page_laundry_min) {
3360 vm_page_laundry_min = 0;
3361 thread_wakeup((event_t)
3362 &vm_page_laundry_count);
3363 }
3364 }
3365 if(m->pageout) {
3366 m->cleaning = FALSE;
3367 m->pageout = FALSE;
3368 #if MACH_CLUSTER_STATS
3369 if (m->wanted) vm_pageout_target_collisions++;
3370 #endif
3371 pmap_page_protect(m->phys_page, VM_PROT_NONE);
3372 m->dirty = pmap_is_modified(m->phys_page);
3373 if(m->dirty) {
3374 CLUSTER_STAT(
3375 vm_pageout_target_page_dirtied++;)
3376 vm_page_unwire(m);/* reactivates */
3377 VM_STAT(reactivations++);
3378 PAGE_WAKEUP_DONE(m);
3379 } else {
3380 CLUSTER_STAT(
3381 vm_pageout_target_page_freed++;)
3382 vm_page_free(m);/* clears busy, etc. */
3383
3384 if (page_list[entry].dirty)
3385 VM_STAT(pageouts++);
3386 }
3387 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3388 delayed_unlock = 0;
3389 vm_page_unlock_queues();
3390 }
3391 target_offset += PAGE_SIZE_64;
3392 xfer_size -= PAGE_SIZE;
3393 entry++;
3394 continue;
3395 }
3396 #if MACH_CLUSTER_STATS
3397 m->dirty = pmap_is_modified(m->phys_page);
3398
3399 if (m->dirty) vm_pageout_cluster_dirtied++;
3400 else vm_pageout_cluster_cleaned++;
3401 if (m->wanted) vm_pageout_cluster_collisions++;
3402 #else
3403 m->dirty = 0;
3404 #endif
3405
3406 if((m->busy) && (m->cleaning)) {
3407 /* the request_page_list case */
3408 if(m->absent) {
3409 m->absent = FALSE;
3410 if(shadow_object->absent_count == 1)
3411 vm_object_absent_release(shadow_object);
3412 else
3413 shadow_object->absent_count--;
3414 }
3415 m->overwriting = FALSE;
3416 m->busy = FALSE;
3417 m->dirty = FALSE;
3418 } else if (m->overwriting) {
3419 /* alternate request page list, write to
3420 /* page_list case. Occurs when the original
3421 /* page was wired at the time of the list
3422 /* request */
3423 assert(m->wire_count != 0);
3424 vm_page_unwire(m);/* reactivates */
3425 m->overwriting = FALSE;
3426 }
3427 m->cleaning = FALSE;
3428
3429 /* It is a part of the semantic of COPYOUT_FROM */
3430 /* UPLs that a commit implies cache sync */
3431 /* between the vm page and the backing store */
3432 /* this can be used to strip the precious bit */
3433 /* as well as clean */
3434 if (upl->flags & UPL_PAGE_SYNC_DONE)
3435 m->precious = FALSE;
3436
3437 if (flags & UPL_COMMIT_SET_DIRTY)
3438 m->dirty = TRUE;
3439
3440 if (flags & UPL_COMMIT_INACTIVATE) {
3441 m->reference = FALSE;
3442 vm_page_deactivate(m);
3443 } else if (!m->active && !m->inactive) {
3444 if (m->reference)
3445 vm_page_activate(m);
3446 else
3447 vm_page_deactivate(m);
3448 }
3449 /*
3450 * Wakeup any thread waiting for the page to be un-cleaning.
3451 */
3452 PAGE_WAKEUP(m);
3453
3454 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3455 delayed_unlock = 0;
3456 vm_page_unlock_queues();
3457 }
3458 }
3459 target_offset += PAGE_SIZE_64;
3460 xfer_size -= PAGE_SIZE;
3461 entry++;
3462 }
3463 if (delayed_unlock)
3464 vm_page_unlock_queues();
3465
3466 occupied = 1;
3467
3468 if (upl->flags & UPL_DEVICE_MEMORY) {
3469 occupied = 0;
3470 } else if (upl->flags & UPL_LITE) {
3471 int pg_num;
3472 int i;
3473 pg_num = upl->size/PAGE_SIZE;
3474 pg_num = (pg_num + 31) >> 5;
3475 occupied = 0;
3476 for(i= 0; i<pg_num; i++) {
3477 if(lite_list[i] != 0) {
3478 occupied = 1;
3479 break;
3480 }
3481 }
3482 } else {
3483 if(queue_empty(&upl->map_object->memq)) {
3484 occupied = 0;
3485 }
3486 }
3487
3488 if(occupied == 0) {
3489 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
3490 *empty = TRUE;
3491 }
3492 if(object == shadow_object)
3493 vm_object_paging_end(shadow_object);
3494 }
3495 vm_object_unlock(shadow_object);
3496 upl_unlock(upl);
3497
3498 return KERN_SUCCESS;
3499 }
3500
3501 kern_return_t
3502 upl_abort_range(
3503 upl_t upl,
3504 vm_offset_t offset,
3505 vm_size_t size,
3506 int error,
3507 boolean_t *empty)
3508 {
3509 vm_size_t xfer_size = size;
3510 vm_object_t shadow_object;
3511 vm_object_t object = upl->map_object;
3512 vm_object_offset_t target_offset;
3513 vm_object_offset_t page_offset;
3514 int entry;
3515 wpl_array_t lite_list;
3516 int occupied;
3517 boolean_t shadow_internal;
3518
3519 *empty = FALSE;
3520
3521 if (upl == UPL_NULL)
3522 return KERN_INVALID_ARGUMENT;
3523
3524 if (upl->flags & UPL_IO_WIRE) {
3525 return upl_commit_range(upl,
3526 offset, size, 0,
3527 NULL, 0, empty);
3528 }
3529
3530 if(object->pageout) {
3531 shadow_object = object->shadow;
3532 } else {
3533 shadow_object = object;
3534 }
3535
3536 upl_lock(upl);
3537 if(upl->flags & UPL_DEVICE_MEMORY) {
3538 xfer_size = 0;
3539 } else if ((offset + size) > upl->size) {
3540 upl_unlock(upl);
3541 return KERN_FAILURE;
3542 }
3543
3544 vm_object_lock(shadow_object);
3545 shadow_internal = shadow_object->internal;
3546
3547 if(upl->flags & UPL_INTERNAL) {
3548 lite_list = (wpl_array_t)
3549 ((((vm_offset_t)upl) + sizeof(struct upl))
3550 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3551 } else {
3552 lite_list = (wpl_array_t)
3553 (((vm_offset_t)upl) + sizeof(struct upl));
3554 }
3555
3556 entry = offset/PAGE_SIZE;
3557 target_offset = (vm_object_offset_t)offset;
3558 while(xfer_size) {
3559 vm_page_t t,m;
3560 upl_page_info_t *p;
3561
3562 m = VM_PAGE_NULL;
3563 if(upl->flags & UPL_LITE) {
3564 int pg_num;
3565 pg_num = target_offset/PAGE_SIZE;
3566 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3567 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3568 m = vm_page_lookup(shadow_object,
3569 target_offset + (upl->offset -
3570 shadow_object->paging_offset));
3571 }
3572 }
3573 if(object->pageout) {
3574 if ((t = vm_page_lookup(object, target_offset))
3575 != NULL) {
3576 t->pageout = FALSE;
3577 VM_PAGE_FREE(t);
3578 if(m == NULL) {
3579 m = vm_page_lookup(
3580 shadow_object,
3581 target_offset +
3582 object->shadow_offset);
3583 }
3584 if(m != VM_PAGE_NULL)
3585 vm_object_paging_end(m->object);
3586 }
3587 }
3588 if(m != VM_PAGE_NULL) {
3589 vm_page_lock_queues();
3590 if(m->absent) {
3591 /* COPYOUT = FALSE case */
3592 /* check for error conditions which must */
3593 /* be passed back to the pages customer */
3594 if(error & UPL_ABORT_RESTART) {
3595 m->restart = TRUE;
3596 m->absent = FALSE;
3597 vm_object_absent_release(m->object);
3598 m->page_error = KERN_MEMORY_ERROR;
3599 m->error = TRUE;
3600 } else if(error & UPL_ABORT_UNAVAILABLE) {
3601 m->restart = FALSE;
3602 m->unusual = TRUE;
3603 m->clustered = FALSE;
3604 } else if(error & UPL_ABORT_ERROR) {
3605 m->restart = FALSE;
3606 m->absent = FALSE;
3607 vm_object_absent_release(m->object);
3608 m->page_error = KERN_MEMORY_ERROR;
3609 m->error = TRUE;
3610 } else if(error & UPL_ABORT_DUMP_PAGES) {
3611 m->clustered = TRUE;
3612 } else {
3613 m->clustered = TRUE;
3614 }
3615
3616
3617 m->cleaning = FALSE;
3618 m->overwriting = FALSE;
3619 PAGE_WAKEUP_DONE(m);
3620 if(m->clustered) {
3621 vm_page_free(m);
3622 } else {
3623 vm_page_activate(m);
3624 }
3625
3626 vm_page_unlock_queues();
3627 target_offset += PAGE_SIZE_64;
3628 xfer_size -= PAGE_SIZE;
3629 entry++;
3630 continue;
3631 }
3632 /*
3633 * Handle the trusted pager throttle.
3634 */
3635 if (m->laundry) {
3636 if (!shadow_internal)
3637 vm_page_burst_count--;
3638 vm_page_laundry_count--;
3639 m->laundry = FALSE;
3640 if (vm_page_laundry_count
3641 < vm_page_laundry_min) {
3642 vm_page_laundry_min = 0;
3643 thread_wakeup((event_t)
3644 &vm_page_laundry_count);
3645 }
3646 }
3647 if(m->pageout) {
3648 assert(m->busy);
3649 assert(m->wire_count == 1);
3650 m->pageout = FALSE;
3651 vm_page_unwire(m);
3652 }
3653 m->dump_cleaning = FALSE;
3654 m->cleaning = FALSE;
3655 m->busy = FALSE;
3656 m->overwriting = FALSE;
3657 #if MACH_PAGEMAP
3658 vm_external_state_clr(
3659 m->object->existence_map, m->offset);
3660 #endif /* MACH_PAGEMAP */
3661 if(error & UPL_ABORT_DUMP_PAGES) {
3662 vm_page_free(m);
3663 pmap_page_protect(m->phys_page, VM_PROT_NONE);
3664 } else {
3665 PAGE_WAKEUP(m);
3666 }
3667 vm_page_unlock_queues();
3668 }
3669 target_offset += PAGE_SIZE_64;
3670 xfer_size -= PAGE_SIZE;
3671 entry++;
3672 }
3673 occupied = 1;
3674 if (upl->flags & UPL_DEVICE_MEMORY) {
3675 occupied = 0;
3676 } else if (upl->flags & UPL_LITE) {
3677 int pg_num;
3678 int i;
3679 pg_num = upl->size/PAGE_SIZE;
3680 pg_num = (pg_num + 31) >> 5;
3681 occupied = 0;
3682 for(i= 0; i<pg_num; i++) {
3683 if(lite_list[i] != 0) {
3684 occupied = 1;
3685 break;
3686 }
3687 }
3688 } else {
3689 if(queue_empty(&upl->map_object->memq)) {
3690 occupied = 0;
3691 }
3692 }
3693
3694 if(occupied == 0) {
3695 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
3696 *empty = TRUE;
3697 }
3698 if(object == shadow_object)
3699 vm_object_paging_end(shadow_object);
3700 }
3701 vm_object_unlock(shadow_object);
3702
3703 upl_unlock(upl);
3704
3705 return KERN_SUCCESS;
3706 }
3707
3708 kern_return_t
3709 upl_abort(
3710 upl_t upl,
3711 int error)
3712 {
3713 vm_object_t object = NULL;
3714 vm_object_t shadow_object = NULL;
3715 vm_object_offset_t offset;
3716 vm_object_offset_t shadow_offset;
3717 vm_object_offset_t target_offset;
3718 int i;
3719 wpl_array_t lite_list;
3720 vm_page_t t,m;
3721 int occupied;
3722 boolean_t shadow_internal;
3723
3724 if (upl == UPL_NULL)
3725 return KERN_INVALID_ARGUMENT;
3726
3727 if (upl->flags & UPL_IO_WIRE) {
3728 boolean_t empty;
3729 return upl_commit_range(upl,
3730 0, upl->size, 0,
3731 NULL, 0, &empty);
3732 }
3733
3734 upl_lock(upl);
3735 if(upl->flags & UPL_DEVICE_MEMORY) {
3736 upl_unlock(upl);
3737 return KERN_SUCCESS;
3738 }
3739
3740 object = upl->map_object;
3741
3742 if (object == NULL) {
3743 panic("upl_abort: upl object is not backed by an object");
3744 upl_unlock(upl);
3745 return KERN_INVALID_ARGUMENT;
3746 }
3747
3748 if(object->pageout) {
3749 shadow_object = object->shadow;
3750 shadow_offset = object->shadow_offset;
3751 } else {
3752 shadow_object = object;
3753 shadow_offset = upl->offset - object->paging_offset;
3754 }
3755
3756 if(upl->flags & UPL_INTERNAL) {
3757 lite_list = (wpl_array_t)
3758 ((((vm_offset_t)upl) + sizeof(struct upl))
3759 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3760 } else {
3761 lite_list = (wpl_array_t)
3762 (((vm_offset_t)upl) + sizeof(struct upl));
3763 }
3764 offset = 0;
3765 vm_object_lock(shadow_object);
3766 shadow_internal = shadow_object->internal;
3767
3768 for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
3769 m = VM_PAGE_NULL;
3770 target_offset = offset + shadow_offset;
3771 if(upl->flags & UPL_LITE) {
3772 int pg_num;
3773 pg_num = offset/PAGE_SIZE;
3774 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3775 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3776 m = vm_page_lookup(
3777 shadow_object, target_offset);
3778 }
3779 }
3780 if(object->pageout) {
3781 if ((t = vm_page_lookup(object, offset)) != NULL) {
3782 t->pageout = FALSE;
3783 VM_PAGE_FREE(t);
3784 if(m == NULL) {
3785 m = vm_page_lookup(
3786 shadow_object, target_offset);
3787 }
3788 if(m != VM_PAGE_NULL)
3789 vm_object_paging_end(m->object);
3790 }
3791 }
3792 if(m != VM_PAGE_NULL) {
3793 vm_page_lock_queues();
3794 if(m->absent) {
3795 /* COPYOUT = FALSE case */
3796 /* check for error conditions which must */
3797 /* be passed back to the pages customer */
3798 if(error & UPL_ABORT_RESTART) {
3799 m->restart = TRUE;
3800 m->absent = FALSE;
3801 vm_object_absent_release(m->object);
3802 m->page_error = KERN_MEMORY_ERROR;
3803 m->error = TRUE;
3804 } else if(error & UPL_ABORT_UNAVAILABLE) {
3805 m->restart = FALSE;
3806 m->unusual = TRUE;
3807 m->clustered = FALSE;
3808 } else if(error & UPL_ABORT_ERROR) {
3809 m->restart = FALSE;
3810 m->absent = FALSE;
3811 vm_object_absent_release(m->object);
3812 m->page_error = KERN_MEMORY_ERROR;
3813 m->error = TRUE;
3814 } else if(error & UPL_ABORT_DUMP_PAGES) {
3815 m->clustered = TRUE;
3816 } else {
3817 m->clustered = TRUE;
3818 }
3819
3820 m->cleaning = FALSE;
3821 m->overwriting = FALSE;
3822 PAGE_WAKEUP_DONE(m);
3823 if(m->clustered) {
3824 vm_page_free(m);
3825 } else {
3826 vm_page_activate(m);
3827 }
3828 vm_page_unlock_queues();
3829 continue;
3830 }
3831 /*
3832 * Handle the trusted pager throttle.
3833 */
3834 if (m->laundry) {
3835 if (!shadow_internal)
3836 vm_page_burst_count--;
3837 vm_page_laundry_count--;
3838 m->laundry = FALSE;
3839 if (vm_page_laundry_count
3840 < vm_page_laundry_min) {
3841 vm_page_laundry_min = 0;
3842 thread_wakeup((event_t)
3843 &vm_page_laundry_count);
3844 }
3845 }
3846 if(m->pageout) {
3847 assert(m->busy);
3848 assert(m->wire_count == 1);
3849 m->pageout = FALSE;
3850 vm_page_unwire(m);
3851 }
3852 m->dump_cleaning = FALSE;
3853 m->cleaning = FALSE;
3854 m->busy = FALSE;
3855 m->overwriting = FALSE;
3856 #if MACH_PAGEMAP
3857 vm_external_state_clr(
3858 m->object->existence_map, m->offset);
3859 #endif /* MACH_PAGEMAP */
3860 if(error & UPL_ABORT_DUMP_PAGES) {
3861 vm_page_free(m);
3862 pmap_page_protect(m->phys_page, VM_PROT_NONE);
3863 } else {
3864 PAGE_WAKEUP(m);
3865 }
3866 vm_page_unlock_queues();
3867 }
3868 }
3869 occupied = 1;
3870 if (upl->flags & UPL_DEVICE_MEMORY) {
3871 occupied = 0;
3872 } else if (upl->flags & UPL_LITE) {
3873 int pg_num;
3874 int i;
3875 pg_num = upl->size/PAGE_SIZE;
3876 pg_num = (pg_num + 31) >> 5;
3877 occupied = 0;
3878 for(i= 0; i<pg_num; i++) {
3879 if(lite_list[i] != 0) {
3880 occupied = 1;
3881 break;
3882 }
3883 }
3884 } else {
3885 if(queue_empty(&upl->map_object->memq)) {
3886 occupied = 0;
3887 }
3888 }
3889
3890 if(occupied == 0) {
3891 if(object == shadow_object)
3892 vm_object_paging_end(shadow_object);
3893 }
3894 vm_object_unlock(shadow_object);
3895
3896 upl_unlock(upl);
3897 return KERN_SUCCESS;
3898 }
3899
3900 /* an option on commit should be wire */
3901 kern_return_t
3902 upl_commit(
3903 upl_t upl,
3904 upl_page_info_t *page_list,
3905 mach_msg_type_number_t count)
3906 {
3907 if (upl == UPL_NULL)
3908 return KERN_INVALID_ARGUMENT;
3909
3910 if(upl->flags & (UPL_LITE | UPL_IO_WIRE)) {
3911 boolean_t empty;
3912 return upl_commit_range(upl, 0, upl->size, 0,
3913 page_list, count, &empty);
3914 }
3915
3916 if (count == 0)
3917 page_list = NULL;
3918
3919 upl_lock(upl);
3920 if (upl->flags & UPL_DEVICE_MEMORY)
3921 page_list = NULL;
3922
3923 if ((upl->flags & UPL_CLEAR_DIRTY) ||
3924 (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
3925 vm_object_t shadow_object = upl->map_object->shadow;
3926 vm_object_t object = upl->map_object;
3927 vm_object_offset_t target_offset;
3928 vm_size_t xfer_end;
3929 int entry;
3930
3931 vm_page_t t, m;
3932 upl_page_info_t *p;
3933
3934 vm_object_lock(shadow_object);
3935
3936 entry = 0;
3937 target_offset = object->shadow_offset;
3938 xfer_end = upl->size + object->shadow_offset;
3939
3940 while(target_offset < xfer_end) {
3941
3942 if ((t = vm_page_lookup(object,
3943 target_offset - object->shadow_offset))
3944 == NULL) {
3945 target_offset += PAGE_SIZE_64;
3946 entry++;
3947 continue;
3948 }
3949
3950 m = vm_page_lookup(shadow_object, target_offset);
3951 if(m != VM_PAGE_NULL) {
3952 if (upl->flags & UPL_CLEAR_DIRTY) {
3953 pmap_clear_modify(m->phys_page);
3954 m->dirty = FALSE;
3955 }
3956 /* It is a part of the semantic of */
3957 /* COPYOUT_FROM UPLs that a commit */
3958 /* implies cache sync between the */
3959 /* vm page and the backing store */
3960 /* this can be used to strip the */
3961 /* precious bit as well as clean */
3962 if (upl->flags & UPL_PAGE_SYNC_DONE)
3963 m->precious = FALSE;
3964
3965 if(page_list) {
3966 p = &(page_list[entry]);
3967 if(page_list[entry].phys_addr &&
3968 p->pageout && !m->pageout) {
3969 vm_page_lock_queues();
3970 m->busy = TRUE;
3971 m->pageout = TRUE;
3972 vm_page_wire(m);
3973 vm_page_unlock_queues();
3974 } else if (page_list[entry].phys_addr &&
3975 !p->pageout && m->pageout &&
3976 !m->dump_cleaning) {
3977 vm_page_lock_queues();
3978 m->pageout = FALSE;
3979 m->absent = FALSE;
3980 m->overwriting = FALSE;
3981 vm_page_unwire(m);
3982 PAGE_WAKEUP_DONE(m);
3983 vm_page_unlock_queues();
3984 }
3985 page_list[entry].phys_addr = 0;
3986 }
3987 }
3988 target_offset += PAGE_SIZE_64;
3989 entry++;
3990 }
3991
3992 vm_object_unlock(shadow_object);
3993 }
3994 if (upl->flags & UPL_DEVICE_MEMORY) {
3995 vm_object_lock(upl->map_object->shadow);
3996 if(upl->map_object == upl->map_object->shadow)
3997 vm_object_paging_end(upl->map_object->shadow);
3998 vm_object_unlock(upl->map_object->shadow);
3999 }
4000 upl_unlock(upl);
4001 return KERN_SUCCESS;
4002 }
4003
4004
4005
4006 kern_return_t
4007 vm_object_iopl_request(
4008 vm_object_t object,
4009 vm_object_offset_t offset,
4010 vm_size_t size,
4011 upl_t *upl_ptr,
4012 upl_page_info_array_t user_page_list,
4013 unsigned int *page_list_count,
4014 int cntrl_flags)
4015 {
4016 vm_page_t dst_page;
4017 vm_object_offset_t dst_offset = offset;
4018 vm_size_t xfer_size = size;
4019 upl_t upl = NULL;
4020 int entry;
4021 wpl_array_t lite_list;
4022 int page_field_size;
4023 int delayed_unlock = 0;
4024
4025 vm_page_t alias_page = NULL;
4026 kern_return_t ret;
4027 vm_prot_t prot;
4028
4029
4030 if(cntrl_flags & UPL_COPYOUT_FROM) {
4031 prot = VM_PROT_READ;
4032 } else {
4033 prot = VM_PROT_READ | VM_PROT_WRITE;
4034 }
4035
4036 if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
4037 size = MAX_UPL_TRANSFER * page_size;
4038 }
4039
4040 if(cntrl_flags & UPL_SET_INTERNAL)
4041 if(page_list_count != NULL)
4042 *page_list_count = MAX_UPL_TRANSFER;
4043 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4044 ((page_list_count != NULL) && (*page_list_count != 0)
4045 && *page_list_count < (size/page_size)))
4046 return KERN_INVALID_ARGUMENT;
4047
4048 if((!object->internal) && (object->paging_offset != 0))
4049 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
4050
4051 if(object->phys_contiguous) {
4052 /* No paging operations are possible against this memory */
4053 /* and so no need for map object, ever */
4054 cntrl_flags |= UPL_SET_LITE;
4055 }
4056
4057 if(upl_ptr) {
4058 if(cntrl_flags & UPL_SET_INTERNAL) {
4059 if(cntrl_flags & UPL_SET_LITE) {
4060 upl = upl_create(
4061 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
4062 size);
4063 user_page_list = (upl_page_info_t *)
4064 (((vm_offset_t)upl) + sizeof(struct upl));
4065 lite_list = (wpl_array_t)
4066 (((vm_offset_t)user_page_list) +
4067 ((size/PAGE_SIZE) *
4068 sizeof(upl_page_info_t)));
4069 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4070 page_field_size =
4071 (page_field_size + 3) & 0xFFFFFFFC;
4072 bzero((char *)lite_list, page_field_size);
4073 upl->flags =
4074 UPL_LITE | UPL_INTERNAL | UPL_IO_WIRE;
4075 } else {
4076 upl = upl_create(UPL_CREATE_INTERNAL, size);
4077 user_page_list = (upl_page_info_t *)
4078 (((vm_offset_t)upl)
4079 + sizeof(struct upl));
4080 upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
4081 }
4082 } else {
4083 if(cntrl_flags & UPL_SET_LITE) {
4084 upl = upl_create(UPL_CREATE_LITE, size);
4085 lite_list = (wpl_array_t)
4086 (((vm_offset_t)upl) + sizeof(struct upl));
4087 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4088 page_field_size =
4089 (page_field_size + 3) & 0xFFFFFFFC;
4090 bzero((char *)lite_list, page_field_size);
4091 upl->flags = UPL_LITE | UPL_IO_WIRE;
4092 } else {
4093 upl = upl_create(UPL_CREATE_EXTERNAL, size);
4094 upl->flags = UPL_IO_WIRE;
4095 }
4096 }
4097
4098 if(object->phys_contiguous) {
4099 upl->map_object = object;
4100 /* don't need any shadow mappings for this one */
4101 /* since it is already I/O memory */
4102 upl->flags |= UPL_DEVICE_MEMORY;
4103
4104 vm_object_lock(object);
4105 vm_object_paging_begin(object);
4106 vm_object_unlock(object);
4107
4108 /* paging in progress also protects the paging_offset */
4109 upl->offset = offset + object->paging_offset;
4110 upl->size = size;
4111 *upl_ptr = upl;
4112 if(user_page_list) {
4113 user_page_list[0].phys_addr =
4114 (offset + object->shadow_offset)>>12;
4115 user_page_list[0].device = TRUE;
4116 }
4117
4118 if(page_list_count != NULL) {
4119 if (upl->flags & UPL_INTERNAL) {
4120 *page_list_count = 0;
4121 } else {
4122 *page_list_count = 1;
4123 }
4124 }
4125 return KERN_SUCCESS;
4126 }
4127 if(user_page_list)
4128 user_page_list[0].device = FALSE;
4129
4130 if(cntrl_flags & UPL_SET_LITE) {
4131 upl->map_object = object;
4132 } else {
4133 upl->map_object = vm_object_allocate(size);
4134 vm_object_lock(upl->map_object);
4135 upl->map_object->shadow = object;
4136 upl->map_object->pageout = TRUE;
4137 upl->map_object->can_persist = FALSE;
4138 upl->map_object->copy_strategy =
4139 MEMORY_OBJECT_COPY_NONE;
4140 upl->map_object->shadow_offset = offset;
4141 upl->map_object->wimg_bits = object->wimg_bits;
4142 vm_object_unlock(upl->map_object);
4143 }
4144 }
4145 vm_object_lock(object);
4146 vm_object_paging_begin(object);
4147
4148 if (!object->phys_contiguous) {
4149 /* Protect user space from future COW operations */
4150 object->true_share = TRUE;
4151 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4152 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4153 }
4154
4155 /* we can lock the upl offset now that paging_in_progress is set */
4156 if(upl_ptr) {
4157 upl->size = size;
4158 upl->offset = offset + object->paging_offset;
4159 *upl_ptr = upl;
4160 #ifdef UBC_DEBUG
4161 queue_enter(&object->uplq, upl, upl_t, uplq);
4162 #endif /* UBC_DEBUG */
4163 }
4164
4165 entry = 0;
4166 while (xfer_size) {
4167 if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
4168 if (delayed_unlock) {
4169 delayed_unlock = 0;
4170 vm_page_unlock_queues();
4171 }
4172 vm_object_unlock(object);
4173 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4174 vm_object_lock(object);
4175 }
4176 dst_page = vm_page_lookup(object, dst_offset);
4177
4178 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4179 (dst_page->unusual && (dst_page->error ||
4180 dst_page->restart || dst_page->absent ||
4181 dst_page->fictitious ||
4182 prot & dst_page->page_lock))) {
4183 vm_fault_return_t result;
4184 do {
4185 vm_page_t top_page;
4186 kern_return_t error_code;
4187 int interruptible;
4188
4189 vm_object_offset_t lo_offset = offset;
4190 vm_object_offset_t hi_offset = offset + size;
4191
4192
4193 if (delayed_unlock) {
4194 delayed_unlock = 0;
4195 vm_page_unlock_queues();
4196 }
4197
4198 if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
4199 interruptible = THREAD_ABORTSAFE;
4200 } else {
4201 interruptible = THREAD_UNINT;
4202 }
4203
4204 result = vm_fault_page(object, dst_offset,
4205 prot | VM_PROT_WRITE, FALSE,
4206 interruptible,
4207 lo_offset, hi_offset,
4208 VM_BEHAVIOR_SEQUENTIAL,
4209 &prot, &dst_page, &top_page,
4210 (int *)0,
4211 &error_code, FALSE, FALSE, NULL, 0);
4212
4213 switch(result) {
4214 case VM_FAULT_SUCCESS:
4215
4216 PAGE_WAKEUP_DONE(dst_page);
4217
4218 /*
4219 * Release paging references and
4220 * top-level placeholder page, if any.
4221 */
4222
4223 if(top_page != VM_PAGE_NULL) {
4224 vm_object_t local_object;
4225 local_object =
4226 top_page->object;
4227 if(top_page->object
4228 != dst_page->object) {
4229 vm_object_lock(
4230 local_object);
4231 VM_PAGE_FREE(top_page);
4232 vm_object_paging_end(
4233 local_object);
4234 vm_object_unlock(
4235 local_object);
4236 } else {
4237 VM_PAGE_FREE(top_page);
4238 vm_object_paging_end(
4239 local_object);
4240 }
4241 }
4242
4243 break;
4244
4245
4246 case VM_FAULT_RETRY:
4247 vm_object_lock(object);
4248 vm_object_paging_begin(object);
4249 break;
4250
4251 case VM_FAULT_FICTITIOUS_SHORTAGE:
4252 vm_page_more_fictitious();
4253 vm_object_lock(object);
4254 vm_object_paging_begin(object);
4255 break;
4256
4257 case VM_FAULT_MEMORY_SHORTAGE:
4258 if (vm_page_wait(interruptible)) {
4259 vm_object_lock(object);
4260 vm_object_paging_begin(object);
4261 break;
4262 }
4263 /* fall thru */
4264
4265 case VM_FAULT_INTERRUPTED:
4266 error_code = MACH_SEND_INTERRUPTED;
4267 case VM_FAULT_MEMORY_ERROR:
4268 ret = (error_code ? error_code:
4269 KERN_MEMORY_ERROR);
4270 vm_object_lock(object);
4271 for(; offset < dst_offset;
4272 offset += PAGE_SIZE) {
4273 dst_page = vm_page_lookup(
4274 object, offset);
4275 if(dst_page == VM_PAGE_NULL)
4276 panic("vm_object_iopl_request: Wired pages missing. \n");
4277 vm_page_lock_queues();
4278 vm_page_unwire(dst_page);
4279 vm_page_unlock_queues();
4280 VM_STAT(reactivations++);
4281 }
4282 vm_object_unlock(object);
4283 upl_destroy(upl);
4284 return ret;
4285 }
4286 } while ((result != VM_FAULT_SUCCESS)
4287 || (result == VM_FAULT_INTERRUPTED));
4288 }
4289 if (delayed_unlock == 0)
4290 vm_page_lock_queues();
4291 vm_page_wire(dst_page);
4292
4293 if (upl_ptr) {
4294 if (cntrl_flags & UPL_SET_LITE) {
4295 int pg_num;
4296 pg_num = (dst_offset-offset)/PAGE_SIZE;
4297 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
4298 } else {
4299 /*
4300 * Convert the fictitious page to a
4301 * private shadow of the real page.
4302 */
4303 assert(alias_page->fictitious);
4304 alias_page->fictitious = FALSE;
4305 alias_page->private = TRUE;
4306 alias_page->pageout = TRUE;
4307 alias_page->phys_page = dst_page->phys_page;
4308 vm_page_wire(alias_page);
4309
4310 vm_page_insert(alias_page,
4311 upl->map_object, size - xfer_size);
4312 assert(!alias_page->wanted);
4313 alias_page->busy = FALSE;
4314 alias_page->absent = FALSE;
4315 }
4316
4317 /* expect the page to be used */
4318 dst_page->reference = TRUE;
4319
4320 if (!(cntrl_flags & UPL_COPYOUT_FROM))
4321 dst_page->dirty = TRUE;
4322 alias_page = NULL;
4323
4324 if (user_page_list) {
4325 user_page_list[entry].phys_addr
4326 = dst_page->phys_page;
4327 user_page_list[entry].dirty =
4328 dst_page->dirty;
4329 user_page_list[entry].pageout =
4330 dst_page->pageout;
4331 user_page_list[entry].absent =
4332 dst_page->absent;
4333 user_page_list[entry].precious =
4334 dst_page->precious;
4335 }
4336 }
4337 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4338 delayed_unlock = 0;
4339 vm_page_unlock_queues();
4340 }
4341 entry++;
4342 dst_offset += PAGE_SIZE_64;
4343 xfer_size -= PAGE_SIZE;
4344 }
4345 if (delayed_unlock)
4346 vm_page_unlock_queues();
4347
4348 if (upl->flags & UPL_INTERNAL) {
4349 if(page_list_count != NULL)
4350 *page_list_count = 0;
4351 } else if (*page_list_count > entry) {
4352 if(page_list_count != NULL)
4353 *page_list_count = entry;
4354 }
4355
4356 if (alias_page != NULL) {
4357 vm_page_lock_queues();
4358 vm_page_free(alias_page);
4359 vm_page_unlock_queues();
4360 }
4361
4362 vm_object_unlock(object);
4363 return KERN_SUCCESS;
4364 }
4365
4366 vm_size_t
4367 upl_get_internal_pagelist_offset()
4368 {
4369 return sizeof(struct upl);
4370 }
4371
4372 void
4373 upl_set_dirty(
4374 upl_t upl)
4375 {
4376 upl->flags |= UPL_CLEAR_DIRTY;
4377 }
4378
4379 void
4380 upl_clear_dirty(
4381 upl_t upl)
4382 {
4383 upl->flags &= ~UPL_CLEAR_DIRTY;
4384 }
4385
4386
4387 #ifdef MACH_BSD
4388
4389 boolean_t upl_page_present(upl_page_info_t *upl, int index)
4390 {
4391 return(UPL_PAGE_PRESENT(upl, index));
4392 }
4393 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
4394 {
4395 return(UPL_DIRTY_PAGE(upl, index));
4396 }
4397 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
4398 {
4399 return(UPL_VALID_PAGE(upl, index));
4400 }
4401 vm_offset_t upl_phys_page(upl_page_info_t *upl, int index)
4402 {
4403 return((vm_offset_t)UPL_PHYS_PAGE(upl, index));
4404 }
4405
4406 void
4407 vm_countdirtypages(void)
4408 {
4409 vm_page_t m;
4410 int dpages;
4411 int pgopages;
4412 int precpages;
4413
4414
4415 dpages=0;
4416 pgopages=0;
4417 precpages=0;
4418
4419 vm_page_lock_queues();
4420 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
4421 do {
4422 if (m ==(vm_page_t )0) break;
4423
4424 if(m->dirty) dpages++;
4425 if(m->pageout) pgopages++;
4426 if(m->precious) precpages++;
4427
4428 m = (vm_page_t) queue_next(&m->pageq);
4429 if (m ==(vm_page_t )0) break;
4430
4431 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
4432 vm_page_unlock_queues();
4433
4434 vm_page_lock_queues();
4435 m = (vm_page_t) queue_first(&vm_page_queue_zf);
4436 do {
4437 if (m ==(vm_page_t )0) break;
4438
4439 if(m->dirty) dpages++;
4440 if(m->pageout) pgopages++;
4441 if(m->precious) precpages++;
4442
4443 m = (vm_page_t) queue_next(&m->pageq);
4444 if (m ==(vm_page_t )0) break;
4445
4446 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
4447 vm_page_unlock_queues();
4448
4449 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
4450
4451 dpages=0;
4452 pgopages=0;
4453 precpages=0;
4454
4455 vm_page_lock_queues();
4456 m = (vm_page_t) queue_first(&vm_page_queue_active);
4457
4458 do {
4459 if(m == (vm_page_t )0) break;
4460 if(m->dirty) dpages++;
4461 if(m->pageout) pgopages++;
4462 if(m->precious) precpages++;
4463
4464 m = (vm_page_t) queue_next(&m->pageq);
4465 if(m == (vm_page_t )0) break;
4466
4467 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
4468 vm_page_unlock_queues();
4469
4470 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
4471
4472 }
4473 #endif /* MACH_BSD */
4474
4475 #ifdef UBC_DEBUG
4476 kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
4477 {
4478 upl->ubc_alias1 = alias1;
4479 upl->ubc_alias2 = alias2;
4480 return KERN_SUCCESS;
4481 }
4482 int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
4483 {
4484 if(al)
4485 *al = upl->ubc_alias1;
4486 if(al2)
4487 *al2 = upl->ubc_alias2;
4488 return KERN_SUCCESS;
4489 }
4490 #endif /* UBC_DEBUG */
4491
4492
4493
4494 #if MACH_KDB
4495 #include <ddb/db_output.h>
4496 #include <ddb/db_print.h>
4497 #include <vm/vm_print.h>
4498
4499 #define printf kdbprintf
4500 extern int db_indent;
4501 void db_pageout(void);
4502
4503 void
4504 db_vm(void)
4505 {
4506 extern int vm_page_gobble_count;
4507
4508 iprintf("VM Statistics:\n");
4509 db_indent += 2;
4510 iprintf("pages:\n");
4511 db_indent += 2;
4512 iprintf("activ %5d inact %5d free %5d",
4513 vm_page_active_count, vm_page_inactive_count,
4514 vm_page_free_count);
4515 printf(" wire %5d gobbl %5d\n",
4516 vm_page_wire_count, vm_page_gobble_count);
4517 iprintf("laund %5d\n",
4518 vm_page_laundry_count);
4519 db_indent -= 2;
4520 iprintf("target:\n");
4521 db_indent += 2;
4522 iprintf("min %5d inact %5d free %5d",
4523 vm_page_free_min, vm_page_inactive_target,
4524 vm_page_free_target);
4525 printf(" resrv %5d\n", vm_page_free_reserved);
4526 db_indent -= 2;
4527
4528 iprintf("burst:\n");
4529 db_indent += 2;
4530 iprintf("max %5d min %5d wait %5d empty %5d\n",
4531 vm_pageout_burst_max, vm_pageout_burst_min,
4532 vm_pageout_burst_wait, vm_pageout_empty_wait);
4533 db_indent -= 2;
4534 iprintf("pause:\n");
4535 db_indent += 2;
4536 iprintf("count %5d max %5d\n",
4537 vm_pageout_pause_count, vm_pageout_pause_max);
4538 #if MACH_COUNTERS
4539 iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue);
4540 #endif /* MACH_COUNTERS */
4541 db_indent -= 2;
4542 db_pageout();
4543 db_indent -= 2;
4544 }
4545
4546 void
4547 db_pageout(void)
4548 {
4549 #if MACH_COUNTERS
4550 extern int c_laundry_pages_freed;
4551 #endif /* MACH_COUNTERS */
4552
4553 iprintf("Pageout Statistics:\n");
4554 db_indent += 2;
4555 iprintf("active %5d inactv %5d\n",
4556 vm_pageout_active, vm_pageout_inactive);
4557 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
4558 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
4559 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
4560 iprintf("used %5d clean %5d dirty %5d\n",
4561 vm_pageout_inactive_used, vm_pageout_inactive_clean,
4562 vm_pageout_inactive_dirty);
4563 #if MACH_COUNTERS
4564 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
4565 #endif /* MACH_COUNTERS */
4566 #if MACH_CLUSTER_STATS
4567 iprintf("Cluster Statistics:\n");
4568 db_indent += 2;
4569 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
4570 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
4571 vm_pageout_cluster_collisions);
4572 iprintf("clusters %5d conversions %5d\n",
4573 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
4574 db_indent -= 2;
4575 iprintf("Target Statistics:\n");
4576 db_indent += 2;
4577 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
4578 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
4579 vm_pageout_target_page_freed);
4580 db_indent -= 2;
4581 #endif /* MACH_CLUSTER_STATS */
4582 db_indent -= 2;
4583 }
4584
4585 #if MACH_CLUSTER_STATS
4586 unsigned long vm_pageout_cluster_dirtied = 0;
4587 unsigned long vm_pageout_cluster_cleaned = 0;
4588 unsigned long vm_pageout_cluster_collisions = 0;
4589 unsigned long vm_pageout_cluster_clusters = 0;
4590 unsigned long vm_pageout_cluster_conversions = 0;
4591 unsigned long vm_pageout_target_collisions = 0;
4592 unsigned long vm_pageout_target_page_dirtied = 0;
4593 unsigned long vm_pageout_target_page_freed = 0;
4594 #define CLUSTER_STAT(clause) clause
4595 #else /* MACH_CLUSTER_STATS */
4596 #define CLUSTER_STAT(clause)
4597 #endif /* MACH_CLUSTER_STATS */
4598
4599 #endif /* MACH_KDB */