]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_pageout.c
xnu-517.9.5.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52 /*
53 * File: vm/vm_pageout.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 * Date: 1985
56 *
57 * The proverbial page-out daemon.
58 */
59
60 #include <mach_pagemap.h>
61 #include <mach_cluster_stats.h>
62 #include <mach_kdb.h>
63 #include <advisory_pageout.h>
64
65 #include <mach/mach_types.h>
66 #include <mach/memory_object.h>
67 #include <mach/memory_object_default.h>
68 #include <mach/memory_object_control_server.h>
69 #include <mach/mach_host_server.h>
70 #include <mach/vm_param.h>
71 #include <mach/vm_statistics.h>
72 #include <kern/host_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/thread.h>
75 #include <kern/xpr.h>
76 #include <vm/pmap.h>
77 #include <vm/vm_fault.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_pageout.h>
82 #include <machine/vm_tuning.h>
83 #include <kern/misc_protos.h>
84
85
86 extern ipc_port_t memory_manager_default;
87
88 #ifndef VM_PAGE_LAUNDRY_MAX
89 #define VM_PAGE_LAUNDRY_MAX 16 /* outstanding DMM+EMM page cleans */
90 #endif /* VM_PAGEOUT_LAUNDRY_MAX */
91
92 #ifndef VM_PAGEOUT_BURST_MAX
93 #define VM_PAGEOUT_BURST_MAX 6 /* simultaneous EMM page cleans */
94 #endif /* VM_PAGEOUT_BURST_MAX */
95
96 #ifndef VM_PAGEOUT_BURST_WAIT
97 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
98 #endif /* VM_PAGEOUT_BURST_WAIT */
99
100 #ifndef VM_PAGEOUT_EMPTY_WAIT
101 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
102 #endif /* VM_PAGEOUT_EMPTY_WAIT */
103
104 /*
105 * To obtain a reasonable LRU approximation, the inactive queue
106 * needs to be large enough to give pages on it a chance to be
107 * referenced a second time. This macro defines the fraction
108 * of active+inactive pages that should be inactive.
109 * The pageout daemon uses it to update vm_page_inactive_target.
110 *
111 * If vm_page_free_count falls below vm_page_free_target and
112 * vm_page_inactive_count is below vm_page_inactive_target,
113 * then the pageout daemon starts running.
114 */
115
116 #ifndef VM_PAGE_INACTIVE_TARGET
117 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
118 #endif /* VM_PAGE_INACTIVE_TARGET */
119
120 /*
121 * Once the pageout daemon starts running, it keeps going
122 * until vm_page_free_count meets or exceeds vm_page_free_target.
123 */
124
125 #ifndef VM_PAGE_FREE_TARGET
126 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
127 #endif /* VM_PAGE_FREE_TARGET */
128
129 /*
130 * The pageout daemon always starts running once vm_page_free_count
131 * falls below vm_page_free_min.
132 */
133
134 #ifndef VM_PAGE_FREE_MIN
135 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
136 #endif /* VM_PAGE_FREE_MIN */
137
138 /*
139 * When vm_page_free_count falls below vm_page_free_reserved,
140 * only vm-privileged threads can allocate pages. vm-privilege
141 * allows the pageout daemon and default pager (and any other
142 * associated threads needed for default pageout) to continue
143 * operation by dipping into the reserved pool of pages.
144 */
145
146 #ifndef VM_PAGE_FREE_RESERVED
147 #define VM_PAGE_FREE_RESERVED \
148 ((6 * VM_PAGE_LAUNDRY_MAX) + NCPUS)
149 #endif /* VM_PAGE_FREE_RESERVED */
150
151 /*
152 * Exported variable used to broadcast the activation of the pageout scan
153 * Working Set uses this to throttle its use of pmap removes. In this
154 * way, code which runs within memory in an uncontested context does
155 * not keep encountering soft faults.
156 */
157
158 unsigned int vm_pageout_scan_event_counter = 0;
159
160 /*
161 * Forward declarations for internal routines.
162 */
163 extern void vm_pageout_continue(void);
164 extern void vm_pageout_scan(void);
165 extern void vm_pageout_throttle(vm_page_t m);
166 extern vm_page_t vm_pageout_cluster_page(
167 vm_object_t object,
168 vm_object_offset_t offset,
169 boolean_t precious_clean);
170
171 unsigned int vm_pageout_reserved_internal = 0;
172 unsigned int vm_pageout_reserved_really = 0;
173
174 unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */
175 unsigned int vm_page_laundry_min = 0;
176 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
177 unsigned int vm_pageout_burst_max = 0;
178 unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
179 unsigned int vm_pageout_burst_min = 0;
180 unsigned int vm_pageout_burst_loop_throttle = 4096;
181 unsigned int vm_pageout_pause_count = 0;
182 unsigned int vm_pageout_pause_max = 0;
183 unsigned int vm_free_page_pause = 100; /* milliseconds */
184
185 /*
186 * Protection against zero fill flushing live working sets derived
187 * from existing backing store and files
188 */
189 unsigned int vm_accellerate_zf_pageout_trigger = 400;
190 unsigned int vm_zf_iterator;
191 unsigned int vm_zf_iterator_count = 40;
192 unsigned int last_page_zf;
193 unsigned int vm_zf_count = 0;
194
195 /*
196 * These variables record the pageout daemon's actions:
197 * how many pages it looks at and what happens to those pages.
198 * No locking needed because only one thread modifies the variables.
199 */
200
201 unsigned int vm_pageout_active = 0; /* debugging */
202 unsigned int vm_pageout_inactive = 0; /* debugging */
203 unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
204 unsigned int vm_pageout_inactive_forced = 0; /* debugging */
205 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
206 unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
207 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
208 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
209 unsigned int vm_pageout_inactive_used = 0; /* debugging */
210 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
211 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
212 unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
213 unsigned int vm_stat_discard = 0; /* debugging */
214 unsigned int vm_stat_discard_sent = 0; /* debugging */
215 unsigned int vm_stat_discard_failure = 0; /* debugging */
216 unsigned int vm_stat_discard_throttle = 0; /* debugging */
217 unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */
218 unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */
219 unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */
220 unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */
221 unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */
222 unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */
223
224 /*
225 * Backing store throttle when BS is exhausted
226 */
227 unsigned int vm_backing_store_low = 0;
228
229 unsigned int vm_pageout_out_of_line = 0;
230 unsigned int vm_pageout_in_place = 0;
231
232
233 /*
234 * Routine: vm_backing_store_disable
235 * Purpose:
236 * Suspend non-privileged threads wishing to extend
237 * backing store when we are low on backing store
238 * (Synchronized by caller)
239 */
240 void
241 vm_backing_store_disable(
242 boolean_t disable)
243 {
244 if(disable) {
245 vm_backing_store_low = 1;
246 } else {
247 if(vm_backing_store_low) {
248 vm_backing_store_low = 0;
249 thread_wakeup((event_t) &vm_backing_store_low);
250 }
251 }
252 }
253
254
255 /*
256 * Routine: vm_pageout_object_allocate
257 * Purpose:
258 * Allocate an object for use as out-of-line memory in a
259 * data_return/data_initialize message.
260 * The page must be in an unlocked object.
261 *
262 * If the page belongs to a trusted pager, cleaning in place
263 * will be used, which utilizes a special "pageout object"
264 * containing private alias pages for the real page frames.
265 * Untrusted pagers use normal out-of-line memory.
266 */
267 vm_object_t
268 vm_pageout_object_allocate(
269 vm_page_t m,
270 vm_size_t size,
271 vm_object_offset_t offset)
272 {
273 vm_object_t object = m->object;
274 vm_object_t new_object;
275
276 assert(object->pager_ready);
277
278 new_object = vm_object_allocate(size);
279
280 if (object->pager_trusted) {
281 assert (offset < object->size);
282
283 vm_object_lock(new_object);
284 new_object->pageout = TRUE;
285 new_object->shadow = object;
286 new_object->can_persist = FALSE;
287 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
288 new_object->shadow_offset = offset;
289 vm_object_unlock(new_object);
290
291 /*
292 * Take a paging reference on the object. This will be dropped
293 * in vm_pageout_object_terminate()
294 */
295 vm_object_lock(object);
296 vm_object_paging_begin(object);
297 vm_page_lock_queues();
298 vm_pageout_throttle(m);
299 vm_page_unlock_queues();
300 vm_object_unlock(object);
301
302 vm_pageout_in_place++;
303 } else
304 vm_pageout_out_of_line++;
305 return(new_object);
306 }
307
308 #if MACH_CLUSTER_STATS
309 unsigned long vm_pageout_cluster_dirtied = 0;
310 unsigned long vm_pageout_cluster_cleaned = 0;
311 unsigned long vm_pageout_cluster_collisions = 0;
312 unsigned long vm_pageout_cluster_clusters = 0;
313 unsigned long vm_pageout_cluster_conversions = 0;
314 unsigned long vm_pageout_target_collisions = 0;
315 unsigned long vm_pageout_target_page_dirtied = 0;
316 unsigned long vm_pageout_target_page_freed = 0;
317 #define CLUSTER_STAT(clause) clause
318 #else /* MACH_CLUSTER_STATS */
319 #define CLUSTER_STAT(clause)
320 #endif /* MACH_CLUSTER_STATS */
321
322 /*
323 * Routine: vm_pageout_object_terminate
324 * Purpose:
325 * Destroy the pageout_object allocated by
326 * vm_pageout_object_allocate(), and perform all of the
327 * required cleanup actions.
328 *
329 * In/Out conditions:
330 * The object must be locked, and will be returned locked.
331 */
332 void
333 vm_pageout_object_terminate(
334 vm_object_t object)
335 {
336 vm_object_t shadow_object;
337 boolean_t shadow_internal;
338
339 /*
340 * Deal with the deallocation (last reference) of a pageout object
341 * (used for cleaning-in-place) by dropping the paging references/
342 * freeing pages in the original object.
343 */
344
345 assert(object->pageout);
346 shadow_object = object->shadow;
347 vm_object_lock(shadow_object);
348 shadow_internal = shadow_object->internal;
349
350 while (!queue_empty(&object->memq)) {
351 vm_page_t p, m;
352 vm_object_offset_t offset;
353
354 p = (vm_page_t) queue_first(&object->memq);
355
356 assert(p->private);
357 assert(p->pageout);
358 p->pageout = FALSE;
359 assert(!p->cleaning);
360
361 offset = p->offset;
362 VM_PAGE_FREE(p);
363 p = VM_PAGE_NULL;
364
365 m = vm_page_lookup(shadow_object,
366 offset + object->shadow_offset);
367
368 if(m == VM_PAGE_NULL)
369 continue;
370 assert(m->cleaning);
371 /* used as a trigger on upl_commit etc to recognize the */
372 /* pageout daemon's subseqent desire to pageout a cleaning */
373 /* page. When the bit is on the upl commit code will */
374 /* respect the pageout bit in the target page over the */
375 /* caller's page list indication */
376 m->dump_cleaning = FALSE;
377
378 /*
379 * Account for the paging reference taken when
380 * m->cleaning was set on this page.
381 */
382 vm_object_paging_end(shadow_object);
383 assert((m->dirty) || (m->precious) ||
384 (m->busy && m->cleaning));
385
386 /*
387 * Handle the trusted pager throttle.
388 * Also decrement the burst throttle (if external).
389 */
390 vm_page_lock_queues();
391 if (m->laundry) {
392 if (!shadow_internal)
393 vm_page_burst_count--;
394 vm_page_laundry_count--;
395 m->laundry = FALSE;
396 if (vm_page_laundry_count < vm_page_laundry_min) {
397 vm_page_laundry_min = 0;
398 thread_wakeup((event_t) &vm_page_laundry_count);
399 }
400 }
401
402 /*
403 * Handle the "target" page(s). These pages are to be freed if
404 * successfully cleaned. Target pages are always busy, and are
405 * wired exactly once. The initial target pages are not mapped,
406 * (so cannot be referenced or modified) but converted target
407 * pages may have been modified between the selection as an
408 * adjacent page and conversion to a target.
409 */
410 if (m->pageout) {
411 assert(m->busy);
412 assert(m->wire_count == 1);
413 m->cleaning = FALSE;
414 m->pageout = FALSE;
415 #if MACH_CLUSTER_STATS
416 if (m->wanted) vm_pageout_target_collisions++;
417 #endif
418 /*
419 * Revoke all access to the page. Since the object is
420 * locked, and the page is busy, this prevents the page
421 * from being dirtied after the pmap_is_modified() call
422 * returns.
423 */
424 pmap_page_protect(m->phys_page, VM_PROT_NONE);
425
426 /*
427 * Since the page is left "dirty" but "not modifed", we
428 * can detect whether the page was redirtied during
429 * pageout by checking the modify state.
430 */
431 m->dirty = pmap_is_modified(m->phys_page);
432
433 if (m->dirty) {
434 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
435 vm_page_unwire(m);/* reactivates */
436 VM_STAT(reactivations++);
437 PAGE_WAKEUP_DONE(m);
438 } else {
439 CLUSTER_STAT(vm_pageout_target_page_freed++;)
440 vm_page_free(m);/* clears busy, etc. */
441 }
442 vm_page_unlock_queues();
443 continue;
444 }
445 /*
446 * Handle the "adjacent" pages. These pages were cleaned in
447 * place, and should be left alone.
448 * If prep_pin_count is nonzero, then someone is using the
449 * page, so make it active.
450 */
451 if (!m->active && !m->inactive && !m->private) {
452 if (m->reference)
453 vm_page_activate(m);
454 else
455 vm_page_deactivate(m);
456 }
457 if((m->busy) && (m->cleaning)) {
458
459 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
460 m->busy = FALSE;
461
462 /* We do not re-set m->dirty ! */
463 /* The page was busy so no extraneous activity */
464 /* could have occured. COPY_INTO is a read into the */
465 /* new pages. CLEAN_IN_PLACE does actually write */
466 /* out the pages but handling outside of this code */
467 /* will take care of resetting dirty. We clear the */
468 /* modify however for the Programmed I/O case. */
469 pmap_clear_modify(m->phys_page);
470 if(m->absent) {
471 m->absent = FALSE;
472 if(shadow_object->absent_count == 1)
473 vm_object_absent_release(shadow_object);
474 else
475 shadow_object->absent_count--;
476 }
477 m->overwriting = FALSE;
478 } else if (m->overwriting) {
479 /* alternate request page list, write to page_list */
480 /* case. Occurs when the original page was wired */
481 /* at the time of the list request */
482 assert(m->wire_count != 0);
483 vm_page_unwire(m);/* reactivates */
484 m->overwriting = FALSE;
485 } else {
486 /*
487 * Set the dirty state according to whether or not the page was
488 * modified during the pageout. Note that we purposefully do
489 * NOT call pmap_clear_modify since the page is still mapped.
490 * If the page were to be dirtied between the 2 calls, this
491 * this fact would be lost. This code is only necessary to
492 * maintain statistics, since the pmap module is always
493 * consulted if m->dirty is false.
494 */
495 #if MACH_CLUSTER_STATS
496 m->dirty = pmap_is_modified(m->phys_page);
497
498 if (m->dirty) vm_pageout_cluster_dirtied++;
499 else vm_pageout_cluster_cleaned++;
500 if (m->wanted) vm_pageout_cluster_collisions++;
501 #else
502 m->dirty = 0;
503 #endif
504 }
505 m->cleaning = FALSE;
506
507 /*
508 * Wakeup any thread waiting for the page to be un-cleaning.
509 */
510 PAGE_WAKEUP(m);
511 vm_page_unlock_queues();
512 }
513 /*
514 * Account for the paging reference taken in vm_paging_object_allocate.
515 */
516 vm_object_paging_end(shadow_object);
517 vm_object_unlock(shadow_object);
518
519 assert(object->ref_count == 0);
520 assert(object->paging_in_progress == 0);
521 assert(object->resident_page_count == 0);
522 return;
523 }
524
525 /*
526 * Routine: vm_pageout_setup
527 * Purpose:
528 * Set up a page for pageout (clean & flush).
529 *
530 * Move the page to a new object, as part of which it will be
531 * sent to its memory manager in a memory_object_data_write or
532 * memory_object_initialize message.
533 *
534 * The "new_object" and "new_offset" arguments
535 * indicate where the page should be moved.
536 *
537 * In/Out conditions:
538 * The page in question must not be on any pageout queues,
539 * and must be busy. The object to which it belongs
540 * must be unlocked, and the caller must hold a paging
541 * reference to it. The new_object must not be locked.
542 *
543 * This routine returns a pointer to a place-holder page,
544 * inserted at the same offset, to block out-of-order
545 * requests for the page. The place-holder page must
546 * be freed after the data_write or initialize message
547 * has been sent.
548 *
549 * The original page is put on a paging queue and marked
550 * not busy on exit.
551 */
552 vm_page_t
553 vm_pageout_setup(
554 register vm_page_t m,
555 register vm_object_t new_object,
556 vm_object_offset_t new_offset)
557 {
558 register vm_object_t old_object = m->object;
559 vm_object_offset_t paging_offset;
560 vm_object_offset_t offset;
561 register vm_page_t holding_page;
562 register vm_page_t new_m;
563 register vm_page_t new_page;
564 boolean_t need_to_wire = FALSE;
565
566
567 XPR(XPR_VM_PAGEOUT,
568 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
569 (integer_t)m->object, (integer_t)m->offset,
570 (integer_t)m, (integer_t)new_object,
571 (integer_t)new_offset);
572 assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
573 !m->restart);
574
575 assert(m->dirty || m->precious);
576
577 /*
578 * Create a place-holder page where the old one was, to prevent
579 * attempted pageins of this page while we're unlocked.
580 */
581 VM_PAGE_GRAB_FICTITIOUS(holding_page);
582
583 vm_object_lock(old_object);
584
585 offset = m->offset;
586 paging_offset = offset + old_object->paging_offset;
587
588 if (old_object->pager_trusted) {
589 /*
590 * This pager is trusted, so we can clean this page
591 * in place. Leave it in the old object, and mark it
592 * cleaning & pageout.
593 */
594 new_m = holding_page;
595 holding_page = VM_PAGE_NULL;
596
597 /*
598 * Set up new page to be private shadow of real page.
599 */
600 new_m->phys_page = m->phys_page;
601 new_m->fictitious = FALSE;
602 new_m->pageout = TRUE;
603
604 /*
605 * Mark real page as cleaning (indicating that we hold a
606 * paging reference to be released via m_o_d_r_c) and
607 * pageout (indicating that the page should be freed
608 * when the pageout completes).
609 */
610 pmap_clear_modify(m->phys_page);
611 vm_page_lock_queues();
612 new_m->private = TRUE;
613 vm_page_wire(new_m);
614 m->cleaning = TRUE;
615 m->pageout = TRUE;
616
617 vm_page_wire(m);
618 assert(m->wire_count == 1);
619 vm_page_unlock_queues();
620
621 m->dirty = TRUE;
622 m->precious = FALSE;
623 m->page_lock = VM_PROT_NONE;
624 m->unusual = FALSE;
625 m->unlock_request = VM_PROT_NONE;
626 } else {
627 /*
628 * Cannot clean in place, so rip the old page out of the
629 * object, and stick the holding page in. Set new_m to the
630 * page in the new object.
631 */
632 vm_page_lock_queues();
633 VM_PAGE_QUEUES_REMOVE(m);
634 vm_page_remove(m);
635
636 vm_page_insert(holding_page, old_object, offset);
637 vm_page_unlock_queues();
638
639 m->dirty = TRUE;
640 m->precious = FALSE;
641 new_m = m;
642 new_m->page_lock = VM_PROT_NONE;
643 new_m->unlock_request = VM_PROT_NONE;
644
645 if (old_object->internal)
646 need_to_wire = TRUE;
647 }
648 /*
649 * Record that this page has been written out
650 */
651 #if MACH_PAGEMAP
652 vm_external_state_set(old_object->existence_map, offset);
653 #endif /* MACH_PAGEMAP */
654
655 vm_object_unlock(old_object);
656
657 vm_object_lock(new_object);
658
659 /*
660 * Put the page into the new object. If it is a not wired
661 * (if it's the real page) it will be activated.
662 */
663
664 vm_page_lock_queues();
665 vm_page_insert(new_m, new_object, new_offset);
666 if (need_to_wire)
667 vm_page_wire(new_m);
668 else
669 vm_page_activate(new_m);
670 PAGE_WAKEUP_DONE(new_m);
671 vm_page_unlock_queues();
672
673 vm_object_unlock(new_object);
674
675 /*
676 * Return the placeholder page to simplify cleanup.
677 */
678 return (holding_page);
679 }
680
681 /*
682 * Routine: vm_pageclean_setup
683 *
684 * Purpose: setup a page to be cleaned (made non-dirty), but not
685 * necessarily flushed from the VM page cache.
686 * This is accomplished by cleaning in place.
687 *
688 * The page must not be busy, and the object and page
689 * queues must be locked.
690 *
691 */
692 void
693 vm_pageclean_setup(
694 vm_page_t m,
695 vm_page_t new_m,
696 vm_object_t new_object,
697 vm_object_offset_t new_offset)
698 {
699 vm_object_t old_object = m->object;
700 assert(!m->busy);
701 assert(!m->cleaning);
702
703 XPR(XPR_VM_PAGEOUT,
704 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
705 (integer_t)old_object, m->offset, (integer_t)m,
706 (integer_t)new_m, new_offset);
707
708 pmap_clear_modify(m->phys_page);
709 vm_object_paging_begin(old_object);
710
711 /*
712 * Record that this page has been written out
713 */
714 #if MACH_PAGEMAP
715 vm_external_state_set(old_object->existence_map, m->offset);
716 #endif /*MACH_PAGEMAP*/
717
718 /*
719 * Mark original page as cleaning in place.
720 */
721 m->cleaning = TRUE;
722 m->dirty = TRUE;
723 m->precious = FALSE;
724
725 /*
726 * Convert the fictitious page to a private shadow of
727 * the real page.
728 */
729 assert(new_m->fictitious);
730 new_m->fictitious = FALSE;
731 new_m->private = TRUE;
732 new_m->pageout = TRUE;
733 new_m->phys_page = m->phys_page;
734 vm_page_wire(new_m);
735
736 vm_page_insert(new_m, new_object, new_offset);
737 assert(!new_m->wanted);
738 new_m->busy = FALSE;
739 }
740
741 void
742 vm_pageclean_copy(
743 vm_page_t m,
744 vm_page_t new_m,
745 vm_object_t new_object,
746 vm_object_offset_t new_offset)
747 {
748 XPR(XPR_VM_PAGEOUT,
749 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
750 m, new_m, new_object, new_offset, 0);
751
752 assert((!m->busy) && (!m->cleaning));
753
754 assert(!new_m->private && !new_m->fictitious);
755
756 pmap_clear_modify(m->phys_page);
757
758 m->busy = TRUE;
759 vm_object_paging_begin(m->object);
760 vm_page_unlock_queues();
761 vm_object_unlock(m->object);
762
763 /*
764 * Copy the original page to the new page.
765 */
766 vm_page_copy(m, new_m);
767
768 /*
769 * Mark the old page as clean. A request to pmap_is_modified
770 * will get the right answer.
771 */
772 vm_object_lock(m->object);
773 m->dirty = FALSE;
774
775 vm_object_paging_end(m->object);
776
777 vm_page_lock_queues();
778 if (!m->active && !m->inactive)
779 vm_page_activate(m);
780 PAGE_WAKEUP_DONE(m);
781
782 vm_page_insert(new_m, new_object, new_offset);
783 vm_page_activate(new_m);
784 new_m->busy = FALSE; /* No other thread can be waiting */
785 }
786
787
788 /*
789 * Routine: vm_pageout_initialize_page
790 * Purpose:
791 * Causes the specified page to be initialized in
792 * the appropriate memory object. This routine is used to push
793 * pages into a copy-object when they are modified in the
794 * permanent object.
795 *
796 * The page is moved to a temporary object and paged out.
797 *
798 * In/out conditions:
799 * The page in question must not be on any pageout queues.
800 * The object to which it belongs must be locked.
801 * The page must be busy, but not hold a paging reference.
802 *
803 * Implementation:
804 * Move this page to a completely new object.
805 */
806 void
807 vm_pageout_initialize_page(
808 vm_page_t m)
809 {
810 vm_map_copy_t copy;
811 vm_object_t new_object;
812 vm_object_t object;
813 vm_object_offset_t paging_offset;
814 vm_page_t holding_page;
815
816
817 XPR(XPR_VM_PAGEOUT,
818 "vm_pageout_initialize_page, page 0x%X\n",
819 (integer_t)m, 0, 0, 0, 0);
820 assert(m->busy);
821
822 /*
823 * Verify that we really want to clean this page
824 */
825 assert(!m->absent);
826 assert(!m->error);
827 assert(m->dirty);
828
829 /*
830 * Create a paging reference to let us play with the object.
831 */
832 object = m->object;
833 paging_offset = m->offset + object->paging_offset;
834 vm_object_paging_begin(object);
835 if (m->absent || m->error || m->restart ||
836 (!m->dirty && !m->precious)) {
837 VM_PAGE_FREE(m);
838 panic("reservation without pageout?"); /* alan */
839 vm_object_unlock(object);
840 return;
841 }
842
843 /* set the page for future call to vm_fault_list_request */
844 holding_page = NULL;
845 vm_page_lock_queues();
846 pmap_clear_modify(m->phys_page);
847 m->dirty = TRUE;
848 m->busy = TRUE;
849 m->list_req_pending = TRUE;
850 m->cleaning = TRUE;
851 m->pageout = TRUE;
852 vm_page_wire(m);
853 vm_pageout_throttle(m);
854 vm_page_unlock_queues();
855 vm_object_unlock(object);
856
857 /*
858 * Write the data to its pager.
859 * Note that the data is passed by naming the new object,
860 * not a virtual address; the pager interface has been
861 * manipulated to use the "internal memory" data type.
862 * [The object reference from its allocation is donated
863 * to the eventual recipient.]
864 */
865 memory_object_data_initialize(object->pager,
866 paging_offset,
867 PAGE_SIZE);
868
869 vm_object_lock(object);
870 }
871
872 #if MACH_CLUSTER_STATS
873 #define MAXCLUSTERPAGES 16
874 struct {
875 unsigned long pages_in_cluster;
876 unsigned long pages_at_higher_offsets;
877 unsigned long pages_at_lower_offsets;
878 } cluster_stats[MAXCLUSTERPAGES];
879 #endif /* MACH_CLUSTER_STATS */
880
881 boolean_t allow_clustered_pageouts = FALSE;
882
883 /*
884 * vm_pageout_cluster:
885 *
886 * Given a page, page it out, and attempt to clean adjacent pages
887 * in the same operation.
888 *
889 * The page must be busy, and the object locked. We will take a
890 * paging reference to prevent deallocation or collapse when we
891 * temporarily release the object lock.
892 *
893 * The page must not be on any pageout queue.
894 */
895 void
896 vm_pageout_cluster(
897 vm_page_t m)
898 {
899 vm_object_t object = m->object;
900 vm_object_offset_t offset = m->offset; /* from vm_object start */
901 vm_object_offset_t paging_offset;
902 vm_object_t new_object;
903 vm_object_offset_t new_offset;
904 vm_size_t cluster_size;
905 vm_object_offset_t cluster_offset; /* from memory_object start */
906 vm_object_offset_t cluster_lower_bound; /* from vm_object_start */
907 vm_object_offset_t cluster_upper_bound; /* from vm_object_start */
908 vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */
909 vm_object_offset_t offset_within_cluster;
910 vm_size_t length_of_data;
911 vm_page_t friend, holding_page;
912 kern_return_t rc;
913 boolean_t precious_clean = TRUE;
914 int pages_in_cluster;
915
916 CLUSTER_STAT(int pages_at_higher_offsets = 0;)
917 CLUSTER_STAT(int pages_at_lower_offsets = 0;)
918
919 XPR(XPR_VM_PAGEOUT,
920 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
921 (integer_t)object, offset, (integer_t)m, 0, 0);
922
923 CLUSTER_STAT(vm_pageout_cluster_clusters++;)
924
925 /*
926 * protect the object from collapse -
927 * locking in the object's paging_offset.
928 */
929 vm_object_paging_begin(object);
930 paging_offset = m->offset + object->paging_offset;
931
932 /*
933 * Only a certain kind of page is appreciated here.
934 */
935 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
936 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
937
938 cluster_size = object->cluster_size;
939
940 assert(cluster_size >= PAGE_SIZE);
941 if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE;
942 assert(object->pager_created && object->pager_initialized);
943 assert(object->internal || object->pager_ready);
944
945 if (m->precious && !m->dirty)
946 precious_clean = TRUE;
947
948 if (!object->pager_trusted || !allow_clustered_pageouts)
949 cluster_size = PAGE_SIZE;
950
951 cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1);
952 /* bytes from beginning of cluster */
953 /*
954 * Due to unaligned mappings, we have to be careful
955 * of negative offsets into the VM object. Clip the cluster
956 * boundary to the VM object, not the memory object.
957 */
958 if (offset > cluster_offset) {
959 cluster_lower_bound = offset - cluster_offset;
960 /* from vm_object */
961 } else {
962 cluster_lower_bound = 0;
963 }
964 cluster_upper_bound = (offset - cluster_offset) +
965 (vm_object_offset_t)cluster_size;
966
967 /* set the page for future call to vm_fault_list_request */
968 holding_page = NULL;
969 vm_page_lock_queues();
970 m->busy = TRUE;
971 m->list_req_pending = TRUE;
972 m->cleaning = TRUE;
973 m->pageout = TRUE;
974 vm_page_wire(m);
975 vm_pageout_throttle(m);
976 vm_page_unlock_queues();
977 vm_object_unlock(object);
978
979 /*
980 * Search backward for adjacent eligible pages to clean in
981 * this operation.
982 */
983
984 cluster_start = offset;
985 if (offset) { /* avoid wrap-around at zero */
986 for (cluster_start = offset - PAGE_SIZE_64;
987 cluster_start >= cluster_lower_bound;
988 cluster_start -= PAGE_SIZE_64) {
989 assert(cluster_size > PAGE_SIZE);
990
991 vm_object_lock(object);
992 vm_page_lock_queues();
993
994 if ((friend = vm_pageout_cluster_page(object, cluster_start,
995 precious_clean)) == VM_PAGE_NULL) {
996 vm_page_unlock_queues();
997 vm_object_unlock(object);
998 break;
999 }
1000 new_offset = (cluster_start + object->paging_offset)
1001 & (cluster_size - 1);
1002
1003 assert(new_offset < cluster_offset);
1004 m->list_req_pending = TRUE;
1005 m->cleaning = TRUE;
1006 /* do nothing except advance the write request, all we really need to */
1007 /* do is push the target page and let the code at the other end decide */
1008 /* what is really the right size */
1009 if (vm_page_free_count <= vm_page_free_reserved) {
1010 m->busy = TRUE;
1011 m->pageout = TRUE;
1012 vm_page_wire(m);
1013 }
1014
1015 vm_page_unlock_queues();
1016 vm_object_unlock(object);
1017 if(m->dirty || m->object->internal) {
1018 CLUSTER_STAT(pages_at_lower_offsets++;)
1019 }
1020
1021 }
1022 cluster_start += PAGE_SIZE_64;
1023 }
1024 assert(cluster_start >= cluster_lower_bound);
1025 assert(cluster_start <= offset);
1026 /*
1027 * Search forward for adjacent eligible pages to clean in
1028 * this operation.
1029 */
1030 for (cluster_end = offset + PAGE_SIZE_64;
1031 cluster_end < cluster_upper_bound;
1032 cluster_end += PAGE_SIZE_64) {
1033 assert(cluster_size > PAGE_SIZE);
1034
1035 vm_object_lock(object);
1036 vm_page_lock_queues();
1037
1038 if ((friend = vm_pageout_cluster_page(object, cluster_end,
1039 precious_clean)) == VM_PAGE_NULL) {
1040 vm_page_unlock_queues();
1041 vm_object_unlock(object);
1042 break;
1043 }
1044 new_offset = (cluster_end + object->paging_offset)
1045 & (cluster_size - 1);
1046
1047 assert(new_offset < cluster_size);
1048 m->list_req_pending = TRUE;
1049 m->cleaning = TRUE;
1050 /* do nothing except advance the write request, all we really need to */
1051 /* do is push the target page and let the code at the other end decide */
1052 /* what is really the right size */
1053 if (vm_page_free_count <= vm_page_free_reserved) {
1054 m->busy = TRUE;
1055 m->pageout = TRUE;
1056 vm_page_wire(m);
1057 }
1058
1059 vm_page_unlock_queues();
1060 vm_object_unlock(object);
1061
1062 if(m->dirty || m->object->internal) {
1063 CLUSTER_STAT(pages_at_higher_offsets++;)
1064 }
1065 }
1066 assert(cluster_end <= cluster_upper_bound);
1067 assert(cluster_end >= offset + PAGE_SIZE);
1068
1069 /*
1070 * (offset - cluster_offset) is beginning of cluster_object
1071 * relative to vm_object start.
1072 */
1073 offset_within_cluster = cluster_start - (offset - cluster_offset);
1074 length_of_data = cluster_end - cluster_start;
1075
1076 assert(offset_within_cluster < cluster_size);
1077 assert((offset_within_cluster + length_of_data) <= cluster_size);
1078
1079 rc = KERN_SUCCESS;
1080 assert(rc == KERN_SUCCESS);
1081
1082 pages_in_cluster = length_of_data/PAGE_SIZE;
1083
1084 #if MACH_CLUSTER_STATS
1085 (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++;
1086 (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++;
1087 (cluster_stats[pages_in_cluster].pages_in_cluster)++;
1088 #endif /* MACH_CLUSTER_STATS */
1089
1090 /*
1091 * Send the data to the pager.
1092 */
1093 paging_offset = cluster_start + object->paging_offset;
1094
1095 rc = memory_object_data_return(object->pager,
1096 paging_offset,
1097 length_of_data,
1098 !precious_clean,
1099 FALSE);
1100
1101 vm_object_lock(object);
1102 vm_object_paging_end(object);
1103
1104 if (holding_page) {
1105 assert(!object->pager_trusted);
1106 VM_PAGE_FREE(holding_page);
1107 vm_object_paging_end(object);
1108 }
1109 }
1110
1111 /*
1112 * Trusted pager throttle.
1113 * Object and page queues must be locked.
1114 */
1115 void
1116 vm_pageout_throttle(
1117 register vm_page_t m)
1118 {
1119 register vm_object_t object;
1120
1121 /*
1122 * need to keep track of the object we
1123 * started with... if we drop the object lock
1124 * due to the throttle, it's possible that someone
1125 * else will gather this page into an I/O if this
1126 * is an external object... the page will then be
1127 * potentially freed before we unwedge from the
1128 * throttle... this is ok since no one plays with
1129 * the page directly after the throttle... the object
1130 * and offset are passed into the memory_object_data_return
1131 * function where eventually it's relooked up against the
1132 * object... if it's changed state or there is no longer
1133 * a page at that offset, the pageout just finishes without
1134 * issuing an I/O
1135 */
1136 object = m->object;
1137
1138 assert(!m->laundry);
1139 m->laundry = TRUE;
1140 if (!object->internal)
1141 vm_page_burst_count++;
1142 vm_page_laundry_count++;
1143
1144 while (vm_page_laundry_count > vm_page_laundry_max) {
1145 /*
1146 * Set the threshold for when vm_page_free()
1147 * should wake us up.
1148 */
1149 vm_page_laundry_min = vm_page_laundry_max/2;
1150
1151 assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT);
1152 vm_page_unlock_queues();
1153 vm_object_unlock(object);
1154 /*
1155 * Pause to let the default pager catch up.
1156 */
1157 thread_block((void (*)(void)) 0);
1158
1159 vm_object_lock(object);
1160 vm_page_lock_queues();
1161 }
1162 }
1163
1164 /*
1165 * The global variable vm_pageout_clean_active_pages controls whether
1166 * active pages are considered valid to be cleaned in place during a
1167 * clustered pageout. Performance measurements are necessary to determine
1168 * the best policy.
1169 */
1170 int vm_pageout_clean_active_pages = 1;
1171 /*
1172 * vm_pageout_cluster_page: [Internal]
1173 *
1174 * return a vm_page_t to the page at (object,offset) if it is appropriate
1175 * to clean in place. Pages that are non-existent, busy, absent, already
1176 * cleaning, or not dirty are not eligible to be cleaned as an adjacent
1177 * page in a cluster.
1178 *
1179 * The object must be locked on entry, and remains locked throughout
1180 * this call.
1181 */
1182
1183 vm_page_t
1184 vm_pageout_cluster_page(
1185 vm_object_t object,
1186 vm_object_offset_t offset,
1187 boolean_t precious_clean)
1188 {
1189 vm_page_t m;
1190
1191 XPR(XPR_VM_PAGEOUT,
1192 "vm_pageout_cluster_page, object 0x%X offset 0x%X\n",
1193 (integer_t)object, offset, 0, 0, 0);
1194
1195 if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
1196 return(VM_PAGE_NULL);
1197
1198 if (m->busy || m->absent || m->cleaning ||
1199 (m->wire_count != 0) || m->error)
1200 return(VM_PAGE_NULL);
1201
1202 if (vm_pageout_clean_active_pages) {
1203 if (!m->active && !m->inactive) return(VM_PAGE_NULL);
1204 } else {
1205 if (!m->inactive) return(VM_PAGE_NULL);
1206 }
1207
1208 assert(!m->private);
1209 assert(!m->fictitious);
1210
1211 if (!m->dirty) m->dirty = pmap_is_modified(m->phys_page);
1212
1213 if (precious_clean) {
1214 if (!m->precious || !m->dirty)
1215 return(VM_PAGE_NULL);
1216 } else {
1217 if (!m->dirty)
1218 return(VM_PAGE_NULL);
1219 }
1220 return(m);
1221 }
1222
1223 /*
1224 * vm_pageout_scan does the dirty work for the pageout daemon.
1225 * It returns with vm_page_queue_free_lock held and
1226 * vm_page_free_wanted == 0.
1227 */
1228 extern void vm_pageout_scan_continue(void); /* forward; */
1229
1230 #define DELAYED_UNLOCK_LIMIT 50
1231 #define LOCAL_FREED_LIMIT 50
1232
1233 void
1234 vm_pageout_scan(void)
1235 {
1236 boolean_t now = FALSE;
1237 unsigned int laundry_pages;
1238 int loop_count = 0;
1239 int loop_bursted_count = 0;
1240 int active_loop_detect;
1241 vm_page_t local_freeq = 0;
1242 int local_freed = 0;
1243 int delayed_unlock = 0;
1244 int need_internal_inactive = 0;
1245 int need_pause;
1246
1247 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1248
1249 /*???*/ /*
1250 * We want to gradually dribble pages from the active queue
1251 * to the inactive queue. If we let the inactive queue get
1252 * very small, and then suddenly dump many pages into it,
1253 * those pages won't get a sufficient chance to be referenced
1254 * before we start taking them from the inactive queue.
1255 *
1256 * We must limit the rate at which we send pages to the pagers.
1257 * data_write messages consume memory, for message buffers and
1258 * for map-copy objects. If we get too far ahead of the pagers,
1259 * we can potentially run out of memory.
1260 *
1261 * We can use the laundry count to limit directly the number
1262 * of pages outstanding to the default pager. A similar
1263 * strategy for external pagers doesn't work, because
1264 * external pagers don't have to deallocate the pages sent them,
1265 * and because we might have to send pages to external pagers
1266 * even if they aren't processing writes. So we also
1267 * use a burst count to limit writes to external pagers.
1268 *
1269 * When memory is very tight, we can't rely on external pagers to
1270 * clean pages. They probably aren't running, because they
1271 * aren't vm-privileged. If we kept sending dirty pages to them,
1272 * we could exhaust the free list.
1273 *
1274 * consider_zone_gc should be last, because the other operations
1275 * might return memory to zones.
1276 */
1277 Restart:
1278
1279 stack_collect();
1280 consider_task_collect();
1281 consider_machine_collect();
1282 consider_zone_gc();
1283
1284 for (;;) {
1285 register vm_page_t m;
1286 register vm_object_t object;
1287
1288 /*
1289 * Recalculate vm_page_inactivate_target.
1290 */
1291 if (delayed_unlock == 0)
1292 vm_page_lock_queues();
1293 vm_page_inactive_target =
1294 VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1295 vm_page_inactive_count);
1296
1297 active_loop_detect = vm_page_active_count;
1298 /*
1299 * Move pages from active to inactive.
1300 */
1301 while ((need_internal_inactive ||
1302 vm_page_inactive_count < vm_page_inactive_target) &&
1303 !queue_empty(&vm_page_queue_active) &&
1304 ((active_loop_detect--) > 0)) {
1305
1306 need_pause = 1;
1307 vm_pageout_active++;
1308
1309 m = (vm_page_t) queue_first(&vm_page_queue_active);
1310 object = m->object;
1311
1312 /*
1313 * If we're getting really low on memory,
1314 * or we have already exceed the burst
1315 * count for the external pagers,
1316 * try skipping to a page that will go
1317 * directly to the default_pager.
1318 */
1319 if (need_internal_inactive &&
1320 IP_VALID(memory_manager_default)) {
1321 vm_pageout_scan_active_emm_throttle++;
1322
1323 assert(m->active && !m->inactive);
1324
1325 if (vm_object_lock_try(object)) {
1326 if (object->internal)
1327 goto object_locked_active;
1328
1329 if (!m->dirty)
1330 m->dirty = pmap_is_modified(m->phys_page);
1331 if (!m->dirty && !m->precious)
1332 goto object_locked_active;
1333
1334 vm_object_unlock(object);
1335
1336 need_pause = 0;
1337 }
1338 goto object_lock_try_active_failed;
1339 }
1340 assert(m->active && !m->inactive);
1341
1342 if (!vm_object_lock_try(object)) {
1343 /*
1344 * Move page to end and continue.
1345 */
1346 object_lock_try_active_failed:
1347 queue_remove(&vm_page_queue_active, m,
1348 vm_page_t, pageq);
1349 queue_enter(&vm_page_queue_active, m,
1350 vm_page_t, pageq);
1351
1352 if (local_freeq) {
1353 vm_page_free_list(local_freeq);
1354
1355 local_freeq = 0;
1356 local_freed = 0;
1357 }
1358 if (need_pause) {
1359 delayed_unlock = 0;
1360
1361 vm_page_unlock_queues();
1362 mutex_pause();
1363 vm_page_lock_queues();
1364 }
1365 continue;
1366 }
1367
1368 object_locked_active:
1369 /*
1370 * If the page is busy, then we pull it
1371 * off the active queue and leave it alone.
1372 */
1373
1374 if (m->busy) {
1375 vm_object_unlock(object);
1376 queue_remove(&vm_page_queue_active, m,
1377 vm_page_t, pageq);
1378 m->active = FALSE;
1379 if (!m->fictitious)
1380 vm_page_active_count--;
1381 continue;
1382 }
1383
1384 /*
1385 * Deactivate the page while holding the object
1386 * locked, so we know the page is still not busy.
1387 * This should prevent races between pmap_enter
1388 * and pmap_clear_reference. The page might be
1389 * absent or fictitious, but vm_page_deactivate
1390 * can handle that.
1391 */
1392
1393 if (need_internal_inactive) {
1394 /* found one ! */
1395 vm_pageout_scan_active_emm_throttle_success++;
1396 need_internal_inactive--;
1397 }
1398 vm_page_deactivate(m);
1399 vm_object_unlock(object);
1400 }
1401 /*
1402 * We are done if we have met our target *and*
1403 * nobody is still waiting for a page.
1404 */
1405 if (vm_page_free_count + local_freed >= vm_page_free_target) {
1406 if (local_freeq) {
1407 vm_page_free_list(local_freeq);
1408
1409 local_freeq = 0;
1410 local_freed = 0;
1411 }
1412
1413 consider_machine_adjust();
1414
1415 mutex_lock(&vm_page_queue_free_lock);
1416
1417 if ((vm_page_free_count >= vm_page_free_target) &&
1418 (vm_page_free_wanted == 0)) {
1419
1420 delayed_unlock = 0;
1421 vm_page_unlock_queues();
1422 break;
1423 }
1424 mutex_unlock(&vm_page_queue_free_lock);
1425 }
1426
1427 /*
1428 * Sometimes we have to pause:
1429 * 1) No inactive pages - nothing to do.
1430 * 2) Flow control - nothing but external pages and
1431 * we have to wait for untrusted pagers to catch up.
1432 */
1433
1434 loop_count++;
1435 if ((queue_empty(&vm_page_queue_inactive) &&
1436 queue_empty(&vm_page_queue_zf)) ||
1437 loop_bursted_count >= vm_pageout_burst_loop_throttle) {
1438
1439 unsigned int pages, msecs;
1440 int wait_result;
1441
1442 consider_machine_adjust();
1443 /*
1444 * vm_pageout_burst_wait is msecs/page.
1445 * If there is nothing for us to do, we wait
1446 * at least vm_pageout_empty_wait msecs.
1447 */
1448 pages = vm_page_burst_count;
1449
1450 if (pages) {
1451 msecs = pages * vm_pageout_burst_wait;
1452 } else {
1453 printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n");
1454 msecs = vm_free_page_pause;
1455 }
1456
1457 if (queue_empty(&vm_page_queue_inactive) &&
1458 queue_empty(&vm_page_queue_zf) &&
1459 (msecs < vm_pageout_empty_wait))
1460 msecs = vm_pageout_empty_wait;
1461
1462 if (local_freeq) {
1463 vm_page_free_list(local_freeq);
1464
1465 local_freeq = 0;
1466 local_freed = 0;
1467 }
1468 delayed_unlock = 0;
1469 vm_page_unlock_queues();
1470
1471 assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE);
1472 counter(c_vm_pageout_scan_block++);
1473
1474 /*
1475 * Unfortunately, we don't have call_continuation
1476 * so we can't rely on tail-recursion.
1477 */
1478 wait_result = thread_block((void (*)(void)) 0);
1479 if (wait_result != THREAD_TIMED_OUT)
1480 thread_cancel_timer();
1481 vm_pageout_scan_continue();
1482
1483 if (loop_count >= vm_page_inactive_count) {
1484 if (vm_page_burst_count >= vm_pageout_burst_max) {
1485 /*
1486 * Make sure we move enough "appropriate"
1487 * pages to the inactive queue before trying
1488 * again.
1489 */
1490 need_internal_inactive = vm_page_laundry_max;
1491 }
1492 loop_count = 0;
1493 }
1494 loop_bursted_count = 0;
1495 goto Restart;
1496 /*NOTREACHED*/
1497 }
1498
1499 vm_pageout_inactive++;
1500
1501 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
1502 vm_zf_iterator = 0;
1503 } else {
1504 last_page_zf = 0;
1505 if((vm_zf_iterator+=1) >= vm_zf_iterator_count) {
1506 vm_zf_iterator = 0;
1507 }
1508 }
1509 if(queue_empty(&vm_page_queue_zf) ||
1510 (((last_page_zf) || (vm_zf_iterator == 0)) &&
1511 !queue_empty(&vm_page_queue_inactive))) {
1512 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1513 last_page_zf = 0;
1514 } else {
1515 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1516 last_page_zf = 1;
1517 }
1518 object = m->object;
1519
1520 need_pause = 1;
1521
1522 if (vm_page_burst_count >= vm_pageout_burst_max &&
1523 IP_VALID(memory_manager_default)) {
1524 /*
1525 * We're throttling external pagers.
1526 * Try to select a page that would
1527 * go directly to the default_pager
1528 * or that is clean...
1529 */
1530 vm_pageout_scan_inactive_emm_throttle++;
1531
1532 assert(!m->active && m->inactive);
1533
1534 if (vm_object_lock_try(object)) {
1535 if (object->internal) {
1536 /* found one ! */
1537 vm_pageout_scan_inactive_emm_throttle_success++;
1538 goto object_locked_inactive;
1539 }
1540 if (!m->dirty)
1541 m->dirty = pmap_is_modified(m->phys_page);
1542 if (!m->dirty && !m->precious) {
1543 /* found one ! */
1544 vm_pageout_scan_inactive_emm_throttle_success++;
1545 goto object_locked_inactive;
1546 }
1547 vm_object_unlock(object);
1548
1549 need_pause = 0;
1550 }
1551 loop_bursted_count++;
1552 goto object_lock_try_inactive_failed;
1553 }
1554
1555 assert(!m->active && m->inactive);
1556
1557 /*
1558 * Try to lock object; since we've got the
1559 * page queues lock, we can only try for this one.
1560 */
1561
1562 if (!vm_object_lock_try(object)) {
1563 object_lock_try_inactive_failed:
1564 /*
1565 * Move page to end and continue.
1566 * Don't re-issue ticket
1567 */
1568 if (m->zero_fill) {
1569 queue_remove(&vm_page_queue_zf, m,
1570 vm_page_t, pageq);
1571 queue_enter(&vm_page_queue_zf, m,
1572 vm_page_t, pageq);
1573 } else {
1574 queue_remove(&vm_page_queue_inactive, m,
1575 vm_page_t, pageq);
1576 queue_enter(&vm_page_queue_inactive, m,
1577 vm_page_t, pageq);
1578 }
1579 if (local_freeq) {
1580 vm_page_free_list(local_freeq);
1581
1582 local_freeq = 0;
1583 local_freed = 0;
1584 }
1585 delayed_unlock = 0;
1586 vm_page_unlock_queues();
1587
1588 if (need_pause) {
1589 mutex_pause();
1590 vm_pageout_inactive_nolock++;
1591 }
1592 continue;
1593 }
1594
1595 object_locked_inactive:
1596 /*
1597 * Paging out pages of external objects which
1598 * are currently being created must be avoided.
1599 * The pager may claim for memory, thus leading to a
1600 * possible dead lock between it and the pageout thread,
1601 * if such pages are finally chosen. The remaining assumption
1602 * is that there will finally be enough available pages in the
1603 * inactive pool to page out in order to satisfy all memory
1604 * claimed by the thread which concurrently creates the pager.
1605 */
1606 if (!object->pager_initialized && object->pager_created) {
1607 /*
1608 * Move page to end and continue, hoping that
1609 * there will be enough other inactive pages to
1610 * page out so that the thread which currently
1611 * initializes the pager will succeed.
1612 * Don't re-grant the ticket, the page should
1613 * pulled from the queue and paged out whenever
1614 * one of its logically adjacent fellows is
1615 * targeted.
1616 */
1617 if(m->zero_fill) {
1618 queue_remove(&vm_page_queue_zf, m,
1619 vm_page_t, pageq);
1620 queue_enter(&vm_page_queue_zf, m,
1621 vm_page_t, pageq);
1622 last_page_zf = 1;
1623 vm_zf_iterator = vm_zf_iterator_count - 1;
1624 } else {
1625 queue_remove(&vm_page_queue_inactive, m,
1626 vm_page_t, pageq);
1627 queue_enter(&vm_page_queue_inactive, m,
1628 vm_page_t, pageq);
1629 last_page_zf = 0;
1630 vm_zf_iterator = 1;
1631 }
1632 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1633 delayed_unlock = 0;
1634 vm_page_unlock_queues();
1635 }
1636 vm_object_unlock(object);
1637 vm_pageout_inactive_avoid++;
1638 continue;
1639 }
1640
1641 /*
1642 * Remove the page from the inactive list.
1643 */
1644
1645 if(m->zero_fill) {
1646 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1647 } else {
1648 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1649 }
1650 m->inactive = FALSE;
1651 if (!m->fictitious)
1652 vm_page_inactive_count--;
1653
1654 if (m->busy || !object->alive) {
1655 /*
1656 * Somebody is already playing with this page.
1657 * Leave it off the pageout queues.
1658 */
1659
1660 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1661 delayed_unlock = 0;
1662 vm_page_unlock_queues();
1663 }
1664 vm_object_unlock(object);
1665 vm_pageout_inactive_busy++;
1666 continue;
1667 }
1668
1669 /*
1670 * If it's absent or in error, we can reclaim the page.
1671 */
1672
1673 if (m->absent || m->error) {
1674 vm_pageout_inactive_absent++;
1675 reclaim_page:
1676
1677 if (m->tabled)
1678 vm_page_remove(m); /* clears tabled, object, offset */
1679 if (m->absent)
1680 vm_object_absent_release(object);
1681
1682 m->pageq.next = (queue_entry_t)local_freeq;
1683 local_freeq = m;
1684
1685 if (local_freed++ > LOCAL_FREED_LIMIT) {
1686 vm_page_free_list(local_freeq);
1687
1688 local_freeq = 0;
1689 local_freed = 0;
1690 }
1691 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1692 delayed_unlock = 0;
1693 vm_page_unlock_queues();
1694 }
1695 vm_object_unlock(object);
1696 loop_bursted_count = 0;
1697 continue;
1698 }
1699
1700 assert(!m->private);
1701 assert(!m->fictitious);
1702
1703 /*
1704 * If already cleaning this page in place, convert from
1705 * "adjacent" to "target". We can leave the page mapped,
1706 * and vm_pageout_object_terminate will determine whether
1707 * to free or reactivate.
1708 */
1709
1710 if (m->cleaning) {
1711 #if MACH_CLUSTER_STATS
1712 vm_pageout_cluster_conversions++;
1713 #endif
1714 m->busy = TRUE;
1715 m->pageout = TRUE;
1716 m->dump_cleaning = TRUE;
1717 vm_page_wire(m);
1718 vm_object_unlock(object);
1719
1720 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1721 delayed_unlock = 0;
1722 vm_page_unlock_queues();
1723 }
1724 loop_bursted_count = 0;
1725 continue;
1726 }
1727
1728 /*
1729 * If it's being used, reactivate.
1730 * (Fictitious pages are either busy or absent.)
1731 */
1732
1733 if (m->reference || pmap_is_referenced(m->phys_page)) {
1734 vm_pageout_inactive_used++;
1735 reactivate_page:
1736 #if ADVISORY_PAGEOUT
1737 if (m->discard_request) {
1738 m->discard_request = FALSE;
1739 }
1740 #endif /* ADVISORY_PAGEOUT */
1741 last_page_zf = 0;
1742 vm_object_unlock(object);
1743 vm_page_activate(m);
1744 VM_STAT(reactivations++);
1745
1746 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1747 delayed_unlock = 0;
1748 vm_page_unlock_queues();
1749 }
1750 continue;
1751 }
1752
1753 #if ADVISORY_PAGEOUT
1754 if (object->advisory_pageout) {
1755 boolean_t do_throttle;
1756 memory_object_t pager;
1757 vm_object_offset_t discard_offset;
1758
1759 if (m->discard_request) {
1760 vm_stat_discard_failure++;
1761 goto mandatory_pageout;
1762 }
1763
1764 assert(object->pager_initialized);
1765 m->discard_request = TRUE;
1766 pager = object->pager;
1767
1768 /* system-wide throttle */
1769 do_throttle = (vm_page_free_count <=
1770 vm_page_free_reserved);
1771
1772 #if 0
1773 /*
1774 * JMM - Do we need a replacement throttle
1775 * mechanism for pagers?
1776 */
1777 if (!do_throttle) {
1778 /* throttle on this pager */
1779 /* XXX lock ordering ? */
1780 ip_lock(port);
1781 do_throttle= imq_full(&port->ip_messages);
1782 ip_unlock(port);
1783 }
1784 #endif
1785
1786 if (do_throttle) {
1787 vm_stat_discard_throttle++;
1788 #if 0
1789 /* ignore this page and skip to next */
1790 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1791 delayed_unlock = 0;
1792 vm_page_unlock_queues();
1793 }
1794 vm_object_unlock(object);
1795 continue;
1796 #else
1797 /* force mandatory pageout */
1798 goto mandatory_pageout;
1799 #endif
1800 }
1801
1802 /* proceed with discard_request */
1803 vm_page_activate(m);
1804 vm_stat_discard++;
1805 VM_STAT(reactivations++);
1806 discard_offset = m->offset + object->paging_offset;
1807 vm_stat_discard_sent++;
1808
1809 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1810 delayed_unlock = 0;
1811 vm_page_unlock_queues();
1812 }
1813 vm_object_unlock(object);
1814
1815 /*
1816 memory_object_discard_request(object->pager,
1817 discard_offset,
1818 PAGE_SIZE);
1819 */
1820 continue;
1821 }
1822 mandatory_pageout:
1823 #endif /* ADVISORY_PAGEOUT */
1824
1825 XPR(XPR_VM_PAGEOUT,
1826 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1827 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1828
1829 /*
1830 * Eliminate all mappings.
1831 */
1832
1833 m->busy = TRUE;
1834
1835 if (m->no_isync == FALSE)
1836 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1837
1838 if (!m->dirty)
1839 m->dirty = pmap_is_modified(m->phys_page);
1840 /*
1841 * If it's clean and not precious, we can free the page.
1842 */
1843
1844 if (!m->dirty && !m->precious) {
1845 vm_pageout_inactive_clean++;
1846 goto reclaim_page;
1847 }
1848 if (local_freeq) {
1849 vm_page_free_list(local_freeq);
1850
1851 local_freeq = 0;
1852 local_freed = 0;
1853 }
1854 delayed_unlock = 0;
1855 vm_page_unlock_queues();
1856
1857 /*
1858 * If there is no memory object for the page, create
1859 * one and hand it to the default pager.
1860 */
1861
1862 if (!object->pager_initialized)
1863 vm_object_collapse(object, (vm_object_offset_t)0);
1864 if (!object->pager_initialized)
1865 vm_object_pager_create(object);
1866 if (!object->pager_initialized) {
1867 /*
1868 * Still no pager for the object.
1869 * Reactivate the page.
1870 *
1871 * Should only happen if there is no
1872 * default pager.
1873 */
1874 vm_page_lock_queues();
1875 vm_page_activate(m);
1876 vm_page_unlock_queues();
1877
1878 /*
1879 * And we are done with it.
1880 */
1881 PAGE_WAKEUP_DONE(m);
1882 vm_object_unlock(object);
1883
1884 /*
1885 * break here to get back to the preemption
1886 * point in the outer loop so that we don't
1887 * spin forever if there is no default pager.
1888 */
1889 vm_pageout_dirty_no_pager++;
1890 /*
1891 * Well there's no pager, but we can still reclaim
1892 * free pages out of the inactive list. Go back
1893 * to top of loop and look for suitable pages.
1894 */
1895 continue;
1896 } else if (object->pager == MEMORY_OBJECT_NULL) {
1897 /*
1898 * This pager has been destroyed by either
1899 * memory_object_destroy or vm_object_destroy, and
1900 * so there is nowhere for the page to go.
1901 * Just free the page.
1902 */
1903 VM_PAGE_FREE(m);
1904 vm_object_unlock(object);
1905 loop_bursted_count = 0;
1906 continue;
1907 }
1908
1909 vm_pageout_inactive_dirty++;
1910 vm_pageout_cluster(m); /* flush it */
1911 vm_object_unlock(object);
1912 loop_bursted_count = 0;
1913 }
1914 }
1915
1916 counter(unsigned int c_vm_pageout_scan_continue = 0;)
1917
1918 void
1919 vm_pageout_scan_continue(void)
1920 {
1921 /*
1922 * We just paused to let the pagers catch up.
1923 * If vm_page_laundry_count is still high,
1924 * then we aren't waiting long enough.
1925 * If we have paused some vm_pageout_pause_max times without
1926 * adjusting vm_pageout_burst_wait, it might be too big,
1927 * so we decrease it.
1928 */
1929
1930 vm_page_lock_queues();
1931 counter(++c_vm_pageout_scan_continue);
1932 if (vm_page_laundry_count > vm_pageout_burst_min) {
1933 vm_pageout_burst_wait++;
1934 vm_pageout_pause_count = 0;
1935 } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
1936 vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
1937 if (vm_pageout_burst_wait < 1)
1938 vm_pageout_burst_wait = 1;
1939 vm_pageout_pause_count = 0;
1940 }
1941 vm_page_unlock_queues();
1942 }
1943
1944 void vm_page_free_reserve(int pages);
1945 int vm_page_free_count_init;
1946
1947 void
1948 vm_page_free_reserve(
1949 int pages)
1950 {
1951 int free_after_reserve;
1952
1953 vm_page_free_reserved += pages;
1954
1955 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1956
1957 vm_page_free_min = vm_page_free_reserved +
1958 VM_PAGE_FREE_MIN(free_after_reserve);
1959
1960 vm_page_free_target = vm_page_free_reserved +
1961 VM_PAGE_FREE_TARGET(free_after_reserve);
1962
1963 if (vm_page_free_target < vm_page_free_min + 5)
1964 vm_page_free_target = vm_page_free_min + 5;
1965 }
1966
1967 /*
1968 * vm_pageout is the high level pageout daemon.
1969 */
1970
1971 void
1972 vm_pageout_continue(void)
1973 {
1974 vm_pageout_scan_event_counter++;
1975 vm_pageout_scan();
1976 /* we hold vm_page_queue_free_lock now */
1977 assert(vm_page_free_wanted == 0);
1978 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1979 mutex_unlock(&vm_page_queue_free_lock);
1980
1981 counter(c_vm_pageout_block++);
1982 thread_block(vm_pageout_continue);
1983 /*NOTREACHED*/
1984 }
1985
1986 void
1987 vm_pageout(void)
1988 {
1989 thread_t self = current_thread();
1990 spl_t s;
1991
1992 /*
1993 * Set thread privileges.
1994 */
1995 self->vm_privilege = TRUE;
1996
1997 s = splsched();
1998 thread_lock(self);
1999 self->priority = BASEPRI_PREEMPT - 1;
2000 set_sched_pri(self, self->priority);
2001 thread_unlock(self);
2002 splx(s);
2003
2004 /*
2005 * Initialize some paging parameters.
2006 */
2007
2008 if (vm_page_laundry_max == 0)
2009 vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX;
2010
2011 if (vm_pageout_burst_max == 0)
2012 vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
2013
2014 if (vm_pageout_burst_wait == 0)
2015 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2016
2017 if (vm_pageout_empty_wait == 0)
2018 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2019
2020 /*
2021 * Set kernel task to low backing store privileged
2022 * status
2023 */
2024 task_lock(kernel_task);
2025 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2026 task_unlock(kernel_task);
2027
2028 vm_page_free_count_init = vm_page_free_count;
2029 vm_zf_iterator = 0;
2030 /*
2031 * even if we've already called vm_page_free_reserve
2032 * call it again here to insure that the targets are
2033 * accurately calculated (it uses vm_page_free_count_init)
2034 * calling it with an arg of 0 will not change the reserve
2035 * but will re-calculate free_min and free_target
2036 */
2037 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED) {
2038 int scale;
2039
2040 /*
2041 * HFS Journaling exists on the vm_pageout path...
2042 * it can need to allocate a lot more memory than a
2043 * typical driver/filesystem... if it can't allocate
2044 * the transaction buffer(s), we will deadlock...
2045 * the amount is scaled
2046 * based on the physical footprint of the system, so
2047 * let's double our reserve on systems with > 512Mbytes
2048 */
2049 if (vm_page_free_count > (512 * 1024 * 1024) / PAGE_SIZE)
2050 scale = 2;
2051 else
2052 scale = 1;
2053 vm_page_free_reserve((VM_PAGE_FREE_RESERVED * scale) - vm_page_free_reserved);
2054 } else
2055 vm_page_free_reserve(0);
2056
2057 vm_pageout_continue();
2058 /*NOTREACHED*/
2059 }
2060
2061 kern_return_t
2062 vm_pageout_emergency_availability_request()
2063 {
2064 vm_page_t m;
2065 vm_object_t object;
2066
2067 vm_page_lock_queues();
2068 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
2069
2070 while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) {
2071
2072 object = m->object;
2073
2074 if ( !vm_object_lock_try(object)) {
2075 m = (vm_page_t) queue_next(&m->pageq);
2076 continue;
2077 }
2078 if ((!object->alive) || (object->pageout)) {
2079 vm_object_unlock(object);
2080
2081 m = (vm_page_t) queue_next(&m->pageq);
2082 continue;
2083 }
2084 if (m->dirty || m->busy || m->wire_count || m->absent || m->fictitious
2085 || m->precious || m->cleaning
2086 || m->dump_cleaning || m->error
2087 || m->pageout || m->laundry
2088 || m->list_req_pending
2089 || m->overwriting) {
2090 vm_object_unlock(object);
2091
2092 m = (vm_page_t) queue_next(&m->pageq);
2093 continue;
2094 }
2095 m->busy = TRUE;
2096 pmap_page_protect(m->phys_page, VM_PROT_NONE);
2097 m->dirty = pmap_is_modified(m->phys_page);
2098
2099 if (m->dirty) {
2100 PAGE_WAKEUP_DONE(m);
2101 vm_object_unlock(object);
2102
2103 m = (vm_page_t) queue_next(&m->pageq);
2104 continue;
2105 }
2106 vm_page_free(m);
2107 vm_object_unlock(object);
2108 vm_page_unlock_queues();
2109
2110 return KERN_SUCCESS;
2111 }
2112 m = (vm_page_t) queue_first(&vm_page_queue_active);
2113
2114 while (!queue_end(&vm_page_queue_active, (queue_entry_t) m)) {
2115
2116 object = m->object;
2117
2118 if ( !vm_object_lock_try(object)) {
2119 m = (vm_page_t) queue_next(&m->pageq);
2120 continue;
2121 }
2122 if ((!object->alive) || (object->pageout)) {
2123 vm_object_unlock(object);
2124
2125 m = (vm_page_t) queue_next(&m->pageq);
2126 continue;
2127 }
2128 if (m->dirty || m->busy || m->wire_count || m->absent || m->fictitious
2129 || m->precious || m->cleaning
2130 || m->dump_cleaning || m->error
2131 || m->pageout || m->laundry
2132 || m->list_req_pending
2133 || m->overwriting) {
2134 vm_object_unlock(object);
2135
2136 m = (vm_page_t) queue_next(&m->pageq);
2137 continue;
2138 }
2139 m->busy = TRUE;
2140 pmap_page_protect(m->phys_page, VM_PROT_NONE);
2141 m->dirty = pmap_is_modified(m->phys_page);
2142
2143 if (m->dirty) {
2144 PAGE_WAKEUP_DONE(m);
2145 vm_object_unlock(object);
2146
2147 m = (vm_page_t) queue_next(&m->pageq);
2148 continue;
2149 }
2150 vm_page_free(m);
2151 vm_object_unlock(object);
2152 vm_page_unlock_queues();
2153
2154 return KERN_SUCCESS;
2155 }
2156 vm_page_unlock_queues();
2157
2158 return KERN_FAILURE;
2159 }
2160
2161
2162 static upl_t
2163 upl_create(
2164 int flags,
2165 vm_size_t size)
2166 {
2167 upl_t upl;
2168 int page_field_size; /* bit field in word size buf */
2169
2170 page_field_size = 0;
2171 if (flags & UPL_CREATE_LITE) {
2172 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2173 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2174 }
2175 if(flags & UPL_CREATE_INTERNAL) {
2176 upl = (upl_t)kalloc(sizeof(struct upl)
2177 + (sizeof(struct upl_page_info)*(size/PAGE_SIZE))
2178 + page_field_size);
2179 } else {
2180 upl = (upl_t)kalloc(sizeof(struct upl) + page_field_size);
2181 }
2182 upl->flags = 0;
2183 upl->src_object = NULL;
2184 upl->kaddr = (vm_offset_t)0;
2185 upl->size = 0;
2186 upl->map_object = NULL;
2187 upl->ref_count = 1;
2188 upl_lock_init(upl);
2189 #ifdef UBC_DEBUG
2190 upl->ubc_alias1 = 0;
2191 upl->ubc_alias2 = 0;
2192 #endif /* UBC_DEBUG */
2193 return(upl);
2194 }
2195
2196 static void
2197 upl_destroy(
2198 upl_t upl)
2199 {
2200 int page_field_size; /* bit field in word size buf */
2201
2202 #ifdef UBC_DEBUG
2203 {
2204 upl_t upl_ele;
2205 vm_object_t object;
2206 if (upl->map_object->pageout) {
2207 object = upl->map_object->shadow;
2208 } else {
2209 object = upl->map_object;
2210 }
2211 vm_object_lock(object);
2212 queue_iterate(&object->uplq, upl_ele, upl_t, uplq) {
2213 if(upl_ele == upl) {
2214 queue_remove(&object->uplq,
2215 upl_ele, upl_t, uplq);
2216 break;
2217 }
2218 }
2219 vm_object_unlock(object);
2220 }
2221 #endif /* UBC_DEBUG */
2222 /* drop a reference on the map_object whether or */
2223 /* not a pageout object is inserted */
2224 if(upl->map_object->pageout)
2225 vm_object_deallocate(upl->map_object);
2226
2227 page_field_size = 0;
2228 if (upl->flags & UPL_LITE) {
2229 page_field_size = ((upl->size/PAGE_SIZE) + 7) >> 3;
2230 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2231 }
2232 if(upl->flags & UPL_INTERNAL) {
2233 kfree((vm_offset_t)upl,
2234 sizeof(struct upl) +
2235 (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
2236 + page_field_size);
2237 } else {
2238 kfree((vm_offset_t)upl, sizeof(struct upl) + page_field_size);
2239 }
2240 }
2241
2242 __private_extern__ void
2243 uc_upl_dealloc(
2244 upl_t upl)
2245 {
2246 upl->ref_count -= 1;
2247 if(upl->ref_count == 0) {
2248 upl_destroy(upl);
2249 }
2250 }
2251
2252 void
2253 upl_deallocate(
2254 upl_t upl)
2255 {
2256
2257 upl->ref_count -= 1;
2258 if(upl->ref_count == 0) {
2259 upl_destroy(upl);
2260 }
2261 }
2262
2263 /*
2264 * Routine: vm_object_upl_request
2265 * Purpose:
2266 * Cause the population of a portion of a vm_object.
2267 * Depending on the nature of the request, the pages
2268 * returned may be contain valid data or be uninitialized.
2269 * A page list structure, listing the physical pages
2270 * will be returned upon request.
2271 * This function is called by the file system or any other
2272 * supplier of backing store to a pager.
2273 * IMPORTANT NOTE: The caller must still respect the relationship
2274 * between the vm_object and its backing memory object. The
2275 * caller MUST NOT substitute changes in the backing file
2276 * without first doing a memory_object_lock_request on the
2277 * target range unless it is know that the pages are not
2278 * shared with another entity at the pager level.
2279 * Copy_in_to:
2280 * if a page list structure is present
2281 * return the mapped physical pages, where a
2282 * page is not present, return a non-initialized
2283 * one. If the no_sync bit is turned on, don't
2284 * call the pager unlock to synchronize with other
2285 * possible copies of the page. Leave pages busy
2286 * in the original object, if a page list structure
2287 * was specified. When a commit of the page list
2288 * pages is done, the dirty bit will be set for each one.
2289 * Copy_out_from:
2290 * If a page list structure is present, return
2291 * all mapped pages. Where a page does not exist
2292 * map a zero filled one. Leave pages busy in
2293 * the original object. If a page list structure
2294 * is not specified, this call is a no-op.
2295 *
2296 * Note: access of default pager objects has a rather interesting
2297 * twist. The caller of this routine, presumably the file system
2298 * page cache handling code, will never actually make a request
2299 * against a default pager backed object. Only the default
2300 * pager will make requests on backing store related vm_objects
2301 * In this way the default pager can maintain the relationship
2302 * between backing store files (abstract memory objects) and
2303 * the vm_objects (cache objects), they support.
2304 *
2305 */
2306 __private_extern__ kern_return_t
2307 vm_object_upl_request(
2308 vm_object_t object,
2309 vm_object_offset_t offset,
2310 vm_size_t size,
2311 upl_t *upl_ptr,
2312 upl_page_info_array_t user_page_list,
2313 unsigned int *page_list_count,
2314 int cntrl_flags)
2315 {
2316 vm_page_t dst_page;
2317 vm_object_offset_t dst_offset = offset;
2318 vm_size_t xfer_size = size;
2319 boolean_t do_m_lock = FALSE;
2320 boolean_t dirty;
2321 boolean_t hw_dirty;
2322 upl_t upl = NULL;
2323 int entry;
2324 boolean_t encountered_lrp = FALSE;
2325
2326 vm_page_t alias_page = NULL;
2327 int page_ticket;
2328 wpl_array_t lite_list;
2329
2330 page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2331 >> UPL_PAGE_TICKET_SHIFT;
2332
2333 if(((size/PAGE_SIZE) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2334 size = MAX_UPL_TRANSFER * PAGE_SIZE;
2335 }
2336
2337 if(cntrl_flags & UPL_SET_INTERNAL)
2338 if(page_list_count != NULL)
2339 *page_list_count = MAX_UPL_TRANSFER;
2340 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
2341 ((page_list_count != NULL) && (*page_list_count != 0)
2342 && *page_list_count < (size/page_size)))
2343 return KERN_INVALID_ARGUMENT;
2344
2345 if((!object->internal) && (object->paging_offset != 0))
2346 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
2347
2348 if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2349 return KERN_SUCCESS;
2350 }
2351
2352 if(upl_ptr) {
2353 if(cntrl_flags & UPL_SET_INTERNAL) {
2354 if(cntrl_flags & UPL_SET_LITE) {
2355 vm_offset_t page_field_size;
2356 upl = upl_create(
2357 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
2358 size);
2359 user_page_list = (upl_page_info_t *)
2360 (((vm_offset_t)upl) + sizeof(struct upl));
2361 lite_list = (wpl_array_t)
2362 (((vm_offset_t)user_page_list) +
2363 ((size/PAGE_SIZE) *
2364 sizeof(upl_page_info_t)));
2365 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2366 page_field_size =
2367 (page_field_size + 3) & 0xFFFFFFFC;
2368 bzero((char *)lite_list, page_field_size);
2369 upl->flags =
2370 UPL_LITE | UPL_INTERNAL;
2371 } else {
2372 upl = upl_create(UPL_CREATE_INTERNAL, size);
2373 user_page_list = (upl_page_info_t *)
2374 (((vm_offset_t)upl)
2375 + sizeof(struct upl));
2376 upl->flags = UPL_INTERNAL;
2377 }
2378 } else {
2379 if(cntrl_flags & UPL_SET_LITE) {
2380 vm_offset_t page_field_size;
2381 upl = upl_create(UPL_CREATE_LITE, size);
2382 lite_list = (wpl_array_t)
2383 (((vm_offset_t)upl) + sizeof(struct upl));
2384 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2385 page_field_size =
2386 (page_field_size + 3) & 0xFFFFFFFC;
2387 bzero((char *)lite_list, page_field_size);
2388 upl->flags = UPL_LITE;
2389 } else {
2390 upl = upl_create(UPL_CREATE_EXTERNAL, size);
2391 upl->flags = 0;
2392 }
2393 }
2394
2395 if(object->phys_contiguous) {
2396 upl->map_object = object;
2397 /* don't need any shadow mappings for this one */
2398 /* since it is already I/O memory */
2399 upl->flags |= UPL_DEVICE_MEMORY;
2400
2401 vm_object_lock(object);
2402 vm_object_paging_begin(object);
2403 vm_object_unlock(object);
2404
2405 /* paging_in_progress protects paging_offset */
2406 upl->offset = offset + object->paging_offset;
2407 upl->size = size;
2408 *upl_ptr = upl;
2409 if(user_page_list) {
2410 user_page_list[0].phys_addr =
2411 (offset + object->shadow_offset)>>12;
2412 user_page_list[0].device = TRUE;
2413 }
2414
2415 if(page_list_count != NULL) {
2416 if (upl->flags & UPL_INTERNAL) {
2417 *page_list_count = 0;
2418 } else {
2419 *page_list_count = 1;
2420 }
2421 }
2422 return KERN_SUCCESS;
2423 }
2424 if(user_page_list)
2425 user_page_list[0].device = FALSE;
2426
2427 if(cntrl_flags & UPL_SET_LITE) {
2428 upl->map_object = object;
2429 } else {
2430 upl->map_object = vm_object_allocate(size);
2431 vm_object_lock(upl->map_object);
2432 upl->map_object->shadow = object;
2433 upl->map_object->pageout = TRUE;
2434 upl->map_object->can_persist = FALSE;
2435 upl->map_object->copy_strategy =
2436 MEMORY_OBJECT_COPY_NONE;
2437 upl->map_object->shadow_offset = offset;
2438 upl->map_object->wimg_bits = object->wimg_bits;
2439 vm_object_unlock(upl->map_object);
2440 }
2441 }
2442 if (!(cntrl_flags & UPL_SET_LITE)) {
2443 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2444 }
2445 vm_object_lock(object);
2446 vm_object_paging_begin(object);
2447
2448 /* we can lock in the paging_offset once paging_in_progress is set */
2449 if(upl_ptr) {
2450 upl->size = size;
2451 upl->offset = offset + object->paging_offset;
2452 *upl_ptr = upl;
2453 #ifdef UBC_DEBUG
2454 queue_enter(&object->uplq, upl, upl_t, uplq);
2455 #endif /* UBC_DEBUG */
2456 }
2457
2458 entry = 0;
2459 if(cntrl_flags & UPL_COPYOUT_FROM) {
2460 upl->flags |= UPL_PAGE_SYNC_DONE;
2461
2462 while (xfer_size) {
2463 if((alias_page == NULL) &&
2464 !(cntrl_flags & UPL_SET_LITE)) {
2465 vm_object_unlock(object);
2466 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2467 vm_object_lock(object);
2468 }
2469 if(((dst_page = vm_page_lookup(object,
2470 dst_offset)) == VM_PAGE_NULL) ||
2471 dst_page->fictitious ||
2472 dst_page->absent ||
2473 dst_page->error ||
2474 (dst_page->wire_count != 0 &&
2475 !dst_page->pageout) ||
2476 ((!(dst_page->dirty || dst_page->precious ||
2477 pmap_is_modified(dst_page->phys_page)))
2478 && (cntrl_flags & UPL_RET_ONLY_DIRTY)) ||
2479 ((!(dst_page->inactive))
2480 && (dst_page->page_ticket != page_ticket)
2481 && ((dst_page->page_ticket+1) != page_ticket)
2482 && (cntrl_flags & UPL_FOR_PAGEOUT)) ||
2483 ((!dst_page->list_req_pending) && (cntrl_flags & UPL_FOR_PAGEOUT) &&
2484 (cntrl_flags & UPL_RET_ONLY_DIRTY) &&
2485 pmap_is_referenced(dst_page->phys_page))) {
2486 if(user_page_list) {
2487 user_page_list[entry].phys_addr = 0;
2488 }
2489 } else {
2490
2491 if(dst_page->busy &&
2492 (!(dst_page->list_req_pending &&
2493 dst_page->pageout))) {
2494 if(cntrl_flags & UPL_NOBLOCK) {
2495 if(user_page_list) {
2496 user_page_list[entry].phys_addr = 0;
2497 }
2498 entry++;
2499 dst_offset += PAGE_SIZE_64;
2500 xfer_size -= PAGE_SIZE;
2501 continue;
2502 }
2503 /*someone else is playing with the */
2504 /* page. We will have to wait. */
2505 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2506 continue;
2507 }
2508 /* Someone else already cleaning the page? */
2509 if((dst_page->cleaning || dst_page->absent ||
2510 dst_page->wire_count != 0) &&
2511 !dst_page->list_req_pending) {
2512 if(user_page_list) {
2513 user_page_list[entry].phys_addr = 0;
2514 }
2515 entry++;
2516 dst_offset += PAGE_SIZE_64;
2517 xfer_size -= PAGE_SIZE;
2518 continue;
2519 }
2520 /* eliminate all mappings from the */
2521 /* original object and its prodigy */
2522
2523 vm_page_lock_queues();
2524
2525 /* pageout statistics gathering. count */
2526 /* all the pages we will page out that */
2527 /* were not counted in the initial */
2528 /* vm_pageout_scan work */
2529 if(dst_page->list_req_pending)
2530 encountered_lrp = TRUE;
2531 if((dst_page->dirty ||
2532 (dst_page->object->internal &&
2533 dst_page->precious)) &&
2534 (dst_page->list_req_pending
2535 == FALSE)) {
2536 if(encountered_lrp) {
2537 CLUSTER_STAT
2538 (pages_at_higher_offsets++;)
2539 } else {
2540 CLUSTER_STAT
2541 (pages_at_lower_offsets++;)
2542 }
2543 }
2544
2545 /* Turn off busy indication on pending */
2546 /* pageout. Note: we can only get here */
2547 /* in the request pending case. */
2548 dst_page->list_req_pending = FALSE;
2549 dst_page->busy = FALSE;
2550 dst_page->cleaning = FALSE;
2551
2552 hw_dirty = pmap_is_modified(dst_page->phys_page);
2553 dirty = hw_dirty ? TRUE : dst_page->dirty;
2554
2555 if(cntrl_flags & UPL_SET_LITE) {
2556 int pg_num;
2557 pg_num = (dst_offset-offset)/PAGE_SIZE;
2558 lite_list[pg_num>>5] |=
2559 1 << (pg_num & 31);
2560 if (hw_dirty)
2561 pmap_clear_modify(dst_page->phys_page);
2562 /*
2563 * Record that this page has been
2564 * written out
2565 */
2566 #if MACH_PAGEMAP
2567 vm_external_state_set(
2568 object->existence_map,
2569 dst_page->offset);
2570 #endif /*MACH_PAGEMAP*/
2571
2572 /*
2573 * Mark original page as cleaning
2574 * in place.
2575 */
2576 dst_page->cleaning = TRUE;
2577 dst_page->dirty = TRUE;
2578 dst_page->precious = FALSE;
2579 } else {
2580 /* use pageclean setup, it is more */
2581 /* convenient even for the pageout */
2582 /* cases here */
2583 vm_pageclean_setup(dst_page,
2584 alias_page, upl->map_object,
2585 size - xfer_size);
2586
2587 alias_page->absent = FALSE;
2588 alias_page = NULL;
2589 }
2590
2591 if(!dirty) {
2592 dst_page->dirty = FALSE;
2593 dst_page->precious = TRUE;
2594 }
2595
2596 if(dst_page->pageout)
2597 dst_page->busy = TRUE;
2598
2599 if((!(cntrl_flags & UPL_CLEAN_IN_PLACE))
2600 || (cntrl_flags & UPL_FOR_PAGEOUT)) {
2601 /* deny access to the target page */
2602 /* while it is being worked on */
2603 if((!dst_page->pageout) &&
2604 (dst_page->wire_count == 0)) {
2605 dst_page->busy = TRUE;
2606 dst_page->pageout = TRUE;
2607 vm_page_wire(dst_page);
2608 }
2609 }
2610 if(user_page_list) {
2611 user_page_list[entry].phys_addr
2612 = dst_page->phys_page;
2613 user_page_list[entry].dirty =
2614 dst_page->dirty;
2615 user_page_list[entry].pageout =
2616 dst_page->pageout;
2617 user_page_list[entry].absent =
2618 dst_page->absent;
2619 user_page_list[entry].precious =
2620 dst_page->precious;
2621 }
2622 vm_page_unlock_queues();
2623 }
2624 entry++;
2625 dst_offset += PAGE_SIZE_64;
2626 xfer_size -= PAGE_SIZE;
2627 }
2628 } else {
2629 while (xfer_size) {
2630 if((alias_page == NULL) &&
2631 !(cntrl_flags & UPL_SET_LITE)) {
2632 vm_object_unlock(object);
2633 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2634 vm_object_lock(object);
2635 }
2636 dst_page = vm_page_lookup(object, dst_offset);
2637
2638 if(dst_page != VM_PAGE_NULL) {
2639 if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
2640 !((dst_page->list_req_pending)
2641 && (dst_page->absent))) {
2642 /* we are doing extended range */
2643 /* requests. we want to grab */
2644 /* pages around some which are */
2645 /* already present. */
2646 if(user_page_list) {
2647 user_page_list[entry].phys_addr = 0;
2648 }
2649 entry++;
2650 dst_offset += PAGE_SIZE_64;
2651 xfer_size -= PAGE_SIZE;
2652 continue;
2653 }
2654 if((dst_page->cleaning) &&
2655 !(dst_page->list_req_pending)) {
2656 /*someone else is writing to the */
2657 /* page. We will have to wait. */
2658 PAGE_SLEEP(object,dst_page,THREAD_UNINT);
2659 continue;
2660 }
2661 if ((dst_page->fictitious &&
2662 dst_page->list_req_pending)) {
2663 /* dump the fictitious page */
2664 dst_page->list_req_pending = FALSE;
2665 dst_page->clustered = FALSE;
2666
2667 vm_page_lock_queues();
2668 vm_page_free(dst_page);
2669 vm_page_unlock_queues();
2670
2671 } else if ((dst_page->absent &&
2672 dst_page->list_req_pending)) {
2673 /* the default_pager case */
2674 dst_page->list_req_pending = FALSE;
2675 dst_page->busy = FALSE;
2676 dst_page->clustered = FALSE;
2677 }
2678 }
2679 if((dst_page = vm_page_lookup(object, dst_offset)) ==
2680 VM_PAGE_NULL) {
2681 if(object->private) {
2682 /*
2683 * This is a nasty wrinkle for users
2684 * of upl who encounter device or
2685 * private memory however, it is
2686 * unavoidable, only a fault can
2687 * reslove the actual backing
2688 * physical page by asking the
2689 * backing device.
2690 */
2691 if(user_page_list) {
2692 user_page_list[entry].phys_addr = 0;
2693 }
2694 entry++;
2695 dst_offset += PAGE_SIZE_64;
2696 xfer_size -= PAGE_SIZE;
2697 continue;
2698 }
2699 /* need to allocate a page */
2700 dst_page = vm_page_alloc(object, dst_offset);
2701 if (dst_page == VM_PAGE_NULL) {
2702 vm_object_unlock(object);
2703 VM_PAGE_WAIT();
2704 vm_object_lock(object);
2705 continue;
2706 }
2707 dst_page->busy = FALSE;
2708 #if 0
2709 if(cntrl_flags & UPL_NO_SYNC) {
2710 dst_page->page_lock = 0;
2711 dst_page->unlock_request = 0;
2712 }
2713 #endif
2714 dst_page->absent = TRUE;
2715 object->absent_count++;
2716 }
2717 #if 1
2718 if(cntrl_flags & UPL_NO_SYNC) {
2719 dst_page->page_lock = 0;
2720 dst_page->unlock_request = 0;
2721 }
2722 #endif /* 1 */
2723 dst_page->overwriting = TRUE;
2724 if(dst_page->fictitious) {
2725 panic("need corner case for fictitious page");
2726 }
2727 if(dst_page->page_lock) {
2728 do_m_lock = TRUE;
2729 }
2730 if(upl_ptr) {
2731
2732 /* eliminate all mappings from the */
2733 /* original object and its prodigy */
2734
2735 if(dst_page->busy) {
2736 /*someone else is playing with the */
2737 /* page. We will have to wait. */
2738 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
2739 continue;
2740 }
2741 vm_page_lock_queues();
2742
2743 if( !(cntrl_flags & UPL_FILE_IO)) {
2744 pmap_page_protect(dst_page->phys_page, VM_PROT_NONE);
2745 }
2746 hw_dirty = pmap_is_modified(dst_page->phys_page);
2747 dirty = hw_dirty ? TRUE : dst_page->dirty;
2748
2749 if(cntrl_flags & UPL_SET_LITE) {
2750 int pg_num;
2751 pg_num = (dst_offset-offset)/PAGE_SIZE;
2752 lite_list[pg_num>>5] |=
2753 1 << (pg_num & 31);
2754 if (hw_dirty)
2755 pmap_clear_modify(dst_page->phys_page);
2756 /*
2757 * Record that this page has been
2758 * written out
2759 */
2760 #if MACH_PAGEMAP
2761 vm_external_state_set(
2762 object->existence_map,
2763 dst_page->offset);
2764 #endif /*MACH_PAGEMAP*/
2765
2766 /*
2767 * Mark original page as cleaning
2768 * in place.
2769 */
2770 dst_page->cleaning = TRUE;
2771 dst_page->dirty = TRUE;
2772 dst_page->precious = FALSE;
2773 } else {
2774 /* use pageclean setup, it is more */
2775 /* convenient even for the pageout */
2776 /* cases here */
2777 vm_pageclean_setup(dst_page,
2778 alias_page, upl->map_object,
2779 size - xfer_size);
2780
2781 alias_page->absent = FALSE;
2782 alias_page = NULL;
2783 }
2784
2785 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
2786 /* clean in place for read implies */
2787 /* that a write will be done on all */
2788 /* the pages that are dirty before */
2789 /* a upl commit is done. The caller */
2790 /* is obligated to preserve the */
2791 /* contents of all pages marked */
2792 /* dirty. */
2793 upl->flags |= UPL_CLEAR_DIRTY;
2794 }
2795
2796 if(!dirty) {
2797 dst_page->dirty = FALSE;
2798 dst_page->precious = TRUE;
2799 }
2800
2801 if (dst_page->wire_count == 0) {
2802 /* deny access to the target page while */
2803 /* it is being worked on */
2804 dst_page->busy = TRUE;
2805 } else {
2806 vm_page_wire(dst_page);
2807 }
2808 /*
2809 * expect the page to be used
2810 */
2811 dst_page->reference = TRUE;
2812 dst_page->precious =
2813 (cntrl_flags & UPL_PRECIOUS)
2814 ? TRUE : FALSE;
2815 if(user_page_list) {
2816 user_page_list[entry].phys_addr
2817 = dst_page->phys_page;
2818 user_page_list[entry].dirty =
2819 dst_page->dirty;
2820 user_page_list[entry].pageout =
2821 dst_page->pageout;
2822 user_page_list[entry].absent =
2823 dst_page->absent;
2824 user_page_list[entry].precious =
2825 dst_page->precious;
2826 }
2827 vm_page_unlock_queues();
2828 }
2829 entry++;
2830 dst_offset += PAGE_SIZE_64;
2831 xfer_size -= PAGE_SIZE;
2832 }
2833 }
2834 if (upl->flags & UPL_INTERNAL) {
2835 if(page_list_count != NULL)
2836 *page_list_count = 0;
2837 } else if (*page_list_count > entry) {
2838 if(page_list_count != NULL)
2839 *page_list_count = entry;
2840 }
2841
2842 if(alias_page != NULL) {
2843 vm_page_lock_queues();
2844 vm_page_free(alias_page);
2845 vm_page_unlock_queues();
2846 }
2847
2848 if(do_m_lock) {
2849 vm_prot_t access_required;
2850 /* call back all associated pages from other users of the pager */
2851 /* all future updates will be on data which is based on the */
2852 /* changes we are going to make here. Note: it is assumed that */
2853 /* we already hold copies of the data so we will not be seeing */
2854 /* an avalanche of incoming data from the pager */
2855 access_required = (cntrl_flags & UPL_COPYOUT_FROM)
2856 ? VM_PROT_READ : VM_PROT_WRITE;
2857 while (TRUE) {
2858 kern_return_t rc;
2859
2860 if(!object->pager_ready) {
2861 wait_result_t wait_result;
2862
2863 wait_result = vm_object_sleep(object,
2864 VM_OBJECT_EVENT_PAGER_READY,
2865 THREAD_UNINT);
2866 if (wait_result != THREAD_AWAKENED) {
2867 vm_object_unlock(object);
2868 return(KERN_FAILURE);
2869 }
2870 continue;
2871 }
2872
2873 vm_object_unlock(object);
2874
2875 if (rc = memory_object_data_unlock(
2876 object->pager,
2877 dst_offset + object->paging_offset,
2878 size,
2879 access_required)) {
2880 if (rc == MACH_SEND_INTERRUPTED)
2881 continue;
2882 else
2883 return KERN_FAILURE;
2884 }
2885 break;
2886
2887 }
2888 /* lets wait on the last page requested */
2889 /* NOTE: we will have to update lock completed routine to signal */
2890 if(dst_page != VM_PAGE_NULL &&
2891 (access_required & dst_page->page_lock) != access_required) {
2892 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
2893 thread_block((void (*)(void))0);
2894 vm_object_lock(object);
2895 }
2896 }
2897 vm_object_unlock(object);
2898 return KERN_SUCCESS;
2899 }
2900
2901 /* JMM - Backward compatability for now */
2902 kern_return_t
2903 vm_fault_list_request(
2904 memory_object_control_t control,
2905 vm_object_offset_t offset,
2906 vm_size_t size,
2907 upl_t *upl_ptr,
2908 upl_page_info_t **user_page_list_ptr,
2909 int page_list_count,
2910 int cntrl_flags)
2911 {
2912 int local_list_count;
2913 upl_page_info_t *user_page_list;
2914 kern_return_t kr;
2915
2916 if (user_page_list_ptr != NULL) {
2917 local_list_count = page_list_count;
2918 user_page_list = *user_page_list_ptr;
2919 } else {
2920 local_list_count = 0;
2921 user_page_list = NULL;
2922 }
2923 kr = memory_object_upl_request(control,
2924 offset,
2925 size,
2926 upl_ptr,
2927 user_page_list,
2928 &local_list_count,
2929 cntrl_flags);
2930
2931 if(kr != KERN_SUCCESS)
2932 return kr;
2933
2934 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
2935 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
2936 }
2937
2938 return KERN_SUCCESS;
2939 }
2940
2941
2942
2943 /*
2944 * Routine: vm_object_super_upl_request
2945 * Purpose:
2946 * Cause the population of a portion of a vm_object
2947 * in much the same way as memory_object_upl_request.
2948 * Depending on the nature of the request, the pages
2949 * returned may be contain valid data or be uninitialized.
2950 * However, the region may be expanded up to the super
2951 * cluster size provided.
2952 */
2953
2954 __private_extern__ kern_return_t
2955 vm_object_super_upl_request(
2956 vm_object_t object,
2957 vm_object_offset_t offset,
2958 vm_size_t size,
2959 vm_size_t super_cluster,
2960 upl_t *upl,
2961 upl_page_info_t *user_page_list,
2962 unsigned int *page_list_count,
2963 int cntrl_flags)
2964 {
2965 vm_page_t target_page;
2966 int ticket;
2967
2968 if(object->paging_offset > offset)
2969 return KERN_FAILURE;
2970
2971 assert(object->paging_in_progress);
2972 offset = offset - object->paging_offset;
2973 if(cntrl_flags & UPL_FOR_PAGEOUT) {
2974 if((target_page = vm_page_lookup(object, offset))
2975 != VM_PAGE_NULL) {
2976 ticket = target_page->page_ticket;
2977 cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
2978 cntrl_flags = cntrl_flags |
2979 ((ticket << UPL_PAGE_TICKET_SHIFT)
2980 & UPL_PAGE_TICKET_MASK);
2981 }
2982 }
2983
2984
2985 /* turns off super cluster exercised by the default_pager */
2986 /*
2987 super_cluster = size;
2988 */
2989 if ((super_cluster > size) &&
2990 (vm_page_free_count > vm_page_free_reserved)) {
2991
2992 vm_object_offset_t base_offset;
2993 vm_size_t super_size;
2994
2995 base_offset = (offset &
2996 ~((vm_object_offset_t) super_cluster - 1));
2997 super_size = (offset+size) > (base_offset + super_cluster) ?
2998 super_cluster<<1 : super_cluster;
2999 super_size = ((base_offset + super_size) > object->size) ?
3000 (object->size - base_offset) : super_size;
3001 if(offset > (base_offset + super_size))
3002 panic("vm_object_super_upl_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset);
3003 /* apparently there is a case where the vm requests a */
3004 /* page to be written out who's offset is beyond the */
3005 /* object size */
3006 if((offset + size) > (base_offset + super_size))
3007 super_size = (offset + size) - base_offset;
3008
3009 offset = base_offset;
3010 size = super_size;
3011 }
3012 vm_object_upl_request(object, offset, size,
3013 upl, user_page_list, page_list_count,
3014 cntrl_flags);
3015 }
3016
3017
3018 kern_return_t
3019 vm_upl_map(
3020 vm_map_t map,
3021 upl_t upl,
3022 vm_offset_t *dst_addr)
3023 {
3024 vm_size_t size;
3025 vm_object_offset_t offset;
3026 vm_offset_t addr;
3027 vm_page_t m;
3028 kern_return_t kr;
3029
3030 if (upl == UPL_NULL)
3031 return KERN_INVALID_ARGUMENT;
3032
3033 upl_lock(upl);
3034
3035 /* check to see if already mapped */
3036 if(UPL_PAGE_LIST_MAPPED & upl->flags) {
3037 upl_unlock(upl);
3038 return KERN_FAILURE;
3039 }
3040
3041 if((!(upl->map_object->pageout)) &&
3042 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3043 (upl->map_object->phys_contiguous))) {
3044 vm_object_t object;
3045 vm_page_t alias_page;
3046 vm_object_offset_t new_offset;
3047 int pg_num;
3048 wpl_array_t lite_list;
3049
3050 if(upl->flags & UPL_INTERNAL) {
3051 lite_list = (wpl_array_t)
3052 ((((vm_offset_t)upl) + sizeof(struct upl))
3053 + ((upl->size/PAGE_SIZE)
3054 * sizeof(upl_page_info_t)));
3055 } else {
3056 lite_list = (wpl_array_t)
3057 (((vm_offset_t)upl) + sizeof(struct upl));
3058 }
3059 object = upl->map_object;
3060 upl->map_object = vm_object_allocate(upl->size);
3061 vm_object_lock(upl->map_object);
3062 upl->map_object->shadow = object;
3063 upl->map_object->pageout = TRUE;
3064 upl->map_object->can_persist = FALSE;
3065 upl->map_object->copy_strategy =
3066 MEMORY_OBJECT_COPY_NONE;
3067 upl->map_object->shadow_offset =
3068 upl->offset - object->paging_offset;
3069 upl->map_object->wimg_bits = object->wimg_bits;
3070 vm_object_unlock(upl->map_object);
3071 offset = upl->map_object->shadow_offset;
3072 new_offset = 0;
3073 size = upl->size;
3074 vm_object_lock(object);
3075 while(size) {
3076 pg_num = (new_offset)/PAGE_SIZE;
3077 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3078 vm_object_unlock(object);
3079 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3080 vm_object_lock(object);
3081 m = vm_page_lookup(object, offset);
3082 if (m == VM_PAGE_NULL) {
3083 panic("vm_upl_map: page missing\n");
3084 }
3085
3086 vm_object_paging_begin(object);
3087
3088 /*
3089 * Convert the fictitious page to a private
3090 * shadow of the real page.
3091 */
3092 assert(alias_page->fictitious);
3093 alias_page->fictitious = FALSE;
3094 alias_page->private = TRUE;
3095 alias_page->pageout = TRUE;
3096 alias_page->phys_page = m->phys_page;
3097 vm_page_wire(alias_page);
3098
3099 vm_page_insert(alias_page,
3100 upl->map_object, new_offset);
3101 assert(!alias_page->wanted);
3102 alias_page->busy = FALSE;
3103 alias_page->absent = FALSE;
3104 }
3105
3106 size -= PAGE_SIZE;
3107 offset += PAGE_SIZE_64;
3108 new_offset += PAGE_SIZE_64;
3109 }
3110 vm_object_unlock(object);
3111 }
3112 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3113 offset = upl->offset - upl->map_object->paging_offset;
3114 else
3115 offset = 0;
3116
3117 size = upl->size;
3118
3119 vm_object_lock(upl->map_object);
3120 upl->map_object->ref_count++;
3121 vm_object_res_reference(upl->map_object);
3122 vm_object_unlock(upl->map_object);
3123
3124 *dst_addr = 0;
3125
3126
3127 /* NEED A UPL_MAP ALIAS */
3128 kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE,
3129 upl->map_object, offset, FALSE,
3130 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3131
3132 if (kr != KERN_SUCCESS) {
3133 upl_unlock(upl);
3134 return(kr);
3135 }
3136
3137 for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
3138 m = vm_page_lookup(upl->map_object, offset);
3139 if(m) {
3140 unsigned int cache_attr;
3141 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3142
3143 PMAP_ENTER(map->pmap, addr,
3144 m, VM_PROT_ALL,
3145 cache_attr, TRUE);
3146 }
3147 offset+=PAGE_SIZE_64;
3148 }
3149 upl->ref_count++; /* hold a reference for the mapping */
3150 upl->flags |= UPL_PAGE_LIST_MAPPED;
3151 upl->kaddr = *dst_addr;
3152 upl_unlock(upl);
3153 return KERN_SUCCESS;
3154 }
3155
3156
3157 kern_return_t
3158 vm_upl_unmap(
3159 vm_map_t map,
3160 upl_t upl)
3161 {
3162 vm_address_t addr;
3163 vm_size_t size;
3164
3165 if (upl == UPL_NULL)
3166 return KERN_INVALID_ARGUMENT;
3167
3168 upl_lock(upl);
3169 if(upl->flags & UPL_PAGE_LIST_MAPPED) {
3170 addr = upl->kaddr;
3171 size = upl->size;
3172 assert(upl->ref_count > 1);
3173 upl->ref_count--; /* removing mapping ref */
3174 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3175 upl->kaddr = (vm_offset_t) 0;
3176 upl_unlock(upl);
3177
3178 vm_deallocate(map, addr, size);
3179 return KERN_SUCCESS;
3180 }
3181 upl_unlock(upl);
3182 return KERN_FAILURE;
3183 }
3184
3185 kern_return_t
3186 upl_commit_range(
3187 upl_t upl,
3188 vm_offset_t offset,
3189 vm_size_t size,
3190 int flags,
3191 upl_page_info_t *page_list,
3192 mach_msg_type_number_t count,
3193 boolean_t *empty)
3194 {
3195 vm_size_t xfer_size = size;
3196 vm_object_t shadow_object;
3197 vm_object_t object = upl->map_object;
3198 vm_object_offset_t target_offset;
3199 int entry;
3200 wpl_array_t lite_list;
3201 int occupied;
3202 int delayed_unlock = 0;
3203 boolean_t shadow_internal;
3204
3205 *empty = FALSE;
3206
3207 if (upl == UPL_NULL)
3208 return KERN_INVALID_ARGUMENT;
3209
3210
3211 if (count == 0)
3212 page_list = NULL;
3213
3214 if(object->pageout) {
3215 shadow_object = object->shadow;
3216 } else {
3217 shadow_object = object;
3218 }
3219
3220 upl_lock(upl);
3221
3222 if (upl->flags & UPL_CLEAR_DIRTY)
3223 flags |= UPL_COMMIT_CLEAR_DIRTY;
3224
3225 if (upl->flags & UPL_DEVICE_MEMORY) {
3226 xfer_size = 0;
3227 } else if ((offset + size) > upl->size) {
3228 upl_unlock(upl);
3229 return KERN_FAILURE;
3230 }
3231
3232 if (upl->flags & UPL_INTERNAL) {
3233 lite_list = (wpl_array_t)
3234 ((((vm_offset_t)upl) + sizeof(struct upl))
3235 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3236 } else {
3237 lite_list = (wpl_array_t)
3238 (((vm_offset_t)upl) + sizeof(struct upl));
3239 }
3240
3241 vm_object_lock(shadow_object);
3242 shadow_internal = shadow_object->internal;
3243
3244 entry = offset/PAGE_SIZE;
3245 target_offset = (vm_object_offset_t)offset;
3246
3247 while(xfer_size) {
3248 vm_page_t t,m;
3249 upl_page_info_t *p;
3250
3251 m = VM_PAGE_NULL;
3252
3253 if (upl->flags & UPL_LITE) {
3254 int pg_num;
3255
3256 pg_num = target_offset/PAGE_SIZE;
3257
3258 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3259 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3260 m = vm_page_lookup(shadow_object,
3261 target_offset + (upl->offset -
3262 shadow_object->paging_offset));
3263 }
3264 }
3265 if (object->pageout) {
3266 if ((t = vm_page_lookup(object, target_offset)) != NULL) {
3267 t->pageout = FALSE;
3268
3269 if (delayed_unlock) {
3270 delayed_unlock = 0;
3271 vm_page_unlock_queues();
3272 }
3273 VM_PAGE_FREE(t);
3274
3275 if (m == NULL) {
3276 m = vm_page_lookup(
3277 shadow_object,
3278 target_offset +
3279 object->shadow_offset);
3280 }
3281 if (m != VM_PAGE_NULL)
3282 vm_object_paging_end(m->object);
3283 }
3284 }
3285 if (m != VM_PAGE_NULL) {
3286
3287 if (upl->flags & UPL_IO_WIRE) {
3288
3289 if (delayed_unlock == 0)
3290 vm_page_lock_queues();
3291
3292 vm_page_unwire(m);
3293
3294 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3295 delayed_unlock = 0;
3296 vm_page_unlock_queues();
3297 }
3298 if (page_list) {
3299 page_list[entry].phys_addr = 0;
3300 }
3301 if (flags & UPL_COMMIT_SET_DIRTY) {
3302 m->dirty = TRUE;
3303 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3304 m->dirty = FALSE;
3305 pmap_clear_modify(m->phys_page);
3306 }
3307 if (flags & UPL_COMMIT_INACTIVATE) {
3308 m->reference = FALSE;
3309 vm_page_deactivate(m);
3310 pmap_clear_reference(m->phys_page);
3311 }
3312 target_offset += PAGE_SIZE_64;
3313 xfer_size -= PAGE_SIZE;
3314 entry++;
3315 continue;
3316 }
3317 if (delayed_unlock == 0)
3318 vm_page_lock_queues();
3319 /*
3320 * make sure to clear the hardware
3321 * modify or reference bits before
3322 * releasing the BUSY bit on this page
3323 * otherwise we risk losing a legitimate
3324 * change of state
3325 */
3326 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3327 m->dirty = FALSE;
3328 pmap_clear_modify(m->phys_page);
3329 }
3330 if (flags & UPL_COMMIT_INACTIVATE)
3331 pmap_clear_reference(m->phys_page);
3332
3333 if (page_list) {
3334 p = &(page_list[entry]);
3335 if(p->phys_addr && p->pageout && !m->pageout) {
3336 m->busy = TRUE;
3337 m->pageout = TRUE;
3338 vm_page_wire(m);
3339 } else if (page_list[entry].phys_addr &&
3340 !p->pageout && m->pageout &&
3341 !m->dump_cleaning) {
3342 m->pageout = FALSE;
3343 m->absent = FALSE;
3344 m->overwriting = FALSE;
3345 vm_page_unwire(m);
3346 PAGE_WAKEUP_DONE(m);
3347 }
3348 page_list[entry].phys_addr = 0;
3349 }
3350 m->dump_cleaning = FALSE;
3351 if(m->laundry) {
3352 if (!shadow_internal)
3353 vm_page_burst_count--;
3354 vm_page_laundry_count--;
3355 m->laundry = FALSE;
3356 if (vm_page_laundry_count < vm_page_laundry_min) {
3357 vm_page_laundry_min = 0;
3358 thread_wakeup((event_t)
3359 &vm_page_laundry_count);
3360 }
3361 }
3362 if(m->pageout) {
3363 m->cleaning = FALSE;
3364 m->pageout = FALSE;
3365 #if MACH_CLUSTER_STATS
3366 if (m->wanted) vm_pageout_target_collisions++;
3367 #endif
3368 pmap_page_protect(m->phys_page, VM_PROT_NONE);
3369 m->dirty = pmap_is_modified(m->phys_page);
3370 if(m->dirty) {
3371 CLUSTER_STAT(
3372 vm_pageout_target_page_dirtied++;)
3373 vm_page_unwire(m);/* reactivates */
3374 VM_STAT(reactivations++);
3375 PAGE_WAKEUP_DONE(m);
3376 } else {
3377 CLUSTER_STAT(
3378 vm_pageout_target_page_freed++;)
3379 vm_page_free(m);/* clears busy, etc. */
3380
3381 if (page_list[entry].dirty)
3382 VM_STAT(pageouts++);
3383 }
3384 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3385 delayed_unlock = 0;
3386 vm_page_unlock_queues();
3387 }
3388 target_offset += PAGE_SIZE_64;
3389 xfer_size -= PAGE_SIZE;
3390 entry++;
3391 continue;
3392 }
3393 #if MACH_CLUSTER_STATS
3394 m->dirty = pmap_is_modified(m->phys_page);
3395
3396 if (m->dirty) vm_pageout_cluster_dirtied++;
3397 else vm_pageout_cluster_cleaned++;
3398 if (m->wanted) vm_pageout_cluster_collisions++;
3399 #else
3400 m->dirty = 0;
3401 #endif
3402
3403 if((m->busy) && (m->cleaning)) {
3404 /* the request_page_list case */
3405 if(m->absent) {
3406 m->absent = FALSE;
3407 if(shadow_object->absent_count == 1)
3408 vm_object_absent_release(shadow_object);
3409 else
3410 shadow_object->absent_count--;
3411 }
3412 m->overwriting = FALSE;
3413 m->busy = FALSE;
3414 m->dirty = FALSE;
3415 } else if (m->overwriting) {
3416 /* alternate request page list, write to
3417 /* page_list case. Occurs when the original
3418 /* page was wired at the time of the list
3419 /* request */
3420 assert(m->wire_count != 0);
3421 vm_page_unwire(m);/* reactivates */
3422 m->overwriting = FALSE;
3423 }
3424 m->cleaning = FALSE;
3425
3426 /* It is a part of the semantic of COPYOUT_FROM */
3427 /* UPLs that a commit implies cache sync */
3428 /* between the vm page and the backing store */
3429 /* this can be used to strip the precious bit */
3430 /* as well as clean */
3431 if (upl->flags & UPL_PAGE_SYNC_DONE)
3432 m->precious = FALSE;
3433
3434 if (flags & UPL_COMMIT_SET_DIRTY)
3435 m->dirty = TRUE;
3436
3437 if (flags & UPL_COMMIT_INACTIVATE) {
3438 m->reference = FALSE;
3439 vm_page_deactivate(m);
3440 } else if (!m->active && !m->inactive) {
3441 if (m->reference)
3442 vm_page_activate(m);
3443 else
3444 vm_page_deactivate(m);
3445 }
3446 /*
3447 * Wakeup any thread waiting for the page to be un-cleaning.
3448 */
3449 PAGE_WAKEUP(m);
3450
3451 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3452 delayed_unlock = 0;
3453 vm_page_unlock_queues();
3454 }
3455 }
3456 target_offset += PAGE_SIZE_64;
3457 xfer_size -= PAGE_SIZE;
3458 entry++;
3459 }
3460 if (delayed_unlock)
3461 vm_page_unlock_queues();
3462
3463 occupied = 1;
3464
3465 if (upl->flags & UPL_DEVICE_MEMORY) {
3466 occupied = 0;
3467 } else if (upl->flags & UPL_LITE) {
3468 int pg_num;
3469 int i;
3470 pg_num = upl->size/PAGE_SIZE;
3471 pg_num = (pg_num + 31) >> 5;
3472 occupied = 0;
3473 for(i= 0; i<pg_num; i++) {
3474 if(lite_list[i] != 0) {
3475 occupied = 1;
3476 break;
3477 }
3478 }
3479 } else {
3480 if(queue_empty(&upl->map_object->memq)) {
3481 occupied = 0;
3482 }
3483 }
3484
3485 if(occupied == 0) {
3486 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
3487 *empty = TRUE;
3488 }
3489 if(object == shadow_object)
3490 vm_object_paging_end(shadow_object);
3491 }
3492 vm_object_unlock(shadow_object);
3493 upl_unlock(upl);
3494
3495 return KERN_SUCCESS;
3496 }
3497
3498 kern_return_t
3499 upl_abort_range(
3500 upl_t upl,
3501 vm_offset_t offset,
3502 vm_size_t size,
3503 int error,
3504 boolean_t *empty)
3505 {
3506 vm_size_t xfer_size = size;
3507 vm_object_t shadow_object;
3508 vm_object_t object = upl->map_object;
3509 vm_object_offset_t target_offset;
3510 vm_object_offset_t page_offset;
3511 int entry;
3512 wpl_array_t lite_list;
3513 int occupied;
3514 boolean_t shadow_internal;
3515
3516 *empty = FALSE;
3517
3518 if (upl == UPL_NULL)
3519 return KERN_INVALID_ARGUMENT;
3520
3521 if (upl->flags & UPL_IO_WIRE) {
3522 return upl_commit_range(upl,
3523 offset, size, 0,
3524 NULL, 0, empty);
3525 }
3526
3527 if(object->pageout) {
3528 shadow_object = object->shadow;
3529 } else {
3530 shadow_object = object;
3531 }
3532
3533 upl_lock(upl);
3534 if(upl->flags & UPL_DEVICE_MEMORY) {
3535 xfer_size = 0;
3536 } else if ((offset + size) > upl->size) {
3537 upl_unlock(upl);
3538 return KERN_FAILURE;
3539 }
3540
3541 vm_object_lock(shadow_object);
3542 shadow_internal = shadow_object->internal;
3543
3544 if(upl->flags & UPL_INTERNAL) {
3545 lite_list = (wpl_array_t)
3546 ((((vm_offset_t)upl) + sizeof(struct upl))
3547 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3548 } else {
3549 lite_list = (wpl_array_t)
3550 (((vm_offset_t)upl) + sizeof(struct upl));
3551 }
3552
3553 entry = offset/PAGE_SIZE;
3554 target_offset = (vm_object_offset_t)offset;
3555 while(xfer_size) {
3556 vm_page_t t,m;
3557 upl_page_info_t *p;
3558
3559 m = VM_PAGE_NULL;
3560 if(upl->flags & UPL_LITE) {
3561 int pg_num;
3562 pg_num = target_offset/PAGE_SIZE;
3563 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3564 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3565 m = vm_page_lookup(shadow_object,
3566 target_offset + (upl->offset -
3567 shadow_object->paging_offset));
3568 }
3569 }
3570 if(object->pageout) {
3571 if ((t = vm_page_lookup(object, target_offset))
3572 != NULL) {
3573 t->pageout = FALSE;
3574 VM_PAGE_FREE(t);
3575 if(m == NULL) {
3576 m = vm_page_lookup(
3577 shadow_object,
3578 target_offset +
3579 object->shadow_offset);
3580 }
3581 if(m != VM_PAGE_NULL)
3582 vm_object_paging_end(m->object);
3583 }
3584 }
3585 if(m != VM_PAGE_NULL) {
3586 vm_page_lock_queues();
3587 if(m->absent) {
3588 /* COPYOUT = FALSE case */
3589 /* check for error conditions which must */
3590 /* be passed back to the pages customer */
3591 if(error & UPL_ABORT_RESTART) {
3592 m->restart = TRUE;
3593 m->absent = FALSE;
3594 vm_object_absent_release(m->object);
3595 m->page_error = KERN_MEMORY_ERROR;
3596 m->error = TRUE;
3597 } else if(error & UPL_ABORT_UNAVAILABLE) {
3598 m->restart = FALSE;
3599 m->unusual = TRUE;
3600 m->clustered = FALSE;
3601 } else if(error & UPL_ABORT_ERROR) {
3602 m->restart = FALSE;
3603 m->absent = FALSE;
3604 vm_object_absent_release(m->object);
3605 m->page_error = KERN_MEMORY_ERROR;
3606 m->error = TRUE;
3607 } else if(error & UPL_ABORT_DUMP_PAGES) {
3608 m->clustered = TRUE;
3609 } else {
3610 m->clustered = TRUE;
3611 }
3612
3613
3614 m->cleaning = FALSE;
3615 m->overwriting = FALSE;
3616 PAGE_WAKEUP_DONE(m);
3617 if(m->clustered) {
3618 vm_page_free(m);
3619 } else {
3620 vm_page_activate(m);
3621 }
3622
3623 vm_page_unlock_queues();
3624 target_offset += PAGE_SIZE_64;
3625 xfer_size -= PAGE_SIZE;
3626 entry++;
3627 continue;
3628 }
3629 /*
3630 * Handle the trusted pager throttle.
3631 */
3632 if (m->laundry) {
3633 if (!shadow_internal)
3634 vm_page_burst_count--;
3635 vm_page_laundry_count--;
3636 m->laundry = FALSE;
3637 if (vm_page_laundry_count
3638 < vm_page_laundry_min) {
3639 vm_page_laundry_min = 0;
3640 thread_wakeup((event_t)
3641 &vm_page_laundry_count);
3642 }
3643 }
3644 if(m->pageout) {
3645 assert(m->busy);
3646 assert(m->wire_count == 1);
3647 m->pageout = FALSE;
3648 vm_page_unwire(m);
3649 }
3650 m->dump_cleaning = FALSE;
3651 m->cleaning = FALSE;
3652 m->busy = FALSE;
3653 m->overwriting = FALSE;
3654 #if MACH_PAGEMAP
3655 vm_external_state_clr(
3656 m->object->existence_map, m->offset);
3657 #endif /* MACH_PAGEMAP */
3658 if(error & UPL_ABORT_DUMP_PAGES) {
3659 vm_page_free(m);
3660 pmap_page_protect(m->phys_page, VM_PROT_NONE);
3661 } else {
3662 PAGE_WAKEUP(m);
3663 }
3664 vm_page_unlock_queues();
3665 }
3666 target_offset += PAGE_SIZE_64;
3667 xfer_size -= PAGE_SIZE;
3668 entry++;
3669 }
3670 occupied = 1;
3671 if (upl->flags & UPL_DEVICE_MEMORY) {
3672 occupied = 0;
3673 } else if (upl->flags & UPL_LITE) {
3674 int pg_num;
3675 int i;
3676 pg_num = upl->size/PAGE_SIZE;
3677 pg_num = (pg_num + 31) >> 5;
3678 occupied = 0;
3679 for(i= 0; i<pg_num; i++) {
3680 if(lite_list[i] != 0) {
3681 occupied = 1;
3682 break;
3683 }
3684 }
3685 } else {
3686 if(queue_empty(&upl->map_object->memq)) {
3687 occupied = 0;
3688 }
3689 }
3690
3691 if(occupied == 0) {
3692 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
3693 *empty = TRUE;
3694 }
3695 if(object == shadow_object)
3696 vm_object_paging_end(shadow_object);
3697 }
3698 vm_object_unlock(shadow_object);
3699
3700 upl_unlock(upl);
3701
3702 return KERN_SUCCESS;
3703 }
3704
3705 kern_return_t
3706 upl_abort(
3707 upl_t upl,
3708 int error)
3709 {
3710 vm_object_t object = NULL;
3711 vm_object_t shadow_object = NULL;
3712 vm_object_offset_t offset;
3713 vm_object_offset_t shadow_offset;
3714 vm_object_offset_t target_offset;
3715 int i;
3716 wpl_array_t lite_list;
3717 vm_page_t t,m;
3718 int occupied;
3719 boolean_t shadow_internal;
3720
3721 if (upl == UPL_NULL)
3722 return KERN_INVALID_ARGUMENT;
3723
3724 if (upl->flags & UPL_IO_WIRE) {
3725 boolean_t empty;
3726 return upl_commit_range(upl,
3727 0, upl->size, 0,
3728 NULL, 0, &empty);
3729 }
3730
3731 upl_lock(upl);
3732 if(upl->flags & UPL_DEVICE_MEMORY) {
3733 upl_unlock(upl);
3734 return KERN_SUCCESS;
3735 }
3736
3737 object = upl->map_object;
3738
3739 if (object == NULL) {
3740 panic("upl_abort: upl object is not backed by an object");
3741 upl_unlock(upl);
3742 return KERN_INVALID_ARGUMENT;
3743 }
3744
3745 if(object->pageout) {
3746 shadow_object = object->shadow;
3747 shadow_offset = object->shadow_offset;
3748 } else {
3749 shadow_object = object;
3750 shadow_offset = upl->offset - object->paging_offset;
3751 }
3752
3753 if(upl->flags & UPL_INTERNAL) {
3754 lite_list = (wpl_array_t)
3755 ((((vm_offset_t)upl) + sizeof(struct upl))
3756 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3757 } else {
3758 lite_list = (wpl_array_t)
3759 (((vm_offset_t)upl) + sizeof(struct upl));
3760 }
3761 offset = 0;
3762 vm_object_lock(shadow_object);
3763 shadow_internal = shadow_object->internal;
3764
3765 for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
3766 m = VM_PAGE_NULL;
3767 target_offset = offset + shadow_offset;
3768 if(upl->flags & UPL_LITE) {
3769 int pg_num;
3770 pg_num = offset/PAGE_SIZE;
3771 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3772 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3773 m = vm_page_lookup(
3774 shadow_object, target_offset);
3775 }
3776 }
3777 if(object->pageout) {
3778 if ((t = vm_page_lookup(object, offset)) != NULL) {
3779 t->pageout = FALSE;
3780 VM_PAGE_FREE(t);
3781 if(m == NULL) {
3782 m = vm_page_lookup(
3783 shadow_object, target_offset);
3784 }
3785 if(m != VM_PAGE_NULL)
3786 vm_object_paging_end(m->object);
3787 }
3788 }
3789 if(m != VM_PAGE_NULL) {
3790 vm_page_lock_queues();
3791 if(m->absent) {
3792 /* COPYOUT = FALSE case */
3793 /* check for error conditions which must */
3794 /* be passed back to the pages customer */
3795 if(error & UPL_ABORT_RESTART) {
3796 m->restart = TRUE;
3797 m->absent = FALSE;
3798 vm_object_absent_release(m->object);
3799 m->page_error = KERN_MEMORY_ERROR;
3800 m->error = TRUE;
3801 } else if(error & UPL_ABORT_UNAVAILABLE) {
3802 m->restart = FALSE;
3803 m->unusual = TRUE;
3804 m->clustered = FALSE;
3805 } else if(error & UPL_ABORT_ERROR) {
3806 m->restart = FALSE;
3807 m->absent = FALSE;
3808 vm_object_absent_release(m->object);
3809 m->page_error = KERN_MEMORY_ERROR;
3810 m->error = TRUE;
3811 } else if(error & UPL_ABORT_DUMP_PAGES) {
3812 m->clustered = TRUE;
3813 } else {
3814 m->clustered = TRUE;
3815 }
3816
3817 m->cleaning = FALSE;
3818 m->overwriting = FALSE;
3819 PAGE_WAKEUP_DONE(m);
3820 if(m->clustered) {
3821 vm_page_free(m);
3822 } else {
3823 vm_page_activate(m);
3824 }
3825 vm_page_unlock_queues();
3826 continue;
3827 }
3828 /*
3829 * Handle the trusted pager throttle.
3830 */
3831 if (m->laundry) {
3832 if (!shadow_internal)
3833 vm_page_burst_count--;
3834 vm_page_laundry_count--;
3835 m->laundry = FALSE;
3836 if (vm_page_laundry_count
3837 < vm_page_laundry_min) {
3838 vm_page_laundry_min = 0;
3839 thread_wakeup((event_t)
3840 &vm_page_laundry_count);
3841 }
3842 }
3843 if(m->pageout) {
3844 assert(m->busy);
3845 assert(m->wire_count == 1);
3846 m->pageout = FALSE;
3847 vm_page_unwire(m);
3848 }
3849 m->dump_cleaning = FALSE;
3850 m->cleaning = FALSE;
3851 m->busy = FALSE;
3852 m->overwriting = FALSE;
3853 #if MACH_PAGEMAP
3854 vm_external_state_clr(
3855 m->object->existence_map, m->offset);
3856 #endif /* MACH_PAGEMAP */
3857 if(error & UPL_ABORT_DUMP_PAGES) {
3858 vm_page_free(m);
3859 pmap_page_protect(m->phys_page, VM_PROT_NONE);
3860 } else {
3861 PAGE_WAKEUP(m);
3862 }
3863 vm_page_unlock_queues();
3864 }
3865 }
3866 occupied = 1;
3867 if (upl->flags & UPL_DEVICE_MEMORY) {
3868 occupied = 0;
3869 } else if (upl->flags & UPL_LITE) {
3870 int pg_num;
3871 int i;
3872 pg_num = upl->size/PAGE_SIZE;
3873 pg_num = (pg_num + 31) >> 5;
3874 occupied = 0;
3875 for(i= 0; i<pg_num; i++) {
3876 if(lite_list[i] != 0) {
3877 occupied = 1;
3878 break;
3879 }
3880 }
3881 } else {
3882 if(queue_empty(&upl->map_object->memq)) {
3883 occupied = 0;
3884 }
3885 }
3886
3887 if(occupied == 0) {
3888 if(object == shadow_object)
3889 vm_object_paging_end(shadow_object);
3890 }
3891 vm_object_unlock(shadow_object);
3892
3893 upl_unlock(upl);
3894 return KERN_SUCCESS;
3895 }
3896
3897 /* an option on commit should be wire */
3898 kern_return_t
3899 upl_commit(
3900 upl_t upl,
3901 upl_page_info_t *page_list,
3902 mach_msg_type_number_t count)
3903 {
3904 if (upl == UPL_NULL)
3905 return KERN_INVALID_ARGUMENT;
3906
3907 if(upl->flags & (UPL_LITE | UPL_IO_WIRE)) {
3908 boolean_t empty;
3909 return upl_commit_range(upl, 0, upl->size, 0,
3910 page_list, count, &empty);
3911 }
3912
3913 if (count == 0)
3914 page_list = NULL;
3915
3916 upl_lock(upl);
3917 if (upl->flags & UPL_DEVICE_MEMORY)
3918 page_list = NULL;
3919
3920 if ((upl->flags & UPL_CLEAR_DIRTY) ||
3921 (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
3922 vm_object_t shadow_object = upl->map_object->shadow;
3923 vm_object_t object = upl->map_object;
3924 vm_object_offset_t target_offset;
3925 vm_size_t xfer_end;
3926 int entry;
3927
3928 vm_page_t t, m;
3929 upl_page_info_t *p;
3930
3931 vm_object_lock(shadow_object);
3932
3933 entry = 0;
3934 target_offset = object->shadow_offset;
3935 xfer_end = upl->size + object->shadow_offset;
3936
3937 while(target_offset < xfer_end) {
3938
3939 if ((t = vm_page_lookup(object,
3940 target_offset - object->shadow_offset))
3941 == NULL) {
3942 target_offset += PAGE_SIZE_64;
3943 entry++;
3944 continue;
3945 }
3946
3947 m = vm_page_lookup(shadow_object, target_offset);
3948 if(m != VM_PAGE_NULL) {
3949 if (upl->flags & UPL_CLEAR_DIRTY) {
3950 pmap_clear_modify(m->phys_page);
3951 m->dirty = FALSE;
3952 }
3953 /* It is a part of the semantic of */
3954 /* COPYOUT_FROM UPLs that a commit */
3955 /* implies cache sync between the */
3956 /* vm page and the backing store */
3957 /* this can be used to strip the */
3958 /* precious bit as well as clean */
3959 if (upl->flags & UPL_PAGE_SYNC_DONE)
3960 m->precious = FALSE;
3961
3962 if(page_list) {
3963 p = &(page_list[entry]);
3964 if(page_list[entry].phys_addr &&
3965 p->pageout && !m->pageout) {
3966 vm_page_lock_queues();
3967 m->busy = TRUE;
3968 m->pageout = TRUE;
3969 vm_page_wire(m);
3970 vm_page_unlock_queues();
3971 } else if (page_list[entry].phys_addr &&
3972 !p->pageout && m->pageout &&
3973 !m->dump_cleaning) {
3974 vm_page_lock_queues();
3975 m->pageout = FALSE;
3976 m->absent = FALSE;
3977 m->overwriting = FALSE;
3978 vm_page_unwire(m);
3979 PAGE_WAKEUP_DONE(m);
3980 vm_page_unlock_queues();
3981 }
3982 page_list[entry].phys_addr = 0;
3983 }
3984 }
3985 target_offset += PAGE_SIZE_64;
3986 entry++;
3987 }
3988
3989 vm_object_unlock(shadow_object);
3990 }
3991 if (upl->flags & UPL_DEVICE_MEMORY) {
3992 vm_object_lock(upl->map_object->shadow);
3993 if(upl->map_object == upl->map_object->shadow)
3994 vm_object_paging_end(upl->map_object->shadow);
3995 vm_object_unlock(upl->map_object->shadow);
3996 }
3997 upl_unlock(upl);
3998 return KERN_SUCCESS;
3999 }
4000
4001
4002
4003 kern_return_t
4004 vm_object_iopl_request(
4005 vm_object_t object,
4006 vm_object_offset_t offset,
4007 vm_size_t size,
4008 upl_t *upl_ptr,
4009 upl_page_info_array_t user_page_list,
4010 unsigned int *page_list_count,
4011 int cntrl_flags)
4012 {
4013 vm_page_t dst_page;
4014 vm_object_offset_t dst_offset = offset;
4015 vm_size_t xfer_size = size;
4016 upl_t upl = NULL;
4017 int entry;
4018 wpl_array_t lite_list;
4019 int page_field_size;
4020 int delayed_unlock = 0;
4021
4022 vm_page_t alias_page = NULL;
4023 kern_return_t ret;
4024 vm_prot_t prot;
4025
4026
4027 if(cntrl_flags & UPL_COPYOUT_FROM) {
4028 prot = VM_PROT_READ;
4029 } else {
4030 prot = VM_PROT_READ | VM_PROT_WRITE;
4031 }
4032
4033 if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
4034 size = MAX_UPL_TRANSFER * page_size;
4035 }
4036
4037 if(cntrl_flags & UPL_SET_INTERNAL)
4038 if(page_list_count != NULL)
4039 *page_list_count = MAX_UPL_TRANSFER;
4040 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4041 ((page_list_count != NULL) && (*page_list_count != 0)
4042 && *page_list_count < (size/page_size)))
4043 return KERN_INVALID_ARGUMENT;
4044
4045 if((!object->internal) && (object->paging_offset != 0))
4046 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
4047
4048 if(object->phys_contiguous) {
4049 /* No paging operations are possible against this memory */
4050 /* and so no need for map object, ever */
4051 cntrl_flags |= UPL_SET_LITE;
4052 }
4053
4054 if(upl_ptr) {
4055 if(cntrl_flags & UPL_SET_INTERNAL) {
4056 if(cntrl_flags & UPL_SET_LITE) {
4057 upl = upl_create(
4058 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
4059 size);
4060 user_page_list = (upl_page_info_t *)
4061 (((vm_offset_t)upl) + sizeof(struct upl));
4062 lite_list = (wpl_array_t)
4063 (((vm_offset_t)user_page_list) +
4064 ((size/PAGE_SIZE) *
4065 sizeof(upl_page_info_t)));
4066 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4067 page_field_size =
4068 (page_field_size + 3) & 0xFFFFFFFC;
4069 bzero((char *)lite_list, page_field_size);
4070 upl->flags =
4071 UPL_LITE | UPL_INTERNAL | UPL_IO_WIRE;
4072 } else {
4073 upl = upl_create(UPL_CREATE_INTERNAL, size);
4074 user_page_list = (upl_page_info_t *)
4075 (((vm_offset_t)upl)
4076 + sizeof(struct upl));
4077 upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
4078 }
4079 } else {
4080 if(cntrl_flags & UPL_SET_LITE) {
4081 upl = upl_create(UPL_CREATE_LITE, size);
4082 lite_list = (wpl_array_t)
4083 (((vm_offset_t)upl) + sizeof(struct upl));
4084 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4085 page_field_size =
4086 (page_field_size + 3) & 0xFFFFFFFC;
4087 bzero((char *)lite_list, page_field_size);
4088 upl->flags = UPL_LITE | UPL_IO_WIRE;
4089 } else {
4090 upl = upl_create(UPL_CREATE_EXTERNAL, size);
4091 upl->flags = UPL_IO_WIRE;
4092 }
4093 }
4094
4095 if(object->phys_contiguous) {
4096 upl->map_object = object;
4097 /* don't need any shadow mappings for this one */
4098 /* since it is already I/O memory */
4099 upl->flags |= UPL_DEVICE_MEMORY;
4100
4101 vm_object_lock(object);
4102 vm_object_paging_begin(object);
4103 vm_object_unlock(object);
4104
4105 /* paging in progress also protects the paging_offset */
4106 upl->offset = offset + object->paging_offset;
4107 upl->size = size;
4108 *upl_ptr = upl;
4109 if(user_page_list) {
4110 user_page_list[0].phys_addr =
4111 (offset + object->shadow_offset)>>12;
4112 user_page_list[0].device = TRUE;
4113 }
4114
4115 if(page_list_count != NULL) {
4116 if (upl->flags & UPL_INTERNAL) {
4117 *page_list_count = 0;
4118 } else {
4119 *page_list_count = 1;
4120 }
4121 }
4122 return KERN_SUCCESS;
4123 }
4124 if(user_page_list)
4125 user_page_list[0].device = FALSE;
4126
4127 if(cntrl_flags & UPL_SET_LITE) {
4128 upl->map_object = object;
4129 } else {
4130 upl->map_object = vm_object_allocate(size);
4131 vm_object_lock(upl->map_object);
4132 upl->map_object->shadow = object;
4133 upl->map_object->pageout = TRUE;
4134 upl->map_object->can_persist = FALSE;
4135 upl->map_object->copy_strategy =
4136 MEMORY_OBJECT_COPY_NONE;
4137 upl->map_object->shadow_offset = offset;
4138 upl->map_object->wimg_bits = object->wimg_bits;
4139 vm_object_unlock(upl->map_object);
4140 }
4141 }
4142 vm_object_lock(object);
4143 vm_object_paging_begin(object);
4144
4145 if (!object->phys_contiguous) {
4146 /* Protect user space from future COW operations */
4147 object->true_share = TRUE;
4148 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4149 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4150 }
4151
4152 /* we can lock the upl offset now that paging_in_progress is set */
4153 if(upl_ptr) {
4154 upl->size = size;
4155 upl->offset = offset + object->paging_offset;
4156 *upl_ptr = upl;
4157 #ifdef UBC_DEBUG
4158 queue_enter(&object->uplq, upl, upl_t, uplq);
4159 #endif /* UBC_DEBUG */
4160 }
4161
4162 entry = 0;
4163 while (xfer_size) {
4164 if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
4165 if (delayed_unlock) {
4166 delayed_unlock = 0;
4167 vm_page_unlock_queues();
4168 }
4169 vm_object_unlock(object);
4170 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4171 vm_object_lock(object);
4172 }
4173 dst_page = vm_page_lookup(object, dst_offset);
4174
4175 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
4176 (dst_page->unusual && (dst_page->error ||
4177 dst_page->restart || dst_page->absent ||
4178 dst_page->fictitious ||
4179 prot & dst_page->page_lock))) {
4180 vm_fault_return_t result;
4181 do {
4182 vm_page_t top_page;
4183 kern_return_t error_code;
4184 int interruptible;
4185
4186 vm_object_offset_t lo_offset = offset;
4187 vm_object_offset_t hi_offset = offset + size;
4188
4189
4190 if (delayed_unlock) {
4191 delayed_unlock = 0;
4192 vm_page_unlock_queues();
4193 }
4194
4195 if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
4196 interruptible = THREAD_ABORTSAFE;
4197 } else {
4198 interruptible = THREAD_UNINT;
4199 }
4200
4201 result = vm_fault_page(object, dst_offset,
4202 prot | VM_PROT_WRITE, FALSE,
4203 interruptible,
4204 lo_offset, hi_offset,
4205 VM_BEHAVIOR_SEQUENTIAL,
4206 &prot, &dst_page, &top_page,
4207 (int *)0,
4208 &error_code, FALSE, FALSE, NULL, 0);
4209
4210 switch(result) {
4211 case VM_FAULT_SUCCESS:
4212
4213 PAGE_WAKEUP_DONE(dst_page);
4214
4215 /*
4216 * Release paging references and
4217 * top-level placeholder page, if any.
4218 */
4219
4220 if(top_page != VM_PAGE_NULL) {
4221 vm_object_t local_object;
4222 local_object =
4223 top_page->object;
4224 if(top_page->object
4225 != dst_page->object) {
4226 vm_object_lock(
4227 local_object);
4228 VM_PAGE_FREE(top_page);
4229 vm_object_paging_end(
4230 local_object);
4231 vm_object_unlock(
4232 local_object);
4233 } else {
4234 VM_PAGE_FREE(top_page);
4235 vm_object_paging_end(
4236 local_object);
4237 }
4238 }
4239
4240 break;
4241
4242
4243 case VM_FAULT_RETRY:
4244 vm_object_lock(object);
4245 vm_object_paging_begin(object);
4246 break;
4247
4248 case VM_FAULT_FICTITIOUS_SHORTAGE:
4249 vm_page_more_fictitious();
4250 vm_object_lock(object);
4251 vm_object_paging_begin(object);
4252 break;
4253
4254 case VM_FAULT_MEMORY_SHORTAGE:
4255 if (vm_page_wait(interruptible)) {
4256 vm_object_lock(object);
4257 vm_object_paging_begin(object);
4258 break;
4259 }
4260 /* fall thru */
4261
4262 case VM_FAULT_INTERRUPTED:
4263 error_code = MACH_SEND_INTERRUPTED;
4264 case VM_FAULT_MEMORY_ERROR:
4265 ret = (error_code ? error_code:
4266 KERN_MEMORY_ERROR);
4267 vm_object_lock(object);
4268 for(; offset < dst_offset;
4269 offset += PAGE_SIZE) {
4270 dst_page = vm_page_lookup(
4271 object, offset);
4272 if(dst_page == VM_PAGE_NULL)
4273 panic("vm_object_iopl_request: Wired pages missing. \n");
4274 vm_page_lock_queues();
4275 vm_page_unwire(dst_page);
4276 vm_page_unlock_queues();
4277 VM_STAT(reactivations++);
4278 }
4279 vm_object_unlock(object);
4280 upl_destroy(upl);
4281 return ret;
4282 }
4283 } while ((result != VM_FAULT_SUCCESS)
4284 || (result == VM_FAULT_INTERRUPTED));
4285 }
4286 if (delayed_unlock == 0)
4287 vm_page_lock_queues();
4288 vm_page_wire(dst_page);
4289
4290 if (upl_ptr) {
4291 if (cntrl_flags & UPL_SET_LITE) {
4292 int pg_num;
4293 pg_num = (dst_offset-offset)/PAGE_SIZE;
4294 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
4295 } else {
4296 /*
4297 * Convert the fictitious page to a
4298 * private shadow of the real page.
4299 */
4300 assert(alias_page->fictitious);
4301 alias_page->fictitious = FALSE;
4302 alias_page->private = TRUE;
4303 alias_page->pageout = TRUE;
4304 alias_page->phys_page = dst_page->phys_page;
4305 vm_page_wire(alias_page);
4306
4307 vm_page_insert(alias_page,
4308 upl->map_object, size - xfer_size);
4309 assert(!alias_page->wanted);
4310 alias_page->busy = FALSE;
4311 alias_page->absent = FALSE;
4312 }
4313
4314 /* expect the page to be used */
4315 dst_page->reference = TRUE;
4316
4317 if (!(cntrl_flags & UPL_COPYOUT_FROM))
4318 dst_page->dirty = TRUE;
4319 alias_page = NULL;
4320
4321 if (user_page_list) {
4322 user_page_list[entry].phys_addr
4323 = dst_page->phys_page;
4324 user_page_list[entry].dirty =
4325 dst_page->dirty;
4326 user_page_list[entry].pageout =
4327 dst_page->pageout;
4328 user_page_list[entry].absent =
4329 dst_page->absent;
4330 user_page_list[entry].precious =
4331 dst_page->precious;
4332 }
4333 }
4334 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4335 delayed_unlock = 0;
4336 vm_page_unlock_queues();
4337 }
4338 entry++;
4339 dst_offset += PAGE_SIZE_64;
4340 xfer_size -= PAGE_SIZE;
4341 }
4342 if (delayed_unlock)
4343 vm_page_unlock_queues();
4344
4345 if (upl->flags & UPL_INTERNAL) {
4346 if(page_list_count != NULL)
4347 *page_list_count = 0;
4348 } else if (*page_list_count > entry) {
4349 if(page_list_count != NULL)
4350 *page_list_count = entry;
4351 }
4352
4353 if (alias_page != NULL) {
4354 vm_page_lock_queues();
4355 vm_page_free(alias_page);
4356 vm_page_unlock_queues();
4357 }
4358
4359 vm_object_unlock(object);
4360 return KERN_SUCCESS;
4361 }
4362
4363 vm_size_t
4364 upl_get_internal_pagelist_offset()
4365 {
4366 return sizeof(struct upl);
4367 }
4368
4369 void
4370 upl_set_dirty(
4371 upl_t upl)
4372 {
4373 upl->flags |= UPL_CLEAR_DIRTY;
4374 }
4375
4376 void
4377 upl_clear_dirty(
4378 upl_t upl)
4379 {
4380 upl->flags &= ~UPL_CLEAR_DIRTY;
4381 }
4382
4383
4384 #ifdef MACH_BSD
4385
4386 boolean_t upl_page_present(upl_page_info_t *upl, int index)
4387 {
4388 return(UPL_PAGE_PRESENT(upl, index));
4389 }
4390 boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
4391 {
4392 return(UPL_DIRTY_PAGE(upl, index));
4393 }
4394 boolean_t upl_valid_page(upl_page_info_t *upl, int index)
4395 {
4396 return(UPL_VALID_PAGE(upl, index));
4397 }
4398 vm_offset_t upl_phys_page(upl_page_info_t *upl, int index)
4399 {
4400 return((vm_offset_t)UPL_PHYS_PAGE(upl, index));
4401 }
4402
4403 void
4404 vm_countdirtypages(void)
4405 {
4406 vm_page_t m;
4407 int dpages;
4408 int pgopages;
4409 int precpages;
4410
4411
4412 dpages=0;
4413 pgopages=0;
4414 precpages=0;
4415
4416 vm_page_lock_queues();
4417 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
4418 do {
4419 if (m ==(vm_page_t )0) break;
4420
4421 if(m->dirty) dpages++;
4422 if(m->pageout) pgopages++;
4423 if(m->precious) precpages++;
4424
4425 m = (vm_page_t) queue_next(&m->pageq);
4426 if (m ==(vm_page_t )0) break;
4427
4428 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
4429 vm_page_unlock_queues();
4430
4431 vm_page_lock_queues();
4432 m = (vm_page_t) queue_first(&vm_page_queue_zf);
4433 do {
4434 if (m ==(vm_page_t )0) break;
4435
4436 if(m->dirty) dpages++;
4437 if(m->pageout) pgopages++;
4438 if(m->precious) precpages++;
4439
4440 m = (vm_page_t) queue_next(&m->pageq);
4441 if (m ==(vm_page_t )0) break;
4442
4443 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
4444 vm_page_unlock_queues();
4445
4446 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
4447
4448 dpages=0;
4449 pgopages=0;
4450 precpages=0;
4451
4452 vm_page_lock_queues();
4453 m = (vm_page_t) queue_first(&vm_page_queue_active);
4454
4455 do {
4456 if(m == (vm_page_t )0) break;
4457 if(m->dirty) dpages++;
4458 if(m->pageout) pgopages++;
4459 if(m->precious) precpages++;
4460
4461 m = (vm_page_t) queue_next(&m->pageq);
4462 if(m == (vm_page_t )0) break;
4463
4464 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
4465 vm_page_unlock_queues();
4466
4467 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
4468
4469 }
4470 #endif /* MACH_BSD */
4471
4472 #ifdef UBC_DEBUG
4473 kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
4474 {
4475 upl->ubc_alias1 = alias1;
4476 upl->ubc_alias2 = alias2;
4477 return KERN_SUCCESS;
4478 }
4479 int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
4480 {
4481 if(al)
4482 *al = upl->ubc_alias1;
4483 if(al2)
4484 *al2 = upl->ubc_alias2;
4485 return KERN_SUCCESS;
4486 }
4487 #endif /* UBC_DEBUG */
4488
4489
4490
4491 #if MACH_KDB
4492 #include <ddb/db_output.h>
4493 #include <ddb/db_print.h>
4494 #include <vm/vm_print.h>
4495
4496 #define printf kdbprintf
4497 extern int db_indent;
4498 void db_pageout(void);
4499
4500 void
4501 db_vm(void)
4502 {
4503 extern int vm_page_gobble_count;
4504
4505 iprintf("VM Statistics:\n");
4506 db_indent += 2;
4507 iprintf("pages:\n");
4508 db_indent += 2;
4509 iprintf("activ %5d inact %5d free %5d",
4510 vm_page_active_count, vm_page_inactive_count,
4511 vm_page_free_count);
4512 printf(" wire %5d gobbl %5d\n",
4513 vm_page_wire_count, vm_page_gobble_count);
4514 iprintf("laund %5d\n",
4515 vm_page_laundry_count);
4516 db_indent -= 2;
4517 iprintf("target:\n");
4518 db_indent += 2;
4519 iprintf("min %5d inact %5d free %5d",
4520 vm_page_free_min, vm_page_inactive_target,
4521 vm_page_free_target);
4522 printf(" resrv %5d\n", vm_page_free_reserved);
4523 db_indent -= 2;
4524
4525 iprintf("burst:\n");
4526 db_indent += 2;
4527 iprintf("max %5d min %5d wait %5d empty %5d\n",
4528 vm_pageout_burst_max, vm_pageout_burst_min,
4529 vm_pageout_burst_wait, vm_pageout_empty_wait);
4530 db_indent -= 2;
4531 iprintf("pause:\n");
4532 db_indent += 2;
4533 iprintf("count %5d max %5d\n",
4534 vm_pageout_pause_count, vm_pageout_pause_max);
4535 #if MACH_COUNTERS
4536 iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue);
4537 #endif /* MACH_COUNTERS */
4538 db_indent -= 2;
4539 db_pageout();
4540 db_indent -= 2;
4541 }
4542
4543 void
4544 db_pageout(void)
4545 {
4546 #if MACH_COUNTERS
4547 extern int c_laundry_pages_freed;
4548 #endif /* MACH_COUNTERS */
4549
4550 iprintf("Pageout Statistics:\n");
4551 db_indent += 2;
4552 iprintf("active %5d inactv %5d\n",
4553 vm_pageout_active, vm_pageout_inactive);
4554 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
4555 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
4556 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
4557 iprintf("used %5d clean %5d dirty %5d\n",
4558 vm_pageout_inactive_used, vm_pageout_inactive_clean,
4559 vm_pageout_inactive_dirty);
4560 #if MACH_COUNTERS
4561 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
4562 #endif /* MACH_COUNTERS */
4563 #if MACH_CLUSTER_STATS
4564 iprintf("Cluster Statistics:\n");
4565 db_indent += 2;
4566 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
4567 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
4568 vm_pageout_cluster_collisions);
4569 iprintf("clusters %5d conversions %5d\n",
4570 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
4571 db_indent -= 2;
4572 iprintf("Target Statistics:\n");
4573 db_indent += 2;
4574 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
4575 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
4576 vm_pageout_target_page_freed);
4577 db_indent -= 2;
4578 #endif /* MACH_CLUSTER_STATS */
4579 db_indent -= 2;
4580 }
4581
4582 #if MACH_CLUSTER_STATS
4583 unsigned long vm_pageout_cluster_dirtied = 0;
4584 unsigned long vm_pageout_cluster_cleaned = 0;
4585 unsigned long vm_pageout_cluster_collisions = 0;
4586 unsigned long vm_pageout_cluster_clusters = 0;
4587 unsigned long vm_pageout_cluster_conversions = 0;
4588 unsigned long vm_pageout_target_collisions = 0;
4589 unsigned long vm_pageout_target_page_dirtied = 0;
4590 unsigned long vm_pageout_target_page_freed = 0;
4591 #define CLUSTER_STAT(clause) clause
4592 #else /* MACH_CLUSTER_STATS */
4593 #define CLUSTER_STAT(clause)
4594 #endif /* MACH_CLUSTER_STATS */
4595
4596 #endif /* MACH_KDB */