]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_pageout.c
xnu-344.21.73.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
d7e50217 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
d7e50217
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
d7e50217
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/*
26 * @OSF_COPYRIGHT@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53/*
54 */
55/*
56 * File: vm/vm_pageout.c
57 * Author: Avadis Tevanian, Jr., Michael Wayne Young
58 * Date: 1985
59 *
60 * The proverbial page-out daemon.
61 */
1c79356b
A
62
63#include <mach_pagemap.h>
64#include <mach_cluster_stats.h>
65#include <mach_kdb.h>
66#include <advisory_pageout.h>
67
68#include <mach/mach_types.h>
69#include <mach/memory_object.h>
70#include <mach/memory_object_default.h>
0b4e3aa0 71#include <mach/memory_object_control_server.h>
1c79356b
A
72#include <mach/mach_host_server.h>
73#include <mach/vm_param.h>
74#include <mach/vm_statistics.h>
75#include <kern/host_statistics.h>
76#include <kern/counters.h>
77#include <kern/thread.h>
1c79356b
A
78#include <kern/xpr.h>
79#include <vm/pmap.h>
d7e50217 80#include <vm/vm_fault.h>
1c79356b
A
81#include <vm/vm_map.h>
82#include <vm/vm_object.h>
83#include <vm/vm_page.h>
84#include <vm/vm_pageout.h>
85#include <machine/vm_tuning.h>
86#include <kern/misc_protos.h>
87
88extern ipc_port_t memory_manager_default;
89
90#ifndef VM_PAGE_LAUNDRY_MAX
0b4e3aa0 91#define VM_PAGE_LAUNDRY_MAX 6 /* outstanding DMM page cleans */
1c79356b
A
92#endif /* VM_PAGEOUT_LAUNDRY_MAX */
93
94#ifndef VM_PAGEOUT_BURST_MAX
95#define VM_PAGEOUT_BURST_MAX 32 /* simultaneous EMM page cleans */
96#endif /* VM_PAGEOUT_BURST_MAX */
97
98#ifndef VM_PAGEOUT_DISCARD_MAX
99#define VM_PAGEOUT_DISCARD_MAX 68 /* simultaneous EMM page cleans */
100#endif /* VM_PAGEOUT_DISCARD_MAX */
101
102#ifndef VM_PAGEOUT_BURST_WAIT
103#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
104#endif /* VM_PAGEOUT_BURST_WAIT */
105
106#ifndef VM_PAGEOUT_EMPTY_WAIT
107#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
108#endif /* VM_PAGEOUT_EMPTY_WAIT */
109
110/*
111 * To obtain a reasonable LRU approximation, the inactive queue
112 * needs to be large enough to give pages on it a chance to be
113 * referenced a second time. This macro defines the fraction
114 * of active+inactive pages that should be inactive.
115 * The pageout daemon uses it to update vm_page_inactive_target.
116 *
117 * If vm_page_free_count falls below vm_page_free_target and
118 * vm_page_inactive_count is below vm_page_inactive_target,
119 * then the pageout daemon starts running.
120 */
121
122#ifndef VM_PAGE_INACTIVE_TARGET
123#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
124#endif /* VM_PAGE_INACTIVE_TARGET */
125
126/*
127 * Once the pageout daemon starts running, it keeps going
128 * until vm_page_free_count meets or exceeds vm_page_free_target.
129 */
130
131#ifndef VM_PAGE_FREE_TARGET
132#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
133#endif /* VM_PAGE_FREE_TARGET */
134
135/*
136 * The pageout daemon always starts running once vm_page_free_count
137 * falls below vm_page_free_min.
138 */
139
140#ifndef VM_PAGE_FREE_MIN
141#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
142#endif /* VM_PAGE_FREE_MIN */
143
144/*
145 * When vm_page_free_count falls below vm_page_free_reserved,
146 * only vm-privileged threads can allocate pages. vm-privilege
147 * allows the pageout daemon and default pager (and any other
148 * associated threads needed for default pageout) to continue
149 * operation by dipping into the reserved pool of pages.
150 */
151
152#ifndef VM_PAGE_FREE_RESERVED
153#define VM_PAGE_FREE_RESERVED \
0b4e3aa0 154 ((16 * VM_PAGE_LAUNDRY_MAX) + NCPUS)
1c79356b
A
155#endif /* VM_PAGE_FREE_RESERVED */
156
0b4e3aa0
A
157/*
158 * Exported variable used to broadcast the activation of the pageout scan
159 * Working Set uses this to throttle its use of pmap removes. In this
160 * way, code which runs within memory in an uncontested context does
161 * not keep encountering soft faults.
162 */
163
164unsigned int vm_pageout_scan_event_counter = 0;
1c79356b
A
165
166/*
167 * Forward declarations for internal routines.
168 */
169extern void vm_pageout_continue(void);
170extern void vm_pageout_scan(void);
171extern void vm_pageout_throttle(vm_page_t m);
172extern vm_page_t vm_pageout_cluster_page(
173 vm_object_t object,
174 vm_object_offset_t offset,
175 boolean_t precious_clean);
176
177unsigned int vm_pageout_reserved_internal = 0;
178unsigned int vm_pageout_reserved_really = 0;
179
180unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */
181unsigned int vm_page_laundry_min = 0;
182unsigned int vm_pageout_burst_max = 0;
183unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
184unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
185unsigned int vm_pageout_burst_min = 0;
186unsigned int vm_pageout_pause_count = 0;
187unsigned int vm_pageout_pause_max = 0;
188unsigned int vm_free_page_pause = 100; /* milliseconds */
189
9bccf70c
A
190/*
191 * Protection against zero fill flushing live working sets derived
192 * from existing backing store and files
193 */
194unsigned int vm_accellerate_zf_pageout_trigger = 400;
195unsigned int vm_zf_iterator;
196unsigned int vm_zf_iterator_count = 40;
197unsigned int last_page_zf;
198unsigned int vm_zf_count = 0;
199
1c79356b
A
200/*
201 * These variables record the pageout daemon's actions:
202 * how many pages it looks at and what happens to those pages.
203 * No locking needed because only one thread modifies the variables.
204 */
205
206unsigned int vm_pageout_active = 0; /* debugging */
207unsigned int vm_pageout_inactive = 0; /* debugging */
208unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
209unsigned int vm_pageout_inactive_forced = 0; /* debugging */
210unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
211unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
212unsigned int vm_pageout_inactive_busy = 0; /* debugging */
213unsigned int vm_pageout_inactive_absent = 0; /* debugging */
214unsigned int vm_pageout_inactive_used = 0; /* debugging */
215unsigned int vm_pageout_inactive_clean = 0; /* debugging */
216unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
217unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
1c79356b
A
218unsigned int vm_stat_discard = 0; /* debugging */
219unsigned int vm_stat_discard_sent = 0; /* debugging */
220unsigned int vm_stat_discard_failure = 0; /* debugging */
221unsigned int vm_stat_discard_throttle = 0; /* debugging */
222unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */
223unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */
224unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */
225unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */
226unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */
227unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */
228
229
230unsigned int vm_pageout_out_of_line = 0;
231unsigned int vm_pageout_in_place = 0;
232/*
233 * Routine: vm_pageout_object_allocate
234 * Purpose:
235 * Allocate an object for use as out-of-line memory in a
236 * data_return/data_initialize message.
237 * The page must be in an unlocked object.
238 *
239 * If the page belongs to a trusted pager, cleaning in place
240 * will be used, which utilizes a special "pageout object"
241 * containing private alias pages for the real page frames.
242 * Untrusted pagers use normal out-of-line memory.
243 */
244vm_object_t
245vm_pageout_object_allocate(
246 vm_page_t m,
247 vm_size_t size,
248 vm_object_offset_t offset)
249{
250 vm_object_t object = m->object;
251 vm_object_t new_object;
252
253 assert(object->pager_ready);
254
255 if (object->pager_trusted || object->internal)
256 vm_pageout_throttle(m);
257
258 new_object = vm_object_allocate(size);
259
260 if (object->pager_trusted) {
261 assert (offset < object->size);
262
263 vm_object_lock(new_object);
264 new_object->pageout = TRUE;
265 new_object->shadow = object;
266 new_object->can_persist = FALSE;
267 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
268 new_object->shadow_offset = offset;
269 vm_object_unlock(new_object);
270
271 /*
272 * Take a paging reference on the object. This will be dropped
273 * in vm_pageout_object_terminate()
274 */
275 vm_object_lock(object);
276 vm_object_paging_begin(object);
277 vm_object_unlock(object);
278
279 vm_pageout_in_place++;
280 } else
281 vm_pageout_out_of_line++;
282 return(new_object);
283}
284
285#if MACH_CLUSTER_STATS
286unsigned long vm_pageout_cluster_dirtied = 0;
287unsigned long vm_pageout_cluster_cleaned = 0;
288unsigned long vm_pageout_cluster_collisions = 0;
289unsigned long vm_pageout_cluster_clusters = 0;
290unsigned long vm_pageout_cluster_conversions = 0;
291unsigned long vm_pageout_target_collisions = 0;
292unsigned long vm_pageout_target_page_dirtied = 0;
293unsigned long vm_pageout_target_page_freed = 0;
1c79356b
A
294#define CLUSTER_STAT(clause) clause
295#else /* MACH_CLUSTER_STATS */
296#define CLUSTER_STAT(clause)
297#endif /* MACH_CLUSTER_STATS */
298
299/*
300 * Routine: vm_pageout_object_terminate
301 * Purpose:
302 * Destroy the pageout_object allocated by
303 * vm_pageout_object_allocate(), and perform all of the
304 * required cleanup actions.
305 *
306 * In/Out conditions:
307 * The object must be locked, and will be returned locked.
308 */
309void
310vm_pageout_object_terminate(
311 vm_object_t object)
312{
313 vm_object_t shadow_object;
314
315 /*
316 * Deal with the deallocation (last reference) of a pageout object
317 * (used for cleaning-in-place) by dropping the paging references/
318 * freeing pages in the original object.
319 */
320
321 assert(object->pageout);
322 shadow_object = object->shadow;
323 vm_object_lock(shadow_object);
324
325 while (!queue_empty(&object->memq)) {
326 vm_page_t p, m;
327 vm_object_offset_t offset;
328
329 p = (vm_page_t) queue_first(&object->memq);
330
331 assert(p->private);
332 assert(p->pageout);
333 p->pageout = FALSE;
334 assert(!p->cleaning);
335
336 offset = p->offset;
337 VM_PAGE_FREE(p);
338 p = VM_PAGE_NULL;
339
340 m = vm_page_lookup(shadow_object,
341 offset + object->shadow_offset);
342
343 if(m == VM_PAGE_NULL)
344 continue;
345 assert(m->cleaning);
0b4e3aa0
A
346 /* used as a trigger on upl_commit etc to recognize the */
347 /* pageout daemon's subseqent desire to pageout a cleaning */
348 /* page. When the bit is on the upl commit code will */
349 /* respect the pageout bit in the target page over the */
350 /* caller's page list indication */
351 m->dump_cleaning = FALSE;
1c79356b
A
352
353 /*
354 * Account for the paging reference taken when
355 * m->cleaning was set on this page.
356 */
357 vm_object_paging_end(shadow_object);
358 assert((m->dirty) || (m->precious) ||
359 (m->busy && m->cleaning));
360
361 /*
362 * Handle the trusted pager throttle.
363 */
364 vm_page_lock_queues();
365 if (m->laundry) {
366 vm_page_laundry_count--;
367 m->laundry = FALSE;
368 if (vm_page_laundry_count < vm_page_laundry_min) {
369 vm_page_laundry_min = 0;
370 thread_wakeup((event_t) &vm_page_laundry_count);
371 }
372 }
373
374 /*
375 * Handle the "target" page(s). These pages are to be freed if
376 * successfully cleaned. Target pages are always busy, and are
377 * wired exactly once. The initial target pages are not mapped,
378 * (so cannot be referenced or modified) but converted target
379 * pages may have been modified between the selection as an
380 * adjacent page and conversion to a target.
381 */
382 if (m->pageout) {
383 assert(m->busy);
384 assert(m->wire_count == 1);
385 m->cleaning = FALSE;
386 m->pageout = FALSE;
387#if MACH_CLUSTER_STATS
388 if (m->wanted) vm_pageout_target_collisions++;
389#endif
390 /*
391 * Revoke all access to the page. Since the object is
392 * locked, and the page is busy, this prevents the page
393 * from being dirtied after the pmap_is_modified() call
394 * returns.
395 */
d7e50217 396 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1c79356b
A
397
398 /*
399 * Since the page is left "dirty" but "not modifed", we
400 * can detect whether the page was redirtied during
401 * pageout by checking the modify state.
402 */
d7e50217 403 m->dirty = pmap_is_modified(m->phys_page);
1c79356b
A
404
405 if (m->dirty) {
406 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
407 vm_page_unwire(m);/* reactivates */
408 VM_STAT(reactivations++);
409 PAGE_WAKEUP_DONE(m);
1c79356b
A
410 } else {
411 CLUSTER_STAT(vm_pageout_target_page_freed++;)
412 vm_page_free(m);/* clears busy, etc. */
413 }
414 vm_page_unlock_queues();
415 continue;
416 }
417 /*
418 * Handle the "adjacent" pages. These pages were cleaned in
419 * place, and should be left alone.
420 * If prep_pin_count is nonzero, then someone is using the
421 * page, so make it active.
422 */
0b4e3aa0
A
423 if (!m->active && !m->inactive && !m->private) {
424 if (m->reference)
1c79356b
A
425 vm_page_activate(m);
426 else
427 vm_page_deactivate(m);
428 }
429 if((m->busy) && (m->cleaning)) {
430
431 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
432 m->busy = FALSE;
433
434 /* We do not re-set m->dirty ! */
435 /* The page was busy so no extraneous activity */
436 /* could have occured. COPY_INTO is a read into the */
437 /* new pages. CLEAN_IN_PLACE does actually write */
438 /* out the pages but handling outside of this code */
439 /* will take care of resetting dirty. We clear the */
440 /* modify however for the Programmed I/O case. */
d7e50217 441 pmap_clear_modify(m->phys_page);
1c79356b
A
442 if(m->absent) {
443 m->absent = FALSE;
444 if(shadow_object->absent_count == 1)
445 vm_object_absent_release(shadow_object);
446 else
447 shadow_object->absent_count--;
448 }
449 m->overwriting = FALSE;
450 } else if (m->overwriting) {
451 /* alternate request page list, write to page_list */
452 /* case. Occurs when the original page was wired */
453 /* at the time of the list request */
454 assert(m->wire_count != 0);
455 vm_page_unwire(m);/* reactivates */
456 m->overwriting = FALSE;
457 } else {
458 /*
459 * Set the dirty state according to whether or not the page was
460 * modified during the pageout. Note that we purposefully do
461 * NOT call pmap_clear_modify since the page is still mapped.
462 * If the page were to be dirtied between the 2 calls, this
463 * this fact would be lost. This code is only necessary to
464 * maintain statistics, since the pmap module is always
465 * consulted if m->dirty is false.
466 */
467#if MACH_CLUSTER_STATS
d7e50217 468 m->dirty = pmap_is_modified(m->phys_page);
1c79356b
A
469
470 if (m->dirty) vm_pageout_cluster_dirtied++;
471 else vm_pageout_cluster_cleaned++;
472 if (m->wanted) vm_pageout_cluster_collisions++;
473#else
474 m->dirty = 0;
475#endif
476 }
477 m->cleaning = FALSE;
478
1c79356b
A
479 /*
480 * Wakeup any thread waiting for the page to be un-cleaning.
481 */
482 PAGE_WAKEUP(m);
483 vm_page_unlock_queues();
484 }
485 /*
486 * Account for the paging reference taken in vm_paging_object_allocate.
487 */
488 vm_object_paging_end(shadow_object);
489 vm_object_unlock(shadow_object);
490
491 assert(object->ref_count == 0);
492 assert(object->paging_in_progress == 0);
493 assert(object->resident_page_count == 0);
494 return;
495}
496
497/*
498 * Routine: vm_pageout_setup
499 * Purpose:
500 * Set up a page for pageout (clean & flush).
501 *
502 * Move the page to a new object, as part of which it will be
503 * sent to its memory manager in a memory_object_data_write or
504 * memory_object_initialize message.
505 *
506 * The "new_object" and "new_offset" arguments
507 * indicate where the page should be moved.
508 *
509 * In/Out conditions:
510 * The page in question must not be on any pageout queues,
511 * and must be busy. The object to which it belongs
512 * must be unlocked, and the caller must hold a paging
513 * reference to it. The new_object must not be locked.
514 *
515 * This routine returns a pointer to a place-holder page,
516 * inserted at the same offset, to block out-of-order
517 * requests for the page. The place-holder page must
518 * be freed after the data_write or initialize message
519 * has been sent.
520 *
521 * The original page is put on a paging queue and marked
522 * not busy on exit.
523 */
524vm_page_t
525vm_pageout_setup(
526 register vm_page_t m,
527 register vm_object_t new_object,
528 vm_object_offset_t new_offset)
529{
530 register vm_object_t old_object = m->object;
531 vm_object_offset_t paging_offset;
532 vm_object_offset_t offset;
533 register vm_page_t holding_page;
534 register vm_page_t new_m;
535 register vm_page_t new_page;
536 boolean_t need_to_wire = FALSE;
537
538
539 XPR(XPR_VM_PAGEOUT,
540 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
541 (integer_t)m->object, (integer_t)m->offset,
542 (integer_t)m, (integer_t)new_object,
543 (integer_t)new_offset);
544 assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
545 !m->restart);
546
547 assert(m->dirty || m->precious);
548
549 /*
550 * Create a place-holder page where the old one was, to prevent
551 * attempted pageins of this page while we're unlocked.
1c79356b
A
552 */
553 VM_PAGE_GRAB_FICTITIOUS(holding_page);
554
1c79356b
A
555 vm_object_lock(old_object);
556
557 offset = m->offset;
558 paging_offset = offset + old_object->paging_offset;
559
560 if (old_object->pager_trusted) {
561 /*
562 * This pager is trusted, so we can clean this page
563 * in place. Leave it in the old object, and mark it
564 * cleaning & pageout.
565 */
566 new_m = holding_page;
567 holding_page = VM_PAGE_NULL;
568
1c79356b
A
569 /*
570 * Set up new page to be private shadow of real page.
571 */
d7e50217 572 new_m->phys_page = m->phys_page;
1c79356b 573 new_m->fictitious = FALSE;
1c79356b
A
574 new_m->pageout = TRUE;
575
576 /*
577 * Mark real page as cleaning (indicating that we hold a
578 * paging reference to be released via m_o_d_r_c) and
579 * pageout (indicating that the page should be freed
580 * when the pageout completes).
581 */
d7e50217 582 pmap_clear_modify(m->phys_page);
1c79356b 583 vm_page_lock_queues();
0b4e3aa0 584 new_m->private = TRUE;
1c79356b
A
585 vm_page_wire(new_m);
586 m->cleaning = TRUE;
587 m->pageout = TRUE;
588
589 vm_page_wire(m);
590 assert(m->wire_count == 1);
591 vm_page_unlock_queues();
592
593 m->dirty = TRUE;
594 m->precious = FALSE;
595 m->page_lock = VM_PROT_NONE;
596 m->unusual = FALSE;
597 m->unlock_request = VM_PROT_NONE;
598 } else {
599 /*
600 * Cannot clean in place, so rip the old page out of the
601 * object, and stick the holding page in. Set new_m to the
602 * page in the new object.
603 */
604 vm_page_lock_queues();
605 VM_PAGE_QUEUES_REMOVE(m);
606 vm_page_remove(m);
607
1c79356b
A
608 vm_page_insert(holding_page, old_object, offset);
609 vm_page_unlock_queues();
610
611 m->dirty = TRUE;
612 m->precious = FALSE;
613 new_m = m;
614 new_m->page_lock = VM_PROT_NONE;
615 new_m->unlock_request = VM_PROT_NONE;
616
617 if (old_object->internal)
618 need_to_wire = TRUE;
619 }
620 /*
621 * Record that this page has been written out
622 */
623#if MACH_PAGEMAP
624 vm_external_state_set(old_object->existence_map, offset);
625#endif /* MACH_PAGEMAP */
626
627 vm_object_unlock(old_object);
628
629 vm_object_lock(new_object);
630
631 /*
632 * Put the page into the new object. If it is a not wired
633 * (if it's the real page) it will be activated.
634 */
635
636 vm_page_lock_queues();
637 vm_page_insert(new_m, new_object, new_offset);
638 if (need_to_wire)
639 vm_page_wire(new_m);
640 else
641 vm_page_activate(new_m);
642 PAGE_WAKEUP_DONE(new_m);
643 vm_page_unlock_queues();
644
645 vm_object_unlock(new_object);
646
647 /*
648 * Return the placeholder page to simplify cleanup.
649 */
650 return (holding_page);
651}
652
653/*
654 * Routine: vm_pageclean_setup
655 *
656 * Purpose: setup a page to be cleaned (made non-dirty), but not
657 * necessarily flushed from the VM page cache.
658 * This is accomplished by cleaning in place.
659 *
660 * The page must not be busy, and the object and page
661 * queues must be locked.
662 *
663 */
664void
665vm_pageclean_setup(
666 vm_page_t m,
667 vm_page_t new_m,
668 vm_object_t new_object,
669 vm_object_offset_t new_offset)
670{
671 vm_object_t old_object = m->object;
672 assert(!m->busy);
673 assert(!m->cleaning);
674
675 XPR(XPR_VM_PAGEOUT,
676 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
677 (integer_t)old_object, m->offset, (integer_t)m,
678 (integer_t)new_m, new_offset);
679
d7e50217 680 pmap_clear_modify(m->phys_page);
1c79356b
A
681 vm_object_paging_begin(old_object);
682
683 /*
684 * Record that this page has been written out
685 */
686#if MACH_PAGEMAP
687 vm_external_state_set(old_object->existence_map, m->offset);
688#endif /*MACH_PAGEMAP*/
689
690 /*
691 * Mark original page as cleaning in place.
692 */
693 m->cleaning = TRUE;
694 m->dirty = TRUE;
695 m->precious = FALSE;
696
697 /*
698 * Convert the fictitious page to a private shadow of
699 * the real page.
700 */
701 assert(new_m->fictitious);
702 new_m->fictitious = FALSE;
703 new_m->private = TRUE;
704 new_m->pageout = TRUE;
d7e50217 705 new_m->phys_page = m->phys_page;
1c79356b
A
706 vm_page_wire(new_m);
707
708 vm_page_insert(new_m, new_object, new_offset);
709 assert(!new_m->wanted);
710 new_m->busy = FALSE;
711}
712
713void
714vm_pageclean_copy(
715 vm_page_t m,
716 vm_page_t new_m,
717 vm_object_t new_object,
718 vm_object_offset_t new_offset)
719{
720 XPR(XPR_VM_PAGEOUT,
721 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
722 m, new_m, new_object, new_offset, 0);
723
724 assert((!m->busy) && (!m->cleaning));
725
726 assert(!new_m->private && !new_m->fictitious);
727
d7e50217 728 pmap_clear_modify(m->phys_page);
1c79356b
A
729
730 m->busy = TRUE;
731 vm_object_paging_begin(m->object);
732 vm_page_unlock_queues();
733 vm_object_unlock(m->object);
734
735 /*
736 * Copy the original page to the new page.
737 */
738 vm_page_copy(m, new_m);
739
740 /*
741 * Mark the old page as clean. A request to pmap_is_modified
742 * will get the right answer.
743 */
744 vm_object_lock(m->object);
745 m->dirty = FALSE;
746
747 vm_object_paging_end(m->object);
748
749 vm_page_lock_queues();
750 if (!m->active && !m->inactive)
751 vm_page_activate(m);
752 PAGE_WAKEUP_DONE(m);
753
754 vm_page_insert(new_m, new_object, new_offset);
755 vm_page_activate(new_m);
756 new_m->busy = FALSE; /* No other thread can be waiting */
757}
758
759
760/*
761 * Routine: vm_pageout_initialize_page
762 * Purpose:
763 * Causes the specified page to be initialized in
764 * the appropriate memory object. This routine is used to push
765 * pages into a copy-object when they are modified in the
766 * permanent object.
767 *
768 * The page is moved to a temporary object and paged out.
769 *
770 * In/out conditions:
771 * The page in question must not be on any pageout queues.
772 * The object to which it belongs must be locked.
773 * The page must be busy, but not hold a paging reference.
774 *
775 * Implementation:
776 * Move this page to a completely new object.
777 */
778void
779vm_pageout_initialize_page(
780 vm_page_t m)
781{
782 vm_map_copy_t copy;
783 vm_object_t new_object;
784 vm_object_t object;
785 vm_object_offset_t paging_offset;
786 vm_page_t holding_page;
787
788
789 XPR(XPR_VM_PAGEOUT,
790 "vm_pageout_initialize_page, page 0x%X\n",
791 (integer_t)m, 0, 0, 0, 0);
792 assert(m->busy);
793
794 /*
795 * Verify that we really want to clean this page
796 */
797 assert(!m->absent);
798 assert(!m->error);
799 assert(m->dirty);
800
801 /*
802 * Create a paging reference to let us play with the object.
803 */
804 object = m->object;
805 paging_offset = m->offset + object->paging_offset;
806 vm_object_paging_begin(object);
807 vm_object_unlock(object);
808 if (m->absent || m->error || m->restart ||
809 (!m->dirty && !m->precious)) {
810 VM_PAGE_FREE(m);
811 panic("reservation without pageout?"); /* alan */
812 return;
813 }
814
815 /* set the page for future call to vm_fault_list_request */
816 holding_page = NULL;
817 vm_object_lock(m->object);
818 vm_page_lock_queues();
d7e50217 819 pmap_clear_modify(m->phys_page);
1c79356b
A
820 m->dirty = TRUE;
821 m->busy = TRUE;
822 m->list_req_pending = TRUE;
823 m->cleaning = TRUE;
824 m->pageout = TRUE;
825 vm_page_wire(m);
826 vm_page_unlock_queues();
827 vm_object_unlock(m->object);
828 vm_pageout_throttle(m);
1c79356b
A
829
830 /*
831 * Write the data to its pager.
832 * Note that the data is passed by naming the new object,
833 * not a virtual address; the pager interface has been
834 * manipulated to use the "internal memory" data type.
835 * [The object reference from its allocation is donated
836 * to the eventual recipient.]
837 */
838 memory_object_data_initialize(object->pager,
1c79356b 839 paging_offset,
1c79356b
A
840 PAGE_SIZE);
841
842 vm_object_lock(object);
843}
844
845#if MACH_CLUSTER_STATS
846#define MAXCLUSTERPAGES 16
847struct {
848 unsigned long pages_in_cluster;
849 unsigned long pages_at_higher_offsets;
850 unsigned long pages_at_lower_offsets;
851} cluster_stats[MAXCLUSTERPAGES];
852#endif /* MACH_CLUSTER_STATS */
853
854boolean_t allow_clustered_pageouts = FALSE;
855
856/*
857 * vm_pageout_cluster:
858 *
859 * Given a page, page it out, and attempt to clean adjacent pages
860 * in the same operation.
861 *
862 * The page must be busy, and the object unlocked w/ paging reference
863 * to prevent deallocation or collapse. The page must not be on any
864 * pageout queue.
865 */
866void
867vm_pageout_cluster(
868 vm_page_t m)
869{
870 vm_object_t object = m->object;
871 vm_object_offset_t offset = m->offset; /* from vm_object start */
872 vm_object_offset_t paging_offset = m->offset + object->paging_offset;
873 vm_object_t new_object;
874 vm_object_offset_t new_offset;
875 vm_size_t cluster_size;
876 vm_object_offset_t cluster_offset; /* from memory_object start */
877 vm_object_offset_t cluster_lower_bound; /* from vm_object_start */
878 vm_object_offset_t cluster_upper_bound; /* from vm_object_start */
879 vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */
880 vm_object_offset_t offset_within_cluster;
881 vm_size_t length_of_data;
882 vm_page_t friend, holding_page;
1c79356b
A
883 kern_return_t rc;
884 boolean_t precious_clean = TRUE;
885 int pages_in_cluster;
886
887 CLUSTER_STAT(int pages_at_higher_offsets = 0;)
888 CLUSTER_STAT(int pages_at_lower_offsets = 0;)
889
890 XPR(XPR_VM_PAGEOUT,
891 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
892 (integer_t)object, offset, (integer_t)m, 0, 0);
893
894 CLUSTER_STAT(vm_pageout_cluster_clusters++;)
895 /*
896 * Only a certain kind of page is appreciated here.
897 */
898 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
899 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
900
901 vm_object_lock(object);
902 cluster_size = object->cluster_size;
903
904 assert(cluster_size >= PAGE_SIZE);
905 if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE;
906 assert(object->pager_created && object->pager_initialized);
907 assert(object->internal || object->pager_ready);
908
909 if (m->precious && !m->dirty)
910 precious_clean = TRUE;
911
912 if (!object->pager_trusted || !allow_clustered_pageouts)
913 cluster_size = PAGE_SIZE;
914 vm_object_unlock(object);
915
916 cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1);
917 /* bytes from beginning of cluster */
918 /*
919 * Due to unaligned mappings, we have to be careful
920 * of negative offsets into the VM object. Clip the cluster
921 * boundary to the VM object, not the memory object.
922 */
923 if (offset > cluster_offset) {
924 cluster_lower_bound = offset - cluster_offset;
925 /* from vm_object */
926 } else {
927 cluster_lower_bound = 0;
928 }
929 cluster_upper_bound = (offset - cluster_offset) +
930 (vm_object_offset_t)cluster_size;
931
932 /* set the page for future call to vm_fault_list_request */
933 holding_page = NULL;
934 vm_object_lock(m->object);
935 vm_page_lock_queues();
936 m->busy = TRUE;
937 m->list_req_pending = TRUE;
938 m->cleaning = TRUE;
939 m->pageout = TRUE;
940 vm_page_wire(m);
941 vm_page_unlock_queues();
942 vm_object_unlock(m->object);
943 vm_pageout_throttle(m);
944
945 /*
946 * Search backward for adjacent eligible pages to clean in
947 * this operation.
948 */
949
950 cluster_start = offset;
951 if (offset) { /* avoid wrap-around at zero */
952 for (cluster_start = offset - PAGE_SIZE_64;
953 cluster_start >= cluster_lower_bound;
954 cluster_start -= PAGE_SIZE_64) {
955 assert(cluster_size > PAGE_SIZE);
956
957 vm_object_lock(object);
958 vm_page_lock_queues();
959
960 if ((friend = vm_pageout_cluster_page(object, cluster_start,
961 precious_clean)) == VM_PAGE_NULL) {
962 vm_page_unlock_queues();
963 vm_object_unlock(object);
964 break;
965 }
966 new_offset = (cluster_start + object->paging_offset)
967 & (cluster_size - 1);
968
969 assert(new_offset < cluster_offset);
970 m->list_req_pending = TRUE;
971 m->cleaning = TRUE;
972/* do nothing except advance the write request, all we really need to */
973/* do is push the target page and let the code at the other end decide */
974/* what is really the right size */
975 if (vm_page_free_count <= vm_page_free_reserved) {
976 m->busy = TRUE;
977 m->pageout = TRUE;
978 vm_page_wire(m);
979 }
980
981 vm_page_unlock_queues();
982 vm_object_unlock(object);
983 if(m->dirty || m->object->internal) {
984 CLUSTER_STAT(pages_at_lower_offsets++;)
985 }
986
987 }
988 cluster_start += PAGE_SIZE_64;
989 }
990 assert(cluster_start >= cluster_lower_bound);
991 assert(cluster_start <= offset);
992 /*
993 * Search forward for adjacent eligible pages to clean in
994 * this operation.
995 */
996 for (cluster_end = offset + PAGE_SIZE_64;
997 cluster_end < cluster_upper_bound;
998 cluster_end += PAGE_SIZE_64) {
999 assert(cluster_size > PAGE_SIZE);
1000
1001 vm_object_lock(object);
1002 vm_page_lock_queues();
1003
1004 if ((friend = vm_pageout_cluster_page(object, cluster_end,
1005 precious_clean)) == VM_PAGE_NULL) {
1006 vm_page_unlock_queues();
1007 vm_object_unlock(object);
1008 break;
1009 }
1010 new_offset = (cluster_end + object->paging_offset)
1011 & (cluster_size - 1);
1012
1013 assert(new_offset < cluster_size);
1014 m->list_req_pending = TRUE;
1015 m->cleaning = TRUE;
1016/* do nothing except advance the write request, all we really need to */
1017/* do is push the target page and let the code at the other end decide */
1018/* what is really the right size */
1019 if (vm_page_free_count <= vm_page_free_reserved) {
1020 m->busy = TRUE;
1021 m->pageout = TRUE;
1022 vm_page_wire(m);
1023 }
1024
1025 vm_page_unlock_queues();
1026 vm_object_unlock(object);
1027
1028 if(m->dirty || m->object->internal) {
1029 CLUSTER_STAT(pages_at_higher_offsets++;)
1030 }
1031 }
1032 assert(cluster_end <= cluster_upper_bound);
1033 assert(cluster_end >= offset + PAGE_SIZE);
1034
1035 /*
1036 * (offset - cluster_offset) is beginning of cluster_object
1037 * relative to vm_object start.
1038 */
1039 offset_within_cluster = cluster_start - (offset - cluster_offset);
1040 length_of_data = cluster_end - cluster_start;
1041
1042 assert(offset_within_cluster < cluster_size);
1043 assert((offset_within_cluster + length_of_data) <= cluster_size);
1044
1045 rc = KERN_SUCCESS;
1046 assert(rc == KERN_SUCCESS);
1047
1048 pages_in_cluster = length_of_data/PAGE_SIZE;
1c79356b
A
1049
1050#if MACH_CLUSTER_STATS
1051 (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++;
1052 (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++;
1053 (cluster_stats[pages_in_cluster].pages_in_cluster)++;
1054#endif /* MACH_CLUSTER_STATS */
1055
1056 /*
1057 * Send the data to the pager.
1058 */
1059 paging_offset = cluster_start + object->paging_offset;
0b4e3aa0 1060
1c79356b 1061 rc = memory_object_data_return(object->pager,
1c79356b 1062 paging_offset,
1c79356b
A
1063 length_of_data,
1064 !precious_clean,
1065 FALSE);
0b4e3aa0 1066
1c79356b
A
1067 vm_object_lock(object);
1068 vm_object_paging_end(object);
1069
1070 if (holding_page) {
1071 assert(!object->pager_trusted);
1072 VM_PAGE_FREE(holding_page);
1073 vm_object_paging_end(object);
1074 }
1075
1076 vm_object_unlock(object);
1077}
1078
1c79356b
A
1079/*
1080 * Trusted pager throttle.
1081 * Object must be unlocked, page queues must be unlocked.
1082 */
1083void
1084vm_pageout_throttle(
1085 register vm_page_t m)
1086{
1087 vm_page_lock_queues();
1088 assert(!m->laundry);
1089 m->laundry = TRUE;
1090 while (vm_page_laundry_count >= vm_page_laundry_max) {
1091 /*
1092 * Set the threshold for when vm_page_free()
1093 * should wake us up.
1094 */
1095 vm_page_laundry_min = vm_page_laundry_max/2;
0b4e3aa0 1096
1c79356b
A
1097 assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT);
1098 vm_page_unlock_queues();
1099
1100 /*
1101 * Pause to let the default pager catch up.
1102 */
1103 thread_block((void (*)(void)) 0);
1104 vm_page_lock_queues();
1105 }
1106 vm_page_laundry_count++;
1107 vm_page_unlock_queues();
1108}
1109
1110/*
1111 * The global variable vm_pageout_clean_active_pages controls whether
1112 * active pages are considered valid to be cleaned in place during a
1113 * clustered pageout. Performance measurements are necessary to determine
1114 * the best policy.
1115 */
1116int vm_pageout_clean_active_pages = 1;
1117/*
1118 * vm_pageout_cluster_page: [Internal]
1119 *
1120 * return a vm_page_t to the page at (object,offset) if it is appropriate
1121 * to clean in place. Pages that are non-existent, busy, absent, already
1122 * cleaning, or not dirty are not eligible to be cleaned as an adjacent
1123 * page in a cluster.
1124 *
1125 * The object must be locked on entry, and remains locked throughout
1126 * this call.
1127 */
1128
1129vm_page_t
1130vm_pageout_cluster_page(
1131 vm_object_t object,
1132 vm_object_offset_t offset,
1133 boolean_t precious_clean)
1134{
1135 vm_page_t m;
1136
1137 XPR(XPR_VM_PAGEOUT,
1138 "vm_pageout_cluster_page, object 0x%X offset 0x%X\n",
1139 (integer_t)object, offset, 0, 0, 0);
1140
1141 if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
1142 return(VM_PAGE_NULL);
1143
1144 if (m->busy || m->absent || m->cleaning ||
1c79356b
A
1145 (m->wire_count != 0) || m->error)
1146 return(VM_PAGE_NULL);
1147
1148 if (vm_pageout_clean_active_pages) {
1149 if (!m->active && !m->inactive) return(VM_PAGE_NULL);
1150 } else {
1151 if (!m->inactive) return(VM_PAGE_NULL);
1152 }
1153
1154 assert(!m->private);
1155 assert(!m->fictitious);
1156
d7e50217 1157 if (!m->dirty) m->dirty = pmap_is_modified(m->phys_page);
1c79356b
A
1158
1159 if (precious_clean) {
1160 if (!m->precious || !m->dirty)
1161 return(VM_PAGE_NULL);
1162 } else {
1163 if (!m->dirty)
1164 return(VM_PAGE_NULL);
1165 }
1166 return(m);
1167}
1168
1169/*
1170 * vm_pageout_scan does the dirty work for the pageout daemon.
1171 * It returns with vm_page_queue_free_lock held and
1172 * vm_page_free_wanted == 0.
1173 */
1174extern void vm_pageout_scan_continue(void); /* forward; */
1175
1176void
1177vm_pageout_scan(void)
1178{
1179 unsigned int burst_count;
1180 boolean_t now = FALSE;
1181 unsigned int laundry_pages;
1182 boolean_t need_more_inactive_pages;
1183 unsigned int loop_detect;
1184
1185 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1186
1187/*???*/ /*
1188 * We want to gradually dribble pages from the active queue
1189 * to the inactive queue. If we let the inactive queue get
1190 * very small, and then suddenly dump many pages into it,
1191 * those pages won't get a sufficient chance to be referenced
1192 * before we start taking them from the inactive queue.
1193 *
1194 * We must limit the rate at which we send pages to the pagers.
1195 * data_write messages consume memory, for message buffers and
1196 * for map-copy objects. If we get too far ahead of the pagers,
1197 * we can potentially run out of memory.
1198 *
1199 * We can use the laundry count to limit directly the number
1200 * of pages outstanding to the default pager. A similar
1201 * strategy for external pagers doesn't work, because
1202 * external pagers don't have to deallocate the pages sent them,
1203 * and because we might have to send pages to external pagers
1204 * even if they aren't processing writes. So we also
1205 * use a burst count to limit writes to external pagers.
1206 *
1207 * When memory is very tight, we can't rely on external pagers to
1208 * clean pages. They probably aren't running, because they
1209 * aren't vm-privileged. If we kept sending dirty pages to them,
1210 * we could exhaust the free list. However, we can't just ignore
1211 * pages belonging to external objects, because there might be no
1212 * pages belonging to internal objects. Hence, we get the page
1213 * into an internal object and then immediately double-page it,
1214 * sending it to the default pager.
1215 *
1216 * consider_zone_gc should be last, because the other operations
1217 * might return memory to zones.
1218 */
1219
0b4e3aa0 1220
1c79356b
A
1221 Restart:
1222
0b4e3aa0 1223#if THREAD_SWAPPER
1c79356b
A
1224 mutex_lock(&vm_page_queue_free_lock);
1225 now = (vm_page_free_count < vm_page_free_min);
1226 mutex_unlock(&vm_page_queue_free_lock);
0b4e3aa0 1227
1c79356b
A
1228 swapout_threads(now);
1229#endif /* THREAD_SWAPPER */
1230
1231 stack_collect();
1232 consider_task_collect();
1233 consider_thread_collect();
1c79356b
A
1234 consider_zone_gc();
1235 consider_machine_collect();
1236
1237 loop_detect = vm_page_active_count + vm_page_inactive_count;
1238#if 0
1239 if (vm_page_free_count <= vm_page_free_reserved) {
1240 need_more_inactive_pages = TRUE;
1241 } else {
1242 need_more_inactive_pages = FALSE;
1243 }
1244#else
1245 need_more_inactive_pages = FALSE;
1246#endif
1247
1248 for (burst_count = 0;;) {
1249 register vm_page_t m;
1250 register vm_object_t object;
1c79356b
A
1251
1252 /*
1253 * Recalculate vm_page_inactivate_target.
1254 */
1255
1256 vm_page_lock_queues();
1257 vm_page_inactive_target =
1258 VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1259 vm_page_inactive_count);
1260
1261 /*
1262 * Move pages from active to inactive.
1263 */
1264
1265 while ((vm_page_inactive_count < vm_page_inactive_target ||
1266 need_more_inactive_pages) &&
1267 !queue_empty(&vm_page_queue_active)) {
1268 register vm_object_t object;
1269
1270 vm_pageout_active++;
1271 m = (vm_page_t) queue_first(&vm_page_queue_active);
1272
1273 /*
1274 * If we're getting really low on memory,
1275 * try selecting a page that will go
1276 * directly to the default_pager.
1277 * If there are no such pages, we have to
1278 * page out a page backed by an EMM,
1279 * so that the default_pager can recover
1280 * it eventually.
1281 */
1282 if (need_more_inactive_pages &&
1283 (IP_VALID(memory_manager_default))) {
1284 vm_pageout_scan_active_emm_throttle++;
1285 do {
1286 assert(m->active && !m->inactive);
1287 object = m->object;
1288
1289 if (vm_object_lock_try(object)) {
1290#if 0
1291 if (object->pager_trusted ||
1292 object->internal) {
1293 /* found one ! */
1294 vm_pageout_scan_active_emm_throttle_success++;
1295 goto object_locked_active;
1296 }
1297#else
1298 vm_pageout_scan_active_emm_throttle_success++;
1299 goto object_locked_active;
1300#endif
1301 vm_object_unlock(object);
1302 }
1303 m = (vm_page_t) queue_next(&m->pageq);
1304 } while (!queue_end(&vm_page_queue_active,
1305 (queue_entry_t) m));
1306 if (queue_end(&vm_page_queue_active,
1307 (queue_entry_t) m)) {
1308 vm_pageout_scan_active_emm_throttle_failure++;
1309 m = (vm_page_t)
1310 queue_first(&vm_page_queue_active);
1311 }
1312 }
1313
1314 assert(m->active && !m->inactive);
1315
1316 object = m->object;
1317 if (!vm_object_lock_try(object)) {
1318 /*
1319 * Move page to end and continue.
1320 */
1321
1322 queue_remove(&vm_page_queue_active, m,
1323 vm_page_t, pageq);
1324 queue_enter(&vm_page_queue_active, m,
1325 vm_page_t, pageq);
1326 vm_page_unlock_queues();
0b4e3aa0 1327
1c79356b
A
1328 mutex_pause();
1329 vm_page_lock_queues();
1330 continue;
1331 }
1332
1333 object_locked_active:
1334 /*
1335 * If the page is busy, then we pull it
1336 * off the active queue and leave it alone.
1337 */
1338
1339 if (m->busy) {
1340 vm_object_unlock(object);
1341 queue_remove(&vm_page_queue_active, m,
1342 vm_page_t, pageq);
1343 m->active = FALSE;
1344 if (!m->fictitious)
1345 vm_page_active_count--;
1346 continue;
1347 }
1348
1349 /*
1350 * Deactivate the page while holding the object
1351 * locked, so we know the page is still not busy.
1352 * This should prevent races between pmap_enter
1353 * and pmap_clear_reference. The page might be
1354 * absent or fictitious, but vm_page_deactivate
1355 * can handle that.
1356 */
1357
1358 vm_page_deactivate(m);
1359 vm_object_unlock(object);
1360 }
1361
1362 /*
1363 * We are done if we have met our target *and*
1364 * nobody is still waiting for a page.
1365 */
0b4e3aa0
A
1366 if (vm_page_free_count >= vm_page_free_target) {
1367 mutex_lock(&vm_page_queue_free_lock);
1368 if ((vm_page_free_count >= vm_page_free_target) &&
1369 (vm_page_free_wanted == 0)) {
1370 vm_page_unlock_queues();
1371 break;
1372 }
1373 mutex_unlock(&vm_page_queue_free_lock);
1c79356b 1374 }
1c79356b
A
1375 /*
1376 * Sometimes we have to pause:
1377 * 1) No inactive pages - nothing to do.
1378 * 2) Flow control - wait for untrusted pagers to catch up.
1379 */
1380
9bccf70c
A
1381 if ((queue_empty(&vm_page_queue_inactive) &&
1382 (queue_empty(&vm_page_queue_zf))) ||
1c79356b
A
1383 ((--loop_detect) == 0) ||
1384 (burst_count >= vm_pageout_burst_max)) {
1385 unsigned int pages, msecs;
1386 int wait_result;
1387
1388 consider_machine_adjust();
1389 /*
1390 * vm_pageout_burst_wait is msecs/page.
1391 * If there is nothing for us to do, we wait
1392 * at least vm_pageout_empty_wait msecs.
1393 */
1394 pages = burst_count;
1395
1396 if (loop_detect == 0) {
1397 printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n");
1398 msecs = vm_free_page_pause;
1399 }
1400 else {
1401 msecs = burst_count * vm_pageout_burst_wait;
1402 }
1403
1404 if (queue_empty(&vm_page_queue_inactive) &&
9bccf70c 1405 queue_empty(&vm_page_queue_zf) &&
1c79356b
A
1406 (msecs < vm_pageout_empty_wait))
1407 msecs = vm_pageout_empty_wait;
1408 vm_page_unlock_queues();
0b4e3aa0 1409
1c79356b
A
1410 assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE);
1411 counter(c_vm_pageout_scan_block++);
1412
1413 /*
1414 * Unfortunately, we don't have call_continuation
1415 * so we can't rely on tail-recursion.
1416 */
1417 wait_result = thread_block((void (*)(void)) 0);
1418 if (wait_result != THREAD_TIMED_OUT)
1419 thread_cancel_timer();
1420 vm_pageout_scan_continue();
0b4e3aa0 1421
1c79356b
A
1422 goto Restart;
1423 /*NOTREACHED*/
1424 }
1425
1426 vm_pageout_inactive++;
9bccf70c
A
1427
1428 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
1429 vm_zf_iterator = 0;
1430 } else {
1431 last_page_zf = 0;
1432 if((vm_zf_iterator+=1) >= vm_zf_iterator_count) {
1433 vm_zf_iterator = 0;
1434 }
1435 }
1436 if(queue_empty(&vm_page_queue_zf) ||
1437 (((last_page_zf) || (vm_zf_iterator == 0)) &&
1438 !queue_empty(&vm_page_queue_inactive))) {
1439 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1440 last_page_zf = 0;
1441 } else {
1442 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1443 last_page_zf = 1;
1444 }
1c79356b
A
1445
1446 if ((vm_page_free_count <= vm_page_free_reserved) &&
1447 (IP_VALID(memory_manager_default))) {
1448 /*
1449 * We're really low on memory. Try to select a page that
1450 * would go directly to the default_pager.
1451 * If there are no such pages, we have to page out a
1452 * page backed by an EMM, so that the default_pager
1453 * can recover it eventually.
1454 */
1455 vm_pageout_scan_inactive_emm_throttle++;
1456 do {
1457 assert(!m->active && m->inactive);
1458 object = m->object;
1459
1460 if (vm_object_lock_try(object)) {
1461#if 0
1462 if (object->pager_trusted ||
1463 object->internal) {
1464 /* found one ! */
1465 vm_pageout_scan_inactive_emm_throttle_success++;
1466 goto object_locked_inactive;
1467 }
1468#else
1469 vm_pageout_scan_inactive_emm_throttle_success++;
1470 goto object_locked_inactive;
1471#endif /* 0 */
1472 vm_object_unlock(object);
1473 }
1474 m = (vm_page_t) queue_next(&m->pageq);
9bccf70c
A
1475 } while ((!queue_end(&vm_page_queue_zf,
1476 (queue_entry_t) m))
1477 && (!queue_end(&vm_page_queue_inactive,
1478 (queue_entry_t) m)));
1479
1480 if ((queue_end(&vm_page_queue_zf,
1481 (queue_entry_t) m))
1482 || (queue_end(&vm_page_queue_inactive,
1483 (queue_entry_t) m))) {
1c79356b
A
1484 vm_pageout_scan_inactive_emm_throttle_failure++;
1485 /*
1486 * We should check the "active" queue
1487 * for good candidates to page out.
1488 */
1489 need_more_inactive_pages = TRUE;
1490
9bccf70c
A
1491 if(last_page_zf == 0) {
1492 last_page_zf = 1;
1493 vm_zf_iterator = vm_zf_iterator_count - 1;
1494 } else {
1495 last_page_zf = 0;
1496 vm_zf_iterator = vm_zf_iterator_count - 2;
1497 }
1498 vm_page_unlock_queues();
1499 goto Restart;
1c79356b
A
1500 }
1501 }
1502
1503 assert(!m->active && m->inactive);
1504 object = m->object;
1505
1506 /*
1507 * Try to lock object; since we've got the
1508 * page queues lock, we can only try for this one.
1509 */
1510
1511 if (!vm_object_lock_try(object)) {
1512 /*
1513 * Move page to end and continue.
0b4e3aa0 1514 * Don't re-issue ticket
1c79356b 1515 */
9bccf70c
A
1516 if(m->zero_fill) {
1517 queue_remove(&vm_page_queue_zf, m,
1518 vm_page_t, pageq);
1519 queue_enter(&vm_page_queue_zf, m,
1520 vm_page_t, pageq);
1521 } else {
1522 queue_remove(&vm_page_queue_inactive, m,
1c79356b 1523 vm_page_t, pageq);
9bccf70c 1524 queue_enter(&vm_page_queue_inactive, m,
1c79356b 1525 vm_page_t, pageq);
9bccf70c 1526 }
1c79356b 1527 vm_page_unlock_queues();
0b4e3aa0 1528
1c79356b
A
1529 mutex_pause();
1530 vm_pageout_inactive_nolock++;
1531 continue;
1532 }
1533
1534 object_locked_inactive:
1535 /*
1536 * Paging out pages of objects which pager is being
1537 * created by another thread must be avoided, because
1538 * this thread may claim for memory, thus leading to a
1539 * possible dead lock between it and the pageout thread
1540 * which will wait for pager creation, if such pages are
1541 * finally chosen. The remaining assumption is that there
1542 * will finally be enough available pages in the inactive
1543 * pool to page out in order to satisfy all memory claimed
1544 * by the thread which concurrently creates the pager.
1545 */
1546
1547 if (!object->pager_initialized && object->pager_created) {
1548 /*
1549 * Move page to end and continue, hoping that
1550 * there will be enough other inactive pages to
1551 * page out so that the thread which currently
1552 * initializes the pager will succeed.
0b4e3aa0
A
1553 * Don't re-grant the ticket, the page should
1554 * pulled from the queue and paged out whenever
1555 * one of its logically adjacent fellows is
1556 * targeted.
1c79356b 1557 */
9bccf70c
A
1558 if(m->zero_fill) {
1559 queue_remove(&vm_page_queue_zf, m,
1560 vm_page_t, pageq);
1561 queue_enter(&vm_page_queue_zf, m,
1562 vm_page_t, pageq);
1563 last_page_zf = 1;
1564 vm_zf_iterator = vm_zf_iterator_count - 1;
1565 } else {
1566 queue_remove(&vm_page_queue_inactive, m,
1567 vm_page_t, pageq);
1568 queue_enter(&vm_page_queue_inactive, m,
1569 vm_page_t, pageq);
1570 last_page_zf = 0;
1571 vm_zf_iterator = 1;
1572 }
1c79356b
A
1573 vm_page_unlock_queues();
1574 vm_object_unlock(object);
1575 vm_pageout_inactive_avoid++;
1576 continue;
1577 }
1578
1579 /*
1580 * Remove the page from the inactive list.
1581 */
1582
9bccf70c
A
1583 if(m->zero_fill) {
1584 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1585 } else {
1586 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1587 }
1c79356b
A
1588 m->inactive = FALSE;
1589 if (!m->fictitious)
1590 vm_page_inactive_count--;
1591
1592 if (m->busy || !object->alive) {
1593 /*
1594 * Somebody is already playing with this page.
1595 * Leave it off the pageout queues.
1596 */
1597
1598 vm_page_unlock_queues();
1599 vm_object_unlock(object);
1600 vm_pageout_inactive_busy++;
1601 continue;
1602 }
1603
1604 /*
1605 * If it's absent or in error, we can reclaim the page.
1606 */
1607
1608 if (m->absent || m->error) {
1609 vm_pageout_inactive_absent++;
1610 reclaim_page:
1611 vm_page_free(m);
1612 vm_page_unlock_queues();
1613 vm_object_unlock(object);
1614 continue;
1615 }
1616
1617 assert(!m->private);
1618 assert(!m->fictitious);
1619
1620 /*
1621 * If already cleaning this page in place, convert from
1622 * "adjacent" to "target". We can leave the page mapped,
1623 * and vm_pageout_object_terminate will determine whether
1624 * to free or reactivate.
1625 */
1626
1627 if (m->cleaning) {
1628#if MACH_CLUSTER_STATS
1629 vm_pageout_cluster_conversions++;
1630#endif
0b4e3aa0
A
1631 m->busy = TRUE;
1632 m->pageout = TRUE;
1633 m->dump_cleaning = TRUE;
1634 vm_page_wire(m);
1c79356b
A
1635 vm_object_unlock(object);
1636 vm_page_unlock_queues();
1637 continue;
1638 }
1639
1640 /*
1641 * If it's being used, reactivate.
1642 * (Fictitious pages are either busy or absent.)
1643 */
1644
d7e50217 1645 if (m->reference || pmap_is_referenced(m->phys_page)) {
1c79356b
A
1646 vm_pageout_inactive_used++;
1647 reactivate_page:
1648#if ADVISORY_PAGEOUT
1649 if (m->discard_request) {
1650 m->discard_request = FALSE;
1651 }
1652#endif /* ADVISORY_PAGEOUT */
9bccf70c 1653 last_page_zf = 0;
1c79356b
A
1654 vm_object_unlock(object);
1655 vm_page_activate(m);
1656 VM_STAT(reactivations++);
1657 vm_page_unlock_queues();
1658 continue;
1659 }
1660
1c79356b
A
1661#if ADVISORY_PAGEOUT
1662 if (object->advisory_pageout) {
1663 boolean_t do_throttle;
0b4e3aa0 1664 memory_object_t pager;
1c79356b
A
1665 vm_object_offset_t discard_offset;
1666
1667 if (m->discard_request) {
1668 vm_stat_discard_failure++;
1669 goto mandatory_pageout;
1670 }
1671
1672 assert(object->pager_initialized);
1673 m->discard_request = TRUE;
0b4e3aa0 1674 pager = object->pager;
1c79356b
A
1675
1676 /* system-wide throttle */
1677 do_throttle = (vm_page_free_count <=
1678 vm_page_free_reserved);
0b4e3aa0
A
1679
1680#if 0
1681 /*
1682 * JMM - Do we need a replacement throttle
1683 * mechanism for pagers?
1684 */
1c79356b
A
1685 if (!do_throttle) {
1686 /* throttle on this pager */
1687 /* XXX lock ordering ? */
1688 ip_lock(port);
1689 do_throttle= imq_full(&port->ip_messages);
1690 ip_unlock(port);
1691 }
0b4e3aa0
A
1692#endif
1693
1c79356b
A
1694 if (do_throttle) {
1695 vm_stat_discard_throttle++;
1696#if 0
1697 /* ignore this page and skip to next */
1698 vm_page_unlock_queues();
1699 vm_object_unlock(object);
1700 continue;
1701#else
1702 /* force mandatory pageout */
1703 goto mandatory_pageout;
1704#endif
1705 }
1706
1707 /* proceed with discard_request */
1708 vm_page_activate(m);
1709 vm_stat_discard++;
1710 VM_STAT(reactivations++);
1711 discard_offset = m->offset + object->paging_offset;
1712 vm_stat_discard_sent++;
1713 vm_page_unlock_queues();
1714 vm_object_unlock(object);
0b4e3aa0 1715
1c79356b
A
1716/*
1717 memory_object_discard_request(object->pager,
1c79356b
A
1718 discard_offset,
1719 PAGE_SIZE);
1720*/
1721 continue;
1722 }
1723 mandatory_pageout:
1724#endif /* ADVISORY_PAGEOUT */
1725
1726 XPR(XPR_VM_PAGEOUT,
1727 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1728 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
1729
1730 /*
1731 * Eliminate all mappings.
1732 */
1733
1734 m->busy = TRUE;
d7e50217 1735 pmap_page_protect(m->phys_page, VM_PROT_NONE);
0b4e3aa0 1736
1c79356b 1737 if (!m->dirty)
d7e50217 1738 m->dirty = pmap_is_modified(m->phys_page);
1c79356b
A
1739 /*
1740 * If it's clean and not precious, we can free the page.
1741 */
1742
1743 if (!m->dirty && !m->precious) {
1744 vm_pageout_inactive_clean++;
1745 goto reclaim_page;
1746 }
1747 vm_page_unlock_queues();
1748
1749 /*
1750 * If there is no memory object for the page, create
1751 * one and hand it to the default pager.
1752 */
1753
1754 if (!object->pager_initialized)
1755 vm_object_collapse(object);
1756 if (!object->pager_initialized)
1757 vm_object_pager_create(object);
1758 if (!object->pager_initialized) {
1759 /*
1760 * Still no pager for the object.
1761 * Reactivate the page.
1762 *
1763 * Should only happen if there is no
1764 * default pager.
1765 */
1766 vm_page_lock_queues();
1767 vm_page_activate(m);
1768 vm_page_unlock_queues();
1769
1770 /*
1771 * And we are done with it.
1772 */
1773 PAGE_WAKEUP_DONE(m);
1774 vm_object_unlock(object);
1775
1776 /*
1777 * break here to get back to the preemption
1778 * point in the outer loop so that we don't
1779 * spin forever if there is no default pager.
1780 */
1781 vm_pageout_dirty_no_pager++;
1782 /*
1783 * Well there's no pager, but we can still reclaim
1784 * free pages out of the inactive list. Go back
1785 * to top of loop and look for suitable pages.
1786 */
1787 continue;
1788 }
1789
0b4e3aa0
A
1790 if ((object->pager_initialized) &&
1791 (object->pager == MEMORY_OBJECT_NULL)) {
1c79356b
A
1792 /*
1793 * This pager has been destroyed by either
1794 * memory_object_destroy or vm_object_destroy, and
1795 * so there is nowhere for the page to go.
1796 * Just free the page.
1797 */
1798 VM_PAGE_FREE(m);
1799 vm_object_unlock(object);
1800 continue;
1801 }
1802
1803 vm_pageout_inactive_dirty++;
1804/*
1805 if (!object->internal)
1806 burst_count++;
1807*/
1808 vm_object_paging_begin(object);
1809 vm_object_unlock(object);
1810 vm_pageout_cluster(m); /* flush it */
1811 }
1812 consider_machine_adjust();
1813}
1814
1815counter(unsigned int c_vm_pageout_scan_continue = 0;)
1816
1817void
1818vm_pageout_scan_continue(void)
1819{
1820 /*
1821 * We just paused to let the pagers catch up.
1822 * If vm_page_laundry_count is still high,
1823 * then we aren't waiting long enough.
1824 * If we have paused some vm_pageout_pause_max times without
1825 * adjusting vm_pageout_burst_wait, it might be too big,
1826 * so we decrease it.
1827 */
1828
1829 vm_page_lock_queues();
1830 counter(++c_vm_pageout_scan_continue);
1831 if (vm_page_laundry_count > vm_pageout_burst_min) {
1832 vm_pageout_burst_wait++;
1833 vm_pageout_pause_count = 0;
1834 } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
1835 vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
1836 if (vm_pageout_burst_wait < 1)
1837 vm_pageout_burst_wait = 1;
1838 vm_pageout_pause_count = 0;
1839 }
1840 vm_page_unlock_queues();
1841}
1842
1843void vm_page_free_reserve(int pages);
1844int vm_page_free_count_init;
1845
1846void
1847vm_page_free_reserve(
1848 int pages)
1849{
1850 int free_after_reserve;
1851
1852 vm_page_free_reserved += pages;
1853
1854 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1855
1856 vm_page_free_min = vm_page_free_reserved +
1857 VM_PAGE_FREE_MIN(free_after_reserve);
1858
1859 vm_page_free_target = vm_page_free_reserved +
1860 VM_PAGE_FREE_TARGET(free_after_reserve);
1861
1862 if (vm_page_free_target < vm_page_free_min + 5)
1863 vm_page_free_target = vm_page_free_min + 5;
1864}
1865
1866/*
1867 * vm_pageout is the high level pageout daemon.
1868 */
1869
1870
1871void
1872vm_pageout(void)
1873{
1874 thread_t self = current_thread();
0b4e3aa0 1875 spl_t s;
1c79356b
A
1876
1877 /*
1878 * Set thread privileges.
1879 */
1880 self->vm_privilege = TRUE;
1881 stack_privilege(self);
0b4e3aa0
A
1882
1883 s = splsched();
1884 thread_lock(self);
0b4e3aa0 1885 self->priority = BASEPRI_PREEMPT - 1;
9bccf70c 1886 set_sched_pri(self, self->priority);
0b4e3aa0
A
1887 thread_unlock(self);
1888 splx(s);
1c79356b
A
1889
1890 /*
1891 * Initialize some paging parameters.
1892 */
1893
1894 if (vm_page_laundry_max == 0)
1895 vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX;
1896
1897 if (vm_pageout_burst_max == 0)
1898 vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
1899
1900 if (vm_pageout_burst_wait == 0)
1901 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
1902
1903 if (vm_pageout_empty_wait == 0)
1904 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
1905
1906 vm_page_free_count_init = vm_page_free_count;
9bccf70c 1907 vm_zf_iterator = 0;
1c79356b
A
1908 /*
1909 * even if we've already called vm_page_free_reserve
1910 * call it again here to insure that the targets are
1911 * accurately calculated (it uses vm_page_free_count_init)
1912 * calling it with an arg of 0 will not change the reserve
1913 * but will re-calculate free_min and free_target
1914 */
1915 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED)
1916 vm_page_free_reserve(VM_PAGE_FREE_RESERVED - vm_page_free_reserved);
1917 else
1918 vm_page_free_reserve(0);
1919
1920 /*
1921 * vm_pageout_scan will set vm_page_inactive_target.
1922 *
1923 * The pageout daemon is never done, so loop forever.
1924 * We should call vm_pageout_scan at least once each
1925 * time we are woken, even if vm_page_free_wanted is
1926 * zero, to check vm_page_free_target and
1927 * vm_page_inactive_target.
1928 */
1929 for (;;) {
0b4e3aa0 1930 vm_pageout_scan_event_counter++;
1c79356b
A
1931 vm_pageout_scan();
1932 /* we hold vm_page_queue_free_lock now */
1933 assert(vm_page_free_wanted == 0);
1934 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1935 mutex_unlock(&vm_page_queue_free_lock);
1936 counter(c_vm_pageout_block++);
1937 thread_block((void (*)(void)) 0);
1938 }
1939 /*NOTREACHED*/
1940}
1941
9bccf70c
A
1942kern_return_t
1943vm_pageout_emergency_availability_request()
1944{
1945 vm_page_t m;
1946 vm_object_t object;
1947
1948 vm_page_lock_queues();
1949 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1950
1951 while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) {
1952 if(m->fictitious) {
1953 m = (vm_page_t) queue_next(&m->pageq);
1954 continue;
1955 }
1956 if (!m->dirty)
d7e50217 1957 m->dirty = pmap_is_modified(m->phys_page);
9bccf70c
A
1958 if(m->dirty || m->busy || m->wire_count || m->absent
1959 || m->precious || m->cleaning
1960 || m->dump_cleaning || m->error
1961 || m->pageout || m->laundry
1962 || m->list_req_pending
1963 || m->overwriting) {
1964 m = (vm_page_t) queue_next(&m->pageq);
1965 continue;
1966 }
1967 object = m->object;
1968
1969 if (vm_object_lock_try(object)) {
1970 if((!object->alive) ||
1971 (object->pageout)) {
1972 vm_object_unlock(object);
1973 m = (vm_page_t) queue_next(&m->pageq);
1974 continue;
1975 }
1976 m->busy = TRUE;
d7e50217 1977 pmap_page_protect(m->phys_page, VM_PROT_NONE);
9bccf70c
A
1978 vm_page_free(m);
1979 vm_object_unlock(object);
1980 vm_page_unlock_queues();
1981 return KERN_SUCCESS;
1982 }
1983 m = (vm_page_t) queue_next(&m->pageq);
1984 }
1985
1986 m = (vm_page_t) queue_first(&vm_page_queue_active);
1987
1988 while (!queue_end(&vm_page_queue_active, (queue_entry_t) m)) {
1989 if(m->fictitious) {
1990 m = (vm_page_t) queue_next(&m->pageq);
1991 continue;
1992 }
1993 if (!m->dirty)
d7e50217 1994 m->dirty = pmap_is_modified(m->phys_page);
9bccf70c
A
1995 if(m->dirty || m->busy || m->wire_count || m->absent
1996 || m->precious || m->cleaning
1997 || m->dump_cleaning || m->error
1998 || m->pageout || m->laundry
1999 || m->list_req_pending
2000 || m->overwriting) {
2001 m = (vm_page_t) queue_next(&m->pageq);
2002 continue;
2003 }
2004 object = m->object;
2005
2006 if (vm_object_lock_try(object)) {
2007 if((!object->alive) ||
2008 (object->pageout)) {
2009 vm_object_unlock(object);
2010 m = (vm_page_t) queue_next(&m->pageq);
2011 continue;
2012 }
2013 m->busy = TRUE;
d7e50217 2014 pmap_page_protect(m->phys_page, VM_PROT_NONE);
9bccf70c
A
2015 vm_page_free(m);
2016 vm_object_unlock(object);
2017 vm_page_unlock_queues();
2018 return KERN_SUCCESS;
2019 }
2020 m = (vm_page_t) queue_next(&m->pageq);
2021 }
2022 vm_page_unlock_queues();
2023 return KERN_FAILURE;
2024}
2025
1c79356b 2026
0b4e3aa0
A
2027static upl_t
2028upl_create(
d7e50217 2029 int flags,
9bccf70c 2030 vm_size_t size)
0b4e3aa0
A
2031{
2032 upl_t upl;
d7e50217 2033 int page_field_size; /* bit field in word size buf */
0b4e3aa0 2034
d7e50217
A
2035 page_field_size = 0;
2036 if (flags & UPL_CREATE_LITE) {
2037 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2038 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2039 }
2040 if(flags & UPL_CREATE_INTERNAL) {
0b4e3aa0 2041 upl = (upl_t)kalloc(sizeof(struct upl)
d7e50217
A
2042 + (sizeof(struct upl_page_info)*(size/PAGE_SIZE))
2043 + page_field_size);
0b4e3aa0 2044 } else {
d7e50217 2045 upl = (upl_t)kalloc(sizeof(struct upl) + page_field_size);
0b4e3aa0
A
2046 }
2047 upl->flags = 0;
2048 upl->src_object = NULL;
2049 upl->kaddr = (vm_offset_t)0;
2050 upl->size = 0;
2051 upl->map_object = NULL;
2052 upl->ref_count = 1;
2053 upl_lock_init(upl);
2054#ifdef UBC_DEBUG
2055 upl->ubc_alias1 = 0;
2056 upl->ubc_alias2 = 0;
2057#endif /* UBC_DEBUG */
2058 return(upl);
2059}
2060
2061static void
2062upl_destroy(
2063 upl_t upl)
2064{
d7e50217 2065 int page_field_size; /* bit field in word size buf */
0b4e3aa0
A
2066
2067#ifdef UBC_DEBUG
2068 {
2069 upl_t upl_ele;
d7e50217
A
2070 vm_object_t object;
2071 if (upl->map_object->pageout) {
2072 object = upl->map_object->shadow;
2073 } else {
2074 object = upl->map_object;
2075 }
2076 vm_object_lock(object);
2077 queue_iterate(&object->uplq, upl_ele, upl_t, uplq) {
0b4e3aa0 2078 if(upl_ele == upl) {
d7e50217
A
2079 queue_remove(&object->uplq,
2080 upl_ele, upl_t, uplq);
0b4e3aa0
A
2081 break;
2082 }
2083 }
d7e50217 2084 vm_object_unlock(object);
0b4e3aa0
A
2085 }
2086#endif /* UBC_DEBUG */
d7e50217
A
2087 /* drop a reference on the map_object whether or */
2088 /* not a pageout object is inserted */
2089 if(upl->map_object->pageout)
0b4e3aa0 2090 vm_object_deallocate(upl->map_object);
d7e50217
A
2091
2092 page_field_size = 0;
2093 if (upl->flags & UPL_LITE) {
2094 page_field_size = ((upl->size/PAGE_SIZE) + 7) >> 3;
2095 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2096 }
0b4e3aa0
A
2097 if(upl->flags & UPL_INTERNAL) {
2098 kfree((vm_offset_t)upl,
2099 sizeof(struct upl) +
d7e50217
A
2100 (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
2101 + page_field_size);
0b4e3aa0 2102 } else {
d7e50217 2103 kfree((vm_offset_t)upl, sizeof(struct upl) + page_field_size);
0b4e3aa0
A
2104 }
2105}
2106
2107__private_extern__ void
2108uc_upl_dealloc(
1c79356b
A
2109 upl_t upl)
2110{
2111 upl->ref_count -= 1;
2112 if(upl->ref_count == 0) {
2113 upl_destroy(upl);
2114 }
2115}
2116
0b4e3aa0
A
2117void
2118upl_deallocate(
2119 upl_t upl)
2120{
2121
2122 upl->ref_count -= 1;
2123 if(upl->ref_count == 0) {
2124 upl_destroy(upl);
2125 }
2126}
1c79356b
A
2127
2128/*
0b4e3aa0 2129 * Routine: vm_object_upl_request
1c79356b
A
2130 * Purpose:
2131 * Cause the population of a portion of a vm_object.
2132 * Depending on the nature of the request, the pages
2133 * returned may be contain valid data or be uninitialized.
2134 * A page list structure, listing the physical pages
2135 * will be returned upon request.
2136 * This function is called by the file system or any other
2137 * supplier of backing store to a pager.
2138 * IMPORTANT NOTE: The caller must still respect the relationship
2139 * between the vm_object and its backing memory object. The
2140 * caller MUST NOT substitute changes in the backing file
2141 * without first doing a memory_object_lock_request on the
2142 * target range unless it is know that the pages are not
2143 * shared with another entity at the pager level.
2144 * Copy_in_to:
2145 * if a page list structure is present
2146 * return the mapped physical pages, where a
2147 * page is not present, return a non-initialized
2148 * one. If the no_sync bit is turned on, don't
2149 * call the pager unlock to synchronize with other
2150 * possible copies of the page. Leave pages busy
2151 * in the original object, if a page list structure
2152 * was specified. When a commit of the page list
2153 * pages is done, the dirty bit will be set for each one.
2154 * Copy_out_from:
2155 * If a page list structure is present, return
2156 * all mapped pages. Where a page does not exist
2157 * map a zero filled one. Leave pages busy in
2158 * the original object. If a page list structure
2159 * is not specified, this call is a no-op.
2160 *
2161 * Note: access of default pager objects has a rather interesting
2162 * twist. The caller of this routine, presumably the file system
2163 * page cache handling code, will never actually make a request
2164 * against a default pager backed object. Only the default
2165 * pager will make requests on backing store related vm_objects
2166 * In this way the default pager can maintain the relationship
2167 * between backing store files (abstract memory objects) and
2168 * the vm_objects (cache objects), they support.
2169 *
2170 */
0b4e3aa0
A
2171__private_extern__ kern_return_t
2172vm_object_upl_request(
1c79356b 2173 vm_object_t object,
0b4e3aa0
A
2174 vm_object_offset_t offset,
2175 vm_size_t size,
1c79356b 2176 upl_t *upl_ptr,
0b4e3aa0
A
2177 upl_page_info_array_t user_page_list,
2178 unsigned int *page_list_count,
2179 int cntrl_flags)
1c79356b
A
2180{
2181 vm_page_t dst_page;
2182 vm_object_offset_t dst_offset = offset;
1c79356b
A
2183 vm_size_t xfer_size = size;
2184 boolean_t do_m_lock = FALSE;
2185 boolean_t dirty;
2186 upl_t upl = NULL;
2187 int entry;
2188 boolean_t encountered_lrp = FALSE;
2189
2190 vm_page_t alias_page = NULL;
0b4e3aa0 2191 int page_ticket;
d7e50217 2192 wpl_array_t lite_list;
0b4e3aa0
A
2193
2194 page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2195 >> UPL_PAGE_TICKET_SHIFT;
2196
d7e50217
A
2197 if(((size/PAGE_SIZE) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2198 size = MAX_UPL_TRANSFER * PAGE_SIZE;
0b4e3aa0 2199 }
1c79356b
A
2200
2201 if(cntrl_flags & UPL_SET_INTERNAL)
0b4e3aa0
A
2202 if(page_list_count != NULL)
2203 *page_list_count = MAX_UPL_TRANSFER;
2204 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
2205 ((page_list_count != NULL) && (*page_list_count != 0)
2206 && *page_list_count < (size/page_size)))
1c79356b
A
2207 return KERN_INVALID_ARGUMENT;
2208
2209 if((!object->internal) && (object->paging_offset != 0))
0b4e3aa0 2210 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
1c79356b
A
2211
2212 if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2213 return KERN_SUCCESS;
2214 }
d7e50217 2215
1c79356b 2216 if(upl_ptr) {
0b4e3aa0 2217 if(cntrl_flags & UPL_SET_INTERNAL) {
d7e50217
A
2218 if(cntrl_flags & UPL_SET_LITE) {
2219 vm_offset_t page_field_size;
2220 upl = upl_create(
2221 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
2222 size);
2223 user_page_list = (upl_page_info_t *)
2224 (((vm_offset_t)upl) + sizeof(struct upl));
2225 lite_list = (wpl_array_t)
2226 (((vm_offset_t)user_page_list) +
2227 ((size/PAGE_SIZE) *
2228 sizeof(upl_page_info_t)));
2229 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2230 page_field_size =
2231 (page_field_size + 3) & 0xFFFFFFFC;
2232 bzero((char *)lite_list, page_field_size);
2233 upl->flags =
2234 UPL_LITE | UPL_INTERNAL;
2235 } else {
2236 upl = upl_create(UPL_CREATE_INTERNAL, size);
2237 user_page_list = (upl_page_info_t *)
2238 (((vm_offset_t)upl)
2239 + sizeof(struct upl));
2240 upl->flags = UPL_INTERNAL;
2241 }
1c79356b 2242 } else {
d7e50217
A
2243 if(cntrl_flags & UPL_SET_LITE) {
2244 vm_offset_t page_field_size;
2245 upl = upl_create(UPL_CREATE_LITE, size);
2246 lite_list = (wpl_array_t)
2247 (((vm_offset_t)upl) + sizeof(struct upl));
2248 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2249 page_field_size =
2250 (page_field_size + 3) & 0xFFFFFFFC;
2251 bzero((char *)lite_list, page_field_size);
2252 upl->flags = UPL_LITE;
2253 } else {
2254 upl = upl_create(UPL_CREATE_EXTERNAL, size);
2255 upl->flags = 0;
2256 }
0b4e3aa0 2257 }
d7e50217 2258
0b4e3aa0
A
2259 if(object->phys_contiguous) {
2260 upl->size = size;
2261 upl->offset = offset + object->paging_offset;
2262 *upl_ptr = upl;
2263 if(user_page_list) {
2264 user_page_list[0].phys_addr =
d7e50217 2265 (offset + object->shadow_offset)>>12;
0b4e3aa0 2266 user_page_list[0].device = TRUE;
1c79356b 2267 }
d7e50217
A
2268 upl->map_object = object;
2269 /* don't need any shadow mappings for this one */
2270 /* since it is already I/O memory */
2271 upl->flags |= UPL_DEVICE_MEMORY;
2272
2273 vm_object_lock(object);
2274 vm_object_paging_begin(object);
2275 vm_object_unlock(object);
2276
2277 if(page_list_count != NULL) {
2278 if (upl->flags & UPL_INTERNAL) {
2279 *page_list_count = 0;
2280 } else {
2281 *page_list_count = 1;
2282 }
2283 }
2284 return KERN_SUCCESS;
2285 }
2286
2287 if(cntrl_flags & UPL_SET_LITE) {
2288 upl->map_object = object;
2289 } else {
0b4e3aa0
A
2290 upl->map_object = vm_object_allocate(size);
2291 vm_object_lock(upl->map_object);
2292 upl->map_object->shadow = object;
0b4e3aa0
A
2293 upl->map_object->pageout = TRUE;
2294 upl->map_object->can_persist = FALSE;
d7e50217
A
2295 upl->map_object->copy_strategy =
2296 MEMORY_OBJECT_COPY_NONE;
0b4e3aa0 2297 upl->map_object->shadow_offset = offset;
d7e50217 2298 upl->map_object->wimg_bits = object->wimg_bits;
0b4e3aa0 2299 vm_object_unlock(upl->map_object);
0b4e3aa0 2300 }
1c79356b
A
2301 upl->size = size;
2302 upl->offset = offset + object->paging_offset;
1c79356b
A
2303 *upl_ptr = upl;
2304 }
d7e50217
A
2305 if (!(cntrl_flags & UPL_SET_LITE)) {
2306 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2307 }
1c79356b
A
2308 vm_object_lock(object);
2309#ifdef UBC_DEBUG
2310 if(upl_ptr)
2311 queue_enter(&object->uplq, upl, upl_t, uplq);
2312#endif /* UBC_DEBUG */
2313 vm_object_paging_begin(object);
2314 entry = 0;
2315 if(cntrl_flags & UPL_COPYOUT_FROM) {
2316 upl->flags |= UPL_PAGE_SYNC_DONE;
2317 while (xfer_size) {
d7e50217
A
2318 if((alias_page == NULL) &&
2319 !(cntrl_flags & UPL_SET_LITE)) {
1c79356b
A
2320 vm_object_unlock(object);
2321 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2322 vm_object_lock(object);
2323 }
2324 if(((dst_page = vm_page_lookup(object,
2325 dst_offset)) == VM_PAGE_NULL) ||
2326 dst_page->fictitious ||
2327 dst_page->absent ||
2328 dst_page->error ||
2329 (dst_page->wire_count != 0 &&
2330 !dst_page->pageout) ||
2331 ((!(dst_page->dirty || dst_page->precious ||
d7e50217 2332 pmap_is_modified(dst_page->phys_page)))
0b4e3aa0
A
2333 && (cntrl_flags & UPL_RET_ONLY_DIRTY)) ||
2334 ((!(dst_page->inactive))
2335 && (dst_page->page_ticket != page_ticket)
2336 && ((dst_page->page_ticket+1) != page_ticket)
d7e50217 2337 && (cntrl_flags & UPL_FOR_PAGEOUT)) ||
0b4e3aa0
A
2338 ((!dst_page->list_req_pending) &&
2339 (cntrl_flags & UPL_RET_ONLY_DIRTY) &&
d7e50217
A
2340 pmap_is_referenced(dst_page->phys_page))) {
2341 if(user_page_list) {
1c79356b 2342 user_page_list[entry].phys_addr = 0;
d7e50217
A
2343 user_page_list[entry].device = FALSE;
2344 }
1c79356b
A
2345 } else {
2346
2347 if(dst_page->busy &&
2348 (!(dst_page->list_req_pending &&
2349 dst_page->pageout))) {
2350 if(cntrl_flags & UPL_NOBLOCK) {
d7e50217
A
2351 if(user_page_list) {
2352 user_page_list[entry].phys_addr = 0;
2353 user_page_list[entry].device = FALSE;
2354 }
1c79356b
A
2355 entry++;
2356 dst_offset += PAGE_SIZE_64;
2357 xfer_size -= PAGE_SIZE;
2358 continue;
2359 }
2360 /*someone else is playing with the */
2361 /* page. We will have to wait. */
9bccf70c 2362 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1c79356b
A
2363 continue;
2364 }
2365 /* Someone else already cleaning the page? */
2366 if((dst_page->cleaning || dst_page->absent ||
1c79356b
A
2367 dst_page->wire_count != 0) &&
2368 !dst_page->list_req_pending) {
d7e50217 2369 if(user_page_list) {
1c79356b 2370 user_page_list[entry].phys_addr = 0;
d7e50217
A
2371 user_page_list[entry].device = FALSE;
2372 }
1c79356b
A
2373 entry++;
2374 dst_offset += PAGE_SIZE_64;
2375 xfer_size -= PAGE_SIZE;
2376 continue;
2377 }
2378 /* eliminate all mappings from the */
2379 /* original object and its prodigy */
2380
2381 vm_page_lock_queues();
9bccf70c 2382 if( !(cntrl_flags & UPL_FILE_IO)) {
d7e50217 2383 pmap_page_protect(dst_page->phys_page, VM_PROT_NONE);
9bccf70c 2384 }
1c79356b
A
2385 /* pageout statistics gathering. count */
2386 /* all the pages we will page out that */
2387 /* were not counted in the initial */
2388 /* vm_pageout_scan work */
2389 if(dst_page->list_req_pending)
2390 encountered_lrp = TRUE;
2391 if((dst_page->dirty ||
2392 (dst_page->object->internal &&
2393 dst_page->precious)) &&
2394 (dst_page->list_req_pending
2395 == FALSE)) {
2396 if(encountered_lrp) {
2397 CLUSTER_STAT
2398 (pages_at_higher_offsets++;)
2399 } else {
2400 CLUSTER_STAT
2401 (pages_at_lower_offsets++;)
2402 }
2403 }
2404
2405 /* Turn off busy indication on pending */
2406 /* pageout. Note: we can only get here */
2407 /* in the request pending case. */
2408 dst_page->list_req_pending = FALSE;
2409 dst_page->busy = FALSE;
2410 dst_page->cleaning = FALSE;
2411
d7e50217 2412 dirty = pmap_is_modified(dst_page->phys_page);
1c79356b
A
2413 dirty = dirty ? TRUE : dst_page->dirty;
2414
d7e50217
A
2415 if(cntrl_flags & UPL_SET_LITE) {
2416 int pg_num;
2417 pg_num = (dst_offset-offset)/PAGE_SIZE;
2418 lite_list[pg_num>>5] |=
2419 1 << (pg_num & 31);
2420 pmap_clear_modify(dst_page->phys_page);
2421 /*
2422 * Record that this page has been
2423 * written out
2424 */
2425#if MACH_PAGEMAP
2426 vm_external_state_set(
2427 object->existence_map,
2428 dst_page->offset);
2429#endif /*MACH_PAGEMAP*/
2430
2431 /*
2432 * Mark original page as cleaning
2433 * in place.
2434 */
2435 dst_page->cleaning = TRUE;
2436 dst_page->dirty = TRUE;
2437 dst_page->precious = FALSE;
2438 } else {
2439 /* use pageclean setup, it is more */
2440 /* convenient even for the pageout */
2441 /* cases here */
2442 vm_pageclean_setup(dst_page,
2443 alias_page, upl->map_object,
2444 size - xfer_size);
2445
2446 alias_page->absent = FALSE;
2447 alias_page = NULL;
2448 }
1c79356b
A
2449
2450 if(!dirty) {
2451 dst_page->dirty = FALSE;
2452 dst_page->precious = TRUE;
2453 }
2454
2455 if(dst_page->pageout)
2456 dst_page->busy = TRUE;
2457
0b4e3aa0 2458 if((!(cntrl_flags & UPL_CLEAN_IN_PLACE))
d7e50217 2459 || (cntrl_flags & UPL_FOR_PAGEOUT)) {
1c79356b
A
2460 /* deny access to the target page */
2461 /* while it is being worked on */
2462 if((!dst_page->pageout) &&
2463 (dst_page->wire_count == 0)) {
2464 dst_page->busy = TRUE;
2465 dst_page->pageout = TRUE;
2466 vm_page_wire(dst_page);
2467 }
2468 }
2469 if(user_page_list) {
2470 user_page_list[entry].phys_addr
d7e50217 2471 = dst_page->phys_page;
1c79356b
A
2472 user_page_list[entry].dirty =
2473 dst_page->dirty;
2474 user_page_list[entry].pageout =
2475 dst_page->pageout;
2476 user_page_list[entry].absent =
2477 dst_page->absent;
2478 user_page_list[entry].precious =
2479 dst_page->precious;
d7e50217
A
2480 user_page_list[entry].device =
2481 FALSE;
1c79356b
A
2482 }
2483
2484 vm_page_unlock_queues();
2485 }
2486 entry++;
2487 dst_offset += PAGE_SIZE_64;
2488 xfer_size -= PAGE_SIZE;
2489 }
2490 } else {
2491 while (xfer_size) {
d7e50217
A
2492 if((alias_page == NULL) &&
2493 !(cntrl_flags & UPL_SET_LITE)) {
1c79356b
A
2494 vm_object_unlock(object);
2495 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2496 vm_object_lock(object);
2497 }
2498 dst_page = vm_page_lookup(object, dst_offset);
2499 if(dst_page != VM_PAGE_NULL) {
9bccf70c
A
2500 if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
2501 !((dst_page->list_req_pending)
2502 && (dst_page->absent))) {
2503 /* we are doing extended range */
2504 /* requests. we want to grab */
2505 /* pages around some which are */
2506 /* already present. */
d7e50217 2507 if(user_page_list) {
9bccf70c 2508 user_page_list[entry].phys_addr = 0;
d7e50217
A
2509 user_page_list[entry].device = FALSE;
2510 }
9bccf70c
A
2511 entry++;
2512 dst_offset += PAGE_SIZE_64;
2513 xfer_size -= PAGE_SIZE;
2514 continue;
2515 }
0b4e3aa0
A
2516 if((dst_page->cleaning) &&
2517 !(dst_page->list_req_pending)) {
2518 /*someone else is writing to the */
2519 /* page. We will have to wait. */
9bccf70c 2520 PAGE_SLEEP(object,dst_page,THREAD_UNINT);
0b4e3aa0
A
2521 continue;
2522 }
2523 if ((dst_page->fictitious &&
2524 dst_page->list_req_pending)) {
2525 /* dump the fictitious page */
2526 dst_page->list_req_pending = FALSE;
2527 dst_page->clustered = FALSE;
2528 vm_page_lock_queues();
2529 vm_page_free(dst_page);
2530 vm_page_unlock_queues();
2531 } else if ((dst_page->absent &&
2532 dst_page->list_req_pending)) {
2533 /* the default_pager case */
2534 dst_page->list_req_pending = FALSE;
2535 dst_page->busy = FALSE;
2536 dst_page->clustered = FALSE;
2537 }
1c79356b 2538 }
0b4e3aa0
A
2539 if((dst_page = vm_page_lookup(object, dst_offset)) ==
2540 VM_PAGE_NULL) {
2541 if(object->private) {
2542 /*
2543 * This is a nasty wrinkle for users
2544 * of upl who encounter device or
2545 * private memory however, it is
2546 * unavoidable, only a fault can
2547 * reslove the actual backing
2548 * physical page by asking the
2549 * backing device.
2550 */
d7e50217
A
2551 if(user_page_list) {
2552 user_page_list[entry].phys_addr = 0;
2553 user_page_list[entry].device = FALSE;
2554 }
0b4e3aa0
A
2555 entry++;
2556 dst_offset += PAGE_SIZE_64;
2557 xfer_size -= PAGE_SIZE;
2558 continue;
2559 }
1c79356b
A
2560 /* need to allocate a page */
2561 dst_page = vm_page_alloc(object, dst_offset);
2562 if (dst_page == VM_PAGE_NULL) {
0b4e3aa0
A
2563 vm_object_unlock(object);
2564 VM_PAGE_WAIT();
2565 vm_object_lock(object);
2566 continue;
1c79356b
A
2567 }
2568 dst_page->busy = FALSE;
2569#if 0
2570 if(cntrl_flags & UPL_NO_SYNC) {
2571 dst_page->page_lock = 0;
2572 dst_page->unlock_request = 0;
2573 }
2574#endif
2575 dst_page->absent = TRUE;
2576 object->absent_count++;
2577 }
2578#if 1
2579 if(cntrl_flags & UPL_NO_SYNC) {
2580 dst_page->page_lock = 0;
2581 dst_page->unlock_request = 0;
2582 }
2583#endif /* 1 */
2584 dst_page->overwriting = TRUE;
2585 if(dst_page->fictitious) {
2586 panic("need corner case for fictitious page");
2587 }
2588 if(dst_page->page_lock) {
2589 do_m_lock = TRUE;
2590 }
2591 if(upl_ptr) {
2592
2593 /* eliminate all mappings from the */
2594 /* original object and its prodigy */
2595
2596 if(dst_page->busy) {
2597 /*someone else is playing with the */
2598 /* page. We will have to wait. */
9bccf70c 2599 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1c79356b
A
2600 continue;
2601 }
2602
2603 vm_page_lock_queues();
9bccf70c 2604 if( !(cntrl_flags & UPL_FILE_IO)) {
d7e50217 2605 pmap_page_protect(dst_page->phys_page, VM_PROT_NONE);
9bccf70c 2606 }
d7e50217 2607 dirty = pmap_is_modified(dst_page->phys_page);
1c79356b
A
2608 dirty = dirty ? TRUE : dst_page->dirty;
2609
d7e50217
A
2610 if(cntrl_flags & UPL_SET_LITE) {
2611 int pg_num;
2612 pg_num = (dst_offset-offset)/PAGE_SIZE;
2613 lite_list[pg_num>>5] |=
2614 1 << (pg_num & 31);
2615 pmap_clear_modify(dst_page->phys_page);
2616 /*
2617 * Record that this page has been
2618 * written out
2619 */
2620#if MACH_PAGEMAP
2621 vm_external_state_set(
2622 object->existence_map,
2623 dst_page->offset);
2624#endif /*MACH_PAGEMAP*/
2625
2626 /*
2627 * Mark original page as cleaning
2628 * in place.
2629 */
2630 dst_page->cleaning = TRUE;
2631 dst_page->dirty = TRUE;
2632 dst_page->precious = FALSE;
2633 } else {
2634 /* use pageclean setup, it is more */
2635 /* convenient even for the pageout */
2636 /* cases here */
2637 vm_pageclean_setup(dst_page,
2638 alias_page, upl->map_object,
2639 size - xfer_size);
2640
2641 alias_page->absent = FALSE;
2642 alias_page = NULL;
2643 }
1c79356b
A
2644
2645 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
2646 /* clean in place for read implies */
2647 /* that a write will be done on all */
2648 /* the pages that are dirty before */
2649 /* a upl commit is done. The caller */
2650 /* is obligated to preserve the */
2651 /* contents of all pages marked */
2652 /* dirty. */
2653 upl->flags |= UPL_CLEAR_DIRTY;
2654 }
2655
2656 if(!dirty) {
2657 dst_page->dirty = FALSE;
2658 dst_page->precious = TRUE;
2659 }
2660
2661 if (dst_page->wire_count == 0) {
2662 /* deny access to the target page while */
2663 /* it is being worked on */
2664 dst_page->busy = TRUE;
2665 } else {
2666 vm_page_wire(dst_page);
2667 }
2668 /* expect the page to be used */
2669 dst_page->reference = TRUE;
2670 dst_page->precious =
2671 (cntrl_flags & UPL_PRECIOUS)
2672 ? TRUE : FALSE;
1c79356b
A
2673 if(user_page_list) {
2674 user_page_list[entry].phys_addr
d7e50217 2675 = dst_page->phys_page;
1c79356b 2676 user_page_list[entry].dirty =
0b4e3aa0 2677 dst_page->dirty;
1c79356b
A
2678 user_page_list[entry].pageout =
2679 dst_page->pageout;
2680 user_page_list[entry].absent =
2681 dst_page->absent;
2682 user_page_list[entry].precious =
2683 dst_page->precious;
d7e50217
A
2684 user_page_list[entry].device =
2685 FALSE;
1c79356b
A
2686 }
2687 vm_page_unlock_queues();
2688 }
d7e50217 2689
1c79356b
A
2690 entry++;
2691 dst_offset += PAGE_SIZE_64;
2692 xfer_size -= PAGE_SIZE;
2693 }
2694 }
0b4e3aa0
A
2695
2696 if (upl->flags & UPL_INTERNAL) {
2697 if(page_list_count != NULL)
2698 *page_list_count = 0;
2699 } else if (*page_list_count > entry) {
2700 if(page_list_count != NULL)
2701 *page_list_count = entry;
2702 }
2703
1c79356b
A
2704 if(alias_page != NULL) {
2705 vm_page_lock_queues();
2706 vm_page_free(alias_page);
2707 vm_page_unlock_queues();
2708 }
0b4e3aa0 2709
1c79356b
A
2710 if(do_m_lock) {
2711 vm_prot_t access_required;
2712 /* call back all associated pages from other users of the pager */
2713 /* all future updates will be on data which is based on the */
2714 /* changes we are going to make here. Note: it is assumed that */
2715 /* we already hold copies of the data so we will not be seeing */
2716 /* an avalanche of incoming data from the pager */
2717 access_required = (cntrl_flags & UPL_COPYOUT_FROM)
2718 ? VM_PROT_READ : VM_PROT_WRITE;
2719 while (TRUE) {
2720 kern_return_t rc;
1c79356b
A
2721
2722 if(!object->pager_ready) {
9bccf70c
A
2723 wait_result_t wait_result;
2724
2725 wait_result = vm_object_sleep(object,
2726 VM_OBJECT_EVENT_PAGER_READY,
2727 THREAD_UNINT);
2728 if (wait_result != THREAD_AWAKENED) {
2729 vm_object_unlock(object);
2730 return(KERN_FAILURE);
1c79356b 2731 }
1c79356b
A
2732 continue;
2733 }
2734
2735 vm_object_unlock(object);
2736
2737 if (rc = memory_object_data_unlock(
2738 object->pager,
1c79356b
A
2739 dst_offset + object->paging_offset,
2740 size,
2741 access_required)) {
2742 if (rc == MACH_SEND_INTERRUPTED)
2743 continue;
2744 else
2745 return KERN_FAILURE;
2746 }
2747 break;
2748
2749 }
2750 /* lets wait on the last page requested */
2751 /* NOTE: we will have to update lock completed routine to signal */
2752 if(dst_page != VM_PAGE_NULL &&
2753 (access_required & dst_page->page_lock) != access_required) {
2754 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
2755 thread_block((void (*)(void))0);
2756 vm_object_lock(object);
2757 }
2758 }
2759 vm_object_unlock(object);
2760 return KERN_SUCCESS;
2761}
2762
0b4e3aa0 2763/* JMM - Backward compatability for now */
1c79356b 2764kern_return_t
0b4e3aa0
A
2765vm_fault_list_request(
2766 memory_object_control_t control,
1c79356b
A
2767 vm_object_offset_t offset,
2768 vm_size_t size,
0b4e3aa0 2769 upl_t *upl_ptr,
1c79356b
A
2770 upl_page_info_t **user_page_list_ptr,
2771 int page_list_count,
2772 int cntrl_flags)
2773{
0b4e3aa0
A
2774 int local_list_count;
2775 upl_page_info_t *user_page_list;
2776 kern_return_t kr;
2777
2778 if (user_page_list_ptr != NULL) {
2779 local_list_count = page_list_count;
2780 user_page_list = *user_page_list_ptr;
2781 } else {
2782 local_list_count = 0;
2783 user_page_list = NULL;
2784 }
2785 kr = memory_object_upl_request(control,
2786 offset,
2787 size,
2788 upl_ptr,
2789 user_page_list,
2790 &local_list_count,
2791 cntrl_flags);
2792
2793 if(kr != KERN_SUCCESS)
2794 return kr;
2795
2796 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
2797 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
2798 }
2799
2800 return KERN_SUCCESS;
2801}
2802
2803
2804
2805/*
2806 * Routine: vm_object_super_upl_request
2807 * Purpose:
2808 * Cause the population of a portion of a vm_object
2809 * in much the same way as memory_object_upl_request.
2810 * Depending on the nature of the request, the pages
2811 * returned may be contain valid data or be uninitialized.
2812 * However, the region may be expanded up to the super
2813 * cluster size provided.
2814 */
2815
2816__private_extern__ kern_return_t
2817vm_object_super_upl_request(
2818 vm_object_t object,
2819 vm_object_offset_t offset,
2820 vm_size_t size,
2821 vm_size_t super_cluster,
2822 upl_t *upl,
2823 upl_page_info_t *user_page_list,
2824 unsigned int *page_list_count,
2825 int cntrl_flags)
2826{
2827 vm_page_t target_page;
2828 int ticket;
2829
1c79356b
A
2830 if(object->paging_offset > offset)
2831 return KERN_FAILURE;
0b4e3aa0 2832
1c79356b 2833 offset = offset - object->paging_offset;
d7e50217 2834 if(cntrl_flags & UPL_FOR_PAGEOUT) {
0b4e3aa0
A
2835 if((target_page = vm_page_lookup(object, offset))
2836 != VM_PAGE_NULL) {
2837 ticket = target_page->page_ticket;
2838 cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
2839 cntrl_flags = cntrl_flags |
2840 ((ticket << UPL_PAGE_TICKET_SHIFT)
2841 & UPL_PAGE_TICKET_MASK);
2842 }
2843 }
2844
1c79356b
A
2845
2846/* turns off super cluster exercised by the default_pager */
2847/*
2848super_cluster = size;
2849*/
2850 if ((super_cluster > size) &&
2851 (vm_page_free_count > vm_page_free_reserved)) {
2852
2853 vm_object_offset_t base_offset;
2854 vm_size_t super_size;
2855
2856 base_offset = (offset &
2857 ~((vm_object_offset_t) super_cluster - 1));
2858 super_size = (offset+size) > (base_offset + super_cluster) ?
2859 super_cluster<<1 : super_cluster;
2860 super_size = ((base_offset + super_size) > object->size) ?
2861 (object->size - base_offset) : super_size;
2862 if(offset > (base_offset + super_size))
0b4e3aa0 2863 panic("vm_object_super_upl_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset);
1c79356b
A
2864 /* apparently there is a case where the vm requests a */
2865 /* page to be written out who's offset is beyond the */
2866 /* object size */
2867 if((offset + size) > (base_offset + super_size))
2868 super_size = (offset + size) - base_offset;
2869
2870 offset = base_offset;
2871 size = super_size;
2872 }
0b4e3aa0
A
2873 vm_object_upl_request(object, offset, size,
2874 upl, user_page_list, page_list_count,
2875 cntrl_flags);
1c79356b
A
2876}
2877
2878
2879kern_return_t
0b4e3aa0 2880vm_upl_map(
1c79356b
A
2881 vm_map_t map,
2882 upl_t upl,
2883 vm_offset_t *dst_addr)
2884{
2885 vm_size_t size;
2886 vm_object_offset_t offset;
2887 vm_offset_t addr;
2888 vm_page_t m;
2889 kern_return_t kr;
2890
0b4e3aa0
A
2891 if (upl == UPL_NULL)
2892 return KERN_INVALID_ARGUMENT;
2893
2894 upl_lock(upl);
2895
1c79356b 2896 /* check to see if already mapped */
0b4e3aa0
A
2897 if(UPL_PAGE_LIST_MAPPED & upl->flags) {
2898 upl_unlock(upl);
1c79356b 2899 return KERN_FAILURE;
0b4e3aa0 2900 }
1c79356b 2901
d7e50217
A
2902 if((!(upl->map_object->pageout)) &&
2903 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
2904 (upl->map_object->phys_contiguous))) {
2905 vm_object_t object;
2906 vm_page_t alias_page;
2907 vm_object_offset_t new_offset;
2908 int pg_num;
2909 wpl_array_t lite_list;
2910
2911 if(upl->flags & UPL_INTERNAL) {
2912 lite_list = (wpl_array_t)
2913 ((((vm_offset_t)upl) + sizeof(struct upl))
2914 + ((upl->size/PAGE_SIZE)
2915 * sizeof(upl_page_info_t)));
2916 } else {
2917 lite_list = (wpl_array_t)
2918 (((vm_offset_t)upl) + sizeof(struct upl));
2919 }
2920 object = upl->map_object;
2921 upl->map_object = vm_object_allocate(upl->size);
2922 vm_object_lock(upl->map_object);
2923 upl->map_object->shadow = object;
2924 upl->map_object->pageout = TRUE;
2925 upl->map_object->can_persist = FALSE;
2926 upl->map_object->copy_strategy =
2927 MEMORY_OBJECT_COPY_NONE;
2928 upl->map_object->shadow_offset =
2929 upl->offset - object->paging_offset;
2930 upl->map_object->wimg_bits = object->wimg_bits;
2931 vm_object_unlock(upl->map_object);
2932 offset = upl->map_object->shadow_offset;
2933 new_offset = 0;
2934 size = upl->size;
2935 vm_object_lock(object);
2936 while(size) {
2937 pg_num = (new_offset)/PAGE_SIZE;
2938 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
2939 vm_object_unlock(object);
2940 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2941 vm_object_lock(object);
2942 m = vm_page_lookup(object, offset);
2943 if (m == VM_PAGE_NULL) {
2944 panic("vm_upl_map: page missing\n");
2945 }
2946
2947 vm_object_paging_begin(object);
2948
2949 /*
2950 * Convert the fictitious page to a private
2951 * shadow of the real page.
2952 */
2953 assert(alias_page->fictitious);
2954 alias_page->fictitious = FALSE;
2955 alias_page->private = TRUE;
2956 alias_page->pageout = TRUE;
2957 alias_page->phys_page = m->phys_page;
2958 vm_page_wire(alias_page);
2959
2960 vm_page_insert(alias_page,
2961 upl->map_object, new_offset);
2962 assert(!alias_page->wanted);
2963 alias_page->busy = FALSE;
2964 alias_page->absent = FALSE;
2965 }
2966
2967 size -= PAGE_SIZE;
2968 offset += PAGE_SIZE_64;
2969 new_offset += PAGE_SIZE_64;
2970 }
2971 vm_object_unlock(object);
2972 }
2973
1c79356b
A
2974 offset = 0; /* Always map the entire object */
2975 size = upl->size;
2976
2977 vm_object_lock(upl->map_object);
2978 upl->map_object->ref_count++;
2979 vm_object_res_reference(upl->map_object);
2980 vm_object_unlock(upl->map_object);
2981
2982 *dst_addr = 0;
2983
2984
2985 /* NEED A UPL_MAP ALIAS */
2986 kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE,
2987 upl->map_object, offset, FALSE,
2988 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
2989
0b4e3aa0
A
2990 if (kr != KERN_SUCCESS) {
2991 upl_unlock(upl);
1c79356b 2992 return(kr);
0b4e3aa0 2993 }
1c79356b
A
2994
2995 for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
2996 m = vm_page_lookup(upl->map_object, offset);
2997 if(m) {
9bccf70c
A
2998 unsigned int cache_attr;
2999 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3000
3001 PMAP_ENTER(map->pmap, addr,
3002 m, VM_PROT_ALL,
3003 cache_attr, TRUE);
1c79356b
A
3004 }
3005 offset+=PAGE_SIZE_64;
3006 }
0b4e3aa0 3007 upl->ref_count++; /* hold a reference for the mapping */
1c79356b
A
3008 upl->flags |= UPL_PAGE_LIST_MAPPED;
3009 upl->kaddr = *dst_addr;
0b4e3aa0 3010 upl_unlock(upl);
1c79356b
A
3011 return KERN_SUCCESS;
3012}
3013
3014
3015kern_return_t
0b4e3aa0 3016vm_upl_unmap(
1c79356b
A
3017 vm_map_t map,
3018 upl_t upl)
3019{
0b4e3aa0 3020 vm_address_t addr;
1c79356b
A
3021 vm_size_t size;
3022
0b4e3aa0
A
3023 if (upl == UPL_NULL)
3024 return KERN_INVALID_ARGUMENT;
3025
3026 upl_lock(upl);
1c79356b 3027 if(upl->flags & UPL_PAGE_LIST_MAPPED) {
0b4e3aa0 3028 addr = upl->kaddr;
1c79356b 3029 size = upl->size;
0b4e3aa0
A
3030 assert(upl->ref_count > 1);
3031 upl->ref_count--; /* removing mapping ref */
1c79356b
A
3032 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3033 upl->kaddr = (vm_offset_t) 0;
0b4e3aa0
A
3034 upl_unlock(upl);
3035
3036 vm_deallocate(map, addr, size);
1c79356b 3037 return KERN_SUCCESS;
1c79356b 3038 }
0b4e3aa0
A
3039 upl_unlock(upl);
3040 return KERN_FAILURE;
1c79356b
A
3041}
3042
3043kern_return_t
0b4e3aa0 3044upl_commit_range(
1c79356b
A
3045 upl_t upl,
3046 vm_offset_t offset,
3047 vm_size_t size,
3048 int flags,
0b4e3aa0
A
3049 upl_page_info_t *page_list,
3050 mach_msg_type_number_t count,
3051 boolean_t *empty)
1c79356b
A
3052{
3053 vm_size_t xfer_size = size;
d7e50217 3054 vm_object_t shadow_object;
1c79356b
A
3055 vm_object_t object = upl->map_object;
3056 vm_object_offset_t target_offset;
1c79356b 3057 int entry;
d7e50217
A
3058 wpl_array_t lite_list;
3059 int occupied;
1c79356b 3060
0b4e3aa0
A
3061 *empty = FALSE;
3062
3063 if (upl == UPL_NULL)
3064 return KERN_INVALID_ARGUMENT;
3065
3066 if (count == 0)
3067 page_list = NULL;
3068
d7e50217
A
3069 if(object->pageout) {
3070 shadow_object = object->shadow;
3071 } else {
3072 shadow_object = object;
3073 }
3074
0b4e3aa0 3075 upl_lock(upl);
d7e50217
A
3076
3077
1c79356b
A
3078 if(upl->flags & UPL_DEVICE_MEMORY) {
3079 xfer_size = 0;
3080 } else if ((offset + size) > upl->size) {
0b4e3aa0 3081 upl_unlock(upl);
1c79356b
A
3082 return KERN_FAILURE;
3083 }
3084
d7e50217
A
3085 if(upl->flags & UPL_INTERNAL) {
3086 lite_list = (wpl_array_t)
3087 ((((vm_offset_t)upl) + sizeof(struct upl))
3088 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3089 } else {
3090 lite_list = (wpl_array_t)
3091 (((vm_offset_t)upl) + sizeof(struct upl));
3092 }
3093
1c79356b
A
3094 vm_object_lock(shadow_object);
3095
3096 entry = offset/PAGE_SIZE;
3097 target_offset = (vm_object_offset_t)offset;
3098 while(xfer_size) {
3099 vm_page_t t,m;
3100 upl_page_info_t *p;
3101
d7e50217
A
3102 m = VM_PAGE_NULL;
3103 if(upl->flags & UPL_LITE) {
3104 int pg_num;
3105 pg_num = target_offset/PAGE_SIZE;
3106 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3107 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3108 m = vm_page_lookup(shadow_object,
3109 target_offset + (upl->offset -
3110 shadow_object->paging_offset));
3111 }
3112 }
3113 if(object->pageout) {
3114 if ((t = vm_page_lookup(object, target_offset))
3115 != NULL) {
3116 t->pageout = FALSE;
3117 VM_PAGE_FREE(t);
3118 if(m == NULL) {
3119 m = vm_page_lookup(
3120 shadow_object,
3121 target_offset +
3122 object->shadow_offset);
1c79356b 3123 }
d7e50217
A
3124 if(m != VM_PAGE_NULL)
3125 vm_object_paging_end(m->object);
3126 }
3127 }
3128
3129 if(m != VM_PAGE_NULL) {
3130 if(upl->flags & UPL_IO_WIRE) {
3131 vm_page_lock_queues();
3132 vm_page_unwire(m);
3133 vm_page_unlock_queues();
3134 if(page_list) {
1c79356b 3135 page_list[entry].phys_addr = 0;
d7e50217
A
3136 }
3137 if (flags & UPL_COMMIT_SET_DIRTY) {
3138 m->dirty = TRUE;
3139 } else if ((upl->flags & UPL_CLEAR_DIRTY) ||
3140 (flags & UPL_COMMIT_CLEAR_DIRTY)) {
3141 pmap_clear_modify(m->phys_page);
3142 m->dirty = FALSE;
3143 }
3144 if (flags & UPL_COMMIT_INACTIVATE) {
3145 vm_page_deactivate(m);
3146 m->reference = FALSE;
3147 pmap_clear_reference(m->phys_page);
3148 }
3149 target_offset += PAGE_SIZE_64;
3150 xfer_size -= PAGE_SIZE;
3151 entry++;
3152 continue;
3153 }
3154 vm_page_lock_queues();
3155 if ((upl->flags & UPL_CLEAR_DIRTY) ||
3156 (flags & UPL_COMMIT_CLEAR_DIRTY)) {
3157 pmap_clear_modify(m->phys_page);
3158 m->dirty = FALSE;
3159 }
3160 if(page_list) {
3161 p = &(page_list[entry]);
3162 if(p->phys_addr && p->pageout && !m->pageout) {
3163 m->busy = TRUE;
3164 m->pageout = TRUE;
3165 vm_page_wire(m);
3166 } else if (page_list[entry].phys_addr &&
3167 !p->pageout && m->pageout &&
3168 !m->dump_cleaning) {
3169 m->pageout = FALSE;
3170 m->absent = FALSE;
3171 m->overwriting = FALSE;
3172 vm_page_unwire(m);
3173 PAGE_WAKEUP_DONE(m);
3174 }
3175 page_list[entry].phys_addr = 0;
3176 }
3177 m->dump_cleaning = FALSE;
3178 if(m->laundry) {
3179 vm_page_laundry_count--;
3180 m->laundry = FALSE;
3181 if (vm_page_laundry_count < vm_page_laundry_min) {
3182 vm_page_laundry_min = 0;
3183 thread_wakeup((event_t)
3184 &vm_page_laundry_count);
3185 }
3186 }
3187 if(m->pageout) {
3188 m->cleaning = FALSE;
3189 m->pageout = FALSE;
1c79356b 3190#if MACH_CLUSTER_STATS
d7e50217 3191 if (m->wanted) vm_pageout_target_collisions++;
1c79356b 3192#endif
d7e50217
A
3193 pmap_page_protect(m->phys_page, VM_PROT_NONE);
3194 m->dirty = pmap_is_modified(m->phys_page);
3195 if(m->dirty) {
3196 CLUSTER_STAT(
3197 vm_pageout_target_page_dirtied++;)
3198 vm_page_unwire(m);/* reactivates */
3199 VM_STAT(reactivations++);
3200 PAGE_WAKEUP_DONE(m);
3201 } else {
3202 CLUSTER_STAT(
3203 vm_pageout_target_page_freed++;)
3204 vm_page_free(m);/* clears busy, etc. */
3205 VM_STAT(pageouts++);
3206 }
3207 vm_page_unlock_queues();
3208 target_offset += PAGE_SIZE_64;
3209 xfer_size -= PAGE_SIZE;
3210 entry++;
3211 continue;
3212 }
3213 if (flags & UPL_COMMIT_INACTIVATE) {
3214 vm_page_deactivate(m);
3215 m->reference = FALSE;
3216 pmap_clear_reference(m->phys_page);
3217 } else if (!m->active && !m->inactive) {
3218 if (m->reference)
3219 vm_page_activate(m);
3220 else
3221 vm_page_deactivate(m);
3222 }
1c79356b 3223#if MACH_CLUSTER_STATS
d7e50217 3224 m->dirty = pmap_is_modified(m->phys_page);
1c79356b 3225
d7e50217
A
3226 if (m->dirty) vm_pageout_cluster_dirtied++;
3227 else vm_pageout_cluster_cleaned++;
3228 if (m->wanted) vm_pageout_cluster_collisions++;
1c79356b 3229#else
d7e50217 3230 m->dirty = 0;
1c79356b
A
3231#endif
3232
d7e50217
A
3233 if((m->busy) && (m->cleaning)) {
3234 /* the request_page_list case */
3235 if(m->absent) {
3236 m->absent = FALSE;
3237 if(shadow_object->absent_count == 1)
1c79356b 3238 vm_object_absent_release(shadow_object);
d7e50217 3239 else
1c79356b 3240 shadow_object->absent_count--;
1c79356b 3241 }
d7e50217
A
3242 m->overwriting = FALSE;
3243 m->busy = FALSE;
3244 m->dirty = FALSE;
3245 } else if (m->overwriting) {
3246 /* alternate request page list, write to
3247 /* page_list case. Occurs when the original
3248 /* page was wired at the time of the list
3249 /* request */
3250 assert(m->wire_count != 0);
3251 vm_page_unwire(m);/* reactivates */
3252 m->overwriting = FALSE;
3253 }
3254 m->cleaning = FALSE;
3255 /* It is a part of the semantic of COPYOUT_FROM */
3256 /* UPLs that a commit implies cache sync */
3257 /* between the vm page and the backing store */
3258 /* this can be used to strip the precious bit */
3259 /* as well as clean */
3260 if (upl->flags & UPL_PAGE_SYNC_DONE)
3261 m->precious = FALSE;
3262
3263 if (flags & UPL_COMMIT_SET_DIRTY) {
3264 m->dirty = TRUE;
3265 }
3266 /*
3267 * Wakeup any thread waiting for the page to be un-cleaning.
3268 */
3269 PAGE_WAKEUP(m);
3270 vm_page_unlock_queues();
3271
1c79356b
A
3272 }
3273 target_offset += PAGE_SIZE_64;
3274 xfer_size -= PAGE_SIZE;
3275 entry++;
3276 }
3277
d7e50217
A
3278 occupied = 1;
3279 if (upl->flags & UPL_DEVICE_MEMORY) {
3280 occupied = 0;
3281 } else if (upl->flags & UPL_LITE) {
3282 int pg_num;
3283 int i;
3284 pg_num = upl->size/PAGE_SIZE;
3285 pg_num = (pg_num + 31) >> 5;
3286 occupied = 0;
3287 for(i= 0; i<pg_num; i++) {
3288 if(lite_list[i] != 0) {
3289 occupied = 1;
3290 break;
3291 }
3292 }
3293 } else {
3294 if(queue_empty(&upl->map_object->memq)) {
3295 occupied = 0;
3296 }
3297 }
3298
3299 if(occupied == 0) {
3300 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
0b4e3aa0 3301 *empty = TRUE;
d7e50217
A
3302 }
3303 if(object == shadow_object)
3304 vm_object_paging_end(shadow_object);
1c79356b 3305 }
d7e50217 3306 vm_object_unlock(shadow_object);
0b4e3aa0
A
3307 upl_unlock(upl);
3308
1c79356b
A
3309 return KERN_SUCCESS;
3310}
3311
0b4e3aa0
A
3312kern_return_t
3313upl_abort_range(
1c79356b
A
3314 upl_t upl,
3315 vm_offset_t offset,
3316 vm_size_t size,
0b4e3aa0
A
3317 int error,
3318 boolean_t *empty)
1c79356b
A
3319{
3320 vm_size_t xfer_size = size;
d7e50217 3321 vm_object_t shadow_object;
1c79356b
A
3322 vm_object_t object = upl->map_object;
3323 vm_object_offset_t target_offset;
3324 vm_object_offset_t page_offset;
3325 int entry;
d7e50217
A
3326 wpl_array_t lite_list;
3327 int occupied;
1c79356b 3328
0b4e3aa0
A
3329 *empty = FALSE;
3330
3331 if (upl == UPL_NULL)
3332 return KERN_INVALID_ARGUMENT;
3333
d7e50217
A
3334 if (upl->flags & UPL_IO_WIRE) {
3335 return upl_commit_range(upl,
3336 offset, size, 0,
3337 NULL, 0, empty);
3338 }
3339
3340 if(object->pageout) {
3341 shadow_object = object->shadow;
3342 } else {
3343 shadow_object = object;
3344 }
3345
0b4e3aa0 3346 upl_lock(upl);
1c79356b
A
3347 if(upl->flags & UPL_DEVICE_MEMORY) {
3348 xfer_size = 0;
3349 } else if ((offset + size) > upl->size) {
0b4e3aa0 3350 upl_unlock(upl);
1c79356b
A
3351 return KERN_FAILURE;
3352 }
3353
1c79356b
A
3354 vm_object_lock(shadow_object);
3355
d7e50217
A
3356 if(upl->flags & UPL_INTERNAL) {
3357 lite_list = (wpl_array_t)
3358 ((((vm_offset_t)upl) + sizeof(struct upl))
3359 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3360 } else {
3361 lite_list = (wpl_array_t)
3362 (((vm_offset_t)upl) + sizeof(struct upl));
3363 }
3364
1c79356b
A
3365 entry = offset/PAGE_SIZE;
3366 target_offset = (vm_object_offset_t)offset;
3367 while(xfer_size) {
3368 vm_page_t t,m;
3369 upl_page_info_t *p;
3370
d7e50217
A
3371 m = VM_PAGE_NULL;
3372 if(upl->flags & UPL_LITE) {
3373 int pg_num;
3374 pg_num = target_offset/PAGE_SIZE;
3375 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3376 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3377 m = vm_page_lookup(shadow_object,
3378 target_offset + (upl->offset -
3379 shadow_object->paging_offset));
3380 }
3381 }
3382 if(object->pageout) {
3383 if ((t = vm_page_lookup(object, target_offset))
3384 != NULL) {
3385 t->pageout = FALSE;
3386 VM_PAGE_FREE(t);
3387 if(m == NULL) {
3388 m = vm_page_lookup(
3389 shadow_object,
3390 target_offset +
3391 object->shadow_offset);
3392 }
3393 if(m != VM_PAGE_NULL)
3394 vm_object_paging_end(m->object);
3395 }
3396 }
3397 if(m != VM_PAGE_NULL) {
1c79356b
A
3398 vm_page_lock_queues();
3399 if(m->absent) {
3400 /* COPYOUT = FALSE case */
3401 /* check for error conditions which must */
3402 /* be passed back to the pages customer */
3403 if(error & UPL_ABORT_RESTART) {
3404 m->restart = TRUE;
3405 m->absent = FALSE;
3406 vm_object_absent_release(m->object);
3407 m->page_error = KERN_MEMORY_ERROR;
3408 m->error = TRUE;
3409 } else if(error & UPL_ABORT_UNAVAILABLE) {
3410 m->restart = FALSE;
3411 m->unusual = TRUE;
3412 m->clustered = FALSE;
3413 } else if(error & UPL_ABORT_ERROR) {
3414 m->restart = FALSE;
3415 m->absent = FALSE;
3416 vm_object_absent_release(m->object);
3417 m->page_error = KERN_MEMORY_ERROR;
3418 m->error = TRUE;
3419 } else if(error & UPL_ABORT_DUMP_PAGES) {
3420 m->clustered = TRUE;
3421 } else {
3422 m->clustered = TRUE;
3423 }
3424
3425
3426 m->cleaning = FALSE;
3427 m->overwriting = FALSE;
3428 PAGE_WAKEUP_DONE(m);
3429 if(m->clustered) {
3430 vm_page_free(m);
3431 } else {
3432 vm_page_activate(m);
3433 }
3434
3435 vm_page_unlock_queues();
3436 target_offset += PAGE_SIZE_64;
3437 xfer_size -= PAGE_SIZE;
3438 entry++;
3439 continue;
3440 }
3441 /*
d7e50217
A
3442 * Handle the trusted pager throttle.
3443 */
3444 if (m->laundry) {
1c79356b
A
3445 vm_page_laundry_count--;
3446 m->laundry = FALSE;
3447 if (vm_page_laundry_count
d7e50217 3448 < vm_page_laundry_min) {
1c79356b
A
3449 vm_page_laundry_min = 0;
3450 thread_wakeup((event_t)
3451 &vm_page_laundry_count);
3452 }
3453 }
3454 if(m->pageout) {
3455 assert(m->busy);
3456 assert(m->wire_count == 1);
3457 m->pageout = FALSE;
3458 vm_page_unwire(m);
3459 }
0b4e3aa0 3460 m->dump_cleaning = FALSE;
1c79356b
A
3461 m->cleaning = FALSE;
3462 m->busy = FALSE;
3463 m->overwriting = FALSE;
3464#if MACH_PAGEMAP
3465 vm_external_state_clr(
3466 m->object->existence_map, m->offset);
3467#endif /* MACH_PAGEMAP */
3468 if(error & UPL_ABORT_DUMP_PAGES) {
3469 vm_page_free(m);
d7e50217 3470 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1c79356b
A
3471 } else {
3472 PAGE_WAKEUP(m);
3473 }
3474 vm_page_unlock_queues();
3475 }
d7e50217
A
3476 target_offset += PAGE_SIZE_64;
3477 xfer_size -= PAGE_SIZE;
3478 entry++;
1c79356b 3479 }
d7e50217
A
3480 occupied = 1;
3481 if (upl->flags & UPL_DEVICE_MEMORY) {
3482 occupied = 0;
3483 } else if (upl->flags & UPL_LITE) {
3484 int pg_num;
3485 int i;
3486 pg_num = upl->size/PAGE_SIZE;
3487 pg_num = (pg_num + 31) >> 5;
3488 occupied = 0;
3489 for(i= 0; i<pg_num; i++) {
3490 if(lite_list[i] != 0) {
3491 occupied = 1;
3492 break;
3493 }
3494 }
3495 } else {
3496 if(queue_empty(&upl->map_object->memq)) {
3497 occupied = 0;
3498 }
3499 }
3500
3501 if(occupied == 0) {
3502 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
0b4e3aa0 3503 *empty = TRUE;
d7e50217
A
3504 }
3505 if(object == shadow_object)
3506 vm_object_paging_end(shadow_object);
1c79356b 3507 }
d7e50217 3508 vm_object_unlock(shadow_object);
0b4e3aa0 3509 upl_unlock(upl);
1c79356b
A
3510 return KERN_SUCCESS;
3511}
3512
3513kern_return_t
0b4e3aa0 3514upl_abort(
1c79356b
A
3515 upl_t upl,
3516 int error)
3517{
3518 vm_object_t object = NULL;
3519 vm_object_t shadow_object = NULL;
3520 vm_object_offset_t offset;
3521 vm_object_offset_t shadow_offset;
3522 vm_object_offset_t target_offset;
3523 int i;
d7e50217 3524 wpl_array_t lite_list;
1c79356b 3525 vm_page_t t,m;
d7e50217 3526 int occupied;
1c79356b 3527
0b4e3aa0
A
3528 if (upl == UPL_NULL)
3529 return KERN_INVALID_ARGUMENT;
3530
d7e50217
A
3531 if (upl->flags & UPL_IO_WIRE) {
3532 boolean_t empty;
3533 return upl_commit_range(upl,
3534 0, upl->size, 0,
3535 NULL, 0, &empty);
3536 }
3537
0b4e3aa0 3538 upl_lock(upl);
1c79356b 3539 if(upl->flags & UPL_DEVICE_MEMORY) {
0b4e3aa0 3540 upl_unlock(upl);
1c79356b
A
3541 return KERN_SUCCESS;
3542 }
0b4e3aa0 3543
1c79356b
A
3544 object = upl->map_object;
3545
0b4e3aa0 3546 if (object == NULL) {
1c79356b 3547 panic("upl_abort: upl object is not backed by an object");
0b4e3aa0 3548 upl_unlock(upl);
1c79356b
A
3549 return KERN_INVALID_ARGUMENT;
3550 }
3551
d7e50217
A
3552 if(object->pageout) {
3553 shadow_object = object->shadow;
3554 shadow_offset = object->shadow_offset;
3555 } else {
3556 shadow_object = object;
3557 shadow_offset = upl->offset - object->paging_offset;
3558 }
3559
3560 if(upl->flags & UPL_INTERNAL) {
3561 lite_list = (wpl_array_t)
3562 ((((vm_offset_t)upl) + sizeof(struct upl))
3563 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3564 } else {
3565 lite_list = (wpl_array_t)
3566 (((vm_offset_t)upl) + sizeof(struct upl));
3567 }
1c79356b
A
3568 offset = 0;
3569 vm_object_lock(shadow_object);
3570 for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
d7e50217
A
3571 m = VM_PAGE_NULL;
3572 target_offset = offset + shadow_offset;
3573 if(upl->flags & UPL_LITE) {
3574 int pg_num;
3575 pg_num = offset/PAGE_SIZE;
3576 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3577 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3578 m = vm_page_lookup(
3579 shadow_object, target_offset);
3580 }
3581 }
3582 if(object->pageout) {
3583 if ((t = vm_page_lookup(object, offset)) != NULL) {
3584 t->pageout = FALSE;
3585 VM_PAGE_FREE(t);
3586 if(m == NULL) {
3587 m = vm_page_lookup(
3588 shadow_object, target_offset);
3589 }
3590 if(m != VM_PAGE_NULL)
3591 vm_object_paging_end(m->object);
3592 }
3593 }
3594 if(m != VM_PAGE_NULL) {
1c79356b
A
3595 vm_page_lock_queues();
3596 if(m->absent) {
3597 /* COPYOUT = FALSE case */
3598 /* check for error conditions which must */
3599 /* be passed back to the pages customer */
3600 if(error & UPL_ABORT_RESTART) {
3601 m->restart = TRUE;
3602 m->absent = FALSE;
3603 vm_object_absent_release(m->object);
3604 m->page_error = KERN_MEMORY_ERROR;
3605 m->error = TRUE;
3606 } else if(error & UPL_ABORT_UNAVAILABLE) {
3607 m->restart = FALSE;
3608 m->unusual = TRUE;
3609 m->clustered = FALSE;
3610 } else if(error & UPL_ABORT_ERROR) {
3611 m->restart = FALSE;
3612 m->absent = FALSE;
3613 vm_object_absent_release(m->object);
3614 m->page_error = KERN_MEMORY_ERROR;
3615 m->error = TRUE;
3616 } else if(error & UPL_ABORT_DUMP_PAGES) {
3617 m->clustered = TRUE;
3618 } else {
3619 m->clustered = TRUE;
3620 }
3621
3622 m->cleaning = FALSE;
3623 m->overwriting = FALSE;
3624 PAGE_WAKEUP_DONE(m);
3625 if(m->clustered) {
3626 vm_page_free(m);
3627 } else {
3628 vm_page_activate(m);
3629 }
3630 vm_page_unlock_queues();
3631 continue;
3632 }
3633 /*
3634 * Handle the trusted pager throttle.
3635 */
3636 if (m->laundry) {
3637 vm_page_laundry_count--;
3638 m->laundry = FALSE;
3639 if (vm_page_laundry_count
3640 < vm_page_laundry_min) {
3641 vm_page_laundry_min = 0;
3642 thread_wakeup((event_t)
3643 &vm_page_laundry_count);
3644 }
3645 }
3646 if(m->pageout) {
3647 assert(m->busy);
3648 assert(m->wire_count == 1);
3649 m->pageout = FALSE;
3650 vm_page_unwire(m);
3651 }
0b4e3aa0 3652 m->dump_cleaning = FALSE;
1c79356b
A
3653 m->cleaning = FALSE;
3654 m->busy = FALSE;
3655 m->overwriting = FALSE;
3656#if MACH_PAGEMAP
3657 vm_external_state_clr(
3658 m->object->existence_map, m->offset);
3659#endif /* MACH_PAGEMAP */
3660 if(error & UPL_ABORT_DUMP_PAGES) {
3661 vm_page_free(m);
d7e50217 3662 pmap_page_protect(m->phys_page, VM_PROT_NONE);
1c79356b
A
3663 } else {
3664 PAGE_WAKEUP(m);
3665 }
3666 vm_page_unlock_queues();
3667 }
1c79356b 3668 }
d7e50217
A
3669 occupied = 1;
3670 if (upl->flags & UPL_DEVICE_MEMORY) {
3671 occupied = 0;
3672 } else if (upl->flags & UPL_LITE) {
3673 int pg_num;
3674 int i;
3675 pg_num = upl->size/PAGE_SIZE;
3676 pg_num = (pg_num + 31) >> 5;
3677 occupied = 0;
3678 for(i= 0; i<pg_num; i++) {
3679 if(lite_list[i] != 0) {
3680 occupied = 1;
3681 break;
3682 }
3683 }
3684 } else {
3685 if(queue_empty(&upl->map_object->memq)) {
3686 occupied = 0;
3687 }
3688 }
1c79356b 3689
d7e50217
A
3690 if(occupied == 0) {
3691 if(object == shadow_object)
3692 vm_object_paging_end(shadow_object);
1c79356b 3693 }
d7e50217 3694 vm_object_unlock(shadow_object);
0b4e3aa0 3695 upl_unlock(upl);
1c79356b
A
3696 return KERN_SUCCESS;
3697}
3698
3699/* an option on commit should be wire */
3700kern_return_t
0b4e3aa0
A
3701upl_commit(
3702 upl_t upl,
3703 upl_page_info_t *page_list,
3704 mach_msg_type_number_t count)
1c79356b 3705{
0b4e3aa0
A
3706 if (upl == UPL_NULL)
3707 return KERN_INVALID_ARGUMENT;
3708
d7e50217
A
3709 if(upl->flags & (UPL_LITE | UPL_IO_WIRE)) {
3710 boolean_t empty;
3711 return upl_commit_range(upl, 0, upl->size, 0,
3712 page_list, count, &empty);
3713 }
3714
0b4e3aa0
A
3715 if (count == 0)
3716 page_list = NULL;
3717
3718 upl_lock(upl);
1c79356b
A
3719 if (upl->flags & UPL_DEVICE_MEMORY)
3720 page_list = NULL;
1c79356b 3721
d7e50217
A
3722 if ((upl->flags & UPL_CLEAR_DIRTY) ||
3723 (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
1c79356b
A
3724 vm_object_t shadow_object = upl->map_object->shadow;
3725 vm_object_t object = upl->map_object;
3726 vm_object_offset_t target_offset;
3727 vm_size_t xfer_end;
3728 int entry;
3729
3730 vm_page_t t, m;
3731 upl_page_info_t *p;
3732
3733 vm_object_lock(shadow_object);
3734
3735 entry = 0;
3736 target_offset = object->shadow_offset;
3737 xfer_end = upl->size + object->shadow_offset;
3738
3739 while(target_offset < xfer_end) {
3740
3741 if ((t = vm_page_lookup(object,
3742 target_offset - object->shadow_offset))
3743 == NULL) {
3744 target_offset += PAGE_SIZE_64;
3745 entry++;
3746 continue;
3747 }
3748
3749 m = vm_page_lookup(shadow_object, target_offset);
3750 if(m != VM_PAGE_NULL) {
d7e50217
A
3751 if (upl->flags & UPL_CLEAR_DIRTY) {
3752 pmap_clear_modify(m->phys_page);
3753 m->dirty = FALSE;
3754 }
3755 /* It is a part of the semantic of */
3756 /* COPYOUT_FROM UPLs that a commit */
3757 /* implies cache sync between the */
3758 /* vm page and the backing store */
3759 /* this can be used to strip the */
3760 /* precious bit as well as clean */
3761 if (upl->flags & UPL_PAGE_SYNC_DONE)
3762 m->precious = FALSE;
3763
3764 if(page_list) {
3765 p = &(page_list[entry]);
3766 if(page_list[entry].phys_addr &&
1c79356b
A
3767 p->pageout && !m->pageout) {
3768 vm_page_lock_queues();
3769 m->busy = TRUE;
3770 m->pageout = TRUE;
3771 vm_page_wire(m);
3772 vm_page_unlock_queues();
d7e50217 3773 } else if (page_list[entry].phys_addr &&
0b4e3aa0
A
3774 !p->pageout && m->pageout &&
3775 !m->dump_cleaning) {
1c79356b
A
3776 vm_page_lock_queues();
3777 m->pageout = FALSE;
3778 m->absent = FALSE;
3779 m->overwriting = FALSE;
3780 vm_page_unwire(m);
3781 PAGE_WAKEUP_DONE(m);
3782 vm_page_unlock_queues();
d7e50217
A
3783 }
3784 page_list[entry].phys_addr = 0;
1c79356b 3785 }
1c79356b
A
3786 }
3787 target_offset += PAGE_SIZE_64;
3788 entry++;
3789 }
3790
3791 vm_object_unlock(shadow_object);
3792 }
d7e50217
A
3793 if (upl->flags & UPL_DEVICE_MEMORY) {
3794 vm_object_lock(upl->map_object->shadow);
3795 if(upl->map_object == upl->map_object->shadow)
3796 vm_object_paging_end(upl->map_object->shadow);
3797 vm_object_unlock(upl->map_object->shadow);
3798 }
0b4e3aa0 3799 upl_unlock(upl);
1c79356b
A
3800 return KERN_SUCCESS;
3801}
3802
d7e50217
A
3803
3804
3805kern_return_t
3806vm_object_iopl_request(
3807 vm_object_t object,
3808 vm_object_offset_t offset,
3809 vm_size_t size,
3810 upl_t *upl_ptr,
3811 upl_page_info_array_t user_page_list,
3812 unsigned int *page_list_count,
3813 int cntrl_flags)
3814{
3815 vm_page_t dst_page;
3816 vm_object_offset_t dst_offset = offset;
3817 vm_size_t xfer_size = size;
3818 upl_t upl = NULL;
3819 int entry;
3820 wpl_array_t lite_list;
3821 int page_field_size;
3822
3823 vm_page_t alias_page = NULL;
3824 kern_return_t ret;
3825 vm_prot_t prot;
3826
3827
3828 if(cntrl_flags & UPL_COPYOUT_FROM) {
3829 prot = VM_PROT_READ;
3830 } else {
3831 prot = VM_PROT_READ | VM_PROT_WRITE;
3832 }
3833
3834 if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
3835 size = MAX_UPL_TRANSFER * page_size;
3836 }
3837
3838 if(cntrl_flags & UPL_SET_INTERNAL)
3839 if(page_list_count != NULL)
3840 *page_list_count = MAX_UPL_TRANSFER;
3841 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
3842 ((page_list_count != NULL) && (*page_list_count != 0)
3843 && *page_list_count < (size/page_size)))
3844 return KERN_INVALID_ARGUMENT;
3845
3846 if((!object->internal) && (object->paging_offset != 0))
3847 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
3848
3849 if(object->phys_contiguous) {
3850 /* No paging operations are possible against this memory */
3851 /* and so no need for map object, ever */
3852 cntrl_flags |= UPL_SET_LITE;
3853 }
3854
3855 if(upl_ptr) {
3856 if(cntrl_flags & UPL_SET_INTERNAL) {
3857 if(cntrl_flags & UPL_SET_LITE) {
3858 upl = upl_create(
3859 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
3860 size);
3861 user_page_list = (upl_page_info_t *)
3862 (((vm_offset_t)upl) + sizeof(struct upl));
3863 lite_list = (wpl_array_t)
3864 (((vm_offset_t)user_page_list) +
3865 ((size/PAGE_SIZE) *
3866 sizeof(upl_page_info_t)));
3867 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
3868 page_field_size =
3869 (page_field_size + 3) & 0xFFFFFFFC;
3870 bzero((char *)lite_list, page_field_size);
3871 upl->flags =
3872 UPL_LITE | UPL_INTERNAL | UPL_IO_WIRE;
3873 } else {
3874 upl = upl_create(UPL_CREATE_INTERNAL, size);
3875 user_page_list = (upl_page_info_t *)
3876 (((vm_offset_t)upl)
3877 + sizeof(struct upl));
3878 upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
3879 }
3880 } else {
3881 if(cntrl_flags & UPL_SET_LITE) {
3882 upl = upl_create(UPL_CREATE_LITE, size);
3883 lite_list = (wpl_array_t)
3884 (((vm_offset_t)upl) + sizeof(struct upl));
3885 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
3886 page_field_size =
3887 (page_field_size + 3) & 0xFFFFFFFC;
3888 bzero((char *)lite_list, page_field_size);
3889 upl->flags = UPL_LITE | UPL_IO_WIRE;
3890 } else {
3891 upl = upl_create(UPL_CREATE_EXTERNAL, size);
3892 upl->flags = UPL_IO_WIRE;
3893 }
3894 }
3895
3896 if(object->phys_contiguous) {
3897 upl->size = size;
3898 upl->offset = offset + object->paging_offset;
3899 *upl_ptr = upl;
3900 if(user_page_list) {
3901 user_page_list[0].phys_addr =
3902 (offset + object->shadow_offset)>>12;
3903 user_page_list[0].device = TRUE;
3904 }
3905 upl->map_object = object;
3906 /* don't need any shadow mappings for this one */
3907 /* since it is already I/O memory */
3908 upl->flags |= UPL_DEVICE_MEMORY;
3909
3910 vm_object_lock(object);
3911 vm_object_paging_begin(object);
3912 vm_object_unlock(object);
3913
3914 if(page_list_count != NULL) {
3915 if (upl->flags & UPL_INTERNAL) {
3916 *page_list_count = 0;
3917 } else {
3918 *page_list_count = 1;
3919 }
3920 }
3921 return KERN_SUCCESS;
3922 }
3923
3924
3925 if(cntrl_flags & UPL_SET_LITE) {
3926 upl->map_object = object;
3927 } else {
3928 upl->map_object = vm_object_allocate(size);
3929 vm_object_lock(upl->map_object);
3930 upl->map_object->shadow = object;
3931 upl->map_object->pageout = TRUE;
3932 upl->map_object->can_persist = FALSE;
3933 upl->map_object->copy_strategy =
3934 MEMORY_OBJECT_COPY_NONE;
3935 upl->map_object->shadow_offset = offset;
3936 upl->map_object->wimg_bits = object->wimg_bits;
3937 vm_object_unlock(upl->map_object);
3938 }
3939 upl->size = size;
3940 upl->offset = offset + object->paging_offset;
3941 *upl_ptr = upl;
3942 }
3943 vm_object_lock(object);
3944
3945 if (!object->phys_contiguous) {
3946 /* Protect user space from future COW operations */
3947 object->true_share = TRUE;
3948 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3949 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3950 }
3951
3952#ifdef UBC_DEBUG
3953 if(upl_ptr)
3954 queue_enter(&object->uplq, upl, upl_t, uplq);
3955#endif /* UBC_DEBUG */
3956 vm_object_paging_begin(object);
3957 entry = 0;
3958 while (xfer_size) {
3959 if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
3960 vm_object_unlock(object);
3961 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3962 vm_object_lock(object);
3963 }
3964 dst_page = vm_page_lookup(object, dst_offset);
3965 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
3966 (dst_page->unusual && (dst_page->error ||
3967 dst_page->restart || dst_page->absent ||
3968 dst_page->fictitious ||
3969 prot & dst_page->page_lock))) {
3970 vm_fault_return_t result;
3971 do {
3972 vm_page_t top_page;
3973 kern_return_t error_code;
3974 int interruptible;
3975
3976 vm_object_offset_t lo_offset = offset;
3977 vm_object_offset_t hi_offset = offset + size;
3978
3979
3980
3981 if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
3982 interruptible = THREAD_ABORTSAFE;
3983 } else {
3984 interruptible = THREAD_UNINT;
3985 }
3986
3987 result = vm_fault_page(object, dst_offset,
3988 prot | VM_PROT_WRITE, FALSE,
3989 interruptible,
3990 lo_offset, hi_offset,
3991 VM_BEHAVIOR_SEQUENTIAL,
3992 &prot, &dst_page, &top_page,
3993 (int *)0,
3994 &error_code, FALSE, FALSE, NULL, 0);
3995
3996 switch(result) {
3997 case VM_FAULT_SUCCESS:
3998
3999 PAGE_WAKEUP_DONE(dst_page);
4000
4001 /*
4002 * Release paging references and
4003 * top-level placeholder page, if any.
4004 */
4005
4006 if(top_page != VM_PAGE_NULL) {
4007 vm_object_t local_object;
4008 local_object =
4009 top_page->object;
4010 if(top_page->object
4011 != dst_page->object) {
4012 vm_object_lock(
4013 local_object);
4014 VM_PAGE_FREE(top_page);
4015 vm_object_paging_end(
4016 local_object);
4017 vm_object_unlock(
4018 local_object);
4019 } else {
4020 VM_PAGE_FREE(top_page);
4021 vm_object_paging_end(
4022 local_object);
4023 }
4024 }
4025
4026 break;
4027
4028
4029 case VM_FAULT_RETRY:
4030 vm_object_lock(object);
4031 vm_object_paging_begin(object);
4032 break;
4033
4034 case VM_FAULT_FICTITIOUS_SHORTAGE:
4035 vm_page_more_fictitious();
4036 vm_object_lock(object);
4037 vm_object_paging_begin(object);
4038 break;
4039
4040 case VM_FAULT_MEMORY_SHORTAGE:
4041 if (vm_page_wait(interruptible)) {
4042 vm_object_lock(object);
4043 vm_object_paging_begin(object);
4044 break;
4045 }
4046 /* fall thru */
4047
4048 case VM_FAULT_INTERRUPTED:
4049 error_code = MACH_SEND_INTERRUPTED;
4050 case VM_FAULT_MEMORY_ERROR:
4051 ret = (error_code ? error_code:
4052 KERN_MEMORY_ERROR);
4053 vm_object_lock(object);
4054 for(; offset < dst_offset;
4055 offset += PAGE_SIZE) {
4056 dst_page = vm_page_lookup(
4057 object, offset);
4058 if(dst_page == VM_PAGE_NULL)
4059 panic("vm_object_iopl_request: Wired pages missing. \n");
4060 vm_page_lock_queues();
4061 vm_page_unwire(dst_page);
4062 vm_page_unlock_queues();
4063 VM_STAT(reactivations++);
4064 }
4065 vm_object_unlock(object);
4066 upl_destroy(upl);
4067 return ret;
4068 }
4069 } while ((result != VM_FAULT_SUCCESS)
4070 || (result == VM_FAULT_INTERRUPTED));
4071 }
4072
4073 vm_page_lock_queues();
4074 vm_page_wire(dst_page);
4075 vm_page_unlock_queues();
4076
4077 if(upl_ptr) {
4078
4079 vm_page_lock_queues();
4080 if(cntrl_flags & UPL_SET_LITE) {
4081 int pg_num;
4082 pg_num = (dst_offset-offset)/PAGE_SIZE;
4083 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
4084 } else {
4085 /*
4086 * Convert the fictitious page to a
4087 * private shadow of the real page.
4088 */
4089 assert(alias_page->fictitious);
4090 alias_page->fictitious = FALSE;
4091 alias_page->private = TRUE;
4092 alias_page->pageout = TRUE;
4093 alias_page->phys_page = dst_page->phys_page;
4094 vm_page_wire(alias_page);
4095
4096 vm_page_insert(alias_page,
4097 upl->map_object, size - xfer_size);
4098 assert(!alias_page->wanted);
4099 alias_page->busy = FALSE;
4100 alias_page->absent = FALSE;
4101 }
4102
4103 /* expect the page to be used */
4104 dst_page->reference = TRUE;
4105 if (!(cntrl_flags & UPL_COPYOUT_FROM))
4106 dst_page->dirty = TRUE;
4107 alias_page = NULL;
4108
4109 if(user_page_list) {
4110 user_page_list[entry].phys_addr
4111 = dst_page->phys_page;
4112 user_page_list[entry].dirty =
4113 dst_page->dirty;
4114 user_page_list[entry].pageout =
4115 dst_page->pageout;
4116 user_page_list[entry].absent =
4117 dst_page->absent;
4118 user_page_list[entry].precious =
4119 dst_page->precious;
4120 }
4121 vm_page_unlock_queues();
4122 }
4123 entry++;
4124 dst_offset += PAGE_SIZE_64;
4125 xfer_size -= PAGE_SIZE;
4126 }
4127
4128 if (upl->flags & UPL_INTERNAL) {
4129 if(page_list_count != NULL)
4130 *page_list_count = 0;
4131 } else if (*page_list_count > entry) {
4132 if(page_list_count != NULL)
4133 *page_list_count = entry;
4134 }
4135
4136 if(alias_page != NULL) {
4137 vm_page_lock_queues();
4138 vm_page_free(alias_page);
4139 vm_page_unlock_queues();
4140 }
4141
4142 vm_object_unlock(object);
4143 return KERN_SUCCESS;
4144}
1c79356b
A
4145vm_size_t
4146upl_get_internal_pagelist_offset()
4147{
4148 return sizeof(struct upl);
4149}
4150
4151void
4152upl_set_dirty(
4153 upl_t upl)
4154{
4155 upl->flags |= UPL_CLEAR_DIRTY;
4156}
4157
4158void
4159upl_clear_dirty(
4160 upl_t upl)
4161{
4162 upl->flags &= ~UPL_CLEAR_DIRTY;
4163}
4164
4165
4166#ifdef MACH_BSD
1c79356b
A
4167
4168boolean_t upl_page_present(upl_page_info_t *upl, int index)
4169{
4170 return(UPL_PAGE_PRESENT(upl, index));
4171}
4172boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
4173{
4174 return(UPL_DIRTY_PAGE(upl, index));
4175}
4176boolean_t upl_valid_page(upl_page_info_t *upl, int index)
4177{
4178 return(UPL_VALID_PAGE(upl, index));
4179}
4180vm_offset_t upl_phys_page(upl_page_info_t *upl, int index)
4181{
4182 return((vm_offset_t)UPL_PHYS_PAGE(upl, index));
4183}
4184
0b4e3aa0
A
4185void
4186vm_countdirtypages(void)
1c79356b
A
4187{
4188 vm_page_t m;
4189 int dpages;
4190 int pgopages;
4191 int precpages;
4192
4193
4194 dpages=0;
4195 pgopages=0;
4196 precpages=0;
4197
4198 vm_page_lock_queues();
4199 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
4200 do {
4201 if (m ==(vm_page_t )0) break;
4202
4203 if(m->dirty) dpages++;
4204 if(m->pageout) pgopages++;
4205 if(m->precious) precpages++;
4206
4207 m = (vm_page_t) queue_next(&m->pageq);
4208 if (m ==(vm_page_t )0) break;
4209
4210 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
4211 vm_page_unlock_queues();
9bccf70c
A
4212
4213 vm_page_lock_queues();
4214 m = (vm_page_t) queue_first(&vm_page_queue_zf);
4215 do {
4216 if (m ==(vm_page_t )0) break;
4217
4218 if(m->dirty) dpages++;
4219 if(m->pageout) pgopages++;
4220 if(m->precious) precpages++;
4221
4222 m = (vm_page_t) queue_next(&m->pageq);
4223 if (m ==(vm_page_t )0) break;
4224
4225 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
4226 vm_page_unlock_queues();
1c79356b
A
4227
4228 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
4229
4230 dpages=0;
4231 pgopages=0;
4232 precpages=0;
4233
4234 vm_page_lock_queues();
4235 m = (vm_page_t) queue_first(&vm_page_queue_active);
4236
4237 do {
4238 if(m == (vm_page_t )0) break;
4239 if(m->dirty) dpages++;
4240 if(m->pageout) pgopages++;
4241 if(m->precious) precpages++;
4242
4243 m = (vm_page_t) queue_next(&m->pageq);
4244 if(m == (vm_page_t )0) break;
4245
4246 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
4247 vm_page_unlock_queues();
4248
4249 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
4250
4251}
4252#endif /* MACH_BSD */
4253
4254#ifdef UBC_DEBUG
4255kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
4256{
4257 upl->ubc_alias1 = alias1;
4258 upl->ubc_alias2 = alias2;
4259 return KERN_SUCCESS;
4260}
4261int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
4262{
4263 if(al)
4264 *al = upl->ubc_alias1;
4265 if(al2)
4266 *al2 = upl->ubc_alias2;
4267 return KERN_SUCCESS;
4268}
4269#endif /* UBC_DEBUG */
4270
4271
4272
4273#if MACH_KDB
4274#include <ddb/db_output.h>
4275#include <ddb/db_print.h>
4276#include <vm/vm_print.h>
4277
4278#define printf kdbprintf
4279extern int db_indent;
4280void db_pageout(void);
4281
4282void
4283db_vm(void)
4284{
4285 extern int vm_page_gobble_count;
1c79356b
A
4286
4287 iprintf("VM Statistics:\n");
4288 db_indent += 2;
4289 iprintf("pages:\n");
4290 db_indent += 2;
4291 iprintf("activ %5d inact %5d free %5d",
4292 vm_page_active_count, vm_page_inactive_count,
4293 vm_page_free_count);
4294 printf(" wire %5d gobbl %5d\n",
4295 vm_page_wire_count, vm_page_gobble_count);
0b4e3aa0
A
4296 iprintf("laund %5d\n",
4297 vm_page_laundry_count);
1c79356b
A
4298 db_indent -= 2;
4299 iprintf("target:\n");
4300 db_indent += 2;
4301 iprintf("min %5d inact %5d free %5d",
4302 vm_page_free_min, vm_page_inactive_target,
4303 vm_page_free_target);
4304 printf(" resrv %5d\n", vm_page_free_reserved);
4305 db_indent -= 2;
4306
4307 iprintf("burst:\n");
4308 db_indent += 2;
4309 iprintf("max %5d min %5d wait %5d empty %5d\n",
4310 vm_pageout_burst_max, vm_pageout_burst_min,
4311 vm_pageout_burst_wait, vm_pageout_empty_wait);
4312 db_indent -= 2;
4313 iprintf("pause:\n");
4314 db_indent += 2;
4315 iprintf("count %5d max %5d\n",
4316 vm_pageout_pause_count, vm_pageout_pause_max);
4317#if MACH_COUNTERS
4318 iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue);
4319#endif /* MACH_COUNTERS */
4320 db_indent -= 2;
4321 db_pageout();
4322 db_indent -= 2;
4323}
4324
4325void
4326db_pageout(void)
4327{
1c79356b
A
4328#if MACH_COUNTERS
4329 extern int c_laundry_pages_freed;
4330#endif /* MACH_COUNTERS */
4331
4332 iprintf("Pageout Statistics:\n");
4333 db_indent += 2;
4334 iprintf("active %5d inactv %5d\n",
4335 vm_pageout_active, vm_pageout_inactive);
4336 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
4337 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
4338 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
4339 iprintf("used %5d clean %5d dirty %5d\n",
4340 vm_pageout_inactive_used, vm_pageout_inactive_clean,
4341 vm_pageout_inactive_dirty);
1c79356b
A
4342#if MACH_COUNTERS
4343 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
4344#endif /* MACH_COUNTERS */
4345#if MACH_CLUSTER_STATS
4346 iprintf("Cluster Statistics:\n");
4347 db_indent += 2;
4348 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
4349 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
4350 vm_pageout_cluster_collisions);
4351 iprintf("clusters %5d conversions %5d\n",
4352 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
4353 db_indent -= 2;
4354 iprintf("Target Statistics:\n");
4355 db_indent += 2;
4356 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
4357 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
4358 vm_pageout_target_page_freed);
1c79356b
A
4359 db_indent -= 2;
4360#endif /* MACH_CLUSTER_STATS */
4361 db_indent -= 2;
4362}
4363
4364#if MACH_CLUSTER_STATS
4365unsigned long vm_pageout_cluster_dirtied = 0;
4366unsigned long vm_pageout_cluster_cleaned = 0;
4367unsigned long vm_pageout_cluster_collisions = 0;
4368unsigned long vm_pageout_cluster_clusters = 0;
4369unsigned long vm_pageout_cluster_conversions = 0;
4370unsigned long vm_pageout_target_collisions = 0;
4371unsigned long vm_pageout_target_page_dirtied = 0;
4372unsigned long vm_pageout_target_page_freed = 0;
1c79356b
A
4373#define CLUSTER_STAT(clause) clause
4374#else /* MACH_CLUSTER_STATS */
4375#define CLUSTER_STAT(clause)
4376#endif /* MACH_CLUSTER_STATS */
4377
4378#endif /* MACH_KDB */