]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_pageout.c
xnu-792.6.22.tar.gz
[apple/xnu.git] / osfmk / vm / vm_pageout.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50/*
51 */
52/*
53 * File: vm/vm_pageout.c
54 * Author: Avadis Tevanian, Jr., Michael Wayne Young
55 * Date: 1985
56 *
57 * The proverbial page-out daemon.
58 */
1c79356b 59
91447636
A
60#include <stdint.h>
61
62#include <debug.h>
1c79356b
A
63#include <mach_pagemap.h>
64#include <mach_cluster_stats.h>
65#include <mach_kdb.h>
66#include <advisory_pageout.h>
67
68#include <mach/mach_types.h>
69#include <mach/memory_object.h>
70#include <mach/memory_object_default.h>
0b4e3aa0 71#include <mach/memory_object_control_server.h>
1c79356b 72#include <mach/mach_host_server.h>
91447636
A
73#include <mach/upl.h>
74#include <mach/vm_map.h>
1c79356b
A
75#include <mach/vm_param.h>
76#include <mach/vm_statistics.h>
91447636
A
77
78#include <kern/kern_types.h>
1c79356b 79#include <kern/counters.h>
91447636
A
80#include <kern/host_statistics.h>
81#include <kern/machine.h>
82#include <kern/misc_protos.h>
1c79356b 83#include <kern/thread.h>
1c79356b 84#include <kern/xpr.h>
91447636
A
85#include <kern/kalloc.h>
86
87#include <machine/vm_tuning.h>
88
1c79356b 89#include <vm/pmap.h>
55e303ae 90#include <vm/vm_fault.h>
1c79356b
A
91#include <vm/vm_map.h>
92#include <vm/vm_object.h>
93#include <vm/vm_page.h>
94#include <vm/vm_pageout.h>
91447636 95#include <vm/vm_protos.h> /* must be last */
1c79356b 96
91447636
A
97/*
98 * ENCRYPTED SWAP:
99 */
100#ifdef __ppc__
101#include <ppc/mappings.h>
102#endif /* __ppc__ */
103#include <../bsd/crypto/aes/aes.h>
55e303ae 104
1c79356b
A
105extern ipc_port_t memory_manager_default;
106
91447636
A
107
108#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE
109#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 10000 /* maximum iterations of the active queue to move pages to inactive */
110#endif
111
112#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE
113#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
114#endif
115
116#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
117#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */
118#endif
119
120#ifndef VM_PAGEOUT_INACTIVE_RELIEF
121#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */
122#endif
123
1c79356b 124#ifndef VM_PAGE_LAUNDRY_MAX
91447636 125#define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */
1c79356b
A
126#endif /* VM_PAGEOUT_LAUNDRY_MAX */
127
1c79356b
A
128#ifndef VM_PAGEOUT_BURST_WAIT
129#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
130#endif /* VM_PAGEOUT_BURST_WAIT */
131
132#ifndef VM_PAGEOUT_EMPTY_WAIT
133#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
134#endif /* VM_PAGEOUT_EMPTY_WAIT */
135
91447636
A
136#ifndef VM_PAGEOUT_DEADLOCK_WAIT
137#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */
138#endif /* VM_PAGEOUT_DEADLOCK_WAIT */
139
140#ifndef VM_PAGEOUT_IDLE_WAIT
141#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */
142#endif /* VM_PAGEOUT_IDLE_WAIT */
143
144
1c79356b
A
145/*
146 * To obtain a reasonable LRU approximation, the inactive queue
147 * needs to be large enough to give pages on it a chance to be
148 * referenced a second time. This macro defines the fraction
149 * of active+inactive pages that should be inactive.
150 * The pageout daemon uses it to update vm_page_inactive_target.
151 *
152 * If vm_page_free_count falls below vm_page_free_target and
153 * vm_page_inactive_count is below vm_page_inactive_target,
154 * then the pageout daemon starts running.
155 */
156
157#ifndef VM_PAGE_INACTIVE_TARGET
158#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3)
159#endif /* VM_PAGE_INACTIVE_TARGET */
160
161/*
162 * Once the pageout daemon starts running, it keeps going
163 * until vm_page_free_count meets or exceeds vm_page_free_target.
164 */
165
166#ifndef VM_PAGE_FREE_TARGET
167#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
168#endif /* VM_PAGE_FREE_TARGET */
169
170/*
171 * The pageout daemon always starts running once vm_page_free_count
172 * falls below vm_page_free_min.
173 */
174
175#ifndef VM_PAGE_FREE_MIN
176#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
177#endif /* VM_PAGE_FREE_MIN */
178
179/*
180 * When vm_page_free_count falls below vm_page_free_reserved,
181 * only vm-privileged threads can allocate pages. vm-privilege
182 * allows the pageout daemon and default pager (and any other
183 * associated threads needed for default pageout) to continue
184 * operation by dipping into the reserved pool of pages.
185 */
186
187#ifndef VM_PAGE_FREE_RESERVED
91447636
A
188#define VM_PAGE_FREE_RESERVED(n) \
189 ((6 * VM_PAGE_LAUNDRY_MAX) + (n))
1c79356b
A
190#endif /* VM_PAGE_FREE_RESERVED */
191
91447636
A
192
193/*
194 * must hold the page queues lock to
195 * manipulate this structure
196 */
197struct vm_pageout_queue {
198 queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */
199 unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */
200 unsigned int pgo_maxlaundry;
201
202 unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */
203 pgo_busy:1, /* iothread is currently processing request from pgo_pending */
204 pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */
205 :0;
206};
207
208#define VM_PAGE_Q_THROTTLED(q) \
209 ((q)->pgo_laundry >= (q)->pgo_maxlaundry)
210
211
0b4e3aa0
A
212/*
213 * Exported variable used to broadcast the activation of the pageout scan
214 * Working Set uses this to throttle its use of pmap removes. In this
215 * way, code which runs within memory in an uncontested context does
216 * not keep encountering soft faults.
217 */
218
219unsigned int vm_pageout_scan_event_counter = 0;
1c79356b
A
220
221/*
222 * Forward declarations for internal routines.
223 */
91447636
A
224
225static void vm_pageout_garbage_collect(int);
226static void vm_pageout_iothread_continue(struct vm_pageout_queue *);
227static void vm_pageout_iothread_external(void);
228static void vm_pageout_iothread_internal(void);
229static void vm_pageout_queue_steal(vm_page_t);
230
1c79356b
A
231extern void vm_pageout_continue(void);
232extern void vm_pageout_scan(void);
1c79356b
A
233
234unsigned int vm_pageout_reserved_internal = 0;
235unsigned int vm_pageout_reserved_really = 0;
236
91447636 237unsigned int vm_pageout_idle_wait = 0; /* milliseconds */
55e303ae 238unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
91447636
A
239unsigned int vm_pageout_burst_wait = 0; /* milliseconds */
240unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */
241unsigned int vm_pageout_deadlock_relief = 0;
242unsigned int vm_pageout_inactive_relief = 0;
243unsigned int vm_pageout_burst_active_throttle = 0;
244unsigned int vm_pageout_burst_inactive_throttle = 0;
1c79356b 245
9bccf70c
A
246/*
247 * Protection against zero fill flushing live working sets derived
248 * from existing backing store and files
249 */
250unsigned int vm_accellerate_zf_pageout_trigger = 400;
251unsigned int vm_zf_iterator;
252unsigned int vm_zf_iterator_count = 40;
253unsigned int last_page_zf;
254unsigned int vm_zf_count = 0;
255
1c79356b
A
256/*
257 * These variables record the pageout daemon's actions:
258 * how many pages it looks at and what happens to those pages.
259 * No locking needed because only one thread modifies the variables.
260 */
261
262unsigned int vm_pageout_active = 0; /* debugging */
263unsigned int vm_pageout_inactive = 0; /* debugging */
264unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
265unsigned int vm_pageout_inactive_forced = 0; /* debugging */
266unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
267unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
268unsigned int vm_pageout_inactive_busy = 0; /* debugging */
269unsigned int vm_pageout_inactive_absent = 0; /* debugging */
270unsigned int vm_pageout_inactive_used = 0; /* debugging */
271unsigned int vm_pageout_inactive_clean = 0; /* debugging */
272unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
273unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
91447636 274unsigned int vm_pageout_purged_objects = 0; /* debugging */
1c79356b
A
275unsigned int vm_stat_discard = 0; /* debugging */
276unsigned int vm_stat_discard_sent = 0; /* debugging */
277unsigned int vm_stat_discard_failure = 0; /* debugging */
278unsigned int vm_stat_discard_throttle = 0; /* debugging */
1c79356b 279
91447636
A
280unsigned int vm_pageout_scan_active_throttled = 0;
281unsigned int vm_pageout_scan_inactive_throttled = 0;
282unsigned int vm_pageout_scan_throttle = 0; /* debugging */
283unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */
284unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */
285unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */
286unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */
287unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */
55e303ae
A
288/*
289 * Backing store throttle when BS is exhausted
290 */
291unsigned int vm_backing_store_low = 0;
1c79356b
A
292
293unsigned int vm_pageout_out_of_line = 0;
294unsigned int vm_pageout_in_place = 0;
55e303ae 295
91447636
A
296/*
297 * ENCRYPTED SWAP:
298 * counters and statistics...
299 */
300unsigned long vm_page_decrypt_counter = 0;
301unsigned long vm_page_decrypt_for_upl_counter = 0;
302unsigned long vm_page_encrypt_counter = 0;
303unsigned long vm_page_encrypt_abort_counter = 0;
304unsigned long vm_page_encrypt_already_encrypted_counter = 0;
305boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */
306
307
308struct vm_pageout_queue vm_pageout_queue_internal;
309struct vm_pageout_queue vm_pageout_queue_external;
310
55e303ae
A
311
312/*
313 * Routine: vm_backing_store_disable
314 * Purpose:
315 * Suspend non-privileged threads wishing to extend
316 * backing store when we are low on backing store
317 * (Synchronized by caller)
318 */
319void
320vm_backing_store_disable(
321 boolean_t disable)
322{
323 if(disable) {
324 vm_backing_store_low = 1;
325 } else {
326 if(vm_backing_store_low) {
327 vm_backing_store_low = 0;
328 thread_wakeup((event_t) &vm_backing_store_low);
329 }
330 }
331}
332
333
1c79356b
A
334/*
335 * Routine: vm_pageout_object_allocate
336 * Purpose:
337 * Allocate an object for use as out-of-line memory in a
338 * data_return/data_initialize message.
339 * The page must be in an unlocked object.
340 *
341 * If the page belongs to a trusted pager, cleaning in place
342 * will be used, which utilizes a special "pageout object"
343 * containing private alias pages for the real page frames.
344 * Untrusted pagers use normal out-of-line memory.
345 */
346vm_object_t
347vm_pageout_object_allocate(
348 vm_page_t m,
349 vm_size_t size,
350 vm_object_offset_t offset)
351{
352 vm_object_t object = m->object;
353 vm_object_t new_object;
354
355 assert(object->pager_ready);
356
1c79356b
A
357 new_object = vm_object_allocate(size);
358
359 if (object->pager_trusted) {
360 assert (offset < object->size);
361
362 vm_object_lock(new_object);
363 new_object->pageout = TRUE;
364 new_object->shadow = object;
365 new_object->can_persist = FALSE;
366 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
367 new_object->shadow_offset = offset;
368 vm_object_unlock(new_object);
369
370 /*
371 * Take a paging reference on the object. This will be dropped
372 * in vm_pageout_object_terminate()
373 */
374 vm_object_lock(object);
375 vm_object_paging_begin(object);
55e303ae 376 vm_page_lock_queues();
55e303ae 377 vm_page_unlock_queues();
1c79356b
A
378 vm_object_unlock(object);
379
380 vm_pageout_in_place++;
381 } else
382 vm_pageout_out_of_line++;
383 return(new_object);
384}
385
386#if MACH_CLUSTER_STATS
387unsigned long vm_pageout_cluster_dirtied = 0;
388unsigned long vm_pageout_cluster_cleaned = 0;
389unsigned long vm_pageout_cluster_collisions = 0;
390unsigned long vm_pageout_cluster_clusters = 0;
391unsigned long vm_pageout_cluster_conversions = 0;
392unsigned long vm_pageout_target_collisions = 0;
393unsigned long vm_pageout_target_page_dirtied = 0;
394unsigned long vm_pageout_target_page_freed = 0;
1c79356b
A
395#define CLUSTER_STAT(clause) clause
396#else /* MACH_CLUSTER_STATS */
397#define CLUSTER_STAT(clause)
398#endif /* MACH_CLUSTER_STATS */
399
400/*
401 * Routine: vm_pageout_object_terminate
402 * Purpose:
403 * Destroy the pageout_object allocated by
404 * vm_pageout_object_allocate(), and perform all of the
405 * required cleanup actions.
406 *
407 * In/Out conditions:
408 * The object must be locked, and will be returned locked.
409 */
410void
411vm_pageout_object_terminate(
412 vm_object_t object)
413{
414 vm_object_t shadow_object;
55e303ae 415 boolean_t shadow_internal;
1c79356b
A
416
417 /*
418 * Deal with the deallocation (last reference) of a pageout object
419 * (used for cleaning-in-place) by dropping the paging references/
420 * freeing pages in the original object.
421 */
422
423 assert(object->pageout);
424 shadow_object = object->shadow;
425 vm_object_lock(shadow_object);
55e303ae 426 shadow_internal = shadow_object->internal;
1c79356b
A
427
428 while (!queue_empty(&object->memq)) {
429 vm_page_t p, m;
430 vm_object_offset_t offset;
431
432 p = (vm_page_t) queue_first(&object->memq);
433
434 assert(p->private);
435 assert(p->pageout);
436 p->pageout = FALSE;
437 assert(!p->cleaning);
438
439 offset = p->offset;
440 VM_PAGE_FREE(p);
441 p = VM_PAGE_NULL;
442
443 m = vm_page_lookup(shadow_object,
444 offset + object->shadow_offset);
445
446 if(m == VM_PAGE_NULL)
447 continue;
448 assert(m->cleaning);
0b4e3aa0
A
449 /* used as a trigger on upl_commit etc to recognize the */
450 /* pageout daemon's subseqent desire to pageout a cleaning */
451 /* page. When the bit is on the upl commit code will */
452 /* respect the pageout bit in the target page over the */
453 /* caller's page list indication */
454 m->dump_cleaning = FALSE;
1c79356b
A
455
456 /*
457 * Account for the paging reference taken when
458 * m->cleaning was set on this page.
459 */
460 vm_object_paging_end(shadow_object);
461 assert((m->dirty) || (m->precious) ||
462 (m->busy && m->cleaning));
463
464 /*
465 * Handle the trusted pager throttle.
55e303ae 466 * Also decrement the burst throttle (if external).
1c79356b
A
467 */
468 vm_page_lock_queues();
469 if (m->laundry) {
91447636 470 vm_pageout_throttle_up(m);
1c79356b
A
471 }
472
473 /*
474 * Handle the "target" page(s). These pages are to be freed if
475 * successfully cleaned. Target pages are always busy, and are
476 * wired exactly once. The initial target pages are not mapped,
477 * (so cannot be referenced or modified) but converted target
478 * pages may have been modified between the selection as an
479 * adjacent page and conversion to a target.
480 */
481 if (m->pageout) {
482 assert(m->busy);
483 assert(m->wire_count == 1);
484 m->cleaning = FALSE;
485 m->pageout = FALSE;
486#if MACH_CLUSTER_STATS
487 if (m->wanted) vm_pageout_target_collisions++;
488#endif
489 /*
490 * Revoke all access to the page. Since the object is
491 * locked, and the page is busy, this prevents the page
91447636 492 * from being dirtied after the pmap_disconnect() call
1c79356b 493 * returns.
91447636 494 *
1c79356b
A
495 * Since the page is left "dirty" but "not modifed", we
496 * can detect whether the page was redirtied during
497 * pageout by checking the modify state.
498 */
91447636
A
499 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
500 m->dirty = TRUE;
501 else
502 m->dirty = FALSE;
1c79356b
A
503
504 if (m->dirty) {
505 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
506 vm_page_unwire(m);/* reactivates */
507 VM_STAT(reactivations++);
508 PAGE_WAKEUP_DONE(m);
1c79356b
A
509 } else {
510 CLUSTER_STAT(vm_pageout_target_page_freed++;)
511 vm_page_free(m);/* clears busy, etc. */
512 }
513 vm_page_unlock_queues();
514 continue;
515 }
516 /*
517 * Handle the "adjacent" pages. These pages were cleaned in
518 * place, and should be left alone.
519 * If prep_pin_count is nonzero, then someone is using the
520 * page, so make it active.
521 */
0b4e3aa0
A
522 if (!m->active && !m->inactive && !m->private) {
523 if (m->reference)
1c79356b
A
524 vm_page_activate(m);
525 else
526 vm_page_deactivate(m);
527 }
528 if((m->busy) && (m->cleaning)) {
529
530 /* the request_page_list case, (COPY_OUT_FROM FALSE) */
531 m->busy = FALSE;
532
533 /* We do not re-set m->dirty ! */
534 /* The page was busy so no extraneous activity */
91447636 535 /* could have occurred. COPY_INTO is a read into the */
1c79356b
A
536 /* new pages. CLEAN_IN_PLACE does actually write */
537 /* out the pages but handling outside of this code */
538 /* will take care of resetting dirty. We clear the */
539 /* modify however for the Programmed I/O case. */
55e303ae 540 pmap_clear_modify(m->phys_page);
1c79356b
A
541 if(m->absent) {
542 m->absent = FALSE;
543 if(shadow_object->absent_count == 1)
544 vm_object_absent_release(shadow_object);
545 else
546 shadow_object->absent_count--;
547 }
548 m->overwriting = FALSE;
549 } else if (m->overwriting) {
550 /* alternate request page list, write to page_list */
551 /* case. Occurs when the original page was wired */
552 /* at the time of the list request */
553 assert(m->wire_count != 0);
554 vm_page_unwire(m);/* reactivates */
555 m->overwriting = FALSE;
556 } else {
557 /*
558 * Set the dirty state according to whether or not the page was
559 * modified during the pageout. Note that we purposefully do
560 * NOT call pmap_clear_modify since the page is still mapped.
561 * If the page were to be dirtied between the 2 calls, this
562 * this fact would be lost. This code is only necessary to
563 * maintain statistics, since the pmap module is always
564 * consulted if m->dirty is false.
565 */
566#if MACH_CLUSTER_STATS
55e303ae 567 m->dirty = pmap_is_modified(m->phys_page);
1c79356b
A
568
569 if (m->dirty) vm_pageout_cluster_dirtied++;
570 else vm_pageout_cluster_cleaned++;
571 if (m->wanted) vm_pageout_cluster_collisions++;
572#else
573 m->dirty = 0;
574#endif
575 }
576 m->cleaning = FALSE;
577
1c79356b
A
578 /*
579 * Wakeup any thread waiting for the page to be un-cleaning.
580 */
581 PAGE_WAKEUP(m);
582 vm_page_unlock_queues();
583 }
584 /*
585 * Account for the paging reference taken in vm_paging_object_allocate.
586 */
587 vm_object_paging_end(shadow_object);
588 vm_object_unlock(shadow_object);
589
590 assert(object->ref_count == 0);
591 assert(object->paging_in_progress == 0);
592 assert(object->resident_page_count == 0);
593 return;
594}
595
596/*
597 * Routine: vm_pageout_setup
598 * Purpose:
599 * Set up a page for pageout (clean & flush).
600 *
601 * Move the page to a new object, as part of which it will be
602 * sent to its memory manager in a memory_object_data_write or
603 * memory_object_initialize message.
604 *
605 * The "new_object" and "new_offset" arguments
606 * indicate where the page should be moved.
607 *
608 * In/Out conditions:
609 * The page in question must not be on any pageout queues,
610 * and must be busy. The object to which it belongs
611 * must be unlocked, and the caller must hold a paging
612 * reference to it. The new_object must not be locked.
613 *
614 * This routine returns a pointer to a place-holder page,
615 * inserted at the same offset, to block out-of-order
616 * requests for the page. The place-holder page must
617 * be freed after the data_write or initialize message
618 * has been sent.
619 *
620 * The original page is put on a paging queue and marked
621 * not busy on exit.
622 */
623vm_page_t
624vm_pageout_setup(
625 register vm_page_t m,
626 register vm_object_t new_object,
627 vm_object_offset_t new_offset)
628{
629 register vm_object_t old_object = m->object;
630 vm_object_offset_t paging_offset;
631 vm_object_offset_t offset;
632 register vm_page_t holding_page;
633 register vm_page_t new_m;
1c79356b
A
634 boolean_t need_to_wire = FALSE;
635
636
637 XPR(XPR_VM_PAGEOUT,
638 "vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
639 (integer_t)m->object, (integer_t)m->offset,
640 (integer_t)m, (integer_t)new_object,
641 (integer_t)new_offset);
642 assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
643 !m->restart);
644
645 assert(m->dirty || m->precious);
646
647 /*
648 * Create a place-holder page where the old one was, to prevent
649 * attempted pageins of this page while we're unlocked.
1c79356b
A
650 */
651 VM_PAGE_GRAB_FICTITIOUS(holding_page);
652
1c79356b
A
653 vm_object_lock(old_object);
654
655 offset = m->offset;
656 paging_offset = offset + old_object->paging_offset;
657
658 if (old_object->pager_trusted) {
659 /*
660 * This pager is trusted, so we can clean this page
661 * in place. Leave it in the old object, and mark it
662 * cleaning & pageout.
663 */
664 new_m = holding_page;
665 holding_page = VM_PAGE_NULL;
666
1c79356b
A
667 /*
668 * Set up new page to be private shadow of real page.
669 */
55e303ae 670 new_m->phys_page = m->phys_page;
1c79356b 671 new_m->fictitious = FALSE;
1c79356b
A
672 new_m->pageout = TRUE;
673
674 /*
675 * Mark real page as cleaning (indicating that we hold a
676 * paging reference to be released via m_o_d_r_c) and
677 * pageout (indicating that the page should be freed
678 * when the pageout completes).
679 */
55e303ae 680 pmap_clear_modify(m->phys_page);
1c79356b 681 vm_page_lock_queues();
0b4e3aa0 682 new_m->private = TRUE;
1c79356b
A
683 vm_page_wire(new_m);
684 m->cleaning = TRUE;
685 m->pageout = TRUE;
686
687 vm_page_wire(m);
688 assert(m->wire_count == 1);
689 vm_page_unlock_queues();
690
691 m->dirty = TRUE;
692 m->precious = FALSE;
693 m->page_lock = VM_PROT_NONE;
694 m->unusual = FALSE;
695 m->unlock_request = VM_PROT_NONE;
696 } else {
697 /*
698 * Cannot clean in place, so rip the old page out of the
699 * object, and stick the holding page in. Set new_m to the
700 * page in the new object.
701 */
702 vm_page_lock_queues();
703 VM_PAGE_QUEUES_REMOVE(m);
704 vm_page_remove(m);
705
1c79356b
A
706 vm_page_insert(holding_page, old_object, offset);
707 vm_page_unlock_queues();
708
709 m->dirty = TRUE;
710 m->precious = FALSE;
711 new_m = m;
712 new_m->page_lock = VM_PROT_NONE;
713 new_m->unlock_request = VM_PROT_NONE;
714
715 if (old_object->internal)
716 need_to_wire = TRUE;
717 }
718 /*
719 * Record that this page has been written out
720 */
721#if MACH_PAGEMAP
722 vm_external_state_set(old_object->existence_map, offset);
723#endif /* MACH_PAGEMAP */
724
725 vm_object_unlock(old_object);
726
727 vm_object_lock(new_object);
728
729 /*
730 * Put the page into the new object. If it is a not wired
731 * (if it's the real page) it will be activated.
732 */
733
734 vm_page_lock_queues();
735 vm_page_insert(new_m, new_object, new_offset);
736 if (need_to_wire)
737 vm_page_wire(new_m);
738 else
739 vm_page_activate(new_m);
740 PAGE_WAKEUP_DONE(new_m);
741 vm_page_unlock_queues();
742
743 vm_object_unlock(new_object);
744
745 /*
746 * Return the placeholder page to simplify cleanup.
747 */
748 return (holding_page);
749}
750
751/*
752 * Routine: vm_pageclean_setup
753 *
754 * Purpose: setup a page to be cleaned (made non-dirty), but not
755 * necessarily flushed from the VM page cache.
756 * This is accomplished by cleaning in place.
757 *
758 * The page must not be busy, and the object and page
759 * queues must be locked.
760 *
761 */
762void
763vm_pageclean_setup(
764 vm_page_t m,
765 vm_page_t new_m,
766 vm_object_t new_object,
767 vm_object_offset_t new_offset)
768{
769 vm_object_t old_object = m->object;
770 assert(!m->busy);
771 assert(!m->cleaning);
772
773 XPR(XPR_VM_PAGEOUT,
774 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
775 (integer_t)old_object, m->offset, (integer_t)m,
776 (integer_t)new_m, new_offset);
777
55e303ae 778 pmap_clear_modify(m->phys_page);
1c79356b
A
779 vm_object_paging_begin(old_object);
780
781 /*
782 * Record that this page has been written out
783 */
784#if MACH_PAGEMAP
785 vm_external_state_set(old_object->existence_map, m->offset);
786#endif /*MACH_PAGEMAP*/
787
788 /*
789 * Mark original page as cleaning in place.
790 */
791 m->cleaning = TRUE;
792 m->dirty = TRUE;
793 m->precious = FALSE;
794
795 /*
796 * Convert the fictitious page to a private shadow of
797 * the real page.
798 */
799 assert(new_m->fictitious);
800 new_m->fictitious = FALSE;
801 new_m->private = TRUE;
802 new_m->pageout = TRUE;
55e303ae 803 new_m->phys_page = m->phys_page;
1c79356b
A
804 vm_page_wire(new_m);
805
806 vm_page_insert(new_m, new_object, new_offset);
807 assert(!new_m->wanted);
808 new_m->busy = FALSE;
809}
810
811void
812vm_pageclean_copy(
813 vm_page_t m,
814 vm_page_t new_m,
815 vm_object_t new_object,
816 vm_object_offset_t new_offset)
817{
818 XPR(XPR_VM_PAGEOUT,
819 "vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
820 m, new_m, new_object, new_offset, 0);
821
822 assert((!m->busy) && (!m->cleaning));
823
824 assert(!new_m->private && !new_m->fictitious);
825
55e303ae 826 pmap_clear_modify(m->phys_page);
1c79356b
A
827
828 m->busy = TRUE;
829 vm_object_paging_begin(m->object);
830 vm_page_unlock_queues();
831 vm_object_unlock(m->object);
832
833 /*
834 * Copy the original page to the new page.
835 */
836 vm_page_copy(m, new_m);
837
838 /*
839 * Mark the old page as clean. A request to pmap_is_modified
840 * will get the right answer.
841 */
842 vm_object_lock(m->object);
843 m->dirty = FALSE;
844
845 vm_object_paging_end(m->object);
846
847 vm_page_lock_queues();
848 if (!m->active && !m->inactive)
849 vm_page_activate(m);
850 PAGE_WAKEUP_DONE(m);
851
852 vm_page_insert(new_m, new_object, new_offset);
853 vm_page_activate(new_m);
854 new_m->busy = FALSE; /* No other thread can be waiting */
855}
856
857
858/*
859 * Routine: vm_pageout_initialize_page
860 * Purpose:
861 * Causes the specified page to be initialized in
862 * the appropriate memory object. This routine is used to push
863 * pages into a copy-object when they are modified in the
864 * permanent object.
865 *
866 * The page is moved to a temporary object and paged out.
867 *
868 * In/out conditions:
869 * The page in question must not be on any pageout queues.
870 * The object to which it belongs must be locked.
871 * The page must be busy, but not hold a paging reference.
872 *
873 * Implementation:
874 * Move this page to a completely new object.
875 */
876void
877vm_pageout_initialize_page(
878 vm_page_t m)
879{
1c79356b
A
880 vm_object_t object;
881 vm_object_offset_t paging_offset;
882 vm_page_t holding_page;
883
884
885 XPR(XPR_VM_PAGEOUT,
886 "vm_pageout_initialize_page, page 0x%X\n",
887 (integer_t)m, 0, 0, 0, 0);
888 assert(m->busy);
889
890 /*
891 * Verify that we really want to clean this page
892 */
893 assert(!m->absent);
894 assert(!m->error);
895 assert(m->dirty);
896
897 /*
898 * Create a paging reference to let us play with the object.
899 */
900 object = m->object;
901 paging_offset = m->offset + object->paging_offset;
902 vm_object_paging_begin(object);
1c79356b
A
903 if (m->absent || m->error || m->restart ||
904 (!m->dirty && !m->precious)) {
905 VM_PAGE_FREE(m);
906 panic("reservation without pageout?"); /* alan */
55e303ae 907 vm_object_unlock(object);
1c79356b
A
908 return;
909 }
910
911 /* set the page for future call to vm_fault_list_request */
912 holding_page = NULL;
1c79356b 913 vm_page_lock_queues();
55e303ae 914 pmap_clear_modify(m->phys_page);
1c79356b 915 m->dirty = TRUE;
55e303ae
A
916 m->busy = TRUE;
917 m->list_req_pending = TRUE;
918 m->cleaning = TRUE;
1c79356b
A
919 m->pageout = TRUE;
920 vm_page_wire(m);
55e303ae
A
921 vm_page_unlock_queues();
922 vm_object_unlock(object);
1c79356b
A
923
924 /*
925 * Write the data to its pager.
926 * Note that the data is passed by naming the new object,
927 * not a virtual address; the pager interface has been
928 * manipulated to use the "internal memory" data type.
929 * [The object reference from its allocation is donated
930 * to the eventual recipient.]
931 */
932 memory_object_data_initialize(object->pager,
1c79356b 933 paging_offset,
1c79356b
A
934 PAGE_SIZE);
935
936 vm_object_lock(object);
937}
938
939#if MACH_CLUSTER_STATS
940#define MAXCLUSTERPAGES 16
941struct {
942 unsigned long pages_in_cluster;
943 unsigned long pages_at_higher_offsets;
944 unsigned long pages_at_lower_offsets;
945} cluster_stats[MAXCLUSTERPAGES];
946#endif /* MACH_CLUSTER_STATS */
947
948boolean_t allow_clustered_pageouts = FALSE;
949
950/*
951 * vm_pageout_cluster:
952 *
91447636
A
953 * Given a page, queue it to the appropriate I/O thread,
954 * which will page it out and attempt to clean adjacent pages
1c79356b
A
955 * in the same operation.
956 *
91447636 957 * The page must be busy, and the object and queues locked. We will take a
55e303ae 958 * paging reference to prevent deallocation or collapse when we
91447636
A
959 * release the object lock back at the call site. The I/O thread
960 * is responsible for consuming this reference
55e303ae
A
961 *
962 * The page must not be on any pageout queue.
1c79356b 963 */
91447636 964
1c79356b 965void
91447636 966vm_pageout_cluster(vm_page_t m)
1c79356b
A
967{
968 vm_object_t object = m->object;
91447636
A
969 struct vm_pageout_queue *q;
970
1c79356b
A
971
972 XPR(XPR_VM_PAGEOUT,
973 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
91447636 974 (integer_t)object, m->offset, (integer_t)m, 0, 0);
1c79356b 975
91447636
A
976 /*
977 * Only a certain kind of page is appreciated here.
978 */
979 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
980 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
55e303ae
A
981
982 /*
983 * protect the object from collapse -
984 * locking in the object's paging_offset.
985 */
986 vm_object_paging_begin(object);
55e303ae 987
1c79356b 988 /*
91447636
A
989 * set the page for future call to vm_fault_list_request
990 * page should already be marked busy
1c79356b 991 */
91447636 992 vm_page_wire(m);
55e303ae
A
993 m->list_req_pending = TRUE;
994 m->cleaning = TRUE;
1c79356b 995 m->pageout = TRUE;
91447636 996 m->laundry = TRUE;
1c79356b 997
91447636
A
998 if (object->internal == TRUE)
999 q = &vm_pageout_queue_internal;
1000 else
1001 q = &vm_pageout_queue_external;
1002 q->pgo_laundry++;
1c79356b 1003
91447636
A
1004 m->pageout_queue = TRUE;
1005 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
1006
1007 if (q->pgo_idle == TRUE) {
1008 q->pgo_idle = FALSE;
1009 thread_wakeup((event_t) &q->pgo_pending);
1c79356b 1010 }
1c79356b
A
1011}
1012
55e303ae 1013
91447636 1014unsigned long vm_pageout_throttle_up_count = 0;
1c79356b
A
1015
1016/*
91447636
A
1017 * A page is back from laundry. See if there are some pages waiting to
1018 * go to laundry and if we can let some of them go now.
1c79356b 1019 *
91447636 1020 * Object and page queues must be locked.
1c79356b 1021 */
91447636
A
1022void
1023vm_pageout_throttle_up(
1024 vm_page_t m)
1c79356b 1025{
91447636 1026 struct vm_pageout_queue *q;
1c79356b 1027
91447636 1028 vm_pageout_throttle_up_count++;
1c79356b 1029
91447636
A
1030 assert(m->laundry);
1031 assert(m->object != VM_OBJECT_NULL);
1032 assert(m->object != kernel_object);
1c79356b 1033
91447636
A
1034 if (m->object->internal == TRUE)
1035 q = &vm_pageout_queue_internal;
1036 else
1037 q = &vm_pageout_queue_external;
1c79356b 1038
91447636
A
1039 m->laundry = FALSE;
1040 q->pgo_laundry--;
1c79356b 1041
91447636
A
1042 if (q->pgo_throttled == TRUE) {
1043 q->pgo_throttled = FALSE;
1044 thread_wakeup((event_t) &q->pgo_laundry);
1c79356b 1045 }
1c79356b
A
1046}
1047
91447636 1048
1c79356b
A
1049/*
1050 * vm_pageout_scan does the dirty work for the pageout daemon.
1051 * It returns with vm_page_queue_free_lock held and
1052 * vm_page_free_wanted == 0.
1053 */
1c79356b 1054
91447636
A
1055#define DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER)
1056
1057#define FCS_IDLE 0
1058#define FCS_DELAYED 1
1059#define FCS_DEADLOCK_DETECTED 2
1060
1061struct flow_control {
1062 int state;
1063 mach_timespec_t ts;
1064};
1065
1066extern kern_return_t sysclk_gettime(mach_timespec_t *);
1067
55e303ae 1068
1c79356b
A
1069void
1070vm_pageout_scan(void)
1071{
91447636
A
1072 unsigned int loop_count = 0;
1073 unsigned int inactive_burst_count = 0;
1074 unsigned int active_burst_count = 0;
55e303ae
A
1075 vm_page_t local_freeq = 0;
1076 int local_freed = 0;
1077 int delayed_unlock = 0;
1078 int need_internal_inactive = 0;
91447636
A
1079 int refmod_state = 0;
1080 int vm_pageout_deadlock_target = 0;
1081 struct vm_pageout_queue *iq;
1082 struct vm_pageout_queue *eq;
1083 struct flow_control flow_control;
1084 boolean_t active_throttled = FALSE;
1085 boolean_t inactive_throttled = FALSE;
1086 mach_timespec_t ts;
1087 unsigned int msecs = 0;
1088 vm_object_t object;
1089
1090
1091 flow_control.state = FCS_IDLE;
1092 iq = &vm_pageout_queue_internal;
1093 eq = &vm_pageout_queue_external;
1c79356b
A
1094
1095 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1096
1097/*???*/ /*
1098 * We want to gradually dribble pages from the active queue
1099 * to the inactive queue. If we let the inactive queue get
1100 * very small, and then suddenly dump many pages into it,
1101 * those pages won't get a sufficient chance to be referenced
1102 * before we start taking them from the inactive queue.
1103 *
1104 * We must limit the rate at which we send pages to the pagers.
1105 * data_write messages consume memory, for message buffers and
1106 * for map-copy objects. If we get too far ahead of the pagers,
1107 * we can potentially run out of memory.
1108 *
1109 * We can use the laundry count to limit directly the number
1110 * of pages outstanding to the default pager. A similar
1111 * strategy for external pagers doesn't work, because
1112 * external pagers don't have to deallocate the pages sent them,
1113 * and because we might have to send pages to external pagers
1114 * even if they aren't processing writes. So we also
1115 * use a burst count to limit writes to external pagers.
1116 *
1117 * When memory is very tight, we can't rely on external pagers to
1118 * clean pages. They probably aren't running, because they
1119 * aren't vm-privileged. If we kept sending dirty pages to them,
55e303ae 1120 * we could exhaust the free list.
1c79356b 1121 */
91447636
A
1122 vm_page_lock_queues();
1123 delayed_unlock = 1;
1124
1c79356b 1125
91447636
A
1126Restart:
1127 /*
1128 * Recalculate vm_page_inactivate_target.
1129 */
1130 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1131 vm_page_inactive_count);
1132 object = NULL;
1c79356b 1133
55e303ae 1134 for (;;) {
91447636 1135 vm_page_t m;
1c79356b 1136
55e303ae
A
1137 if (delayed_unlock == 0)
1138 vm_page_lock_queues();
1c79356b 1139
91447636
A
1140 active_burst_count = vm_page_active_count;
1141
1142 if (active_burst_count > vm_pageout_burst_active_throttle)
1143 active_burst_count = vm_pageout_burst_active_throttle;
1144
1c79356b
A
1145 /*
1146 * Move pages from active to inactive.
1147 */
55e303ae
A
1148 while ((need_internal_inactive ||
1149 vm_page_inactive_count < vm_page_inactive_target) &&
1150 !queue_empty(&vm_page_queue_active) &&
91447636 1151 ((active_burst_count--) > 0)) {
1c79356b 1152
1c79356b 1153 vm_pageout_active++;
55e303ae 1154
1c79356b 1155 m = (vm_page_t) queue_first(&vm_page_queue_active);
91447636
A
1156
1157 assert(m->active && !m->inactive);
1158 assert(!m->laundry);
1159 assert(m->object != kernel_object);
1c79356b
A
1160
1161 /*
91447636
A
1162 * Try to lock object; since we've already got the
1163 * page queues lock, we can only 'try' for this one.
1164 * if the 'try' fails, we need to do a mutex_pause
1165 * to allow the owner of the object lock a chance to
1166 * run... otherwise, we're likely to trip over this
1167 * object in the same state as we work our way through
1168 * the queue... clumps of pages associated with the same
1169 * object are fairly typical on the inactive and active queues
1c79356b 1170 */
91447636
A
1171 if (m->object != object) {
1172 if (object != NULL) {
1173 vm_object_unlock(object);
1174 object = NULL;
1c79356b 1175 }
91447636
A
1176 if (!vm_object_lock_try(m->object)) {
1177 /*
1178 * move page to end of active queue and continue
1179 */
1180 queue_remove(&vm_page_queue_active, m,
1181 vm_page_t, pageq);
1182 queue_enter(&vm_page_queue_active, m,
1183 vm_page_t, pageq);
55e303ae 1184
91447636 1185 goto done_with_activepage;
55e303ae 1186 }
91447636 1187 object = m->object;
1c79356b 1188 }
1c79356b 1189 /*
91447636
A
1190 * if the page is BUSY, then we pull it
1191 * off the active queue and leave it alone.
1192 * when BUSY is cleared, it will get stuck
1193 * back on the appropriate queue
1c79356b 1194 */
1c79356b 1195 if (m->busy) {
1c79356b
A
1196 queue_remove(&vm_page_queue_active, m,
1197 vm_page_t, pageq);
91447636
A
1198 m->pageq.next = NULL;
1199 m->pageq.prev = NULL;
1200
1c79356b
A
1201 if (!m->fictitious)
1202 vm_page_active_count--;
91447636
A
1203 m->active = FALSE;
1204
1205 goto done_with_activepage;
1c79356b 1206 }
91447636
A
1207 if (need_internal_inactive) {
1208 /*
1209 * If we're unable to make forward progress
1210 * with the current set of pages on the
1211 * inactive queue due to busy objects or
1212 * throttled pageout queues, then
1213 * move a page that is already clean
1214 * or belongs to a pageout queue that
1215 * isn't currently throttled
1216 */
1217 active_throttled = FALSE;
1c79356b 1218
91447636
A
1219 if (object->internal) {
1220 if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1221 active_throttled = TRUE;
1222 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1223 active_throttled = TRUE;
1224 }
1225 if (active_throttled == TRUE) {
1226 if (!m->dirty) {
1227 refmod_state = pmap_get_refmod(m->phys_page);
1228
1229 if (refmod_state & VM_MEM_REFERENCED)
1230 m->reference = TRUE;
1231 if (refmod_state & VM_MEM_MODIFIED)
1232 m->dirty = TRUE;
1233 }
1234 if (m->dirty || m->precious) {
1235 /*
1236 * page is dirty and targets a THROTTLED queue
1237 * so all we can do is move it back to the
1238 * end of the active queue to get it out
1239 * of the way
1240 */
1241 queue_remove(&vm_page_queue_active, m,
1242 vm_page_t, pageq);
1243 queue_enter(&vm_page_queue_active, m,
1244 vm_page_t, pageq);
1245
1246 vm_pageout_scan_active_throttled++;
1247
1248 goto done_with_activepage;
1249 }
1250 }
1251 vm_pageout_scan_active_throttle_success++;
1252 need_internal_inactive--;
1253 }
1c79356b
A
1254 /*
1255 * Deactivate the page while holding the object
1256 * locked, so we know the page is still not busy.
1257 * This should prevent races between pmap_enter
1258 * and pmap_clear_reference. The page might be
1259 * absent or fictitious, but vm_page_deactivate
1260 * can handle that.
1261 */
91447636
A
1262 vm_page_deactivate(m);
1263done_with_activepage:
1264 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1c79356b 1265
91447636
A
1266 if (object != NULL) {
1267 vm_object_unlock(object);
1268 object = NULL;
1269 }
1270 if (local_freeq) {
1271 vm_page_free_list(local_freeq);
1272
1273 local_freeq = 0;
1274 local_freed = 0;
1275 }
1276 delayed_unlock = 0;
1277 vm_page_unlock_queues();
1278
1279 mutex_pause();
1280 vm_page_lock_queues();
1281 /*
1282 * continue the while loop processing
1283 * the active queue... need to hold
1284 * the page queues lock
1285 */
1286 continue;
55e303ae 1287 }
1c79356b 1288 }
91447636
A
1289
1290
1291
1292 /**********************************************************************
1293 * above this point we're playing with the active queue
1294 * below this point we're playing with the throttling mechanisms
1295 * and the inactive queue
1296 **********************************************************************/
1297
1298
1299
1c79356b
A
1300 /*
1301 * We are done if we have met our target *and*
1302 * nobody is still waiting for a page.
1303 */
55e303ae 1304 if (vm_page_free_count + local_freed >= vm_page_free_target) {
91447636
A
1305 if (object != NULL) {
1306 vm_object_unlock(object);
1307 object = NULL;
1308 }
55e303ae
A
1309 if (local_freeq) {
1310 vm_page_free_list(local_freeq);
1311
1312 local_freeq = 0;
1313 local_freed = 0;
1314 }
0b4e3aa0 1315 mutex_lock(&vm_page_queue_free_lock);
55e303ae 1316
0b4e3aa0
A
1317 if ((vm_page_free_count >= vm_page_free_target) &&
1318 (vm_page_free_wanted == 0)) {
55e303ae 1319
0b4e3aa0 1320 vm_page_unlock_queues();
91447636
A
1321
1322 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1323 return;
0b4e3aa0
A
1324 }
1325 mutex_unlock(&vm_page_queue_free_lock);
1c79356b 1326 }
55e303ae 1327
91447636 1328
1c79356b
A
1329 /*
1330 * Sometimes we have to pause:
1331 * 1) No inactive pages - nothing to do.
91447636
A
1332 * 2) Flow control - default pageout queue is full
1333 * 3) Loop control - no acceptable pages found on the inactive queue
1334 * within the last vm_pageout_burst_inactive_throttle iterations
1c79356b 1335 */
91447636
A
1336 if ((queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf))) {
1337 vm_pageout_scan_empty_throttle++;
1338 msecs = vm_pageout_empty_wait;
1339 goto vm_pageout_scan_delay;
1340
1341 } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
1342 vm_pageout_scan_burst_throttle++;
1343 msecs = vm_pageout_burst_wait;
1344 goto vm_pageout_scan_delay;
1345
1346 } else if (VM_PAGE_Q_THROTTLED(iq)) {
1347
1348 switch (flow_control.state) {
1349
1350 case FCS_IDLE:
1351reset_deadlock_timer:
1352 ts.tv_sec = vm_pageout_deadlock_wait / 1000;
1353 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
1354 sysclk_gettime(&flow_control.ts);
1355 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
1356
1357 flow_control.state = FCS_DELAYED;
1358 msecs = vm_pageout_deadlock_wait;
1c79356b 1359
91447636
A
1360 break;
1361
1362 case FCS_DELAYED:
1363 sysclk_gettime(&ts);
1364
1365 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
1366 /*
1367 * the pageout thread for the default pager is potentially
1368 * deadlocked since the
1369 * default pager queue has been throttled for more than the
1370 * allowable time... we need to move some clean pages or dirty
1371 * pages belonging to the external pagers if they aren't throttled
1372 * vm_page_free_wanted represents the number of threads currently
1373 * blocked waiting for pages... we'll move one page for each of
1374 * these plus a fixed amount to break the logjam... once we're done
1375 * moving this number of pages, we'll re-enter the FSC_DELAYED state
1376 * with a new timeout target since we have no way of knowing
1377 * whether we've broken the deadlock except through observation
1378 * of the queue associated with the default pager... we need to
1379 * stop moving pagings and allow the system to run to see what
1380 * state it settles into.
1381 */
1382 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted;
1383 vm_pageout_scan_deadlock_detected++;
1384 flow_control.state = FCS_DEADLOCK_DETECTED;
55e303ae 1385
91447636
A
1386 thread_wakeup((event_t) &vm_pageout_garbage_collect);
1387 goto consider_inactive;
1388 }
1389 /*
1390 * just resniff instead of trying
1391 * to compute a new delay time... we're going to be
1392 * awakened immediately upon a laundry completion,
1393 * so we won't wait any longer than necessary
1394 */
1395 msecs = vm_pageout_idle_wait;
1396 break;
1c79356b 1397
91447636
A
1398 case FCS_DEADLOCK_DETECTED:
1399 if (vm_pageout_deadlock_target)
1400 goto consider_inactive;
1401 goto reset_deadlock_timer;
55e303ae 1402
91447636
A
1403 }
1404 vm_pageout_scan_throttle++;
1405 iq->pgo_throttled = TRUE;
1406vm_pageout_scan_delay:
1407 if (object != NULL) {
1408 vm_object_unlock(object);
1409 object = NULL;
1410 }
55e303ae
A
1411 if (local_freeq) {
1412 vm_page_free_list(local_freeq);
1413
1414 local_freeq = 0;
1415 local_freed = 0;
1416 }
91447636 1417 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
0b4e3aa0 1418
1c79356b
A
1419 counter(c_vm_pageout_scan_block++);
1420
91447636
A
1421 vm_page_unlock_queues();
1422
1423 thread_block(THREAD_CONTINUE_NULL);
1424
1425 vm_page_lock_queues();
1426 delayed_unlock = 1;
1427
1428 iq->pgo_throttled = FALSE;
0b4e3aa0 1429
55e303ae 1430 if (loop_count >= vm_page_inactive_count) {
91447636 1431 if (VM_PAGE_Q_THROTTLED(eq) || VM_PAGE_Q_THROTTLED(iq)) {
55e303ae
A
1432 /*
1433 * Make sure we move enough "appropriate"
1434 * pages to the inactive queue before trying
1435 * again.
1436 */
91447636 1437 need_internal_inactive = vm_pageout_inactive_relief;
55e303ae
A
1438 }
1439 loop_count = 0;
1440 }
91447636
A
1441 inactive_burst_count = 0;
1442
1c79356b
A
1443 goto Restart;
1444 /*NOTREACHED*/
1445 }
1446
91447636
A
1447
1448 flow_control.state = FCS_IDLE;
1449consider_inactive:
1450 loop_count++;
1451 inactive_burst_count++;
1c79356b 1452 vm_pageout_inactive++;
9bccf70c 1453
91447636
A
1454 if (!queue_empty(&vm_page_queue_inactive)) {
1455 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1456
1457 if (m->clustered && (m->no_isync == TRUE)) {
1458 goto use_this_page;
1459 }
1460 }
9bccf70c
A
1461 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) {
1462 vm_zf_iterator = 0;
1463 } else {
1464 last_page_zf = 0;
1465 if((vm_zf_iterator+=1) >= vm_zf_iterator_count) {
1466 vm_zf_iterator = 0;
1467 }
1468 }
91447636 1469 if (queue_empty(&vm_page_queue_zf) ||
9bccf70c
A
1470 (((last_page_zf) || (vm_zf_iterator == 0)) &&
1471 !queue_empty(&vm_page_queue_inactive))) {
1472 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
1473 last_page_zf = 0;
1474 } else {
1475 m = (vm_page_t) queue_first(&vm_page_queue_zf);
1476 last_page_zf = 1;
1477 }
91447636
A
1478use_this_page:
1479 assert(!m->active && m->inactive);
1480 assert(!m->laundry);
1481 assert(m->object != kernel_object);
1c79356b 1482
91447636
A
1483 /*
1484 * Try to lock object; since we've alread got the
1485 * page queues lock, we can only 'try' for this one.
1486 * if the 'try' fails, we need to do a mutex_pause
1487 * to allow the owner of the object lock a chance to
1488 * run... otherwise, we're likely to trip over this
1489 * object in the same state as we work our way through
1490 * the queue... clumps of pages associated with the same
1491 * object are fairly typical on the inactive and active queues
1492 */
1493 if (m->object != object) {
1494 if (object != NULL) {
1495 vm_object_unlock(object);
1496 object = NULL;
1497 }
1498 if (!vm_object_lock_try(m->object)) {
1499 /*
1500 * Move page to end and continue.
1501 * Don't re-issue ticket
1502 */
1503 if (m->zero_fill) {
1504 queue_remove(&vm_page_queue_zf, m,
1505 vm_page_t, pageq);
1506 queue_enter(&vm_page_queue_zf, m,
1507 vm_page_t, pageq);
1508 } else {
1509 queue_remove(&vm_page_queue_inactive, m,
1510 vm_page_t, pageq);
1511 queue_enter(&vm_page_queue_inactive, m,
1512 vm_page_t, pageq);
55e303ae 1513 }
91447636
A
1514 vm_pageout_inactive_nolock++;
1515
1516 /*
1517 * force us to dump any collected free pages
1518 * and to pause before moving on
1519 */
1520 delayed_unlock = DELAYED_UNLOCK_LIMIT + 1;
55e303ae 1521
91447636 1522 goto done_with_inactivepage;
1c79356b 1523 }
91447636 1524 object = m->object;
1c79356b 1525 }
1c79356b 1526 /*
91447636
A
1527 * If the page belongs to a purgable object with no pending copies
1528 * against it, then we reap all of the pages in the object
1529 * and note that the object has been "emptied". It'll be up to the
1530 * application the discover this and recreate its contents if desired.
1c79356b 1531 */
91447636
A
1532 if ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
1533 object->purgable == VM_OBJECT_PURGABLE_EMPTY) &&
1534 object->copy == VM_OBJECT_NULL) {
1c79356b 1535
91447636
A
1536 (void) vm_object_purge(object);
1537 vm_pageout_purged_objects++;
1c79356b 1538 /*
91447636
A
1539 * we've just taken all of the pages from this object,
1540 * so drop the lock now since we're not going to find
1541 * any more pages belonging to it anytime soon
1c79356b 1542 */
91447636
A
1543 vm_object_unlock(object);
1544 object = NULL;
0b4e3aa0 1545
91447636
A
1546 inactive_burst_count = 0;
1547
1548 goto done_with_inactivepage;
1c79356b
A
1549 }
1550
1c79356b 1551 /*
55e303ae
A
1552 * Paging out pages of external objects which
1553 * are currently being created must be avoided.
1554 * The pager may claim for memory, thus leading to a
1555 * possible dead lock between it and the pageout thread,
1556 * if such pages are finally chosen. The remaining assumption
1557 * is that there will finally be enough available pages in the
1558 * inactive pool to page out in order to satisfy all memory
1559 * claimed by the thread which concurrently creates the pager.
1c79356b 1560 */
1c79356b
A
1561 if (!object->pager_initialized && object->pager_created) {
1562 /*
1563 * Move page to end and continue, hoping that
1564 * there will be enough other inactive pages to
1565 * page out so that the thread which currently
1566 * initializes the pager will succeed.
0b4e3aa0
A
1567 * Don't re-grant the ticket, the page should
1568 * pulled from the queue and paged out whenever
1569 * one of its logically adjacent fellows is
1570 * targeted.
1c79356b 1571 */
91447636 1572 if (m->zero_fill) {
9bccf70c
A
1573 queue_remove(&vm_page_queue_zf, m,
1574 vm_page_t, pageq);
1575 queue_enter(&vm_page_queue_zf, m,
1576 vm_page_t, pageq);
1577 last_page_zf = 1;
1578 vm_zf_iterator = vm_zf_iterator_count - 1;
1579 } else {
1580 queue_remove(&vm_page_queue_inactive, m,
1581 vm_page_t, pageq);
1582 queue_enter(&vm_page_queue_inactive, m,
1583 vm_page_t, pageq);
1584 last_page_zf = 0;
1585 vm_zf_iterator = 1;
1586 }
1c79356b 1587 vm_pageout_inactive_avoid++;
1c79356b 1588
91447636
A
1589 goto done_with_inactivepage;
1590 }
1c79356b
A
1591 /*
1592 * Remove the page from the inactive list.
1593 */
91447636 1594 if (m->zero_fill) {
9bccf70c
A
1595 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1596 } else {
1597 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
1598 }
91447636
A
1599 m->pageq.next = NULL;
1600 m->pageq.prev = NULL;
1c79356b
A
1601 m->inactive = FALSE;
1602 if (!m->fictitious)
1603 vm_page_inactive_count--;
1604
1605 if (m->busy || !object->alive) {
1606 /*
1607 * Somebody is already playing with this page.
1608 * Leave it off the pageout queues.
1609 */
1c79356b 1610 vm_pageout_inactive_busy++;
91447636
A
1611
1612 goto done_with_inactivepage;
1c79356b
A
1613 }
1614
1615 /*
1616 * If it's absent or in error, we can reclaim the page.
1617 */
1618
1619 if (m->absent || m->error) {
1620 vm_pageout_inactive_absent++;
91447636
A
1621reclaim_page:
1622 if (vm_pageout_deadlock_target) {
1623 vm_pageout_scan_inactive_throttle_success++;
1624 vm_pageout_deadlock_target--;
1625 }
55e303ae
A
1626 if (m->tabled)
1627 vm_page_remove(m); /* clears tabled, object, offset */
1628 if (m->absent)
1629 vm_object_absent_release(object);
1630
91447636
A
1631 assert(m->pageq.next == NULL &&
1632 m->pageq.prev == NULL);
55e303ae
A
1633 m->pageq.next = (queue_entry_t)local_freeq;
1634 local_freeq = m;
91447636 1635 local_freed++;
55e303ae 1636
91447636
A
1637 inactive_burst_count = 0;
1638
1639 goto done_with_inactivepage;
1c79356b
A
1640 }
1641
1642 assert(!m->private);
1643 assert(!m->fictitious);
1644
1645 /*
1646 * If already cleaning this page in place, convert from
1647 * "adjacent" to "target". We can leave the page mapped,
1648 * and vm_pageout_object_terminate will determine whether
1649 * to free or reactivate.
1650 */
1651
1652 if (m->cleaning) {
0b4e3aa0
A
1653 m->busy = TRUE;
1654 m->pageout = TRUE;
1655 m->dump_cleaning = TRUE;
1656 vm_page_wire(m);
55e303ae 1657
91447636
A
1658 CLUSTER_STAT(vm_pageout_cluster_conversions++);
1659
1660 inactive_burst_count = 0;
1661
1662 goto done_with_inactivepage;
1c79356b
A
1663 }
1664
1665 /*
1666 * If it's being used, reactivate.
1667 * (Fictitious pages are either busy or absent.)
1668 */
91447636
A
1669 if ( (!m->reference) ) {
1670 refmod_state = pmap_get_refmod(m->phys_page);
1671
1672 if (refmod_state & VM_MEM_REFERENCED)
1673 m->reference = TRUE;
1674 if (refmod_state & VM_MEM_MODIFIED)
1675 m->dirty = TRUE;
1676 }
1677 if (m->reference) {
1678was_referenced:
1679 vm_page_activate(m);
1680 VM_STAT(reactivations++);
1c79356b 1681
1c79356b 1682 vm_pageout_inactive_used++;
9bccf70c 1683 last_page_zf = 0;
91447636 1684 inactive_burst_count = 0;
55e303ae 1685
91447636 1686 goto done_with_inactivepage;
1c79356b
A
1687 }
1688
91447636
A
1689 XPR(XPR_VM_PAGEOUT,
1690 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
1691 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
0b4e3aa0 1692
91447636
A
1693 /*
1694 * we've got a candidate page to steal...
1695 *
1696 * m->dirty is up to date courtesy of the
1697 * preceding check for m->reference... if
1698 * we get here, then m->reference had to be
1699 * FALSE which means we did a pmap_get_refmod
1700 * and updated both m->reference and m->dirty
1701 *
1702 * if it's dirty or precious we need to
1703 * see if the target queue is throtttled
1704 * it if is, we need to skip over it by moving it back
1705 * to the end of the inactive queue
1706 */
1707 inactive_throttled = FALSE;
1708
1709 if (m->dirty || m->precious) {
1710 if (object->internal) {
1711 if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default)))
1712 inactive_throttled = TRUE;
1713 } else if (VM_PAGE_Q_THROTTLED(eq)) {
1714 inactive_throttled = TRUE;
1c79356b 1715 }
91447636
A
1716 }
1717 if (inactive_throttled == TRUE) {
1718 if (m->zero_fill) {
1719 queue_enter(&vm_page_queue_zf, m,
1720 vm_page_t, pageq);
1721 } else {
1722 queue_enter(&vm_page_queue_inactive, m,
1723 vm_page_t, pageq);
1c79356b 1724 }
91447636
A
1725 if (!m->fictitious)
1726 vm_page_inactive_count++;
1727 m->inactive = TRUE;
1c79356b 1728
91447636 1729 vm_pageout_scan_inactive_throttled++;
0b4e3aa0 1730
91447636 1731 goto done_with_inactivepage;
1c79356b 1732 }
1c79356b 1733 /*
91447636
A
1734 * we've got a page that we can steal...
1735 * eliminate all mappings and make sure
1736 * we have the up-to-date modified state
1737 * first take the page BUSY, so that no new
1738 * mappings can be made
1c79356b 1739 */
1c79356b 1740 m->busy = TRUE;
55e303ae 1741
91447636
A
1742 /*
1743 * if we need to do a pmap_disconnect then we
1744 * need to re-evaluate m->dirty since the pmap_disconnect
1745 * provides the true state atomically... the
1746 * page was still mapped up to the pmap_disconnect
1747 * and may have been dirtied at the last microsecond
1748 *
1749 * we also check for the page being referenced 'late'
1750 * if it was, we first need to do a WAKEUP_DONE on it
1751 * since we already set m->busy = TRUE, before
1752 * going off to reactivate it
1753 *
1754 * if we don't need the pmap_disconnect, then
1755 * m->dirty is up to date courtesy of the
1756 * earlier check for m->reference... if
1757 * we get here, then m->reference had to be
1758 * FALSE which means we did a pmap_get_refmod
1759 * and updated both m->reference and m->dirty...
1760 */
1761 if (m->no_isync == FALSE) {
1762 refmod_state = pmap_disconnect(m->phys_page);
0b4e3aa0 1763
91447636
A
1764 if (refmod_state & VM_MEM_MODIFIED)
1765 m->dirty = TRUE;
1766 if (refmod_state & VM_MEM_REFERENCED) {
1767 m->reference = TRUE;
1768
1769 PAGE_WAKEUP_DONE(m);
1770 goto was_referenced;
1771 }
1772 }
1c79356b
A
1773 /*
1774 * If it's clean and not precious, we can free the page.
1775 */
1c79356b
A
1776 if (!m->dirty && !m->precious) {
1777 vm_pageout_inactive_clean++;
1778 goto reclaim_page;
1779 }
91447636 1780 vm_pageout_cluster(m);
1c79356b 1781
91447636 1782 vm_pageout_inactive_dirty++;
1c79356b 1783
91447636 1784 inactive_burst_count = 0;
1c79356b 1785
91447636
A
1786done_with_inactivepage:
1787 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
1c79356b 1788
91447636
A
1789 if (object != NULL) {
1790 vm_object_unlock(object);
1791 object = NULL;
1792 }
1793 if (local_freeq) {
1794 vm_page_free_list(local_freeq);
1795
1796 local_freeq = 0;
1797 local_freed = 0;
1798 }
1799 delayed_unlock = 0;
1800 vm_page_unlock_queues();
1801 mutex_pause();
1c79356b 1802 }
91447636
A
1803 /*
1804 * back to top of pageout scan loop
1805 */
1c79356b 1806 }
1c79356b
A
1807}
1808
1c79356b 1809
1c79356b
A
1810int vm_page_free_count_init;
1811
1812void
1813vm_page_free_reserve(
1814 int pages)
1815{
1816 int free_after_reserve;
1817
1818 vm_page_free_reserved += pages;
1819
1820 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
1821
1822 vm_page_free_min = vm_page_free_reserved +
1823 VM_PAGE_FREE_MIN(free_after_reserve);
1824
1825 vm_page_free_target = vm_page_free_reserved +
1826 VM_PAGE_FREE_TARGET(free_after_reserve);
1827
1828 if (vm_page_free_target < vm_page_free_min + 5)
1829 vm_page_free_target = vm_page_free_min + 5;
1830}
1831
1832/*
1833 * vm_pageout is the high level pageout daemon.
1834 */
1835
55e303ae
A
1836void
1837vm_pageout_continue(void)
1838{
1839 vm_pageout_scan_event_counter++;
1840 vm_pageout_scan();
1841 /* we hold vm_page_queue_free_lock now */
1842 assert(vm_page_free_wanted == 0);
1843 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
1844 mutex_unlock(&vm_page_queue_free_lock);
1845
1846 counter(c_vm_pageout_block++);
91447636 1847 thread_block((thread_continue_t)vm_pageout_continue);
55e303ae
A
1848 /*NOTREACHED*/
1849}
1c79356b 1850
91447636
A
1851
1852/*
1853 * must be called with the
1854 * queues and object locks held
1855 */
1856static void
1857vm_pageout_queue_steal(vm_page_t m)
1c79356b 1858{
91447636 1859 struct vm_pageout_queue *q;
1c79356b 1860
91447636
A
1861 if (m->object->internal == TRUE)
1862 q = &vm_pageout_queue_internal;
1863 else
1864 q = &vm_pageout_queue_external;
0b4e3aa0 1865
91447636
A
1866 m->laundry = FALSE;
1867 m->pageout_queue = FALSE;
1868 queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
1c79356b 1869
91447636
A
1870 m->pageq.next = NULL;
1871 m->pageq.prev = NULL;
1c79356b 1872
91447636 1873 vm_object_paging_end(m->object);
1c79356b 1874
91447636
A
1875 q->pgo_laundry--;
1876}
1c79356b 1877
1c79356b 1878
91447636 1879#ifdef FAKE_DEADLOCK
1c79356b 1880
91447636
A
1881#define FAKE_COUNT 5000
1882
1883int internal_count = 0;
1884int fake_deadlock = 0;
1885
1886#endif
1887
1888static void
1889vm_pageout_iothread_continue(struct vm_pageout_queue *q)
1890{
1891 vm_page_t m = NULL;
1892 vm_object_t object;
1893 boolean_t need_wakeup;
1894
1895 vm_page_lock_queues();
1896
1897 while ( !queue_empty(&q->pgo_pending) ) {
1898
1899 q->pgo_busy = TRUE;
1900 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
1901 m->pageout_queue = FALSE;
1902 vm_page_unlock_queues();
1903
1904 m->pageq.next = NULL;
1905 m->pageq.prev = NULL;
1906#ifdef FAKE_DEADLOCK
1907 if (q == &vm_pageout_queue_internal) {
1908 vm_offset_t addr;
1909 int pg_count;
1910
1911 internal_count++;
1912
1913 if ((internal_count == FAKE_COUNT)) {
1914
1915 pg_count = vm_page_free_count + vm_page_free_reserved;
1916
1917 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) {
1918 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count);
1919 }
1920 internal_count = 0;
1921 fake_deadlock++;
1922 }
1923 }
1924#endif
1925 object = m->object;
1926
1927 if (!object->pager_initialized) {
1928 vm_object_lock(object);
1929
1930 /*
1931 * If there is no memory object for the page, create
1932 * one and hand it to the default pager.
1933 */
1934
1935 if (!object->pager_initialized)
1936 vm_object_collapse(object, (vm_object_offset_t)0);
1937 if (!object->pager_initialized)
1938 vm_object_pager_create(object);
1939 if (!object->pager_initialized) {
1940 /*
1941 * Still no pager for the object.
1942 * Reactivate the page.
1943 *
1944 * Should only happen if there is no
1945 * default pager.
1946 */
1947 m->list_req_pending = FALSE;
1948 m->cleaning = FALSE;
1949 m->pageout = FALSE;
1950 vm_page_unwire(m);
1951
1952 vm_pageout_throttle_up(m);
1953
1954 vm_page_lock_queues();
1955 vm_pageout_dirty_no_pager++;
1956 vm_page_activate(m);
1957 vm_page_unlock_queues();
1958
1959 /*
1960 * And we are done with it.
1961 */
1962 PAGE_WAKEUP_DONE(m);
1963
1964 vm_object_paging_end(object);
1965 vm_object_unlock(object);
1966
1967 vm_page_lock_queues();
1968 continue;
1969 } else if (object->pager == MEMORY_OBJECT_NULL) {
1970 /*
1971 * This pager has been destroyed by either
1972 * memory_object_destroy or vm_object_destroy, and
1973 * so there is nowhere for the page to go.
1974 * Just free the page... VM_PAGE_FREE takes
1975 * care of cleaning up all the state...
1976 * including doing the vm_pageout_throttle_up
1977 */
1978 VM_PAGE_FREE(m);
1979
1980 vm_object_paging_end(object);
1981 vm_object_unlock(object);
1982
1983 vm_page_lock_queues();
1984 continue;
1985 }
1986 vm_object_unlock(object);
1987 }
1988 /*
1989 * we expect the paging_in_progress reference to have
1990 * already been taken on the object before it was added
1991 * to the appropriate pageout I/O queue... this will
1992 * keep the object from being terminated and/or the
1993 * paging_offset from changing until the I/O has
1994 * completed... therefore no need to lock the object to
1995 * pull the paging_offset from it.
1996 *
1997 * Send the data to the pager.
1998 * any pageout clustering happens there
1999 */
2000 memory_object_data_return(object->pager,
2001 m->offset + object->paging_offset,
2002 PAGE_SIZE,
2003 NULL,
2004 NULL,
2005 FALSE,
2006 FALSE,
2007 0);
2008
2009 vm_object_lock(object);
2010 vm_object_paging_end(object);
2011 vm_object_unlock(object);
2012
2013 vm_page_lock_queues();
2014 }
2015 assert_wait((event_t) q, THREAD_UNINT);
2016
2017
2018 if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) {
2019 q->pgo_throttled = FALSE;
2020 need_wakeup = TRUE;
2021 } else
2022 need_wakeup = FALSE;
2023
2024 q->pgo_busy = FALSE;
2025 q->pgo_idle = TRUE;
2026 vm_page_unlock_queues();
2027
2028 if (need_wakeup == TRUE)
2029 thread_wakeup((event_t) &q->pgo_laundry);
2030
2031 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending);
2032 /*NOTREACHED*/
2033}
2034
2035
2036static void
2037vm_pageout_iothread_external(void)
2038{
2039
2040 vm_pageout_iothread_continue(&vm_pageout_queue_external);
2041 /*NOTREACHED*/
2042}
2043
2044
2045static void
2046vm_pageout_iothread_internal(void)
2047{
2048 thread_t self = current_thread();
2049
2050 self->options |= TH_OPT_VMPRIV;
2051
2052 vm_pageout_iothread_continue(&vm_pageout_queue_internal);
2053 /*NOTREACHED*/
2054}
2055
2056static void
2057vm_pageout_garbage_collect(int collect)
2058{
2059 if (collect) {
2060 stack_collect();
2061
2062 /*
2063 * consider_zone_gc should be last, because the other operations
2064 * might return memory to zones.
2065 */
2066 consider_machine_collect();
2067 consider_zone_gc();
2068
2069 consider_machine_adjust();
2070 }
2071
2072 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
2073
2074 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
2075 /*NOTREACHED*/
2076}
2077
2078
2079
2080void
2081vm_pageout(void)
2082{
2083 thread_t self = current_thread();
2084 thread_t thread;
2085 kern_return_t result;
2086 spl_t s;
2087
2088 /*
2089 * Set thread privileges.
2090 */
2091 s = splsched();
2092 thread_lock(self);
2093 self->priority = BASEPRI_PREEMPT - 1;
2094 set_sched_pri(self, self->priority);
2095 thread_unlock(self);
2096 splx(s);
2097
2098 /*
2099 * Initialize some paging parameters.
2100 */
2101
2102 if (vm_pageout_idle_wait == 0)
2103 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
2104
2105 if (vm_pageout_burst_wait == 0)
2106 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
2107
2108 if (vm_pageout_empty_wait == 0)
2109 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
2110
2111 if (vm_pageout_deadlock_wait == 0)
2112 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
2113
2114 if (vm_pageout_deadlock_relief == 0)
2115 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
2116
2117 if (vm_pageout_inactive_relief == 0)
2118 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF;
2119
2120 if (vm_pageout_burst_active_throttle == 0)
2121 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE;
2122
2123 if (vm_pageout_burst_inactive_throttle == 0)
2124 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
2125
2126 /*
2127 * Set kernel task to low backing store privileged
55e303ae
A
2128 * status
2129 */
2130 task_lock(kernel_task);
2131 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV;
2132 task_unlock(kernel_task);
2133
1c79356b 2134 vm_page_free_count_init = vm_page_free_count;
9bccf70c 2135 vm_zf_iterator = 0;
1c79356b
A
2136 /*
2137 * even if we've already called vm_page_free_reserve
2138 * call it again here to insure that the targets are
2139 * accurately calculated (it uses vm_page_free_count_init)
2140 * calling it with an arg of 0 will not change the reserve
2141 * but will re-calculate free_min and free_target
2142 */
91447636
A
2143 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
2144 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
55e303ae 2145 } else
1c79356b
A
2146 vm_page_free_reserve(0);
2147
55e303ae 2148
91447636
A
2149 queue_init(&vm_pageout_queue_external.pgo_pending);
2150 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2151 vm_pageout_queue_external.pgo_laundry = 0;
2152 vm_pageout_queue_external.pgo_idle = FALSE;
2153 vm_pageout_queue_external.pgo_busy = FALSE;
2154 vm_pageout_queue_external.pgo_throttled = FALSE;
55e303ae 2155
91447636
A
2156 queue_init(&vm_pageout_queue_internal.pgo_pending);
2157 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
2158 vm_pageout_queue_internal.pgo_laundry = 0;
2159 vm_pageout_queue_internal.pgo_idle = FALSE;
2160 vm_pageout_queue_internal.pgo_busy = FALSE;
2161 vm_pageout_queue_internal.pgo_throttled = FALSE;
9bccf70c 2162
55e303ae 2163
91447636
A
2164 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &thread);
2165 if (result != KERN_SUCCESS)
2166 panic("vm_pageout_iothread_internal: create failed");
9bccf70c 2167
91447636 2168 thread_deallocate(thread);
9bccf70c 2169
55e303ae 2170
91447636
A
2171 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, BASEPRI_PREEMPT - 1, &thread);
2172 if (result != KERN_SUCCESS)
2173 panic("vm_pageout_iothread_external: create failed");
55e303ae 2174
91447636 2175 thread_deallocate(thread);
55e303ae 2176
9bccf70c 2177
91447636
A
2178 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, BASEPRI_PREEMPT - 2, &thread);
2179 if (result != KERN_SUCCESS)
2180 panic("vm_pageout_garbage_collect: create failed");
55e303ae 2181
91447636 2182 thread_deallocate(thread);
55e303ae 2183
55e303ae 2184
91447636
A
2185 vm_pageout_continue();
2186 /*NOTREACHED*/
9bccf70c
A
2187}
2188
1c79356b 2189
0b4e3aa0
A
2190static upl_t
2191upl_create(
91447636
A
2192 int flags,
2193 upl_size_t size)
0b4e3aa0
A
2194{
2195 upl_t upl;
55e303ae 2196 int page_field_size; /* bit field in word size buf */
0b4e3aa0 2197
55e303ae
A
2198 page_field_size = 0;
2199 if (flags & UPL_CREATE_LITE) {
2200 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2201 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2202 }
2203 if(flags & UPL_CREATE_INTERNAL) {
0b4e3aa0 2204 upl = (upl_t)kalloc(sizeof(struct upl)
55e303ae
A
2205 + (sizeof(struct upl_page_info)*(size/PAGE_SIZE))
2206 + page_field_size);
0b4e3aa0 2207 } else {
55e303ae 2208 upl = (upl_t)kalloc(sizeof(struct upl) + page_field_size);
0b4e3aa0
A
2209 }
2210 upl->flags = 0;
2211 upl->src_object = NULL;
2212 upl->kaddr = (vm_offset_t)0;
2213 upl->size = 0;
2214 upl->map_object = NULL;
2215 upl->ref_count = 1;
2216 upl_lock_init(upl);
91447636 2217#ifdef UPL_DEBUG
0b4e3aa0
A
2218 upl->ubc_alias1 = 0;
2219 upl->ubc_alias2 = 0;
91447636 2220#endif /* UPL_DEBUG */
0b4e3aa0
A
2221 return(upl);
2222}
2223
2224static void
2225upl_destroy(
2226 upl_t upl)
2227{
55e303ae 2228 int page_field_size; /* bit field in word size buf */
0b4e3aa0 2229
91447636 2230#ifdef UPL_DEBUG
0b4e3aa0
A
2231 {
2232 upl_t upl_ele;
55e303ae
A
2233 vm_object_t object;
2234 if (upl->map_object->pageout) {
2235 object = upl->map_object->shadow;
2236 } else {
2237 object = upl->map_object;
2238 }
2239 vm_object_lock(object);
2240 queue_iterate(&object->uplq, upl_ele, upl_t, uplq) {
0b4e3aa0 2241 if(upl_ele == upl) {
55e303ae
A
2242 queue_remove(&object->uplq,
2243 upl_ele, upl_t, uplq);
0b4e3aa0
A
2244 break;
2245 }
2246 }
55e303ae 2247 vm_object_unlock(object);
0b4e3aa0 2248 }
91447636 2249#endif /* UPL_DEBUG */
55e303ae
A
2250 /* drop a reference on the map_object whether or */
2251 /* not a pageout object is inserted */
2252 if(upl->map_object->pageout)
0b4e3aa0 2253 vm_object_deallocate(upl->map_object);
55e303ae
A
2254
2255 page_field_size = 0;
2256 if (upl->flags & UPL_LITE) {
2257 page_field_size = ((upl->size/PAGE_SIZE) + 7) >> 3;
2258 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
2259 }
0b4e3aa0 2260 if(upl->flags & UPL_INTERNAL) {
91447636
A
2261 kfree(upl,
2262 sizeof(struct upl) +
2263 (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE))
2264 + page_field_size);
0b4e3aa0 2265 } else {
91447636 2266 kfree(upl, sizeof(struct upl) + page_field_size);
0b4e3aa0
A
2267 }
2268}
2269
91447636 2270void uc_upl_dealloc(upl_t upl);
0b4e3aa0
A
2271__private_extern__ void
2272uc_upl_dealloc(
1c79356b
A
2273 upl_t upl)
2274{
2275 upl->ref_count -= 1;
2276 if(upl->ref_count == 0) {
2277 upl_destroy(upl);
2278 }
2279}
2280
0b4e3aa0
A
2281void
2282upl_deallocate(
2283 upl_t upl)
2284{
2285
2286 upl->ref_count -= 1;
2287 if(upl->ref_count == 0) {
2288 upl_destroy(upl);
2289 }
2290}
1c79356b 2291
91447636
A
2292/*
2293 * Statistics about UPL enforcement of copy-on-write obligations.
2294 */
2295unsigned long upl_cow = 0;
2296unsigned long upl_cow_again = 0;
2297unsigned long upl_cow_contiguous = 0;
2298unsigned long upl_cow_pages = 0;
2299unsigned long upl_cow_again_pages = 0;
2300unsigned long upl_cow_contiguous_pages = 0;
2301
1c79356b 2302/*
0b4e3aa0 2303 * Routine: vm_object_upl_request
1c79356b
A
2304 * Purpose:
2305 * Cause the population of a portion of a vm_object.
2306 * Depending on the nature of the request, the pages
2307 * returned may be contain valid data or be uninitialized.
2308 * A page list structure, listing the physical pages
2309 * will be returned upon request.
2310 * This function is called by the file system or any other
2311 * supplier of backing store to a pager.
2312 * IMPORTANT NOTE: The caller must still respect the relationship
2313 * between the vm_object and its backing memory object. The
2314 * caller MUST NOT substitute changes in the backing file
2315 * without first doing a memory_object_lock_request on the
2316 * target range unless it is know that the pages are not
2317 * shared with another entity at the pager level.
2318 * Copy_in_to:
2319 * if a page list structure is present
2320 * return the mapped physical pages, where a
2321 * page is not present, return a non-initialized
2322 * one. If the no_sync bit is turned on, don't
2323 * call the pager unlock to synchronize with other
2324 * possible copies of the page. Leave pages busy
2325 * in the original object, if a page list structure
2326 * was specified. When a commit of the page list
2327 * pages is done, the dirty bit will be set for each one.
2328 * Copy_out_from:
2329 * If a page list structure is present, return
2330 * all mapped pages. Where a page does not exist
2331 * map a zero filled one. Leave pages busy in
2332 * the original object. If a page list structure
2333 * is not specified, this call is a no-op.
2334 *
2335 * Note: access of default pager objects has a rather interesting
2336 * twist. The caller of this routine, presumably the file system
2337 * page cache handling code, will never actually make a request
2338 * against a default pager backed object. Only the default
2339 * pager will make requests on backing store related vm_objects
2340 * In this way the default pager can maintain the relationship
2341 * between backing store files (abstract memory objects) and
2342 * the vm_objects (cache objects), they support.
2343 *
2344 */
91447636 2345
0b4e3aa0
A
2346__private_extern__ kern_return_t
2347vm_object_upl_request(
1c79356b 2348 vm_object_t object,
91447636
A
2349 vm_object_offset_t offset,
2350 upl_size_t size,
1c79356b 2351 upl_t *upl_ptr,
0b4e3aa0
A
2352 upl_page_info_array_t user_page_list,
2353 unsigned int *page_list_count,
91447636 2354 int cntrl_flags)
1c79356b 2355{
91447636 2356 vm_page_t dst_page = VM_PAGE_NULL;
1c79356b 2357 vm_object_offset_t dst_offset = offset;
91447636 2358 upl_size_t xfer_size = size;
1c79356b
A
2359 boolean_t do_m_lock = FALSE;
2360 boolean_t dirty;
55e303ae 2361 boolean_t hw_dirty;
1c79356b 2362 upl_t upl = NULL;
91447636
A
2363 unsigned int entry;
2364#if MACH_CLUSTER_STATS
1c79356b 2365 boolean_t encountered_lrp = FALSE;
91447636 2366#endif
1c79356b 2367 vm_page_t alias_page = NULL;
0b4e3aa0 2368 int page_ticket;
91447636
A
2369 int refmod_state;
2370 wpl_array_t lite_list = NULL;
2371 vm_object_t last_copy_object;
2372
2373
2374 if (cntrl_flags & ~UPL_VALID_FLAGS) {
2375 /*
2376 * For forward compatibility's sake,
2377 * reject any unknown flag.
2378 */
2379 return KERN_INVALID_VALUE;
2380 }
0b4e3aa0
A
2381
2382 page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK)
2383 >> UPL_PAGE_TICKET_SHIFT;
2384
55e303ae
A
2385 if(((size/PAGE_SIZE) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
2386 size = MAX_UPL_TRANSFER * PAGE_SIZE;
0b4e3aa0 2387 }
1c79356b
A
2388
2389 if(cntrl_flags & UPL_SET_INTERNAL)
0b4e3aa0
A
2390 if(page_list_count != NULL)
2391 *page_list_count = MAX_UPL_TRANSFER;
1c79356b
A
2392
2393 if((!object->internal) && (object->paging_offset != 0))
0b4e3aa0 2394 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
1c79356b
A
2395
2396 if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
2397 return KERN_SUCCESS;
2398 }
55e303ae 2399
91447636
A
2400 vm_object_lock(object);
2401 vm_object_paging_begin(object);
2402 vm_object_unlock(object);
2403
1c79356b 2404 if(upl_ptr) {
0b4e3aa0 2405 if(cntrl_flags & UPL_SET_INTERNAL) {
55e303ae 2406 if(cntrl_flags & UPL_SET_LITE) {
91447636 2407 uintptr_t page_field_size;
55e303ae
A
2408 upl = upl_create(
2409 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
2410 size);
2411 user_page_list = (upl_page_info_t *)
91447636 2412 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae 2413 lite_list = (wpl_array_t)
91447636 2414 (((uintptr_t)user_page_list) +
55e303ae
A
2415 ((size/PAGE_SIZE) *
2416 sizeof(upl_page_info_t)));
2417 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2418 page_field_size =
2419 (page_field_size + 3) & 0xFFFFFFFC;
2420 bzero((char *)lite_list, page_field_size);
2421 upl->flags =
2422 UPL_LITE | UPL_INTERNAL;
2423 } else {
2424 upl = upl_create(UPL_CREATE_INTERNAL, size);
2425 user_page_list = (upl_page_info_t *)
91447636 2426 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae
A
2427 upl->flags = UPL_INTERNAL;
2428 }
1c79356b 2429 } else {
55e303ae 2430 if(cntrl_flags & UPL_SET_LITE) {
91447636 2431 uintptr_t page_field_size;
55e303ae
A
2432 upl = upl_create(UPL_CREATE_LITE, size);
2433 lite_list = (wpl_array_t)
91447636 2434 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae
A
2435 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
2436 page_field_size =
2437 (page_field_size + 3) & 0xFFFFFFFC;
2438 bzero((char *)lite_list, page_field_size);
2439 upl->flags = UPL_LITE;
2440 } else {
2441 upl = upl_create(UPL_CREATE_EXTERNAL, size);
2442 upl->flags = 0;
2443 }
0b4e3aa0 2444 }
55e303ae 2445
91447636
A
2446 if (object->phys_contiguous) {
2447 if ((cntrl_flags & UPL_WILL_MODIFY) &&
2448 object->copy != VM_OBJECT_NULL) {
2449 /* Honor copy-on-write obligations */
2450
2451 /*
2452 * XXX FBDP
2453 * We could still have a race...
2454 * A is here building the UPL for a write().
2455 * A pushes the pages to the current copy
2456 * object.
2457 * A returns the UPL to the caller.
2458 * B comes along and establishes another
2459 * private mapping on this object, inserting
2460 * a new copy object between the original
2461 * object and the old copy object.
2462 * B reads a page and gets the original contents
2463 * from the original object.
2464 * A modifies the page in the original object.
2465 * B reads the page again and sees A's changes,
2466 * which is wrong...
2467 *
2468 * The problem is that the pages are not
2469 * marked "busy" in the original object, so
2470 * nothing prevents B from reading it before
2471 * before A's changes are completed.
2472 *
2473 * The "paging_in_progress" might protect us
2474 * from the insertion of a new copy object
2475 * though... To be verified.
2476 */
2477 vm_object_lock_request(object,
2478 offset,
2479 size,
2480 FALSE,
2481 MEMORY_OBJECT_COPY_SYNC,
2482 VM_PROT_NO_CHANGE);
2483 upl_cow_contiguous++;
2484 upl_cow_contiguous_pages += size >> PAGE_SHIFT;
2485 }
2486
55e303ae
A
2487 upl->map_object = object;
2488 /* don't need any shadow mappings for this one */
2489 /* since it is already I/O memory */
2490 upl->flags |= UPL_DEVICE_MEMORY;
2491
55e303ae
A
2492
2493 /* paging_in_progress protects paging_offset */
0b4e3aa0 2494 upl->offset = offset + object->paging_offset;
55e303ae 2495 upl->size = size;
0b4e3aa0
A
2496 *upl_ptr = upl;
2497 if(user_page_list) {
2498 user_page_list[0].phys_addr =
91447636 2499 (offset + object->shadow_offset)>>PAGE_SHIFT;
0b4e3aa0 2500 user_page_list[0].device = TRUE;
1c79356b 2501 }
55e303ae
A
2502
2503 if(page_list_count != NULL) {
2504 if (upl->flags & UPL_INTERNAL) {
2505 *page_list_count = 0;
2506 } else {
2507 *page_list_count = 1;
2508 }
2509 }
91447636 2510
55e303ae
A
2511 return KERN_SUCCESS;
2512 }
91447636 2513
55e303ae
A
2514 if(user_page_list)
2515 user_page_list[0].device = FALSE;
2516
2517 if(cntrl_flags & UPL_SET_LITE) {
2518 upl->map_object = object;
2519 } else {
0b4e3aa0 2520 upl->map_object = vm_object_allocate(size);
91447636
A
2521 /*
2522 * No neeed to lock the new object: nobody else knows
2523 * about it yet, so it's all ours so far.
2524 */
0b4e3aa0 2525 upl->map_object->shadow = object;
0b4e3aa0
A
2526 upl->map_object->pageout = TRUE;
2527 upl->map_object->can_persist = FALSE;
55e303ae
A
2528 upl->map_object->copy_strategy =
2529 MEMORY_OBJECT_COPY_NONE;
0b4e3aa0 2530 upl->map_object->shadow_offset = offset;
55e303ae 2531 upl->map_object->wimg_bits = object->wimg_bits;
0b4e3aa0 2532 }
91447636 2533
55e303ae
A
2534 }
2535 if (!(cntrl_flags & UPL_SET_LITE)) {
2536 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2537 }
91447636
A
2538
2539 /*
2540 * ENCRYPTED SWAP:
2541 * Just mark the UPL as "encrypted" here.
2542 * We'll actually encrypt the pages later,
2543 * in upl_encrypt(), when the caller has
2544 * selected which pages need to go to swap.
2545 */
2546 if (cntrl_flags & UPL_ENCRYPT) {
2547 upl->flags |= UPL_ENCRYPTED;
2548 }
2549 if (cntrl_flags & UPL_FOR_PAGEOUT) {
2550 upl->flags |= UPL_PAGEOUT;
2551 }
55e303ae 2552 vm_object_lock(object);
55e303ae
A
2553
2554 /* we can lock in the paging_offset once paging_in_progress is set */
2555 if(upl_ptr) {
1c79356b
A
2556 upl->size = size;
2557 upl->offset = offset + object->paging_offset;
1c79356b 2558 *upl_ptr = upl;
91447636 2559#ifdef UPL_DEBUG
1c79356b 2560 queue_enter(&object->uplq, upl, upl_t, uplq);
91447636
A
2561#endif /* UPL_DEBUG */
2562 }
2563
2564 if ((cntrl_flags & UPL_WILL_MODIFY) &&
2565 object->copy != VM_OBJECT_NULL) {
2566 /* Honor copy-on-write obligations */
2567
2568 /*
2569 * The caller is gathering these pages and
2570 * might modify their contents. We need to
2571 * make sure that the copy object has its own
2572 * private copies of these pages before we let
2573 * the caller modify them.
2574 */
2575 vm_object_update(object,
2576 offset,
2577 size,
2578 NULL,
2579 NULL,
2580 FALSE, /* should_return */
2581 MEMORY_OBJECT_COPY_SYNC,
2582 VM_PROT_NO_CHANGE);
2583 upl_cow++;
2584 upl_cow_pages += size >> PAGE_SHIFT;
2585
55e303ae 2586 }
91447636
A
2587 /* remember which copy object we synchronized with */
2588 last_copy_object = object->copy;
55e303ae 2589
1c79356b
A
2590 entry = 0;
2591 if(cntrl_flags & UPL_COPYOUT_FROM) {
2592 upl->flags |= UPL_PAGE_SYNC_DONE;
55e303ae 2593
1c79356b 2594 while (xfer_size) {
55e303ae
A
2595 if((alias_page == NULL) &&
2596 !(cntrl_flags & UPL_SET_LITE)) {
1c79356b
A
2597 vm_object_unlock(object);
2598 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2599 vm_object_lock(object);
2600 }
91447636 2601 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
1c79356b
A
2602 dst_page->fictitious ||
2603 dst_page->absent ||
2604 dst_page->error ||
91447636
A
2605 (dst_page->wire_count && !dst_page->pageout) ||
2606
2607 ((!dst_page->inactive) && (cntrl_flags & UPL_FOR_PAGEOUT) &&
2608 (dst_page->page_ticket != page_ticket) &&
2609 ((dst_page->page_ticket+1) != page_ticket)) ) {
2610
2611 if (user_page_list)
1c79356b 2612 user_page_list[entry].phys_addr = 0;
91447636
A
2613 } else {
2614 /*
2615 * grab this up front...
2616 * a high percentange of the time we're going to
2617 * need the hardware modification state a bit later
2618 * anyway... so we can eliminate an extra call into
2619 * the pmap layer by grabbing it here and recording it
2620 */
2621 refmod_state = pmap_get_refmod(dst_page->phys_page);
2622
2623 if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
2624 /*
2625 * we're only asking for DIRTY pages to be returned
2626 */
2627
2628 if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
2629 /*
2630 * if we were the page stolen by vm_pageout_scan to be
2631 * cleaned (as opposed to a buddy being clustered in
2632 * or this request is not being driven by a PAGEOUT cluster
2633 * then we only need to check for the page being diry or
2634 * precious to decide whether to return it
2635 */
2636 if (dst_page->dirty || dst_page->precious ||
2637 (refmod_state & VM_MEM_MODIFIED)) {
2638 goto check_busy;
2639 }
2640 }
2641 /*
2642 * this is a request for a PAGEOUT cluster and this page
2643 * is merely along for the ride as a 'buddy'... not only
2644 * does it have to be dirty to be returned, but it also
2645 * can't have been referenced recently... note that we've
2646 * already filtered above based on whether this page is
2647 * currently on the inactive queue or it meets the page
2648 * ticket (generation count) check
2649 */
2650 if ( !(refmod_state & VM_MEM_REFERENCED) &&
2651 ((refmod_state & VM_MEM_MODIFIED) ||
2652 dst_page->dirty || dst_page->precious) ) {
2653 goto check_busy;
2654 }
2655 /*
2656 * if we reach here, we're not to return
2657 * the page... go on to the next one
2658 */
2659 if (user_page_list)
2660 user_page_list[entry].phys_addr = 0;
2661 entry++;
2662 dst_offset += PAGE_SIZE_64;
2663 xfer_size -= PAGE_SIZE;
2664 continue;
55e303ae 2665 }
91447636 2666check_busy:
1c79356b
A
2667 if(dst_page->busy &&
2668 (!(dst_page->list_req_pending &&
2669 dst_page->pageout))) {
2670 if(cntrl_flags & UPL_NOBLOCK) {
55e303ae
A
2671 if(user_page_list) {
2672 user_page_list[entry].phys_addr = 0;
2673 }
1c79356b
A
2674 entry++;
2675 dst_offset += PAGE_SIZE_64;
2676 xfer_size -= PAGE_SIZE;
2677 continue;
2678 }
91447636
A
2679 /*
2680 * someone else is playing with the
2681 * page. We will have to wait.
2682 */
9bccf70c 2683 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1c79356b
A
2684 continue;
2685 }
2686 /* Someone else already cleaning the page? */
2687 if((dst_page->cleaning || dst_page->absent ||
1c79356b
A
2688 dst_page->wire_count != 0) &&
2689 !dst_page->list_req_pending) {
55e303ae 2690 if(user_page_list) {
1c79356b 2691 user_page_list[entry].phys_addr = 0;
55e303ae 2692 }
1c79356b
A
2693 entry++;
2694 dst_offset += PAGE_SIZE_64;
2695 xfer_size -= PAGE_SIZE;
2696 continue;
2697 }
2698 /* eliminate all mappings from the */
2699 /* original object and its prodigy */
2700
2701 vm_page_lock_queues();
55e303ae 2702
91447636
A
2703 if (dst_page->pageout_queue == TRUE)
2704 /*
2705 * we've buddied up a page for a clustered pageout
2706 * that has already been moved to the pageout
2707 * queue by pageout_scan... we need to remove
2708 * it from the queue and drop the laundry count
2709 * on that queue
2710 */
2711 vm_pageout_queue_steal(dst_page);
2712#if MACH_CLUSTER_STATS
1c79356b
A
2713 /* pageout statistics gathering. count */
2714 /* all the pages we will page out that */
2715 /* were not counted in the initial */
2716 /* vm_pageout_scan work */
2717 if(dst_page->list_req_pending)
2718 encountered_lrp = TRUE;
2719 if((dst_page->dirty ||
2720 (dst_page->object->internal &&
2721 dst_page->precious)) &&
2722 (dst_page->list_req_pending
2723 == FALSE)) {
2724 if(encountered_lrp) {
2725 CLUSTER_STAT
2726 (pages_at_higher_offsets++;)
2727 } else {
2728 CLUSTER_STAT
2729 (pages_at_lower_offsets++;)
2730 }
2731 }
91447636 2732#endif
1c79356b
A
2733 /* Turn off busy indication on pending */
2734 /* pageout. Note: we can only get here */
2735 /* in the request pending case. */
2736 dst_page->list_req_pending = FALSE;
2737 dst_page->busy = FALSE;
2738 dst_page->cleaning = FALSE;
2739
91447636 2740 hw_dirty = refmod_state & VM_MEM_MODIFIED;
55e303ae
A
2741 dirty = hw_dirty ? TRUE : dst_page->dirty;
2742
2743 if(cntrl_flags & UPL_SET_LITE) {
2744 int pg_num;
2745 pg_num = (dst_offset-offset)/PAGE_SIZE;
2746 lite_list[pg_num>>5] |=
2747 1 << (pg_num & 31);
2748 if (hw_dirty)
2749 pmap_clear_modify(dst_page->phys_page);
2750 /*
2751 * Record that this page has been
2752 * written out
2753 */
2754#if MACH_PAGEMAP
2755 vm_external_state_set(
2756 object->existence_map,
2757 dst_page->offset);
2758#endif /*MACH_PAGEMAP*/
2759
2760 /*
2761 * Mark original page as cleaning
2762 * in place.
2763 */
2764 dst_page->cleaning = TRUE;
2765 dst_page->dirty = TRUE;
2766 dst_page->precious = FALSE;
2767 } else {
2768 /* use pageclean setup, it is more */
2769 /* convenient even for the pageout */
2770 /* cases here */
91447636
A
2771
2772 vm_object_lock(upl->map_object);
55e303ae
A
2773 vm_pageclean_setup(dst_page,
2774 alias_page, upl->map_object,
2775 size - xfer_size);
91447636 2776 vm_object_unlock(upl->map_object);
55e303ae
A
2777
2778 alias_page->absent = FALSE;
2779 alias_page = NULL;
2780 }
1c79356b
A
2781
2782 if(!dirty) {
2783 dst_page->dirty = FALSE;
2784 dst_page->precious = TRUE;
2785 }
2786
2787 if(dst_page->pageout)
2788 dst_page->busy = TRUE;
2789
91447636
A
2790 if ( (cntrl_flags & UPL_ENCRYPT) ) {
2791 /*
2792 * ENCRYPTED SWAP:
2793 * We want to deny access to the target page
2794 * because its contents are about to be
2795 * encrypted and the user would be very
2796 * confused to see encrypted data instead
2797 * of their data.
2798 */
2799 dst_page->busy = TRUE;
2800 }
2801 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
2802 /*
2803 * deny access to the target page
2804 * while it is being worked on
2805 */
2806 if ((!dst_page->pageout) &&
2807 (dst_page->wire_count == 0)) {
1c79356b
A
2808 dst_page->busy = TRUE;
2809 dst_page->pageout = TRUE;
2810 vm_page_wire(dst_page);
2811 }
2812 }
91447636 2813
1c79356b
A
2814 if(user_page_list) {
2815 user_page_list[entry].phys_addr
55e303ae 2816 = dst_page->phys_page;
1c79356b
A
2817 user_page_list[entry].dirty =
2818 dst_page->dirty;
2819 user_page_list[entry].pageout =
2820 dst_page->pageout;
2821 user_page_list[entry].absent =
2822 dst_page->absent;
2823 user_page_list[entry].precious =
2824 dst_page->precious;
2825 }
1c79356b 2826 vm_page_unlock_queues();
91447636
A
2827
2828 /*
2829 * ENCRYPTED SWAP:
2830 * The caller is gathering this page and might
2831 * access its contents later on. Decrypt the
2832 * page before adding it to the UPL, so that
2833 * the caller never sees encrypted data.
2834 */
2835 if (! (cntrl_flags & UPL_ENCRYPT) &&
2836 dst_page->encrypted) {
2837 assert(dst_page->busy);
2838
2839 vm_page_decrypt(dst_page, 0);
2840 vm_page_decrypt_for_upl_counter++;
2841
2842 /*
2843 * Retry this page, since anything
2844 * could have changed while we were
2845 * decrypting.
2846 */
2847 continue;
2848 }
2849 }
2850 entry++;
2851 dst_offset += PAGE_SIZE_64;
1c79356b
A
2852 xfer_size -= PAGE_SIZE;
2853 }
2854 } else {
2855 while (xfer_size) {
55e303ae
A
2856 if((alias_page == NULL) &&
2857 !(cntrl_flags & UPL_SET_LITE)) {
1c79356b
A
2858 vm_object_unlock(object);
2859 VM_PAGE_GRAB_FICTITIOUS(alias_page);
2860 vm_object_lock(object);
2861 }
55e303ae 2862
91447636
A
2863 if ((cntrl_flags & UPL_WILL_MODIFY) &&
2864 object->copy != last_copy_object) {
2865 /* Honor copy-on-write obligations */
2866
2867 /*
2868 * The copy object has changed since we
2869 * last synchronized for copy-on-write.
2870 * Another copy object might have been
2871 * inserted while we released the object's
2872 * lock. Since someone could have seen the
2873 * original contents of the remaining pages
2874 * through that new object, we have to
2875 * synchronize with it again for the remaining
2876 * pages only. The previous pages are "busy"
2877 * so they can not be seen through the new
2878 * mapping. The new mapping will see our
2879 * upcoming changes for those previous pages,
2880 * but that's OK since they couldn't see what
2881 * was there before. It's just a race anyway
2882 * and there's no guarantee of consistency or
2883 * atomicity. We just don't want new mappings
2884 * to see both the *before* and *after* pages.
2885 */
2886 if (object->copy != VM_OBJECT_NULL) {
2887 vm_object_update(
2888 object,
2889 dst_offset,/* current offset */
2890 xfer_size, /* remaining size */
2891 NULL,
2892 NULL,
2893 FALSE, /* should_return */
2894 MEMORY_OBJECT_COPY_SYNC,
2895 VM_PROT_NO_CHANGE);
2896 upl_cow_again++;
2897 upl_cow_again_pages +=
2898 xfer_size >> PAGE_SHIFT;
2899 }
2900 /* remember the copy object we synced with */
2901 last_copy_object = object->copy;
2902 }
2903
2904 dst_page = vm_page_lookup(object, dst_offset);
2905
1c79356b 2906 if(dst_page != VM_PAGE_NULL) {
9bccf70c
A
2907 if((cntrl_flags & UPL_RET_ONLY_ABSENT) &&
2908 !((dst_page->list_req_pending)
2909 && (dst_page->absent))) {
2910 /* we are doing extended range */
2911 /* requests. we want to grab */
2912 /* pages around some which are */
2913 /* already present. */
55e303ae 2914 if(user_page_list) {
9bccf70c 2915 user_page_list[entry].phys_addr = 0;
55e303ae 2916 }
9bccf70c
A
2917 entry++;
2918 dst_offset += PAGE_SIZE_64;
2919 xfer_size -= PAGE_SIZE;
2920 continue;
2921 }
0b4e3aa0
A
2922 if((dst_page->cleaning) &&
2923 !(dst_page->list_req_pending)) {
2924 /*someone else is writing to the */
2925 /* page. We will have to wait. */
9bccf70c 2926 PAGE_SLEEP(object,dst_page,THREAD_UNINT);
0b4e3aa0
A
2927 continue;
2928 }
2929 if ((dst_page->fictitious &&
2930 dst_page->list_req_pending)) {
2931 /* dump the fictitious page */
2932 dst_page->list_req_pending = FALSE;
2933 dst_page->clustered = FALSE;
55e303ae 2934
0b4e3aa0
A
2935 vm_page_lock_queues();
2936 vm_page_free(dst_page);
2937 vm_page_unlock_queues();
55e303ae 2938
91447636 2939 dst_page = NULL;
0b4e3aa0
A
2940 } else if ((dst_page->absent &&
2941 dst_page->list_req_pending)) {
2942 /* the default_pager case */
2943 dst_page->list_req_pending = FALSE;
2944 dst_page->busy = FALSE;
0b4e3aa0 2945 }
1c79356b 2946 }
91447636 2947 if(dst_page == VM_PAGE_NULL) {
0b4e3aa0
A
2948 if(object->private) {
2949 /*
2950 * This is a nasty wrinkle for users
2951 * of upl who encounter device or
2952 * private memory however, it is
2953 * unavoidable, only a fault can
2954 * reslove the actual backing
2955 * physical page by asking the
2956 * backing device.
2957 */
55e303ae
A
2958 if(user_page_list) {
2959 user_page_list[entry].phys_addr = 0;
2960 }
0b4e3aa0
A
2961 entry++;
2962 dst_offset += PAGE_SIZE_64;
2963 xfer_size -= PAGE_SIZE;
2964 continue;
2965 }
1c79356b
A
2966 /* need to allocate a page */
2967 dst_page = vm_page_alloc(object, dst_offset);
2968 if (dst_page == VM_PAGE_NULL) {
0b4e3aa0
A
2969 vm_object_unlock(object);
2970 VM_PAGE_WAIT();
2971 vm_object_lock(object);
2972 continue;
1c79356b
A
2973 }
2974 dst_page->busy = FALSE;
2975#if 0
2976 if(cntrl_flags & UPL_NO_SYNC) {
2977 dst_page->page_lock = 0;
2978 dst_page->unlock_request = 0;
2979 }
2980#endif
91447636
A
2981 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
2982 /*
2983 * if UPL_RET_ONLY_ABSENT was specified,
2984 * than we're definitely setting up a
2985 * upl for a clustered read/pagein
2986 * operation... mark the pages as clustered
2987 * so vm_fault can correctly attribute them
2988 * to the 'pagein' bucket the first time
2989 * a fault happens on them
2990 */
2991 dst_page->clustered = TRUE;
2992 }
1c79356b
A
2993 dst_page->absent = TRUE;
2994 object->absent_count++;
2995 }
2996#if 1
2997 if(cntrl_flags & UPL_NO_SYNC) {
2998 dst_page->page_lock = 0;
2999 dst_page->unlock_request = 0;
3000 }
3001#endif /* 1 */
91447636
A
3002
3003 /*
3004 * ENCRYPTED SWAP:
3005 */
3006 if (cntrl_flags & UPL_ENCRYPT) {
3007 /*
3008 * The page is going to be encrypted when we
3009 * get it from the pager, so mark it so.
3010 */
3011 dst_page->encrypted = TRUE;
3012 } else {
3013 /*
3014 * Otherwise, the page will not contain
3015 * encrypted data.
3016 */
3017 dst_page->encrypted = FALSE;
3018 }
3019
1c79356b
A
3020 dst_page->overwriting = TRUE;
3021 if(dst_page->fictitious) {
3022 panic("need corner case for fictitious page");
3023 }
3024 if(dst_page->page_lock) {
3025 do_m_lock = TRUE;
3026 }
3027 if(upl_ptr) {
3028
3029 /* eliminate all mappings from the */
3030 /* original object and its prodigy */
3031
3032 if(dst_page->busy) {
3033 /*someone else is playing with the */
3034 /* page. We will have to wait. */
9bccf70c 3035 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1c79356b
A
3036 continue;
3037 }
1c79356b 3038 vm_page_lock_queues();
55e303ae 3039
91447636
A
3040 if( !(cntrl_flags & UPL_FILE_IO))
3041 hw_dirty = pmap_disconnect(dst_page->phys_page) & VM_MEM_MODIFIED;
3042 else
3043 hw_dirty = pmap_get_refmod(dst_page->phys_page) & VM_MEM_MODIFIED;
55e303ae
A
3044 dirty = hw_dirty ? TRUE : dst_page->dirty;
3045
3046 if(cntrl_flags & UPL_SET_LITE) {
3047 int pg_num;
3048 pg_num = (dst_offset-offset)/PAGE_SIZE;
3049 lite_list[pg_num>>5] |=
3050 1 << (pg_num & 31);
3051 if (hw_dirty)
3052 pmap_clear_modify(dst_page->phys_page);
3053 /*
3054 * Record that this page has been
3055 * written out
3056 */
3057#if MACH_PAGEMAP
3058 vm_external_state_set(
3059 object->existence_map,
3060 dst_page->offset);
3061#endif /*MACH_PAGEMAP*/
3062
3063 /*
3064 * Mark original page as cleaning
3065 * in place.
3066 */
3067 dst_page->cleaning = TRUE;
3068 dst_page->dirty = TRUE;
3069 dst_page->precious = FALSE;
3070 } else {
3071 /* use pageclean setup, it is more */
3072 /* convenient even for the pageout */
3073 /* cases here */
91447636 3074 vm_object_lock(upl->map_object);
55e303ae
A
3075 vm_pageclean_setup(dst_page,
3076 alias_page, upl->map_object,
3077 size - xfer_size);
91447636 3078 vm_object_unlock(upl->map_object);
55e303ae
A
3079
3080 alias_page->absent = FALSE;
3081 alias_page = NULL;
9bccf70c 3082 }
1c79356b
A
3083
3084 if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
3085 /* clean in place for read implies */
3086 /* that a write will be done on all */
3087 /* the pages that are dirty before */
3088 /* a upl commit is done. The caller */
3089 /* is obligated to preserve the */
3090 /* contents of all pages marked */
3091 /* dirty. */
3092 upl->flags |= UPL_CLEAR_DIRTY;
3093 }
3094
3095 if(!dirty) {
3096 dst_page->dirty = FALSE;
3097 dst_page->precious = TRUE;
3098 }
3099
3100 if (dst_page->wire_count == 0) {
3101 /* deny access to the target page while */
3102 /* it is being worked on */
3103 dst_page->busy = TRUE;
3104 } else {
3105 vm_page_wire(dst_page);
3106 }
91447636
A
3107 if(cntrl_flags & UPL_RET_ONLY_ABSENT) {
3108 /*
3109 * expect the page not to be used
3110 * since it's coming in as part
3111 * of a cluster and could be
3112 * speculative... pages that
3113 * are 'consumed' will get a
3114 * hardware reference
3115 */
3116 dst_page->reference = FALSE;
3117 } else {
3118 /*
3119 * expect the page to be used
3120 */
3121 dst_page->reference = TRUE;
3122 }
1c79356b
A
3123 dst_page->precious =
3124 (cntrl_flags & UPL_PRECIOUS)
3125 ? TRUE : FALSE;
1c79356b
A
3126 if(user_page_list) {
3127 user_page_list[entry].phys_addr
55e303ae 3128 = dst_page->phys_page;
1c79356b 3129 user_page_list[entry].dirty =
0b4e3aa0 3130 dst_page->dirty;
1c79356b
A
3131 user_page_list[entry].pageout =
3132 dst_page->pageout;
3133 user_page_list[entry].absent =
3134 dst_page->absent;
3135 user_page_list[entry].precious =
3136 dst_page->precious;
3137 }
3138 vm_page_unlock_queues();
3139 }
3140 entry++;
3141 dst_offset += PAGE_SIZE_64;
3142 xfer_size -= PAGE_SIZE;
3143 }
3144 }
91447636 3145
0b4e3aa0
A
3146 if (upl->flags & UPL_INTERNAL) {
3147 if(page_list_count != NULL)
3148 *page_list_count = 0;
3149 } else if (*page_list_count > entry) {
3150 if(page_list_count != NULL)
3151 *page_list_count = entry;
3152 }
3153
1c79356b
A
3154 if(alias_page != NULL) {
3155 vm_page_lock_queues();
3156 vm_page_free(alias_page);
3157 vm_page_unlock_queues();
3158 }
0b4e3aa0 3159
1c79356b
A
3160 if(do_m_lock) {
3161 vm_prot_t access_required;
3162 /* call back all associated pages from other users of the pager */
3163 /* all future updates will be on data which is based on the */
3164 /* changes we are going to make here. Note: it is assumed that */
3165 /* we already hold copies of the data so we will not be seeing */
3166 /* an avalanche of incoming data from the pager */
3167 access_required = (cntrl_flags & UPL_COPYOUT_FROM)
3168 ? VM_PROT_READ : VM_PROT_WRITE;
3169 while (TRUE) {
3170 kern_return_t rc;
1c79356b
A
3171
3172 if(!object->pager_ready) {
9bccf70c
A
3173 wait_result_t wait_result;
3174
3175 wait_result = vm_object_sleep(object,
3176 VM_OBJECT_EVENT_PAGER_READY,
3177 THREAD_UNINT);
3178 if (wait_result != THREAD_AWAKENED) {
3179 vm_object_unlock(object);
91447636 3180 return KERN_FAILURE;
1c79356b 3181 }
1c79356b
A
3182 continue;
3183 }
3184
3185 vm_object_unlock(object);
91447636 3186 rc = memory_object_data_unlock(
1c79356b 3187 object->pager,
1c79356b
A
3188 dst_offset + object->paging_offset,
3189 size,
91447636
A
3190 access_required);
3191 if (rc != KERN_SUCCESS && rc != MACH_SEND_INTERRUPTED)
3192 return KERN_FAILURE;
3193 vm_object_lock(object);
3194
3195 if (rc == KERN_SUCCESS)
3196 break;
1c79356b 3197 }
91447636 3198
1c79356b
A
3199 /* lets wait on the last page requested */
3200 /* NOTE: we will have to update lock completed routine to signal */
3201 if(dst_page != VM_PAGE_NULL &&
3202 (access_required & dst_page->page_lock) != access_required) {
3203 PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
91447636
A
3204 vm_object_unlock(object);
3205 thread_block(THREAD_CONTINUE_NULL);
3206 return KERN_SUCCESS;
1c79356b
A
3207 }
3208 }
91447636 3209
1c79356b
A
3210 vm_object_unlock(object);
3211 return KERN_SUCCESS;
3212}
3213
0b4e3aa0 3214/* JMM - Backward compatability for now */
1c79356b 3215kern_return_t
91447636
A
3216vm_fault_list_request( /* forward */
3217 memory_object_control_t control,
3218 vm_object_offset_t offset,
3219 upl_size_t size,
3220 upl_t *upl_ptr,
3221 upl_page_info_t **user_page_list_ptr,
3222 int page_list_count,
3223 int cntrl_flags);
3224kern_return_t
0b4e3aa0
A
3225vm_fault_list_request(
3226 memory_object_control_t control,
1c79356b 3227 vm_object_offset_t offset,
91447636 3228 upl_size_t size,
0b4e3aa0 3229 upl_t *upl_ptr,
1c79356b
A
3230 upl_page_info_t **user_page_list_ptr,
3231 int page_list_count,
3232 int cntrl_flags)
3233{
0b4e3aa0
A
3234 int local_list_count;
3235 upl_page_info_t *user_page_list;
3236 kern_return_t kr;
3237
3238 if (user_page_list_ptr != NULL) {
3239 local_list_count = page_list_count;
3240 user_page_list = *user_page_list_ptr;
3241 } else {
3242 local_list_count = 0;
3243 user_page_list = NULL;
3244 }
3245 kr = memory_object_upl_request(control,
3246 offset,
3247 size,
3248 upl_ptr,
3249 user_page_list,
3250 &local_list_count,
3251 cntrl_flags);
3252
3253 if(kr != KERN_SUCCESS)
3254 return kr;
3255
3256 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) {
3257 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr);
3258 }
3259
3260 return KERN_SUCCESS;
3261}
3262
3263
3264
3265/*
3266 * Routine: vm_object_super_upl_request
3267 * Purpose:
3268 * Cause the population of a portion of a vm_object
3269 * in much the same way as memory_object_upl_request.
3270 * Depending on the nature of the request, the pages
3271 * returned may be contain valid data or be uninitialized.
3272 * However, the region may be expanded up to the super
3273 * cluster size provided.
3274 */
3275
3276__private_extern__ kern_return_t
3277vm_object_super_upl_request(
3278 vm_object_t object,
3279 vm_object_offset_t offset,
91447636
A
3280 upl_size_t size,
3281 upl_size_t super_cluster,
0b4e3aa0
A
3282 upl_t *upl,
3283 upl_page_info_t *user_page_list,
3284 unsigned int *page_list_count,
3285 int cntrl_flags)
3286{
3287 vm_page_t target_page;
3288 int ticket;
3289
91447636 3290
1c79356b
A
3291 if(object->paging_offset > offset)
3292 return KERN_FAILURE;
0b4e3aa0 3293
55e303ae 3294 assert(object->paging_in_progress);
1c79356b 3295 offset = offset - object->paging_offset;
91447636 3296
55e303ae 3297 if(cntrl_flags & UPL_FOR_PAGEOUT) {
91447636
A
3298
3299 vm_object_lock(object);
3300
0b4e3aa0
A
3301 if((target_page = vm_page_lookup(object, offset))
3302 != VM_PAGE_NULL) {
3303 ticket = target_page->page_ticket;
3304 cntrl_flags = cntrl_flags & ~(int)UPL_PAGE_TICKET_MASK;
3305 cntrl_flags = cntrl_flags |
3306 ((ticket << UPL_PAGE_TICKET_SHIFT)
3307 & UPL_PAGE_TICKET_MASK);
3308 }
91447636 3309 vm_object_unlock(object);
0b4e3aa0
A
3310 }
3311
91447636 3312 if (super_cluster > size) {
1c79356b
A
3313
3314 vm_object_offset_t base_offset;
91447636 3315 upl_size_t super_size;
1c79356b
A
3316
3317 base_offset = (offset &
3318 ~((vm_object_offset_t) super_cluster - 1));
3319 super_size = (offset+size) > (base_offset + super_cluster) ?
3320 super_cluster<<1 : super_cluster;
3321 super_size = ((base_offset + super_size) > object->size) ?
3322 (object->size - base_offset) : super_size;
3323 if(offset > (base_offset + super_size))
91447636
A
3324 panic("vm_object_super_upl_request: Missed target pageout"
3325 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
3326 offset, base_offset, super_size, super_cluster,
3327 size, object->paging_offset);
3328 /*
3329 * apparently there is a case where the vm requests a
3330 * page to be written out who's offset is beyond the
3331 * object size
3332 */
1c79356b
A
3333 if((offset + size) > (base_offset + super_size))
3334 super_size = (offset + size) - base_offset;
3335
3336 offset = base_offset;
3337 size = super_size;
3338 }
91447636
A
3339 return vm_object_upl_request(object, offset, size,
3340 upl, user_page_list, page_list_count,
3341 cntrl_flags);
1c79356b
A
3342}
3343
91447636
A
3344
3345kern_return_t
3346vm_map_create_upl(
3347 vm_map_t map,
3348 vm_map_address_t offset,
3349 upl_size_t *upl_size,
3350 upl_t *upl,
3351 upl_page_info_array_t page_list,
3352 unsigned int *count,
3353 int *flags)
3354{
3355 vm_map_entry_t entry;
3356 int caller_flags;
3357 int force_data_sync;
3358 int sync_cow_data;
3359 vm_object_t local_object;
3360 vm_map_offset_t local_offset;
3361 vm_map_offset_t local_start;
3362 kern_return_t ret;
3363
3364 caller_flags = *flags;
3365
3366 if (caller_flags & ~UPL_VALID_FLAGS) {
3367 /*
3368 * For forward compatibility's sake,
3369 * reject any unknown flag.
3370 */
3371 return KERN_INVALID_VALUE;
3372 }
3373
3374 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
3375 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
3376
3377 if(upl == NULL)
3378 return KERN_INVALID_ARGUMENT;
3379
3380
3381REDISCOVER_ENTRY:
3382 vm_map_lock(map);
3383 if (vm_map_lookup_entry(map, offset, &entry)) {
3384 if (entry->object.vm_object == VM_OBJECT_NULL ||
3385 !entry->object.vm_object->phys_contiguous) {
3386 if((*upl_size/page_size) > MAX_UPL_TRANSFER) {
3387 *upl_size = MAX_UPL_TRANSFER * page_size;
3388 }
3389 }
3390 if((entry->vme_end - offset) < *upl_size) {
3391 *upl_size = entry->vme_end - offset;
3392 }
3393 if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
3394 if (entry->object.vm_object == VM_OBJECT_NULL) {
3395 *flags = 0;
3396 } else if (entry->object.vm_object->private) {
3397 *flags = UPL_DEV_MEMORY;
3398 if (entry->object.vm_object->phys_contiguous) {
3399 *flags |= UPL_PHYS_CONTIG;
3400 }
3401 } else {
3402 *flags = 0;
3403 }
3404 vm_map_unlock(map);
3405 return KERN_SUCCESS;
3406 }
3407 /*
3408 * Create an object if necessary.
3409 */
3410 if (entry->object.vm_object == VM_OBJECT_NULL) {
3411 entry->object.vm_object = vm_object_allocate(
3412 (vm_size_t)(entry->vme_end - entry->vme_start));
3413 entry->offset = 0;
3414 }
3415 if (!(caller_flags & UPL_COPYOUT_FROM)) {
3416 if (!(entry->protection & VM_PROT_WRITE)) {
3417 vm_map_unlock(map);
3418 return KERN_PROTECTION_FAILURE;
3419 }
3420 if (entry->needs_copy) {
3421 vm_map_t local_map;
3422 vm_object_t object;
3423 vm_map_offset_t offset_hi;
3424 vm_map_offset_t offset_lo;
3425 vm_object_offset_t new_offset;
3426 vm_prot_t prot;
3427 boolean_t wired;
3428 vm_behavior_t behavior;
3429 vm_map_version_t version;
3430 vm_map_t real_map;
3431
3432 local_map = map;
3433 vm_map_lock_write_to_read(map);
3434 if(vm_map_lookup_locked(&local_map,
3435 offset, VM_PROT_WRITE,
3436 &version, &object,
3437 &new_offset, &prot, &wired,
3438 &behavior, &offset_lo,
3439 &offset_hi, &real_map)) {
3440 vm_map_unlock(local_map);
3441 return KERN_FAILURE;
3442 }
3443 if (real_map != map) {
3444 vm_map_unlock(real_map);
3445 }
3446 vm_object_unlock(object);
3447 vm_map_unlock(local_map);
3448
3449 goto REDISCOVER_ENTRY;
3450 }
3451 }
3452 if (entry->is_sub_map) {
3453 vm_map_t submap;
3454
3455 submap = entry->object.sub_map;
3456 local_start = entry->vme_start;
3457 local_offset = entry->offset;
3458 vm_map_reference(submap);
3459 vm_map_unlock(map);
3460
3461 ret = (vm_map_create_upl(submap,
3462 local_offset + (offset - local_start),
3463 upl_size, upl, page_list, count,
3464 flags));
3465
3466 vm_map_deallocate(submap);
3467 return ret;
3468 }
3469
3470 if (sync_cow_data) {
3471 if (entry->object.vm_object->shadow
3472 || entry->object.vm_object->copy) {
3473
3474 local_object = entry->object.vm_object;
3475 local_start = entry->vme_start;
3476 local_offset = entry->offset;
3477 vm_object_reference(local_object);
3478 vm_map_unlock(map);
3479
3480 if (entry->object.vm_object->shadow &&
3481 entry->object.vm_object->copy) {
3482 vm_object_lock_request(
3483 local_object->shadow,
3484 (vm_object_offset_t)
3485 ((offset - local_start) +
3486 local_offset) +
3487 local_object->shadow_offset,
3488 *upl_size, FALSE,
3489 MEMORY_OBJECT_DATA_SYNC,
3490 VM_PROT_NO_CHANGE);
3491 }
3492 sync_cow_data = FALSE;
3493 vm_object_deallocate(local_object);
3494 goto REDISCOVER_ENTRY;
3495 }
3496 }
3497
3498 if (force_data_sync) {
3499
3500 local_object = entry->object.vm_object;
3501 local_start = entry->vme_start;
3502 local_offset = entry->offset;
3503 vm_object_reference(local_object);
3504 vm_map_unlock(map);
3505
3506 vm_object_lock_request(
3507 local_object,
3508 (vm_object_offset_t)
3509 ((offset - local_start) + local_offset),
3510 (vm_object_size_t)*upl_size, FALSE,
3511 MEMORY_OBJECT_DATA_SYNC,
3512 VM_PROT_NO_CHANGE);
3513 force_data_sync = FALSE;
3514 vm_object_deallocate(local_object);
3515 goto REDISCOVER_ENTRY;
3516 }
3517
3518 if(!(entry->object.vm_object->private)) {
3519 if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
3520 *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
3521 if(entry->object.vm_object->phys_contiguous) {
3522 *flags = UPL_PHYS_CONTIG;
3523 } else {
3524 *flags = 0;
3525 }
3526 } else {
3527 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
3528 }
3529 local_object = entry->object.vm_object;
3530 local_offset = entry->offset;
3531 local_start = entry->vme_start;
3532 vm_object_reference(local_object);
3533 vm_map_unlock(map);
3534 if(caller_flags & UPL_SET_IO_WIRE) {
3535 ret = (vm_object_iopl_request(local_object,
3536 (vm_object_offset_t)
3537 ((offset - local_start)
3538 + local_offset),
3539 *upl_size,
3540 upl,
3541 page_list,
3542 count,
3543 caller_flags));
3544 } else {
3545 ret = (vm_object_upl_request(local_object,
3546 (vm_object_offset_t)
3547 ((offset - local_start)
3548 + local_offset),
3549 *upl_size,
3550 upl,
3551 page_list,
3552 count,
3553 caller_flags));
3554 }
3555 vm_object_deallocate(local_object);
3556 return(ret);
3557 }
3558
3559 vm_map_unlock(map);
3560 return(KERN_FAILURE);
1c79356b 3561
91447636
A
3562}
3563
3564/*
3565 * Internal routine to enter a UPL into a VM map.
3566 *
3567 * JMM - This should just be doable through the standard
3568 * vm_map_enter() API.
3569 */
1c79356b 3570kern_return_t
91447636
A
3571vm_map_enter_upl(
3572 vm_map_t map,
3573 upl_t upl,
3574 vm_map_offset_t *dst_addr)
1c79356b 3575{
91447636 3576 vm_map_size_t size;
1c79356b 3577 vm_object_offset_t offset;
91447636 3578 vm_map_offset_t addr;
1c79356b
A
3579 vm_page_t m;
3580 kern_return_t kr;
3581
0b4e3aa0
A
3582 if (upl == UPL_NULL)
3583 return KERN_INVALID_ARGUMENT;
3584
3585 upl_lock(upl);
3586
1c79356b 3587 /* check to see if already mapped */
0b4e3aa0
A
3588 if(UPL_PAGE_LIST_MAPPED & upl->flags) {
3589 upl_unlock(upl);
1c79356b 3590 return KERN_FAILURE;
0b4e3aa0 3591 }
1c79356b 3592
55e303ae
A
3593 if((!(upl->map_object->pageout)) &&
3594 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) ||
3595 (upl->map_object->phys_contiguous))) {
3596 vm_object_t object;
3597 vm_page_t alias_page;
3598 vm_object_offset_t new_offset;
3599 int pg_num;
3600 wpl_array_t lite_list;
3601
3602 if(upl->flags & UPL_INTERNAL) {
3603 lite_list = (wpl_array_t)
91447636 3604 ((((uintptr_t)upl) + sizeof(struct upl))
55e303ae
A
3605 + ((upl->size/PAGE_SIZE)
3606 * sizeof(upl_page_info_t)));
3607 } else {
3608 lite_list = (wpl_array_t)
91447636 3609 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae
A
3610 }
3611 object = upl->map_object;
3612 upl->map_object = vm_object_allocate(upl->size);
3613 vm_object_lock(upl->map_object);
3614 upl->map_object->shadow = object;
3615 upl->map_object->pageout = TRUE;
3616 upl->map_object->can_persist = FALSE;
3617 upl->map_object->copy_strategy =
3618 MEMORY_OBJECT_COPY_NONE;
3619 upl->map_object->shadow_offset =
3620 upl->offset - object->paging_offset;
3621 upl->map_object->wimg_bits = object->wimg_bits;
55e303ae
A
3622 offset = upl->map_object->shadow_offset;
3623 new_offset = 0;
3624 size = upl->size;
91447636 3625
55e303ae 3626 vm_object_lock(object);
91447636 3627
55e303ae
A
3628 while(size) {
3629 pg_num = (new_offset)/PAGE_SIZE;
3630 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3631 vm_object_unlock(object);
3632 VM_PAGE_GRAB_FICTITIOUS(alias_page);
3633 vm_object_lock(object);
3634 m = vm_page_lookup(object, offset);
3635 if (m == VM_PAGE_NULL) {
3636 panic("vm_upl_map: page missing\n");
3637 }
3638
3639 vm_object_paging_begin(object);
3640
3641 /*
3642 * Convert the fictitious page to a private
3643 * shadow of the real page.
3644 */
3645 assert(alias_page->fictitious);
3646 alias_page->fictitious = FALSE;
3647 alias_page->private = TRUE;
3648 alias_page->pageout = TRUE;
3649 alias_page->phys_page = m->phys_page;
91447636
A
3650
3651 vm_page_lock_queues();
55e303ae 3652 vm_page_wire(alias_page);
91447636
A
3653 vm_page_unlock_queues();
3654
3655 /*
3656 * ENCRYPTED SWAP:
3657 * The virtual page ("m") has to be wired in some way
3658 * here or its physical page ("m->phys_page") could
3659 * be recycled at any time.
3660 * Assuming this is enforced by the caller, we can't
3661 * get an encrypted page here. Since the encryption
3662 * key depends on the VM page's "pager" object and
3663 * the "paging_offset", we couldn't handle 2 pageable
3664 * VM pages (with different pagers and paging_offsets)
3665 * sharing the same physical page: we could end up
3666 * encrypting with one key (via one VM page) and
3667 * decrypting with another key (via the alias VM page).
3668 */
3669 ASSERT_PAGE_DECRYPTED(m);
55e303ae
A
3670
3671 vm_page_insert(alias_page,
3672 upl->map_object, new_offset);
3673 assert(!alias_page->wanted);
3674 alias_page->busy = FALSE;
3675 alias_page->absent = FALSE;
3676 }
3677
3678 size -= PAGE_SIZE;
3679 offset += PAGE_SIZE_64;
3680 new_offset += PAGE_SIZE_64;
3681 }
3682 vm_object_unlock(object);
91447636 3683 vm_object_unlock(upl->map_object);
55e303ae
A
3684 }
3685 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous)
3686 offset = upl->offset - upl->map_object->paging_offset;
3687 else
3688 offset = 0;
3689
1c79356b
A
3690 size = upl->size;
3691
3692 vm_object_lock(upl->map_object);
3693 upl->map_object->ref_count++;
3694 vm_object_res_reference(upl->map_object);
3695 vm_object_unlock(upl->map_object);
3696
3697 *dst_addr = 0;
3698
3699
3700 /* NEED A UPL_MAP ALIAS */
91447636
A
3701 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
3702 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE,
1c79356b
A
3703 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
3704
0b4e3aa0
A
3705 if (kr != KERN_SUCCESS) {
3706 upl_unlock(upl);
1c79356b 3707 return(kr);
0b4e3aa0 3708 }
1c79356b 3709
91447636
A
3710 vm_object_lock(upl->map_object);
3711
1c79356b
A
3712 for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
3713 m = vm_page_lookup(upl->map_object, offset);
3714 if(m) {
9bccf70c
A
3715 unsigned int cache_attr;
3716 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
3717
3718 PMAP_ENTER(map->pmap, addr,
3719 m, VM_PROT_ALL,
3720 cache_attr, TRUE);
1c79356b
A
3721 }
3722 offset+=PAGE_SIZE_64;
3723 }
91447636
A
3724 vm_object_unlock(upl->map_object);
3725
0b4e3aa0 3726 upl->ref_count++; /* hold a reference for the mapping */
1c79356b
A
3727 upl->flags |= UPL_PAGE_LIST_MAPPED;
3728 upl->kaddr = *dst_addr;
0b4e3aa0 3729 upl_unlock(upl);
1c79356b
A
3730 return KERN_SUCCESS;
3731}
3732
91447636
A
3733/*
3734 * Internal routine to remove a UPL mapping from a VM map.
3735 *
3736 * XXX - This should just be doable through a standard
3737 * vm_map_remove() operation. Otherwise, implicit clean-up
3738 * of the target map won't be able to correctly remove
3739 * these (and release the reference on the UPL). Having
3740 * to do this means we can't map these into user-space
3741 * maps yet.
3742 */
1c79356b 3743kern_return_t
91447636 3744vm_map_remove_upl(
1c79356b
A
3745 vm_map_t map,
3746 upl_t upl)
3747{
0b4e3aa0 3748 vm_address_t addr;
91447636 3749 upl_size_t size;
1c79356b 3750
0b4e3aa0
A
3751 if (upl == UPL_NULL)
3752 return KERN_INVALID_ARGUMENT;
3753
3754 upl_lock(upl);
1c79356b 3755 if(upl->flags & UPL_PAGE_LIST_MAPPED) {
0b4e3aa0 3756 addr = upl->kaddr;
1c79356b 3757 size = upl->size;
0b4e3aa0
A
3758 assert(upl->ref_count > 1);
3759 upl->ref_count--; /* removing mapping ref */
1c79356b
A
3760 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
3761 upl->kaddr = (vm_offset_t) 0;
0b4e3aa0
A
3762 upl_unlock(upl);
3763
91447636
A
3764 vm_map_remove( map,
3765 vm_map_trunc_page(addr),
3766 vm_map_round_page(addr + size),
3767 VM_MAP_NO_FLAGS);
1c79356b 3768 return KERN_SUCCESS;
1c79356b 3769 }
0b4e3aa0
A
3770 upl_unlock(upl);
3771 return KERN_FAILURE;
1c79356b
A
3772}
3773
3774kern_return_t
0b4e3aa0 3775upl_commit_range(
1c79356b 3776 upl_t upl,
91447636
A
3777 upl_offset_t offset,
3778 upl_size_t size,
1c79356b 3779 int flags,
0b4e3aa0
A
3780 upl_page_info_t *page_list,
3781 mach_msg_type_number_t count,
3782 boolean_t *empty)
1c79356b 3783{
91447636 3784 upl_size_t xfer_size = size;
55e303ae 3785 vm_object_t shadow_object;
1c79356b
A
3786 vm_object_t object = upl->map_object;
3787 vm_object_offset_t target_offset;
1c79356b 3788 int entry;
55e303ae
A
3789 wpl_array_t lite_list;
3790 int occupied;
3791 int delayed_unlock = 0;
91447636 3792 int clear_refmod = 0;
55e303ae 3793 boolean_t shadow_internal;
1c79356b 3794
0b4e3aa0
A
3795 *empty = FALSE;
3796
3797 if (upl == UPL_NULL)
3798 return KERN_INVALID_ARGUMENT;
3799
55e303ae 3800
0b4e3aa0
A
3801 if (count == 0)
3802 page_list = NULL;
3803
91447636 3804 if (object->pageout) {
55e303ae
A
3805 shadow_object = object->shadow;
3806 } else {
3807 shadow_object = object;
3808 }
3809
0b4e3aa0 3810 upl_lock(upl);
55e303ae 3811
91447636
A
3812 if (upl->flags & UPL_ACCESS_BLOCKED) {
3813 /*
3814 * We used this UPL to block access to the pages by marking
3815 * them "busy". Now we need to clear the "busy" bit to allow
3816 * access to these pages again.
3817 */
3818 flags |= UPL_COMMIT_ALLOW_ACCESS;
3819 }
3820
55e303ae
A
3821 if (upl->flags & UPL_CLEAR_DIRTY)
3822 flags |= UPL_COMMIT_CLEAR_DIRTY;
3823
3824 if (upl->flags & UPL_DEVICE_MEMORY) {
1c79356b
A
3825 xfer_size = 0;
3826 } else if ((offset + size) > upl->size) {
0b4e3aa0 3827 upl_unlock(upl);
1c79356b
A
3828 return KERN_FAILURE;
3829 }
3830
55e303ae
A
3831 if (upl->flags & UPL_INTERNAL) {
3832 lite_list = (wpl_array_t)
91447636 3833 ((((uintptr_t)upl) + sizeof(struct upl))
55e303ae
A
3834 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
3835 } else {
3836 lite_list = (wpl_array_t)
91447636 3837 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae 3838 }
91447636
A
3839 if (object != shadow_object)
3840 vm_object_lock(object);
1c79356b 3841 vm_object_lock(shadow_object);
91447636 3842
55e303ae 3843 shadow_internal = shadow_object->internal;
1c79356b
A
3844
3845 entry = offset/PAGE_SIZE;
3846 target_offset = (vm_object_offset_t)offset;
55e303ae 3847
91447636 3848 while (xfer_size) {
1c79356b
A
3849 vm_page_t t,m;
3850 upl_page_info_t *p;
3851
55e303ae 3852 m = VM_PAGE_NULL;
d7e50217 3853
55e303ae
A
3854 if (upl->flags & UPL_LITE) {
3855 int pg_num;
3856
3857 pg_num = target_offset/PAGE_SIZE;
3858
3859 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
3860 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
3861 m = vm_page_lookup(shadow_object,
3862 target_offset + (upl->offset -
3863 shadow_object->paging_offset));
3864 }
3865 }
3866 if (object->pageout) {
3867 if ((t = vm_page_lookup(object, target_offset)) != NULL) {
3868 t->pageout = FALSE;
3869
3870 if (delayed_unlock) {
3871 delayed_unlock = 0;
3872 vm_page_unlock_queues();
3873 }
3874 VM_PAGE_FREE(t);
3875
3876 if (m == NULL) {
3877 m = vm_page_lookup(
3878 shadow_object,
3879 target_offset +
3880 object->shadow_offset);
de355530 3881 }
55e303ae
A
3882 if (m != VM_PAGE_NULL)
3883 vm_object_paging_end(m->object);
3884 }
3885 }
3886 if (m != VM_PAGE_NULL) {
3887
91447636
A
3888 clear_refmod = 0;
3889
55e303ae
A
3890 if (upl->flags & UPL_IO_WIRE) {
3891
3892 if (delayed_unlock == 0)
3893 vm_page_lock_queues();
3894
3895 vm_page_unwire(m);
3896
3897 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
3898 delayed_unlock = 0;
3899 vm_page_unlock_queues();
3900 }
3901 if (page_list) {
de355530 3902 page_list[entry].phys_addr = 0;
55e303ae
A
3903 }
3904 if (flags & UPL_COMMIT_SET_DIRTY) {
3905 m->dirty = TRUE;
3906 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3907 m->dirty = FALSE;
91447636 3908 clear_refmod |= VM_MEM_MODIFIED;
55e303ae
A
3909 }
3910 if (flags & UPL_COMMIT_INACTIVATE) {
3911 m->reference = FALSE;
91447636 3912 clear_refmod |= VM_MEM_REFERENCED;
55e303ae 3913 vm_page_deactivate(m);
55e303ae 3914 }
91447636
A
3915 if (clear_refmod)
3916 pmap_clear_refmod(m->phys_page, clear_refmod);
3917
3918 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
3919 /*
3920 * We blocked access to the pages in this UPL.
3921 * Clear the "busy" bit and wake up any waiter
3922 * for this page.
3923 */
3924 PAGE_WAKEUP_DONE(m);
3925 }
3926
55e303ae
A
3927 target_offset += PAGE_SIZE_64;
3928 xfer_size -= PAGE_SIZE;
3929 entry++;
3930 continue;
3931 }
3932 if (delayed_unlock == 0)
3933 vm_page_lock_queues();
3934 /*
3935 * make sure to clear the hardware
3936 * modify or reference bits before
3937 * releasing the BUSY bit on this page
3938 * otherwise we risk losing a legitimate
3939 * change of state
3940 */
3941 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
3942 m->dirty = FALSE;
91447636 3943 clear_refmod |= VM_MEM_MODIFIED;
55e303ae
A
3944 }
3945 if (flags & UPL_COMMIT_INACTIVATE)
91447636
A
3946 clear_refmod |= VM_MEM_REFERENCED;
3947
3948 if (clear_refmod)
3949 pmap_clear_refmod(m->phys_page, clear_refmod);
55e303ae
A
3950
3951 if (page_list) {
3952 p = &(page_list[entry]);
3953 if(p->phys_addr && p->pageout && !m->pageout) {
3954 m->busy = TRUE;
3955 m->pageout = TRUE;
3956 vm_page_wire(m);
3957 } else if (page_list[entry].phys_addr &&
3958 !p->pageout && m->pageout &&
3959 !m->dump_cleaning) {
3960 m->pageout = FALSE;
3961 m->absent = FALSE;
3962 m->overwriting = FALSE;
3963 vm_page_unwire(m);
3964 PAGE_WAKEUP_DONE(m);
3965 }
3966 page_list[entry].phys_addr = 0;
3967 }
3968 m->dump_cleaning = FALSE;
3969 if(m->laundry) {
91447636 3970 vm_pageout_throttle_up(m);
55e303ae
A
3971 }
3972 if(m->pageout) {
3973 m->cleaning = FALSE;
3974 m->pageout = FALSE;
1c79356b 3975#if MACH_CLUSTER_STATS
55e303ae 3976 if (m->wanted) vm_pageout_target_collisions++;
1c79356b 3977#endif
91447636
A
3978 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
3979 m->dirty = TRUE;
3980 else
3981 m->dirty = FALSE;
3982
55e303ae 3983 if(m->dirty) {
55e303ae 3984 vm_page_unwire(m);/* reactivates */
91447636
A
3985
3986 if (upl->flags & UPL_PAGEOUT) {
3987 CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
3988 VM_STAT(reactivations++);
3989 }
55e303ae
A
3990 PAGE_WAKEUP_DONE(m);
3991 } else {
55e303ae
A
3992 vm_page_free(m);/* clears busy, etc. */
3993
91447636
A
3994 if (upl->flags & UPL_PAGEOUT) {
3995 CLUSTER_STAT(vm_pageout_target_page_freed++;)
3996
3997 if (page_list[entry].dirty)
3998 VM_STAT(pageouts++);
3999 }
55e303ae
A
4000 }
4001 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4002 delayed_unlock = 0;
4003 vm_page_unlock_queues();
4004 }
4005 target_offset += PAGE_SIZE_64;
4006 xfer_size -= PAGE_SIZE;
4007 entry++;
4008 continue;
4009 }
1c79356b 4010#if MACH_CLUSTER_STATS
55e303ae 4011 m->dirty = pmap_is_modified(m->phys_page);
1c79356b 4012
55e303ae
A
4013 if (m->dirty) vm_pageout_cluster_dirtied++;
4014 else vm_pageout_cluster_cleaned++;
4015 if (m->wanted) vm_pageout_cluster_collisions++;
1c79356b 4016#else
55e303ae 4017 m->dirty = 0;
1c79356b
A
4018#endif
4019
55e303ae
A
4020 if((m->busy) && (m->cleaning)) {
4021 /* the request_page_list case */
4022 if(m->absent) {
4023 m->absent = FALSE;
4024 if(shadow_object->absent_count == 1)
1c79356b 4025 vm_object_absent_release(shadow_object);
55e303ae 4026 else
1c79356b 4027 shadow_object->absent_count--;
de355530 4028 }
55e303ae
A
4029 m->overwriting = FALSE;
4030 m->busy = FALSE;
4031 m->dirty = FALSE;
4032 } else if (m->overwriting) {
4033 /* alternate request page list, write to
91447636
A
4034 * page_list case. Occurs when the original
4035 * page was wired at the time of the list
4036 * request */
55e303ae
A
4037 assert(m->wire_count != 0);
4038 vm_page_unwire(m);/* reactivates */
4039 m->overwriting = FALSE;
4040 }
4041 m->cleaning = FALSE;
4042
4043 /* It is a part of the semantic of COPYOUT_FROM */
4044 /* UPLs that a commit implies cache sync */
4045 /* between the vm page and the backing store */
4046 /* this can be used to strip the precious bit */
4047 /* as well as clean */
4048 if (upl->flags & UPL_PAGE_SYNC_DONE)
4049 m->precious = FALSE;
4050
4051 if (flags & UPL_COMMIT_SET_DIRTY)
4052 m->dirty = TRUE;
4053
4054 if (flags & UPL_COMMIT_INACTIVATE) {
4055 m->reference = FALSE;
4056 vm_page_deactivate(m);
4057 } else if (!m->active && !m->inactive) {
4058 if (m->reference)
4059 vm_page_activate(m);
4060 else
4061 vm_page_deactivate(m);
4062 }
91447636
A
4063
4064 if (flags & UPL_COMMIT_ALLOW_ACCESS) {
4065 /*
4066 * We blocked access to the pages in this URL.
4067 * Clear the "busy" bit on this page before we
4068 * wake up any waiter.
4069 */
4070 m->busy = FALSE;
4071 }
4072
55e303ae
A
4073 /*
4074 * Wakeup any thread waiting for the page to be un-cleaning.
4075 */
4076 PAGE_WAKEUP(m);
4077
4078 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
4079 delayed_unlock = 0;
4080 vm_page_unlock_queues();
4081 }
1c79356b
A
4082 }
4083 target_offset += PAGE_SIZE_64;
4084 xfer_size -= PAGE_SIZE;
4085 entry++;
4086 }
55e303ae
A
4087 if (delayed_unlock)
4088 vm_page_unlock_queues();
4089
4090 occupied = 1;
4091
4092 if (upl->flags & UPL_DEVICE_MEMORY) {
4093 occupied = 0;
4094 } else if (upl->flags & UPL_LITE) {
4095 int pg_num;
4096 int i;
4097 pg_num = upl->size/PAGE_SIZE;
4098 pg_num = (pg_num + 31) >> 5;
4099 occupied = 0;
4100 for(i= 0; i<pg_num; i++) {
4101 if(lite_list[i] != 0) {
4102 occupied = 1;
4103 break;
4104 }
4105 }
4106 } else {
4107 if(queue_empty(&upl->map_object->memq)) {
4108 occupied = 0;
4109 }
4110 }
1c79356b 4111
55e303ae
A
4112 if(occupied == 0) {
4113 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
0b4e3aa0 4114 *empty = TRUE;
55e303ae
A
4115 }
4116 if(object == shadow_object)
4117 vm_object_paging_end(shadow_object);
1c79356b 4118 }
55e303ae 4119 vm_object_unlock(shadow_object);
91447636
A
4120 if (object != shadow_object)
4121 vm_object_unlock(object);
0b4e3aa0
A
4122 upl_unlock(upl);
4123
1c79356b
A
4124 return KERN_SUCCESS;
4125}
4126
0b4e3aa0
A
4127kern_return_t
4128upl_abort_range(
1c79356b 4129 upl_t upl,
91447636
A
4130 upl_offset_t offset,
4131 upl_size_t size,
0b4e3aa0
A
4132 int error,
4133 boolean_t *empty)
1c79356b 4134{
91447636 4135 upl_size_t xfer_size = size;
55e303ae 4136 vm_object_t shadow_object;
1c79356b
A
4137 vm_object_t object = upl->map_object;
4138 vm_object_offset_t target_offset;
1c79356b 4139 int entry;
55e303ae
A
4140 wpl_array_t lite_list;
4141 int occupied;
4142 boolean_t shadow_internal;
1c79356b 4143
0b4e3aa0
A
4144 *empty = FALSE;
4145
4146 if (upl == UPL_NULL)
4147 return KERN_INVALID_ARGUMENT;
4148
55e303ae
A
4149 if (upl->flags & UPL_IO_WIRE) {
4150 return upl_commit_range(upl,
4151 offset, size, 0,
4152 NULL, 0, empty);
4153 }
4154
4155 if(object->pageout) {
4156 shadow_object = object->shadow;
4157 } else {
4158 shadow_object = object;
4159 }
4160
0b4e3aa0 4161 upl_lock(upl);
1c79356b
A
4162 if(upl->flags & UPL_DEVICE_MEMORY) {
4163 xfer_size = 0;
4164 } else if ((offset + size) > upl->size) {
0b4e3aa0 4165 upl_unlock(upl);
1c79356b
A
4166 return KERN_FAILURE;
4167 }
91447636
A
4168 if (object != shadow_object)
4169 vm_object_lock(object);
1c79356b 4170 vm_object_lock(shadow_object);
91447636 4171
55e303ae
A
4172 shadow_internal = shadow_object->internal;
4173
4174 if(upl->flags & UPL_INTERNAL) {
4175 lite_list = (wpl_array_t)
91447636 4176 ((((uintptr_t)upl) + sizeof(struct upl))
55e303ae
A
4177 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4178 } else {
4179 lite_list = (wpl_array_t)
91447636 4180 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae 4181 }
1c79356b
A
4182
4183 entry = offset/PAGE_SIZE;
4184 target_offset = (vm_object_offset_t)offset;
4185 while(xfer_size) {
4186 vm_page_t t,m;
1c79356b 4187
55e303ae
A
4188 m = VM_PAGE_NULL;
4189 if(upl->flags & UPL_LITE) {
4190 int pg_num;
4191 pg_num = target_offset/PAGE_SIZE;
4192 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4193 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4194 m = vm_page_lookup(shadow_object,
4195 target_offset + (upl->offset -
4196 shadow_object->paging_offset));
4197 }
4198 }
4199 if(object->pageout) {
4200 if ((t = vm_page_lookup(object, target_offset))
4201 != NULL) {
4202 t->pageout = FALSE;
4203 VM_PAGE_FREE(t);
4204 if(m == NULL) {
4205 m = vm_page_lookup(
4206 shadow_object,
4207 target_offset +
4208 object->shadow_offset);
4209 }
4210 if(m != VM_PAGE_NULL)
4211 vm_object_paging_end(m->object);
4212 }
4213 }
4214 if(m != VM_PAGE_NULL) {
1c79356b
A
4215 vm_page_lock_queues();
4216 if(m->absent) {
91447636
A
4217 boolean_t must_free = TRUE;
4218
1c79356b
A
4219 /* COPYOUT = FALSE case */
4220 /* check for error conditions which must */
4221 /* be passed back to the pages customer */
4222 if(error & UPL_ABORT_RESTART) {
4223 m->restart = TRUE;
4224 m->absent = FALSE;
4225 vm_object_absent_release(m->object);
4226 m->page_error = KERN_MEMORY_ERROR;
4227 m->error = TRUE;
91447636 4228 must_free = FALSE;
1c79356b
A
4229 } else if(error & UPL_ABORT_UNAVAILABLE) {
4230 m->restart = FALSE;
4231 m->unusual = TRUE;
91447636 4232 must_free = FALSE;
1c79356b
A
4233 } else if(error & UPL_ABORT_ERROR) {
4234 m->restart = FALSE;
4235 m->absent = FALSE;
4236 vm_object_absent_release(m->object);
4237 m->page_error = KERN_MEMORY_ERROR;
4238 m->error = TRUE;
91447636 4239 must_free = FALSE;
1c79356b 4240 }
91447636
A
4241
4242 /*
4243 * ENCRYPTED SWAP:
4244 * If the page was already encrypted,
4245 * we don't really need to decrypt it
4246 * now. It will get decrypted later,
4247 * on demand, as soon as someone needs
4248 * to access its contents.
4249 */
1c79356b
A
4250
4251 m->cleaning = FALSE;
4252 m->overwriting = FALSE;
4253 PAGE_WAKEUP_DONE(m);
91447636
A
4254
4255 if (must_free == TRUE) {
1c79356b
A
4256 vm_page_free(m);
4257 } else {
4258 vm_page_activate(m);
4259 }
1c79356b 4260 vm_page_unlock_queues();
91447636 4261
1c79356b
A
4262 target_offset += PAGE_SIZE_64;
4263 xfer_size -= PAGE_SIZE;
4264 entry++;
4265 continue;
4266 }
4267 /*
55e303ae
A
4268 * Handle the trusted pager throttle.
4269 */
4270 if (m->laundry) {
91447636 4271 vm_pageout_throttle_up(m);
1c79356b
A
4272 }
4273 if(m->pageout) {
4274 assert(m->busy);
4275 assert(m->wire_count == 1);
4276 m->pageout = FALSE;
4277 vm_page_unwire(m);
4278 }
0b4e3aa0 4279 m->dump_cleaning = FALSE;
1c79356b 4280 m->cleaning = FALSE;
1c79356b
A
4281 m->overwriting = FALSE;
4282#if MACH_PAGEMAP
4283 vm_external_state_clr(
4284 m->object->existence_map, m->offset);
4285#endif /* MACH_PAGEMAP */
4286 if(error & UPL_ABORT_DUMP_PAGES) {
4287 vm_page_free(m);
91447636 4288 pmap_disconnect(m->phys_page);
1c79356b 4289 } else {
91447636 4290 PAGE_WAKEUP_DONE(m);
1c79356b
A
4291 }
4292 vm_page_unlock_queues();
4293 }
55e303ae
A
4294 target_offset += PAGE_SIZE_64;
4295 xfer_size -= PAGE_SIZE;
4296 entry++;
d7e50217 4297 }
55e303ae
A
4298 occupied = 1;
4299 if (upl->flags & UPL_DEVICE_MEMORY) {
4300 occupied = 0;
4301 } else if (upl->flags & UPL_LITE) {
4302 int pg_num;
4303 int i;
4304 pg_num = upl->size/PAGE_SIZE;
4305 pg_num = (pg_num + 31) >> 5;
4306 occupied = 0;
4307 for(i= 0; i<pg_num; i++) {
4308 if(lite_list[i] != 0) {
4309 occupied = 1;
4310 break;
4311 }
4312 }
4313 } else {
4314 if(queue_empty(&upl->map_object->memq)) {
4315 occupied = 0;
4316 }
4317 }
4318
4319 if(occupied == 0) {
4320 if(upl->flags & UPL_COMMIT_NOTIFY_EMPTY) {
0b4e3aa0 4321 *empty = TRUE;
55e303ae
A
4322 }
4323 if(object == shadow_object)
4324 vm_object_paging_end(shadow_object);
1c79356b 4325 }
55e303ae 4326 vm_object_unlock(shadow_object);
91447636
A
4327 if (object != shadow_object)
4328 vm_object_unlock(object);
55e303ae 4329
0b4e3aa0 4330 upl_unlock(upl);
55e303ae 4331
1c79356b
A
4332 return KERN_SUCCESS;
4333}
4334
4335kern_return_t
0b4e3aa0 4336upl_abort(
1c79356b
A
4337 upl_t upl,
4338 int error)
4339{
4340 vm_object_t object = NULL;
4341 vm_object_t shadow_object = NULL;
4342 vm_object_offset_t offset;
4343 vm_object_offset_t shadow_offset;
4344 vm_object_offset_t target_offset;
91447636 4345 upl_size_t i;
55e303ae 4346 wpl_array_t lite_list;
1c79356b 4347 vm_page_t t,m;
55e303ae
A
4348 int occupied;
4349 boolean_t shadow_internal;
1c79356b 4350
0b4e3aa0
A
4351 if (upl == UPL_NULL)
4352 return KERN_INVALID_ARGUMENT;
4353
55e303ae
A
4354 if (upl->flags & UPL_IO_WIRE) {
4355 boolean_t empty;
4356 return upl_commit_range(upl,
4357 0, upl->size, 0,
4358 NULL, 0, &empty);
4359 }
4360
0b4e3aa0 4361 upl_lock(upl);
1c79356b 4362 if(upl->flags & UPL_DEVICE_MEMORY) {
0b4e3aa0 4363 upl_unlock(upl);
1c79356b
A
4364 return KERN_SUCCESS;
4365 }
0b4e3aa0 4366
1c79356b
A
4367 object = upl->map_object;
4368
0b4e3aa0 4369 if (object == NULL) {
1c79356b 4370 panic("upl_abort: upl object is not backed by an object");
0b4e3aa0 4371 upl_unlock(upl);
1c79356b
A
4372 return KERN_INVALID_ARGUMENT;
4373 }
4374
55e303ae
A
4375 if(object->pageout) {
4376 shadow_object = object->shadow;
4377 shadow_offset = object->shadow_offset;
4378 } else {
4379 shadow_object = object;
4380 shadow_offset = upl->offset - object->paging_offset;
4381 }
4382
4383 if(upl->flags & UPL_INTERNAL) {
4384 lite_list = (wpl_array_t)
91447636 4385 ((((uintptr_t)upl) + sizeof(struct upl))
55e303ae
A
4386 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4387 } else {
4388 lite_list = (wpl_array_t)
91447636 4389 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae 4390 }
1c79356b 4391 offset = 0;
91447636
A
4392
4393 if (object != shadow_object)
4394 vm_object_lock(object);
1c79356b 4395 vm_object_lock(shadow_object);
91447636 4396
55e303ae
A
4397 shadow_internal = shadow_object->internal;
4398
1c79356b 4399 for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) {
55e303ae
A
4400 m = VM_PAGE_NULL;
4401 target_offset = offset + shadow_offset;
4402 if(upl->flags & UPL_LITE) {
4403 int pg_num;
4404 pg_num = offset/PAGE_SIZE;
4405 if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
4406 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
4407 m = vm_page_lookup(
4408 shadow_object, target_offset);
4409 }
4410 }
4411 if(object->pageout) {
4412 if ((t = vm_page_lookup(object, offset)) != NULL) {
4413 t->pageout = FALSE;
4414 VM_PAGE_FREE(t);
4415 if(m == NULL) {
4416 m = vm_page_lookup(
4417 shadow_object, target_offset);
4418 }
4419 if(m != VM_PAGE_NULL)
4420 vm_object_paging_end(m->object);
4421 }
4422 }
4423 if(m != VM_PAGE_NULL) {
1c79356b
A
4424 vm_page_lock_queues();
4425 if(m->absent) {
91447636
A
4426 boolean_t must_free = TRUE;
4427
1c79356b
A
4428 /* COPYOUT = FALSE case */
4429 /* check for error conditions which must */
4430 /* be passed back to the pages customer */
4431 if(error & UPL_ABORT_RESTART) {
4432 m->restart = TRUE;
4433 m->absent = FALSE;
4434 vm_object_absent_release(m->object);
4435 m->page_error = KERN_MEMORY_ERROR;
4436 m->error = TRUE;
91447636 4437 must_free = FALSE;
1c79356b
A
4438 } else if(error & UPL_ABORT_UNAVAILABLE) {
4439 m->restart = FALSE;
4440 m->unusual = TRUE;
91447636 4441 must_free = FALSE;
1c79356b
A
4442 } else if(error & UPL_ABORT_ERROR) {
4443 m->restart = FALSE;
4444 m->absent = FALSE;
4445 vm_object_absent_release(m->object);
4446 m->page_error = KERN_MEMORY_ERROR;
4447 m->error = TRUE;
91447636 4448 must_free = FALSE;
1c79356b 4449 }
91447636
A
4450
4451 /*
4452 * ENCRYPTED SWAP:
4453 * If the page was already encrypted,
4454 * we don't really need to decrypt it
4455 * now. It will get decrypted later,
4456 * on demand, as soon as someone needs
4457 * to access its contents.
4458 */
4459
1c79356b
A
4460 m->cleaning = FALSE;
4461 m->overwriting = FALSE;
4462 PAGE_WAKEUP_DONE(m);
91447636
A
4463
4464 if (must_free == TRUE) {
1c79356b
A
4465 vm_page_free(m);
4466 } else {
4467 vm_page_activate(m);
4468 }
4469 vm_page_unlock_queues();
4470 continue;
4471 }
4472 /*
4473 * Handle the trusted pager throttle.
4474 */
4475 if (m->laundry) {
91447636 4476 vm_pageout_throttle_up(m);
1c79356b
A
4477 }
4478 if(m->pageout) {
4479 assert(m->busy);
4480 assert(m->wire_count == 1);
4481 m->pageout = FALSE;
4482 vm_page_unwire(m);
4483 }
0b4e3aa0 4484 m->dump_cleaning = FALSE;
1c79356b 4485 m->cleaning = FALSE;
1c79356b
A
4486 m->overwriting = FALSE;
4487#if MACH_PAGEMAP
4488 vm_external_state_clr(
4489 m->object->existence_map, m->offset);
4490#endif /* MACH_PAGEMAP */
4491 if(error & UPL_ABORT_DUMP_PAGES) {
4492 vm_page_free(m);
91447636 4493 pmap_disconnect(m->phys_page);
1c79356b 4494 } else {
91447636 4495 PAGE_WAKEUP_DONE(m);
1c79356b
A
4496 }
4497 vm_page_unlock_queues();
4498 }
1c79356b 4499 }
55e303ae
A
4500 occupied = 1;
4501 if (upl->flags & UPL_DEVICE_MEMORY) {
4502 occupied = 0;
4503 } else if (upl->flags & UPL_LITE) {
4504 int pg_num;
91447636 4505 int j;
55e303ae
A
4506 pg_num = upl->size/PAGE_SIZE;
4507 pg_num = (pg_num + 31) >> 5;
4508 occupied = 0;
91447636
A
4509 for(j= 0; j<pg_num; j++) {
4510 if(lite_list[j] != 0) {
55e303ae
A
4511 occupied = 1;
4512 break;
4513 }
4514 }
4515 } else {
4516 if(queue_empty(&upl->map_object->memq)) {
4517 occupied = 0;
4518 }
4519 }
1c79356b 4520
55e303ae
A
4521 if(occupied == 0) {
4522 if(object == shadow_object)
4523 vm_object_paging_end(shadow_object);
1c79356b 4524 }
55e303ae 4525 vm_object_unlock(shadow_object);
91447636
A
4526 if (object != shadow_object)
4527 vm_object_unlock(object);
55e303ae 4528
0b4e3aa0 4529 upl_unlock(upl);
1c79356b
A
4530 return KERN_SUCCESS;
4531}
4532
4533/* an option on commit should be wire */
4534kern_return_t
0b4e3aa0
A
4535upl_commit(
4536 upl_t upl,
4537 upl_page_info_t *page_list,
4538 mach_msg_type_number_t count)
1c79356b 4539{
0b4e3aa0
A
4540 if (upl == UPL_NULL)
4541 return KERN_INVALID_ARGUMENT;
4542
55e303ae
A
4543 if(upl->flags & (UPL_LITE | UPL_IO_WIRE)) {
4544 boolean_t empty;
4545 return upl_commit_range(upl, 0, upl->size, 0,
4546 page_list, count, &empty);
4547 }
4548
0b4e3aa0
A
4549 if (count == 0)
4550 page_list = NULL;
4551
4552 upl_lock(upl);
1c79356b
A
4553 if (upl->flags & UPL_DEVICE_MEMORY)
4554 page_list = NULL;
de355530 4555
91447636
A
4556 if (upl->flags & UPL_ENCRYPTED) {
4557 /*
4558 * ENCRYPTED SWAP:
4559 * This UPL was encrypted, but we don't need
4560 * to decrypt here. We'll decrypt each page
4561 * later, on demand, as soon as someone needs
4562 * to access the page's contents.
4563 */
4564 }
4565
55e303ae
A
4566 if ((upl->flags & UPL_CLEAR_DIRTY) ||
4567 (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) {
1c79356b
A
4568 vm_object_t shadow_object = upl->map_object->shadow;
4569 vm_object_t object = upl->map_object;
4570 vm_object_offset_t target_offset;
91447636 4571 upl_size_t xfer_end;
1c79356b
A
4572 int entry;
4573
4574 vm_page_t t, m;
4575 upl_page_info_t *p;
4576
91447636
A
4577 if (object != shadow_object)
4578 vm_object_lock(object);
1c79356b
A
4579 vm_object_lock(shadow_object);
4580
4581 entry = 0;
4582 target_offset = object->shadow_offset;
4583 xfer_end = upl->size + object->shadow_offset;
4584
4585 while(target_offset < xfer_end) {
4586
4587 if ((t = vm_page_lookup(object,
4588 target_offset - object->shadow_offset))
4589 == NULL) {
4590 target_offset += PAGE_SIZE_64;
4591 entry++;
4592 continue;
4593 }
4594
4595 m = vm_page_lookup(shadow_object, target_offset);
4596 if(m != VM_PAGE_NULL) {
91447636
A
4597 /*
4598 * ENCRYPTED SWAP:
4599 * If this page was encrypted, we
4600 * don't need to decrypt it here.
4601 * We'll decrypt it later, on demand,
4602 * as soon as someone needs to access
4603 * its contents.
4604 */
4605
55e303ae
A
4606 if (upl->flags & UPL_CLEAR_DIRTY) {
4607 pmap_clear_modify(m->phys_page);
4608 m->dirty = FALSE;
4609 }
4610 /* It is a part of the semantic of */
4611 /* COPYOUT_FROM UPLs that a commit */
4612 /* implies cache sync between the */
4613 /* vm page and the backing store */
4614 /* this can be used to strip the */
4615 /* precious bit as well as clean */
4616 if (upl->flags & UPL_PAGE_SYNC_DONE)
4617 m->precious = FALSE;
4618
4619 if(page_list) {
4620 p = &(page_list[entry]);
4621 if(page_list[entry].phys_addr &&
1c79356b
A
4622 p->pageout && !m->pageout) {
4623 vm_page_lock_queues();
4624 m->busy = TRUE;
4625 m->pageout = TRUE;
4626 vm_page_wire(m);
4627 vm_page_unlock_queues();
55e303ae 4628 } else if (page_list[entry].phys_addr &&
0b4e3aa0
A
4629 !p->pageout && m->pageout &&
4630 !m->dump_cleaning) {
1c79356b
A
4631 vm_page_lock_queues();
4632 m->pageout = FALSE;
4633 m->absent = FALSE;
4634 m->overwriting = FALSE;
4635 vm_page_unwire(m);
4636 PAGE_WAKEUP_DONE(m);
4637 vm_page_unlock_queues();
55e303ae
A
4638 }
4639 page_list[entry].phys_addr = 0;
1c79356b 4640 }
1c79356b
A
4641 }
4642 target_offset += PAGE_SIZE_64;
4643 entry++;
4644 }
1c79356b 4645 vm_object_unlock(shadow_object);
91447636
A
4646 if (object != shadow_object)
4647 vm_object_unlock(object);
4648
1c79356b 4649 }
55e303ae
A
4650 if (upl->flags & UPL_DEVICE_MEMORY) {
4651 vm_object_lock(upl->map_object->shadow);
4652 if(upl->map_object == upl->map_object->shadow)
4653 vm_object_paging_end(upl->map_object->shadow);
4654 vm_object_unlock(upl->map_object->shadow);
4655 }
0b4e3aa0 4656 upl_unlock(upl);
1c79356b
A
4657 return KERN_SUCCESS;
4658}
4659
55e303ae
A
4660
4661
4662kern_return_t
4663vm_object_iopl_request(
4664 vm_object_t object,
4665 vm_object_offset_t offset,
91447636 4666 upl_size_t size,
55e303ae
A
4667 upl_t *upl_ptr,
4668 upl_page_info_array_t user_page_list,
4669 unsigned int *page_list_count,
4670 int cntrl_flags)
4671{
4672 vm_page_t dst_page;
4673 vm_object_offset_t dst_offset = offset;
91447636 4674 upl_size_t xfer_size = size;
55e303ae 4675 upl_t upl = NULL;
91447636
A
4676 unsigned int entry;
4677 wpl_array_t lite_list = NULL;
55e303ae
A
4678 int page_field_size;
4679 int delayed_unlock = 0;
91447636 4680 int no_zero_fill = FALSE;
55e303ae
A
4681 vm_page_t alias_page = NULL;
4682 kern_return_t ret;
4683 vm_prot_t prot;
4684
4685
91447636
A
4686 if (cntrl_flags & ~UPL_VALID_FLAGS) {
4687 /*
4688 * For forward compatibility's sake,
4689 * reject any unknown flag.
4690 */
4691 return KERN_INVALID_VALUE;
4692 }
4693
4694 if (cntrl_flags & UPL_ENCRYPT) {
4695 /*
4696 * ENCRYPTED SWAP:
4697 * The paging path doesn't use this interface,
4698 * so we don't support the UPL_ENCRYPT flag
4699 * here. We won't encrypt the pages.
4700 */
4701 assert(! (cntrl_flags & UPL_ENCRYPT));
4702 }
4703
4704 if (cntrl_flags & UPL_NOZEROFILL)
4705 no_zero_fill = TRUE;
4706
4707 if (cntrl_flags & UPL_COPYOUT_FROM)
55e303ae 4708 prot = VM_PROT_READ;
91447636 4709 else
55e303ae 4710 prot = VM_PROT_READ | VM_PROT_WRITE;
55e303ae
A
4711
4712 if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) {
4713 size = MAX_UPL_TRANSFER * page_size;
4714 }
4715
4716 if(cntrl_flags & UPL_SET_INTERNAL)
4717 if(page_list_count != NULL)
4718 *page_list_count = MAX_UPL_TRANSFER;
4719 if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) &&
4720 ((page_list_count != NULL) && (*page_list_count != 0)
4721 && *page_list_count < (size/page_size)))
4722 return KERN_INVALID_ARGUMENT;
4723
4724 if((!object->internal) && (object->paging_offset != 0))
4725 panic("vm_object_upl_request: vnode object with non-zero paging offset\n");
4726
4727 if(object->phys_contiguous) {
4728 /* No paging operations are possible against this memory */
4729 /* and so no need for map object, ever */
4730 cntrl_flags |= UPL_SET_LITE;
4731 }
4732
4733 if(upl_ptr) {
4734 if(cntrl_flags & UPL_SET_INTERNAL) {
4735 if(cntrl_flags & UPL_SET_LITE) {
4736 upl = upl_create(
4737 UPL_CREATE_INTERNAL | UPL_CREATE_LITE,
4738 size);
4739 user_page_list = (upl_page_info_t *)
91447636 4740 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae 4741 lite_list = (wpl_array_t)
91447636 4742 (((uintptr_t)user_page_list) +
55e303ae
A
4743 ((size/PAGE_SIZE) *
4744 sizeof(upl_page_info_t)));
4745 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4746 page_field_size =
4747 (page_field_size + 3) & 0xFFFFFFFC;
4748 bzero((char *)lite_list, page_field_size);
4749 upl->flags =
4750 UPL_LITE | UPL_INTERNAL | UPL_IO_WIRE;
4751 } else {
4752 upl = upl_create(UPL_CREATE_INTERNAL, size);
4753 user_page_list = (upl_page_info_t *)
91447636 4754 (((uintptr_t)upl)
55e303ae
A
4755 + sizeof(struct upl));
4756 upl->flags = UPL_INTERNAL | UPL_IO_WIRE;
4757 }
4758 } else {
4759 if(cntrl_flags & UPL_SET_LITE) {
4760 upl = upl_create(UPL_CREATE_LITE, size);
4761 lite_list = (wpl_array_t)
91447636 4762 (((uintptr_t)upl) + sizeof(struct upl));
55e303ae
A
4763 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4764 page_field_size =
4765 (page_field_size + 3) & 0xFFFFFFFC;
4766 bzero((char *)lite_list, page_field_size);
4767 upl->flags = UPL_LITE | UPL_IO_WIRE;
4768 } else {
4769 upl = upl_create(UPL_CREATE_EXTERNAL, size);
4770 upl->flags = UPL_IO_WIRE;
4771 }
4772 }
4773
4774 if(object->phys_contiguous) {
4775 upl->map_object = object;
4776 /* don't need any shadow mappings for this one */
4777 /* since it is already I/O memory */
4778 upl->flags |= UPL_DEVICE_MEMORY;
4779
4780 vm_object_lock(object);
4781 vm_object_paging_begin(object);
4782 vm_object_unlock(object);
4783
4784 /* paging in progress also protects the paging_offset */
4785 upl->offset = offset + object->paging_offset;
4786 upl->size = size;
4787 *upl_ptr = upl;
4788 if(user_page_list) {
4789 user_page_list[0].phys_addr =
91447636 4790 (offset + object->shadow_offset)>>PAGE_SHIFT;
55e303ae
A
4791 user_page_list[0].device = TRUE;
4792 }
4793
4794 if(page_list_count != NULL) {
4795 if (upl->flags & UPL_INTERNAL) {
4796 *page_list_count = 0;
4797 } else {
4798 *page_list_count = 1;
4799 }
4800 }
4801 return KERN_SUCCESS;
4802 }
4803 if(user_page_list)
4804 user_page_list[0].device = FALSE;
4805
4806 if(cntrl_flags & UPL_SET_LITE) {
4807 upl->map_object = object;
4808 } else {
4809 upl->map_object = vm_object_allocate(size);
4810 vm_object_lock(upl->map_object);
4811 upl->map_object->shadow = object;
4812 upl->map_object->pageout = TRUE;
4813 upl->map_object->can_persist = FALSE;
4814 upl->map_object->copy_strategy =
4815 MEMORY_OBJECT_COPY_NONE;
4816 upl->map_object->shadow_offset = offset;
4817 upl->map_object->wimg_bits = object->wimg_bits;
4818 vm_object_unlock(upl->map_object);
4819 }
4820 }
4821 vm_object_lock(object);
4822 vm_object_paging_begin(object);
4823
4824 if (!object->phys_contiguous) {
4825 /* Protect user space from future COW operations */
4826 object->true_share = TRUE;
4827 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
4828 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4829 }
4830
4831 /* we can lock the upl offset now that paging_in_progress is set */
4832 if(upl_ptr) {
4833 upl->size = size;
4834 upl->offset = offset + object->paging_offset;
4835 *upl_ptr = upl;
91447636 4836#ifdef UPL_DEBUG
55e303ae 4837 queue_enter(&object->uplq, upl, upl_t, uplq);
91447636
A
4838#endif /* UPL_DEBUG */
4839 }
4840
4841 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4842 /*
4843 * The user requested that access to the pages in this URL
4844 * be blocked until the UPL is commited or aborted.
4845 */
4846 upl->flags |= UPL_ACCESS_BLOCKED;
55e303ae
A
4847 }
4848
4849 entry = 0;
4850 while (xfer_size) {
4851 if((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
4852 if (delayed_unlock) {
4853 delayed_unlock = 0;
4854 vm_page_unlock_queues();
4855 }
4856 vm_object_unlock(object);
4857 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4858 vm_object_lock(object);
4859 }
4860 dst_page = vm_page_lookup(object, dst_offset);
4861
91447636
A
4862 /*
4863 * ENCRYPTED SWAP:
4864 * If the page is encrypted, we need to decrypt it,
4865 * so force a soft page fault.
4866 */
55e303ae 4867 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) ||
91447636
A
4868 (dst_page->encrypted) ||
4869 (dst_page->unusual && (dst_page->error ||
4870 dst_page->restart ||
4871 dst_page->absent ||
4872 dst_page->fictitious ||
4873 (prot & dst_page->page_lock)))) {
55e303ae
A
4874 vm_fault_return_t result;
4875 do {
4876 vm_page_t top_page;
4877 kern_return_t error_code;
4878 int interruptible;
4879
4880 vm_object_offset_t lo_offset = offset;
4881 vm_object_offset_t hi_offset = offset + size;
4882
4883
4884 if (delayed_unlock) {
4885 delayed_unlock = 0;
4886 vm_page_unlock_queues();
4887 }
4888
4889 if(cntrl_flags & UPL_SET_INTERRUPTIBLE) {
4890 interruptible = THREAD_ABORTSAFE;
4891 } else {
4892 interruptible = THREAD_UNINT;
4893 }
4894
4895 result = vm_fault_page(object, dst_offset,
4896 prot | VM_PROT_WRITE, FALSE,
4897 interruptible,
4898 lo_offset, hi_offset,
4899 VM_BEHAVIOR_SEQUENTIAL,
4900 &prot, &dst_page, &top_page,
4901 (int *)0,
91447636 4902 &error_code, no_zero_fill, FALSE, NULL, 0);
55e303ae
A
4903
4904 switch(result) {
4905 case VM_FAULT_SUCCESS:
4906
4907 PAGE_WAKEUP_DONE(dst_page);
4908
4909 /*
4910 * Release paging references and
4911 * top-level placeholder page, if any.
4912 */
4913
4914 if(top_page != VM_PAGE_NULL) {
4915 vm_object_t local_object;
4916 local_object =
4917 top_page->object;
4918 if(top_page->object
4919 != dst_page->object) {
4920 vm_object_lock(
4921 local_object);
4922 VM_PAGE_FREE(top_page);
4923 vm_object_paging_end(
4924 local_object);
4925 vm_object_unlock(
4926 local_object);
4927 } else {
4928 VM_PAGE_FREE(top_page);
4929 vm_object_paging_end(
4930 local_object);
4931 }
4932 }
4933
4934 break;
4935
4936
4937 case VM_FAULT_RETRY:
4938 vm_object_lock(object);
4939 vm_object_paging_begin(object);
4940 break;
4941
4942 case VM_FAULT_FICTITIOUS_SHORTAGE:
4943 vm_page_more_fictitious();
4944 vm_object_lock(object);
4945 vm_object_paging_begin(object);
4946 break;
4947
4948 case VM_FAULT_MEMORY_SHORTAGE:
4949 if (vm_page_wait(interruptible)) {
4950 vm_object_lock(object);
4951 vm_object_paging_begin(object);
4952 break;
4953 }
4954 /* fall thru */
4955
4956 case VM_FAULT_INTERRUPTED:
4957 error_code = MACH_SEND_INTERRUPTED;
4958 case VM_FAULT_MEMORY_ERROR:
4959 ret = (error_code ? error_code:
4960 KERN_MEMORY_ERROR);
4961 vm_object_lock(object);
4962 for(; offset < dst_offset;
4963 offset += PAGE_SIZE) {
4964 dst_page = vm_page_lookup(
4965 object, offset);
4966 if(dst_page == VM_PAGE_NULL)
4967 panic("vm_object_iopl_request: Wired pages missing. \n");
4968 vm_page_lock_queues();
4969 vm_page_unwire(dst_page);
4970 vm_page_unlock_queues();
4971 VM_STAT(reactivations++);
4972 }
4973 vm_object_unlock(object);
4974 upl_destroy(upl);
4975 return ret;
4976 }
4977 } while ((result != VM_FAULT_SUCCESS)
4978 || (result == VM_FAULT_INTERRUPTED));
4979 }
4980 if (delayed_unlock == 0)
4981 vm_page_lock_queues();
4982 vm_page_wire(dst_page);
4983
91447636
A
4984 if (cntrl_flags & UPL_BLOCK_ACCESS) {
4985 /*
4986 * Mark the page "busy" to block any future page fault
4987 * on this page. We'll also remove the mapping
4988 * of all these pages before leaving this routine.
4989 */
4990 assert(!dst_page->fictitious);
4991 dst_page->busy = TRUE;
4992 }
4993
55e303ae
A
4994 if (upl_ptr) {
4995 if (cntrl_flags & UPL_SET_LITE) {
4996 int pg_num;
4997 pg_num = (dst_offset-offset)/PAGE_SIZE;
4998 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
4999 } else {
5000 /*
5001 * Convert the fictitious page to a
5002 * private shadow of the real page.
5003 */
5004 assert(alias_page->fictitious);
5005 alias_page->fictitious = FALSE;
5006 alias_page->private = TRUE;
5007 alias_page->pageout = TRUE;
5008 alias_page->phys_page = dst_page->phys_page;
5009 vm_page_wire(alias_page);
5010
5011 vm_page_insert(alias_page,
5012 upl->map_object, size - xfer_size);
5013 assert(!alias_page->wanted);
5014 alias_page->busy = FALSE;
5015 alias_page->absent = FALSE;
5016 }
5017
5018 /* expect the page to be used */
5019 dst_page->reference = TRUE;
5020
5021 if (!(cntrl_flags & UPL_COPYOUT_FROM))
5022 dst_page->dirty = TRUE;
5023 alias_page = NULL;
5024
5025 if (user_page_list) {
5026 user_page_list[entry].phys_addr
5027 = dst_page->phys_page;
5028 user_page_list[entry].dirty =
5029 dst_page->dirty;
5030 user_page_list[entry].pageout =
5031 dst_page->pageout;
5032 user_page_list[entry].absent =
5033 dst_page->absent;
5034 user_page_list[entry].precious =
5035 dst_page->precious;
5036 }
5037 }
5038 if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) {
5039 delayed_unlock = 0;
5040 vm_page_unlock_queues();
5041 }
5042 entry++;
5043 dst_offset += PAGE_SIZE_64;
5044 xfer_size -= PAGE_SIZE;
5045 }
5046 if (delayed_unlock)
5047 vm_page_unlock_queues();
5048
5049 if (upl->flags & UPL_INTERNAL) {
5050 if(page_list_count != NULL)
5051 *page_list_count = 0;
5052 } else if (*page_list_count > entry) {
5053 if(page_list_count != NULL)
5054 *page_list_count = entry;
5055 }
5056
5057 if (alias_page != NULL) {
5058 vm_page_lock_queues();
5059 vm_page_free(alias_page);
5060 vm_page_unlock_queues();
5061 }
5062
5063 vm_object_unlock(object);
55e303ae 5064
91447636
A
5065 if (cntrl_flags & UPL_BLOCK_ACCESS) {
5066 /*
5067 * We've marked all the pages "busy" so that future
5068 * page faults will block.
5069 * Now remove the mapping for these pages, so that they
5070 * can't be accessed without causing a page fault.
5071 */
5072 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
5073 PMAP_NULL, 0, VM_PROT_NONE);
5074 }
1c79356b 5075
91447636 5076 return KERN_SUCCESS;
1c79356b
A
5077}
5078
91447636
A
5079kern_return_t
5080upl_transpose(
5081 upl_t upl1,
5082 upl_t upl2)
1c79356b 5083{
91447636
A
5084 kern_return_t retval;
5085 boolean_t upls_locked;
5086 vm_object_t object1, object2;
1c79356b 5087
91447636
A
5088 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) {
5089 return KERN_INVALID_ARGUMENT;
5090 }
5091
5092 upls_locked = FALSE;
1c79356b 5093
91447636
A
5094 /*
5095 * Since we need to lock both UPLs at the same time,
5096 * avoid deadlocks by always taking locks in the same order.
5097 */
5098 if (upl1 < upl2) {
5099 upl_lock(upl1);
5100 upl_lock(upl2);
5101 } else {
5102 upl_lock(upl2);
5103 upl_lock(upl1);
5104 }
5105 upls_locked = TRUE; /* the UPLs will need to be unlocked */
5106
5107 object1 = upl1->map_object;
5108 object2 = upl2->map_object;
5109
5110 if (upl1->offset != 0 || upl2->offset != 0 ||
5111 upl1->size != upl2->size) {
5112 /*
5113 * We deal only with full objects, not subsets.
5114 * That's because we exchange the entire backing store info
5115 * for the objects: pager, resident pages, etc... We can't do
5116 * only part of it.
5117 */
5118 retval = KERN_INVALID_VALUE;
5119 goto done;
5120 }
5121
5122 /*
5123 * Tranpose the VM objects' backing store.
5124 */
5125 retval = vm_object_transpose(object1, object2,
5126 (vm_object_size_t) upl1->size);
5127
5128 if (retval == KERN_SUCCESS) {
5129 /*
5130 * Make each UPL point to the correct VM object, i.e. the
5131 * object holding the pages that the UPL refers to...
5132 */
5133 upl1->map_object = object2;
5134 upl2->map_object = object1;
5135 }
5136
5137done:
5138 /*
5139 * Cleanup.
5140 */
5141 if (upls_locked) {
5142 upl_unlock(upl1);
5143 upl_unlock(upl2);
5144 upls_locked = FALSE;
5145 }
5146
5147 return retval;
5148}
5149
5150/*
5151 * ENCRYPTED SWAP:
5152 *
5153 * Rationale: the user might have some encrypted data on disk (via
5154 * FileVault or any other mechanism). That data is then decrypted in
5155 * memory, which is safe as long as the machine is secure. But that
5156 * decrypted data in memory could be paged out to disk by the default
5157 * pager. The data would then be stored on disk in clear (not encrypted)
5158 * and it could be accessed by anyone who gets physical access to the
5159 * disk (if the laptop or the disk gets stolen for example). This weakens
5160 * the security offered by FileVault.
5161 *
5162 * Solution: the default pager will optionally request that all the
5163 * pages it gathers for pageout be encrypted, via the UPL interfaces,
5164 * before it sends this UPL to disk via the vnode_pageout() path.
5165 *
5166 * Notes:
5167 *
5168 * To avoid disrupting the VM LRU algorithms, we want to keep the
5169 * clean-in-place mechanisms, which allow us to send some extra pages to
5170 * swap (clustering) without actually removing them from the user's
5171 * address space. We don't want the user to unknowingly access encrypted
5172 * data, so we have to actually remove the encrypted pages from the page
5173 * table. When the user accesses the data, the hardware will fail to
5174 * locate the virtual page in its page table and will trigger a page
5175 * fault. We can then decrypt the page and enter it in the page table
5176 * again. Whenever we allow the user to access the contents of a page,
5177 * we have to make sure it's not encrypted.
5178 *
5179 *
5180 */
5181/*
5182 * ENCRYPTED SWAP:
5183 * Reserve of virtual addresses in the kernel address space.
5184 * We need to map the physical pages in the kernel, so that we
5185 * can call the encryption/decryption routines with a kernel
5186 * virtual address. We keep this pool of pre-allocated kernel
5187 * virtual addresses so that we don't have to scan the kernel's
5188 * virtaul address space each time we need to encrypt or decrypt
5189 * a physical page.
5190 * It would be nice to be able to encrypt and decrypt in physical
5191 * mode but that might not always be more efficient...
5192 */
5193decl_simple_lock_data(,vm_paging_lock)
5194#define VM_PAGING_NUM_PAGES 64
5195vm_map_offset_t vm_paging_base_address = 0;
5196boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
5197int vm_paging_max_index = 0;
5198unsigned long vm_paging_no_kernel_page = 0;
5199unsigned long vm_paging_objects_mapped = 0;
5200unsigned long vm_paging_pages_mapped = 0;
5201unsigned long vm_paging_objects_mapped_slow = 0;
5202unsigned long vm_paging_pages_mapped_slow = 0;
5203
5204/*
5205 * ENCRYPTED SWAP:
5206 * vm_paging_map_object:
5207 * Maps part of a VM object's pages in the kernel
5208 * virtual address space, using the pre-allocated
5209 * kernel virtual addresses, if possible.
5210 * Context:
5211 * The VM object is locked. This lock will get
5212 * dropped and re-acquired though.
5213 */
5214kern_return_t
5215vm_paging_map_object(
5216 vm_map_offset_t *address,
5217 vm_page_t page,
5218 vm_object_t object,
5219 vm_object_offset_t offset,
5220 vm_map_size_t *size)
5221{
5222 kern_return_t kr;
5223 vm_map_offset_t page_map_offset;
5224 vm_map_size_t map_size;
5225 vm_object_offset_t object_offset;
5226#ifdef __ppc__
5227 int i;
5228 vm_map_entry_t map_entry;
5229#endif /* __ppc__ */
5230
5231
5232#ifdef __ppc__
5233 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
5234 /*
5235 * Optimization for the PowerPC.
5236 * Use one of the pre-allocated kernel virtual addresses
5237 * and just enter the VM page in the kernel address space
5238 * at that virtual address.
5239 */
5240 vm_object_unlock(object);
5241 simple_lock(&vm_paging_lock);
5242
5243 if (vm_paging_base_address == 0) {
5244 /*
5245 * Initialize our pool of pre-allocated kernel
5246 * virtual addresses.
5247 */
5248 simple_unlock(&vm_paging_lock);
5249 page_map_offset = 0;
5250 kr = vm_map_find_space(kernel_map,
5251 &page_map_offset,
5252 VM_PAGING_NUM_PAGES * PAGE_SIZE,
5253 0,
5254 &map_entry);
5255 if (kr != KERN_SUCCESS) {
5256 panic("vm_paging_map_object: "
5257 "kernel_map full\n");
5258 }
5259 map_entry->object.vm_object = kernel_object;
5260 map_entry->offset =
5261 page_map_offset - VM_MIN_KERNEL_ADDRESS;
5262 vm_object_reference(kernel_object);
5263 vm_map_unlock(kernel_map);
5264
5265 simple_lock(&vm_paging_lock);
5266 if (vm_paging_base_address != 0) {
5267 /* someone raced us and won: undo */
5268 simple_unlock(&vm_paging_lock);
5269 kr = vm_map_remove(kernel_map,
5270 page_map_offset,
5271 page_map_offset +
5272 (VM_PAGING_NUM_PAGES
5273 * PAGE_SIZE),
5274 VM_MAP_NO_FLAGS);
5275 assert(kr == KERN_SUCCESS);
5276 simple_lock(&vm_paging_lock);
5277 } else {
5278 vm_paging_base_address = page_map_offset;
5279 }
5280 }
5281
5282 /*
5283 * Try and find an available kernel virtual address
5284 * from our pre-allocated pool.
5285 */
5286 page_map_offset = 0;
5287 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
5288 if (vm_paging_page_inuse[i] == FALSE) {
5289 page_map_offset = vm_paging_base_address +
5290 (i * PAGE_SIZE);
5291 break;
5292 }
5293 }
5294
5295 if (page_map_offset != 0) {
5296 /*
5297 * We found a kernel virtual address;
5298 * map the physical page to that virtual address.
5299 */
5300 if (i > vm_paging_max_index) {
5301 vm_paging_max_index = i;
5302 }
5303 vm_paging_page_inuse[i] = TRUE;
5304 simple_unlock(&vm_paging_lock);
5305 pmap_map_block(kernel_pmap,
5306 page_map_offset,
5307 page->phys_page,
3a60a9f5 5308 1, /* Size is number of 4k pages */
91447636
A
5309 VM_PROT_DEFAULT,
5310 ((int) page->object->wimg_bits &
5311 VM_WIMG_MASK),
5312 0);
5313 vm_paging_objects_mapped++;
5314 vm_paging_pages_mapped++;
5315 *address = page_map_offset;
5316 vm_object_lock(object);
5317
5318 /* all done and mapped, ready to use ! */
5319 return KERN_SUCCESS;
5320 }
5321
5322 /*
5323 * We ran out of pre-allocated kernel virtual
5324 * addresses. Just map the page in the kernel
5325 * the slow and regular way.
5326 */
5327 vm_paging_no_kernel_page++;
5328 simple_unlock(&vm_paging_lock);
5329 vm_object_lock(object);
5330 }
5331#endif /* __ppc__ */
5332
5333 object_offset = vm_object_trunc_page(offset);
5334 map_size = vm_map_round_page(*size);
5335
5336 /*
5337 * Try and map the required range of the object
5338 * in the kernel_map
5339 */
5340
5341 /* don't go beyond the object's end... */
5342 if (object_offset >= object->size) {
5343 map_size = 0;
5344 } else if (map_size > object->size - offset) {
5345 map_size = object->size - offset;
5346 }
5347
5348 vm_object_reference_locked(object); /* for the map entry */
5349 vm_object_unlock(object);
5350
5351 kr = vm_map_enter(kernel_map,
5352 address,
5353 map_size,
5354 0,
5355 VM_FLAGS_ANYWHERE,
5356 object,
5357 object_offset,
5358 FALSE,
5359 VM_PROT_DEFAULT,
5360 VM_PROT_ALL,
5361 VM_INHERIT_NONE);
5362 if (kr != KERN_SUCCESS) {
5363 *address = 0;
5364 *size = 0;
5365 vm_object_deallocate(object); /* for the map entry */
5366 return kr;
5367 }
5368
5369 *size = map_size;
5370
5371 /*
5372 * Enter the mapped pages in the page table now.
5373 */
5374 vm_object_lock(object);
5375 for (page_map_offset = 0;
5376 map_size != 0;
5377 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
5378 unsigned int cache_attr;
5379
5380 page = vm_page_lookup(object, offset + page_map_offset);
5381 if (page == VM_PAGE_NULL) {
5382 panic("vm_paging_map_object: no page !?");
5383 }
5384 if (page->no_isync == TRUE) {
5385 pmap_sync_page_data_phys(page->phys_page);
5386 }
5387 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
5388
5389 PMAP_ENTER(kernel_pmap,
5390 *address + page_map_offset,
5391 page,
5392 VM_PROT_DEFAULT,
5393 cache_attr,
5394 FALSE);
5395 }
5396
5397 vm_paging_objects_mapped_slow++;
5398 vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64;
5399
5400 return KERN_SUCCESS;
5401}
5402
5403/*
5404 * ENCRYPTED SWAP:
5405 * vm_paging_unmap_object:
5406 * Unmaps part of a VM object's pages from the kernel
5407 * virtual address space.
5408 * Context:
5409 * The VM object is locked. This lock will get
5410 * dropped and re-acquired though.
5411 */
5412void
5413vm_paging_unmap_object(
5414 vm_object_t object,
5415 vm_map_offset_t start,
5416 vm_map_offset_t end)
5417{
5418 kern_return_t kr;
5419#ifdef __ppc__
5420 int i;
5421#endif /* __ppc__ */
5422
5423 if ((vm_paging_base_address != 0) &&
5424 ((start < vm_paging_base_address) ||
5425 (end > (vm_paging_base_address
5426 + (VM_PAGING_NUM_PAGES * PAGE_SIZE))))) {
5427 /*
5428 * We didn't use our pre-allocated pool of
5429 * kernel virtual address. Deallocate the
5430 * virtual memory.
5431 */
5432 if (object != VM_OBJECT_NULL) {
5433 vm_object_unlock(object);
5434 }
5435 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS);
5436 if (object != VM_OBJECT_NULL) {
5437 vm_object_lock(object);
5438 }
5439 assert(kr == KERN_SUCCESS);
5440 } else {
5441 /*
5442 * We used a kernel virtual address from our
5443 * pre-allocated pool. Put it back in the pool
5444 * for next time.
5445 */
5446#ifdef __ppc__
5447 assert(end - start == PAGE_SIZE);
5448 i = (start - vm_paging_base_address) >> PAGE_SHIFT;
5449
5450 /* undo the pmap mapping */
5451 mapping_remove(kernel_pmap, start);
5452
5453 simple_lock(&vm_paging_lock);
5454 vm_paging_page_inuse[i] = FALSE;
5455 simple_unlock(&vm_paging_lock);
5456#endif /* __ppc__ */
5457 }
5458}
5459
5460/*
5461 * Encryption data.
5462 * "iv" is the "initial vector". Ideally, we want to
5463 * have a different one for each page we encrypt, so that
5464 * crackers can't find encryption patterns too easily.
5465 */
5466#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
5467boolean_t swap_crypt_ctx_initialized = FALSE;
5468aes_32t swap_crypt_key[8]; /* big enough for a 256 key */
5469aes_ctx swap_crypt_ctx;
5470const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, };
5471
5472#if DEBUG
5473boolean_t swap_crypt_ctx_tested = FALSE;
5474unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
5475unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
5476unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
5477#endif /* DEBUG */
5478
5479extern u_long random(void);
5480
5481/*
5482 * Initialize the encryption context: key and key size.
5483 */
5484void swap_crypt_ctx_initialize(void); /* forward */
5485void
5486swap_crypt_ctx_initialize(void)
5487{
5488 unsigned int i;
5489
5490 /*
5491 * No need for locking to protect swap_crypt_ctx_initialized
5492 * because the first use of encryption will come from the
5493 * pageout thread (we won't pagein before there's been a pageout)
5494 * and there's only one pageout thread.
5495 */
5496 if (swap_crypt_ctx_initialized == FALSE) {
5497 for (i = 0;
5498 i < (sizeof (swap_crypt_key) /
5499 sizeof (swap_crypt_key[0]));
5500 i++) {
5501 swap_crypt_key[i] = random();
5502 }
5503 aes_encrypt_key((const unsigned char *) swap_crypt_key,
5504 SWAP_CRYPT_AES_KEY_SIZE,
5505 &swap_crypt_ctx.encrypt);
5506 aes_decrypt_key((const unsigned char *) swap_crypt_key,
5507 SWAP_CRYPT_AES_KEY_SIZE,
5508 &swap_crypt_ctx.decrypt);
5509 swap_crypt_ctx_initialized = TRUE;
5510 }
5511
5512#if DEBUG
5513 /*
5514 * Validate the encryption algorithms.
5515 */
5516 if (swap_crypt_ctx_tested == FALSE) {
5517 /* initialize */
5518 for (i = 0; i < 4096; i++) {
5519 swap_crypt_test_page_ref[i] = (char) i;
5520 }
5521 /* encrypt */
5522 aes_encrypt_cbc(swap_crypt_test_page_ref,
5523 swap_crypt_null_iv,
5524 PAGE_SIZE / AES_BLOCK_SIZE,
5525 swap_crypt_test_page_encrypt,
5526 &swap_crypt_ctx.encrypt);
5527 /* decrypt */
5528 aes_decrypt_cbc(swap_crypt_test_page_encrypt,
5529 swap_crypt_null_iv,
5530 PAGE_SIZE / AES_BLOCK_SIZE,
5531 swap_crypt_test_page_decrypt,
5532 &swap_crypt_ctx.decrypt);
5533 /* compare result with original */
5534 for (i = 0; i < 4096; i ++) {
5535 if (swap_crypt_test_page_decrypt[i] !=
5536 swap_crypt_test_page_ref[i]) {
5537 panic("encryption test failed");
5538 }
5539 }
5540
5541 /* encrypt again */
5542 aes_encrypt_cbc(swap_crypt_test_page_decrypt,
5543 swap_crypt_null_iv,
5544 PAGE_SIZE / AES_BLOCK_SIZE,
5545 swap_crypt_test_page_decrypt,
5546 &swap_crypt_ctx.encrypt);
5547 /* decrypt in place */
5548 aes_decrypt_cbc(swap_crypt_test_page_decrypt,
5549 swap_crypt_null_iv,
5550 PAGE_SIZE / AES_BLOCK_SIZE,
5551 swap_crypt_test_page_decrypt,
5552 &swap_crypt_ctx.decrypt);
5553 for (i = 0; i < 4096; i ++) {
5554 if (swap_crypt_test_page_decrypt[i] !=
5555 swap_crypt_test_page_ref[i]) {
5556 panic("in place encryption test failed");
5557 }
5558 }
5559
5560 swap_crypt_ctx_tested = TRUE;
5561 }
5562#endif /* DEBUG */
5563}
5564
5565/*
5566 * ENCRYPTED SWAP:
5567 * vm_page_encrypt:
5568 * Encrypt the given page, for secure paging.
5569 * The page might already be mapped at kernel virtual
5570 * address "kernel_mapping_offset". Otherwise, we need
5571 * to map it.
5572 *
5573 * Context:
5574 * The page's object is locked, but this lock will be released
5575 * and re-acquired.
5576 * The page is busy and not accessible by users (not entered in any pmap).
5577 */
5578void
5579vm_page_encrypt(
5580 vm_page_t page,
5581 vm_map_offset_t kernel_mapping_offset)
5582{
5583 int clear_refmod = 0;
5584 kern_return_t kr;
5585 boolean_t page_was_referenced;
5586 boolean_t page_was_modified;
5587 vm_map_size_t kernel_mapping_size;
5588 vm_offset_t kernel_vaddr;
5589 union {
5590 unsigned char aes_iv[AES_BLOCK_SIZE];
5591 struct {
5592 memory_object_t pager_object;
5593 vm_object_offset_t paging_offset;
5594 } vm;
5595 } encrypt_iv;
5596
5597 if (! vm_pages_encrypted) {
5598 vm_pages_encrypted = TRUE;
5599 }
5600
5601 assert(page->busy);
5602 assert(page->dirty || page->precious);
5603
5604 if (page->encrypted) {
5605 /*
5606 * Already encrypted: no need to do it again.
5607 */
5608 vm_page_encrypt_already_encrypted_counter++;
5609 return;
5610 }
5611 ASSERT_PAGE_DECRYPTED(page);
5612
5613 /*
5614 * Gather the "reference" and "modified" status of the page.
5615 * We'll restore these values after the encryption, so that
5616 * the encryption is transparent to the rest of the system
5617 * and doesn't impact the VM's LRU logic.
5618 */
5619 page_was_referenced =
5620 (page->reference || pmap_is_referenced(page->phys_page));
5621 page_was_modified =
5622 (page->dirty || pmap_is_modified(page->phys_page));
5623
5624 if (kernel_mapping_offset == 0) {
5625 /*
5626 * The page hasn't already been mapped in kernel space
5627 * by the caller. Map it now, so that we can access
5628 * its contents and encrypt them.
5629 */
5630 kernel_mapping_size = PAGE_SIZE;
5631 kr = vm_paging_map_object(&kernel_mapping_offset,
5632 page,
5633 page->object,
5634 page->offset,
5635 &kernel_mapping_size);
5636 if (kr != KERN_SUCCESS) {
5637 panic("vm_page_encrypt: "
5638 "could not map page in kernel: 0x%x\n",
5639 kr);
5640 }
5641 } else {
5642 kernel_mapping_size = 0;
5643 }
5644 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5645
5646 if (swap_crypt_ctx_initialized == FALSE) {
5647 swap_crypt_ctx_initialize();
5648 }
5649 assert(swap_crypt_ctx_initialized);
5650
5651 /*
5652 * Prepare an "initial vector" for the encryption.
5653 * We use the "pager" and the "paging_offset" for that
5654 * page to obfuscate the encrypted data a bit more and
5655 * prevent crackers from finding patterns that they could
5656 * use to break the key.
5657 */
5658 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
5659 encrypt_iv.vm.pager_object = page->object->pager;
5660 encrypt_iv.vm.paging_offset =
5661 page->object->paging_offset + page->offset;
5662
5663 vm_object_unlock(page->object);
5664
5665 /* encrypt the "initial vector" */
5666 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
5667 swap_crypt_null_iv,
5668 1,
5669 &encrypt_iv.aes_iv[0],
5670 &swap_crypt_ctx.encrypt);
5671
5672 /*
5673 * Encrypt the page.
5674 */
5675 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
5676 &encrypt_iv.aes_iv[0],
5677 PAGE_SIZE / AES_BLOCK_SIZE,
5678 (unsigned char *) kernel_vaddr,
5679 &swap_crypt_ctx.encrypt);
5680
5681 vm_page_encrypt_counter++;
5682
5683 vm_object_lock(page->object);
5684
5685 /*
5686 * Unmap the page from the kernel's address space,
5687 * if we had to map it ourselves. Otherwise, let
5688 * the caller undo the mapping if needed.
5689 */
5690 if (kernel_mapping_size != 0) {
5691 vm_paging_unmap_object(page->object,
5692 kernel_mapping_offset,
5693 kernel_mapping_offset + kernel_mapping_size);
5694 }
5695
5696 /*
5697 * Restore the "reference" and "modified" bits.
5698 * This should clean up any impact the encryption had
5699 * on them.
5700 */
5701 if (! page_was_referenced) {
5702 clear_refmod |= VM_MEM_REFERENCED;
5703 page->reference = FALSE;
5704 }
5705 if (! page_was_modified) {
5706 clear_refmod |= VM_MEM_MODIFIED;
5707 page->dirty = FALSE;
5708 }
5709 if (clear_refmod)
5710 pmap_clear_refmod(page->phys_page, clear_refmod);
5711
5712 page->encrypted = TRUE;
5713}
5714
5715/*
5716 * ENCRYPTED SWAP:
5717 * vm_page_decrypt:
5718 * Decrypt the given page.
5719 * The page might already be mapped at kernel virtual
5720 * address "kernel_mapping_offset". Otherwise, we need
5721 * to map it.
5722 *
5723 * Context:
5724 * The page's VM object is locked but will be unlocked and relocked.
5725 * The page is busy and not accessible by users (not entered in any pmap).
5726 */
5727void
5728vm_page_decrypt(
5729 vm_page_t page,
5730 vm_map_offset_t kernel_mapping_offset)
5731{
5732 int clear_refmod = 0;
5733 kern_return_t kr;
5734 vm_map_size_t kernel_mapping_size;
5735 vm_offset_t kernel_vaddr;
5736 boolean_t page_was_referenced;
5737 union {
5738 unsigned char aes_iv[AES_BLOCK_SIZE];
5739 struct {
5740 memory_object_t pager_object;
5741 vm_object_offset_t paging_offset;
5742 } vm;
5743 } decrypt_iv;
5744
5745 assert(page->busy);
5746 assert(page->encrypted);
5747
5748 /*
5749 * Gather the "reference" status of the page.
5750 * We'll restore its value after the decryption, so that
5751 * the decryption is transparent to the rest of the system
5752 * and doesn't impact the VM's LRU logic.
5753 */
5754 page_was_referenced =
5755 (page->reference || pmap_is_referenced(page->phys_page));
5756
5757 if (kernel_mapping_offset == 0) {
5758 /*
5759 * The page hasn't already been mapped in kernel space
5760 * by the caller. Map it now, so that we can access
5761 * its contents and decrypt them.
5762 */
5763 kernel_mapping_size = PAGE_SIZE;
5764 kr = vm_paging_map_object(&kernel_mapping_offset,
5765 page,
5766 page->object,
5767 page->offset,
5768 &kernel_mapping_size);
5769 if (kr != KERN_SUCCESS) {
5770 panic("vm_page_decrypt: "
5771 "could not map page in kernel: 0x%x\n");
5772 }
5773 } else {
5774 kernel_mapping_size = 0;
5775 }
5776 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
5777
5778 assert(swap_crypt_ctx_initialized);
5779
5780 /*
5781 * Prepare an "initial vector" for the decryption.
5782 * It has to be the same as the "initial vector" we
5783 * used to encrypt that page.
5784 */
5785 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
5786 decrypt_iv.vm.pager_object = page->object->pager;
5787 decrypt_iv.vm.paging_offset =
5788 page->object->paging_offset + page->offset;
5789
5790 vm_object_unlock(page->object);
5791
5792 /* encrypt the "initial vector" */
5793 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
5794 swap_crypt_null_iv,
5795 1,
5796 &decrypt_iv.aes_iv[0],
5797 &swap_crypt_ctx.encrypt);
5798
5799 /*
5800 * Decrypt the page.
5801 */
5802 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
5803 &decrypt_iv.aes_iv[0],
5804 PAGE_SIZE / AES_BLOCK_SIZE,
5805 (unsigned char *) kernel_vaddr,
5806 &swap_crypt_ctx.decrypt);
5807 vm_page_decrypt_counter++;
5808
5809 vm_object_lock(page->object);
5810
5811 /*
5812 * Unmap the page from the kernel's address space,
5813 * if we had to map it ourselves. Otherwise, let
5814 * the caller undo the mapping if needed.
5815 */
5816 if (kernel_mapping_size != 0) {
5817 vm_paging_unmap_object(page->object,
5818 kernel_vaddr,
5819 kernel_vaddr + PAGE_SIZE);
5820 }
5821
5822 /*
5823 * After decryption, the page is actually clean.
5824 * It was encrypted as part of paging, which "cleans"
5825 * the "dirty" pages.
5826 * Noone could access it after it was encrypted
5827 * and the decryption doesn't count.
5828 */
5829 page->dirty = FALSE;
5830 clear_refmod = VM_MEM_MODIFIED;
5831
5832 /* restore the "reference" bit */
5833 if (! page_was_referenced) {
5834 page->reference = FALSE;
5835 clear_refmod |= VM_MEM_REFERENCED;
5836 }
5837 pmap_clear_refmod(page->phys_page, clear_refmod);
5838
5839 page->encrypted = FALSE;
5840
5841 /*
5842 * We've just modified the page's contents via the data cache and part
5843 * of the new contents might still be in the cache and not yet in RAM.
5844 * Since the page is now available and might get gathered in a UPL to
5845 * be part of a DMA transfer from a driver that expects the memory to
5846 * be coherent at this point, we have to flush the data cache.
5847 */
5848 pmap_sync_page_data_phys(page->phys_page);
5849 /*
5850 * Since the page is not mapped yet, some code might assume that it
5851 * doesn't need to invalidate the instruction cache when writing to
5852 * that page. That code relies on "no_isync" being set, so that the
5853 * caches get syncrhonized when the page is first mapped. So we need
5854 * to set "no_isync" here too, despite the fact that we just
5855 * synchronized the caches above...
5856 */
5857 page->no_isync = TRUE;
5858}
5859
5860unsigned long upl_encrypt_upls = 0;
5861unsigned long upl_encrypt_pages = 0;
5862
5863/*
5864 * ENCRYPTED SWAP:
5865 *
5866 * upl_encrypt:
5867 * Encrypts all the pages in the UPL, within the specified range.
5868 *
5869 */
5870void
5871upl_encrypt(
5872 upl_t upl,
5873 upl_offset_t crypt_offset,
5874 upl_size_t crypt_size)
5875{
5876 upl_size_t upl_size;
5877 upl_offset_t upl_offset;
5878 vm_object_t upl_object;
5879 vm_page_t page;
5880 vm_object_t shadow_object;
5881 vm_object_offset_t shadow_offset;
5882 vm_object_offset_t paging_offset;
5883 vm_object_offset_t base_offset;
5884
5885 upl_encrypt_upls++;
5886 upl_encrypt_pages += crypt_size / PAGE_SIZE;
5887
5888 upl_lock(upl);
5889
5890 upl_object = upl->map_object;
5891 upl_offset = upl->offset;
5892 upl_size = upl->size;
5893
5894 upl_unlock(upl);
5895
5896 vm_object_lock(upl_object);
5897
5898 /*
5899 * Find the VM object that contains the actual pages.
5900 */
5901 if (upl_object->pageout) {
5902 shadow_object = upl_object->shadow;
5903 /*
5904 * The offset in the shadow object is actually also
5905 * accounted for in upl->offset. It possibly shouldn't be
5906 * this way, but for now don't account for it twice.
5907 */
5908 shadow_offset = 0;
5909 assert(upl_object->paging_offset == 0); /* XXX ? */
5910 vm_object_lock(shadow_object);
5911 } else {
5912 shadow_object = upl_object;
5913 shadow_offset = 0;
5914 }
5915
5916 paging_offset = shadow_object->paging_offset;
5917 vm_object_paging_begin(shadow_object);
5918
5919 if (shadow_object != upl_object) {
5920 vm_object_unlock(shadow_object);
5921 }
5922 vm_object_unlock(upl_object);
5923
5924 base_offset = shadow_offset;
5925 base_offset += upl_offset;
5926 base_offset += crypt_offset;
5927 base_offset -= paging_offset;
5928 /*
5929 * Unmap the pages, so that nobody can continue accessing them while
5930 * they're encrypted. After that point, all accesses to these pages
5931 * will cause a page fault and block while the page is being encrypted
5932 * (busy). After the encryption completes, any access will cause a
5933 * page fault and the page gets decrypted at that time.
5934 */
5935 assert(crypt_offset + crypt_size <= upl_size);
5936 vm_object_pmap_protect(shadow_object,
5937 base_offset,
5938 (vm_object_size_t)crypt_size,
5939 PMAP_NULL,
5940 0,
5941 VM_PROT_NONE);
5942
5943 /* XXX FBDP could the object have changed significantly here ? */
5944 vm_object_lock(shadow_object);
5945
5946 for (upl_offset = 0;
5947 upl_offset < crypt_size;
5948 upl_offset += PAGE_SIZE) {
5949 page = vm_page_lookup(shadow_object,
5950 base_offset + upl_offset);
5951 if (page == VM_PAGE_NULL) {
5952 panic("upl_encrypt: "
5953 "no page for (obj=%p,off=%lld+%d)!\n",
5954 shadow_object,
5955 base_offset,
5956 upl_offset);
5957 }
5958 vm_page_encrypt(page, 0);
5959 }
5960
5961 vm_object_paging_end(shadow_object);
5962 vm_object_unlock(shadow_object);
5963}
5964
5965vm_size_t
5966upl_get_internal_pagelist_offset(void)
5967{
5968 return sizeof(struct upl);
5969}
5970
5971void
5972upl_set_dirty(
5973 upl_t upl)
5974{
5975 upl->flags |= UPL_CLEAR_DIRTY;
5976}
5977
5978void
5979upl_clear_dirty(
5980 upl_t upl)
5981{
5982 upl->flags &= ~UPL_CLEAR_DIRTY;
5983}
5984
5985
5986#ifdef MACH_BSD
1c79356b
A
5987
5988boolean_t upl_page_present(upl_page_info_t *upl, int index)
5989{
5990 return(UPL_PAGE_PRESENT(upl, index));
5991}
5992boolean_t upl_dirty_page(upl_page_info_t *upl, int index)
5993{
5994 return(UPL_DIRTY_PAGE(upl, index));
5995}
5996boolean_t upl_valid_page(upl_page_info_t *upl, int index)
5997{
5998 return(UPL_VALID_PAGE(upl, index));
5999}
91447636 6000ppnum_t upl_phys_page(upl_page_info_t *upl, int index)
1c79356b 6001{
91447636 6002 return(UPL_PHYS_PAGE(upl, index));
1c79356b
A
6003}
6004
0b4e3aa0
A
6005void
6006vm_countdirtypages(void)
1c79356b
A
6007{
6008 vm_page_t m;
6009 int dpages;
6010 int pgopages;
6011 int precpages;
6012
6013
6014 dpages=0;
6015 pgopages=0;
6016 precpages=0;
6017
6018 vm_page_lock_queues();
6019 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6020 do {
6021 if (m ==(vm_page_t )0) break;
6022
6023 if(m->dirty) dpages++;
6024 if(m->pageout) pgopages++;
6025 if(m->precious) precpages++;
6026
91447636 6027 assert(m->object != kernel_object);
1c79356b
A
6028 m = (vm_page_t) queue_next(&m->pageq);
6029 if (m ==(vm_page_t )0) break;
6030
6031 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m));
6032 vm_page_unlock_queues();
9bccf70c
A
6033
6034 vm_page_lock_queues();
6035 m = (vm_page_t) queue_first(&vm_page_queue_zf);
6036 do {
6037 if (m ==(vm_page_t )0) break;
6038
6039 if(m->dirty) dpages++;
6040 if(m->pageout) pgopages++;
6041 if(m->precious) precpages++;
6042
91447636 6043 assert(m->object != kernel_object);
9bccf70c
A
6044 m = (vm_page_t) queue_next(&m->pageq);
6045 if (m ==(vm_page_t )0) break;
6046
6047 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m));
6048 vm_page_unlock_queues();
1c79356b
A
6049
6050 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
6051
6052 dpages=0;
6053 pgopages=0;
6054 precpages=0;
6055
6056 vm_page_lock_queues();
6057 m = (vm_page_t) queue_first(&vm_page_queue_active);
6058
6059 do {
6060 if(m == (vm_page_t )0) break;
6061 if(m->dirty) dpages++;
6062 if(m->pageout) pgopages++;
6063 if(m->precious) precpages++;
6064
91447636 6065 assert(m->object != kernel_object);
1c79356b
A
6066 m = (vm_page_t) queue_next(&m->pageq);
6067 if(m == (vm_page_t )0) break;
6068
6069 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m));
6070 vm_page_unlock_queues();
6071
6072 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
6073
6074}
6075#endif /* MACH_BSD */
6076
91447636 6077#ifdef UPL_DEBUG
1c79356b
A
6078kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2)
6079{
6080 upl->ubc_alias1 = alias1;
6081 upl->ubc_alias2 = alias2;
6082 return KERN_SUCCESS;
6083}
6084int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2)
6085{
6086 if(al)
6087 *al = upl->ubc_alias1;
6088 if(al2)
6089 *al2 = upl->ubc_alias2;
6090 return KERN_SUCCESS;
6091}
91447636 6092#endif /* UPL_DEBUG */
1c79356b
A
6093
6094
6095
6096#if MACH_KDB
6097#include <ddb/db_output.h>
6098#include <ddb/db_print.h>
6099#include <vm/vm_print.h>
6100
6101#define printf kdbprintf
1c79356b
A
6102void db_pageout(void);
6103
6104void
6105db_vm(void)
6106{
1c79356b
A
6107
6108 iprintf("VM Statistics:\n");
6109 db_indent += 2;
6110 iprintf("pages:\n");
6111 db_indent += 2;
6112 iprintf("activ %5d inact %5d free %5d",
6113 vm_page_active_count, vm_page_inactive_count,
6114 vm_page_free_count);
6115 printf(" wire %5d gobbl %5d\n",
6116 vm_page_wire_count, vm_page_gobble_count);
1c79356b
A
6117 db_indent -= 2;
6118 iprintf("target:\n");
6119 db_indent += 2;
6120 iprintf("min %5d inact %5d free %5d",
6121 vm_page_free_min, vm_page_inactive_target,
6122 vm_page_free_target);
6123 printf(" resrv %5d\n", vm_page_free_reserved);
6124 db_indent -= 2;
1c79356b 6125 iprintf("pause:\n");
1c79356b
A
6126 db_pageout();
6127 db_indent -= 2;
6128}
6129
1c79356b 6130#if MACH_COUNTERS
91447636 6131extern int c_laundry_pages_freed;
1c79356b
A
6132#endif /* MACH_COUNTERS */
6133
91447636
A
6134void
6135db_pageout(void)
6136{
1c79356b
A
6137 iprintf("Pageout Statistics:\n");
6138 db_indent += 2;
6139 iprintf("active %5d inactv %5d\n",
6140 vm_pageout_active, vm_pageout_inactive);
6141 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
6142 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
6143 vm_pageout_inactive_busy, vm_pageout_inactive_absent);
6144 iprintf("used %5d clean %5d dirty %5d\n",
6145 vm_pageout_inactive_used, vm_pageout_inactive_clean,
6146 vm_pageout_inactive_dirty);
1c79356b
A
6147#if MACH_COUNTERS
6148 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
6149#endif /* MACH_COUNTERS */
6150#if MACH_CLUSTER_STATS
6151 iprintf("Cluster Statistics:\n");
6152 db_indent += 2;
6153 iprintf("dirtied %5d cleaned %5d collisions %5d\n",
6154 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
6155 vm_pageout_cluster_collisions);
6156 iprintf("clusters %5d conversions %5d\n",
6157 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
6158 db_indent -= 2;
6159 iprintf("Target Statistics:\n");
6160 db_indent += 2;
6161 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
6162 vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
6163 vm_pageout_target_page_freed);
1c79356b
A
6164 db_indent -= 2;
6165#endif /* MACH_CLUSTER_STATS */
6166 db_indent -= 2;
6167}
6168
1c79356b 6169#endif /* MACH_KDB */