2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
62 * External memory management interface control functions.
65 #include <advisory_pageout.h>
68 * Interface dependencies:
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
85 * Implementation dependencies:
87 #include <string.h> /* For memcpy() */
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
103 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
106 #include <vm/vm_external.h>
107 #endif /* MACH_PAGEMAP */
109 #include <vm/vm_protos.h>
112 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
113 decl_lck_mtx_data(, memory_manager_default_lock
)
117 * Routine: memory_object_should_return_page
120 * Determine whether the given page should be returned,
121 * based on the page's state and on the given return policy.
123 * We should return the page if one of the following is true:
125 * 1. Page is dirty and should_return is not RETURN_NONE.
126 * 2. Page is precious and should_return is RETURN_ALL.
127 * 3. Should_return is RETURN_ANYTHING.
129 * As a side effect, m->dirty will be made consistent
130 * with pmap_is_modified(m), if should_return is not
131 * MEMORY_OBJECT_RETURN_NONE.
134 #define memory_object_should_return_page(m, should_return) \
135 (should_return != MEMORY_OBJECT_RETURN_NONE && \
136 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
137 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
138 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
140 typedef int memory_object_lock_result_t
;
142 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
143 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
147 memory_object_lock_result_t
memory_object_lock_page(
149 memory_object_return_t should_return
,
150 boolean_t should_flush
,
154 * Routine: memory_object_lock_page
157 * Perform the appropriate lock operations on the
158 * given page. See the description of
159 * "memory_object_lock_request" for the meanings
162 * Returns an indication that the operation
163 * completed, blocked, or that the page must
166 memory_object_lock_result_t
167 memory_object_lock_page(
169 memory_object_return_t should_return
,
170 boolean_t should_flush
,
173 XPR(XPR_MEMORY_OBJECT
,
174 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
175 m
, should_return
, should_flush
, prot
, 0);
178 * If we cannot change access to the page,
179 * either because a mapping is in progress
180 * (busy page) or because a mapping has been
181 * wired, then give up.
184 if (m
->busy
|| m
->cleaning
) {
185 if (m
->list_req_pending
&& (m
->pageout
|| m
->cleaning
) &&
186 should_return
== MEMORY_OBJECT_RETURN_NONE
&&
187 should_flush
== TRUE
) {
189 * if pageout is set, page was earmarked by vm_pageout_scan
190 * to be cleaned and stolen... if cleaning is set, we're
191 * pre-cleaning pages for a hibernate...
192 * in either case, we're going
193 * to take it back since we are being asked to
194 * flush the page w/o cleaning it (i.e. we don't
195 * care that it's dirty, we want it gone from
196 * the cache) and we don't want to stall
197 * waiting for it to be cleaned for 2 reasons...
198 * 1 - no use paging it out since we're probably
199 * shrinking the file at this point or we no
200 * longer care about the data in the page
201 * 2 - if we stall, we may casue a deadlock in
202 * the FS trying to acquire its locks
203 * on the VNOP_PAGEOUT path presuming that
204 * those locks are already held on the truncate
205 * path before calling through to this function
207 * so undo all of the state that vm_pageout_scan
212 vm_pageout_queue_steal(m
, FALSE
);
214 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
218 * Don't worry about pages for which the kernel
219 * does not have any data.
222 if (m
->absent
|| m
->error
|| m
->restart
) {
223 if(m
->error
&& should_flush
) {
224 /* dump the page, pager wants us to */
225 /* clean it up and there is no */
226 /* relevant data to return */
227 if ( !VM_PAGE_WIRED(m
)) {
229 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
232 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
236 assert(!m
->fictitious
);
239 * If the page is wired, just clean or return the page if needed.
240 * Wired pages don't get flushed or disconnected from the pmap.
243 if (VM_PAGE_WIRED(m
)) {
244 if (memory_object_should_return_page(m
, should_return
)) {
246 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
);
248 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
251 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
255 * If the page is to be flushed, allow
256 * that to be done as part of the protection.
265 * If we are decreasing permission, do it now;
266 * let the fault handler take care of increases
267 * (pmap_page_protect may not increase protection).
270 if (prot
!= VM_PROT_NO_CHANGE
) {
271 pmap_page_protect(m
->phys_page
, VM_PROT_ALL
& ~prot
);
277 * Handle page returning.
279 if (memory_object_should_return_page(m
, should_return
)) {
282 * If we weren't planning
283 * to flush the page anyway,
284 * we may need to remove the
285 * page from the pageout
286 * system and from physical
290 vm_page_lockspin_queues();
291 VM_PAGE_QUEUES_REMOVE(m
);
292 vm_page_unlock_queues();
295 pmap_disconnect(m
->phys_page
);
298 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
);
300 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
310 * XXX Make clean but not flush a paging hint,
311 * and deactivate the pages. This is a hack
312 * because it overloads flush/clean with
313 * implementation-dependent meaning. This only
314 * happens to pages that are already clean.
317 if (vm_page_deactivate_hint
&&
318 (should_return
!= MEMORY_OBJECT_RETURN_NONE
)) {
319 vm_page_lockspin_queues();
320 vm_page_deactivate(m
);
321 vm_page_unlock_queues();
325 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
328 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
331 register int upl_flags; \
332 memory_object_t pager; \
334 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
335 vm_object_paging_begin(object); \
336 vm_object_unlock(object); \
339 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
341 upl_flags = UPL_MSYNC; \
343 (void) memory_object_data_return(pager, \
345 (memory_object_cluster_size_t)data_cnt, \
348 (action) == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN,\
352 vm_object_lock(object); \
353 vm_object_paging_end(object); \
358 * Routine: memory_object_lock_request [user interface]
361 * Control use of the data associated with the given
362 * memory object. For each page in the given range,
363 * perform the following operations, in order:
364 * 1) restrict access to the page (disallow
365 * forms specified by "prot");
366 * 2) return data to the manager (if "should_return"
367 * is RETURN_DIRTY and the page is dirty, or
368 * "should_return" is RETURN_ALL and the page
369 * is either dirty or precious); and,
370 * 3) flush the cached copy (if "should_flush"
372 * The set of pages is defined by a starting offset
373 * ("offset") and size ("size"). Only pages with the
374 * same page alignment as the starting offset are
377 * A single acknowledgement is sent (to the "reply_to"
378 * port) when these actions are complete. If successful,
379 * the naked send right for reply_to is consumed.
383 memory_object_lock_request(
384 memory_object_control_t control
,
385 memory_object_offset_t offset
,
386 memory_object_size_t size
,
387 memory_object_offset_t
* resid_offset
,
389 memory_object_return_t should_return
,
396 * Check for bogus arguments.
398 object
= memory_object_control_to_vm_object(control
);
399 if (object
== VM_OBJECT_NULL
)
400 return (KERN_INVALID_ARGUMENT
);
402 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
403 return (KERN_INVALID_ARGUMENT
);
405 size
= round_page_64(size
);
408 * Lock the object, and acquire a paging reference to
409 * prevent the memory_object reference from being released.
411 vm_object_lock(object
);
412 vm_object_paging_begin(object
);
414 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
) {
415 if ((should_return
!= MEMORY_OBJECT_RETURN_NONE
) || offset
|| object
->copy
) {
416 flags
&= ~MEMORY_OBJECT_DATA_FLUSH_ALL
;
417 flags
|= MEMORY_OBJECT_DATA_FLUSH
;
420 offset
-= object
->paging_offset
;
422 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
)
423 vm_object_reap_pages(object
, REAP_DATA_FLUSH
);
425 (void)vm_object_update(object
, offset
, size
, resid_offset
,
426 io_errno
, should_return
, flags
, prot
);
428 vm_object_paging_end(object
);
429 vm_object_unlock(object
);
431 return (KERN_SUCCESS
);
435 * memory_object_release_name: [interface]
437 * Enforces name semantic on memory_object reference count decrement
438 * This routine should not be called unless the caller holds a name
439 * reference gained through the memory_object_named_create or the
440 * memory_object_rename call.
441 * If the TERMINATE_IDLE flag is set, the call will return if the
442 * reference count is not 1. i.e. idle with the only remaining reference
444 * If the decision is made to proceed the name field flag is set to
445 * false and the reference count is decremented. If the RESPECT_CACHE
446 * flag is set and the reference count has gone to zero, the
447 * memory_object is checked to see if it is cacheable otherwise when
448 * the reference count is zero, it is simply terminated.
452 memory_object_release_name(
453 memory_object_control_t control
,
458 object
= memory_object_control_to_vm_object(control
);
459 if (object
== VM_OBJECT_NULL
)
460 return (KERN_INVALID_ARGUMENT
);
462 return vm_object_release_name(object
, flags
);
468 * Routine: memory_object_destroy [user interface]
470 * Shut down a memory object, despite the
471 * presence of address map (or other) references
475 memory_object_destroy(
476 memory_object_control_t control
,
477 kern_return_t reason
)
481 object
= memory_object_control_to_vm_object(control
);
482 if (object
== VM_OBJECT_NULL
)
483 return (KERN_INVALID_ARGUMENT
);
485 return (vm_object_destroy(object
, reason
));
489 * Routine: vm_object_sync
491 * Kernel internal function to synch out pages in a given
492 * range within an object to its memory manager. Much the
493 * same as memory_object_lock_request but page protection
496 * If the should_flush and should_return flags are true pages
497 * are flushed, that is dirty & precious pages are written to
498 * the memory manager and then discarded. If should_return
499 * is false, only precious pages are returned to the memory
502 * If should flush is false and should_return true, the memory
503 * manager's copy of the pages is updated. If should_return
504 * is also false, only the precious pages are updated. This
505 * last option is of limited utility.
508 * FALSE if no pages were returned to the pager
515 vm_object_offset_t offset
,
516 vm_object_size_t size
,
517 boolean_t should_flush
,
518 boolean_t should_return
,
519 boolean_t should_iosync
)
525 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
526 object
, offset
, size
, should_flush
, should_return
);
529 * Lock the object, and acquire a paging reference to
530 * prevent the memory_object and control ports from
533 vm_object_lock(object
);
534 vm_object_paging_begin(object
);
537 flags
= MEMORY_OBJECT_DATA_FLUSH
;
542 flags
|= MEMORY_OBJECT_IO_SYNC
;
544 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
546 MEMORY_OBJECT_RETURN_ALL
:
547 MEMORY_OBJECT_RETURN_NONE
,
552 vm_object_paging_end(object
);
553 vm_object_unlock(object
);
561 vm_object_update_extent(
563 vm_object_offset_t offset
,
564 vm_object_offset_t offset_end
,
565 vm_object_offset_t
*offset_resid
,
567 boolean_t should_flush
,
568 memory_object_return_t should_return
,
569 boolean_t should_iosync
,
574 memory_object_cluster_size_t data_cnt
= 0;
575 vm_object_offset_t paging_offset
= 0;
576 vm_object_offset_t next_offset
= offset
;
577 memory_object_lock_result_t page_lock_result
;
578 memory_object_lock_result_t pageout_action
;
580 pageout_action
= MEMORY_OBJECT_LOCK_RESULT_DONE
;
583 offset
< offset_end
&& object
->resident_page_count
;
584 offset
+= PAGE_SIZE_64
) {
587 * Limit the number of pages to be cleaned at once to a contiguous
588 * run, or at most MAX_UPL_TRANSFER size
591 if ((data_cnt
>= PAGE_SIZE
* MAX_UPL_TRANSFER
) || (next_offset
!= offset
)) {
592 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
593 pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
598 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
599 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
601 XPR(XPR_MEMORY_OBJECT
,
602 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
603 object
, offset
, page_lock_result
, 0, 0);
605 switch (page_lock_result
)
607 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
609 * End of a cluster of dirty pages.
612 LIST_REQ_PAGEOUT_PAGES(object
,
613 data_cnt
, pageout_action
,
614 paging_offset
, offset_resid
, io_errno
, should_iosync
);
620 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
622 * Since it is necessary to block,
623 * clean any dirty pages now.
626 LIST_REQ_PAGEOUT_PAGES(object
,
627 data_cnt
, pageout_action
,
628 paging_offset
, offset_resid
, io_errno
, should_iosync
);
632 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
635 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
:
636 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
638 * The clean and return cases are similar.
640 * if this would form a discontiguous block,
641 * clean the old pages and start anew.
643 if (data_cnt
&& pageout_action
!= page_lock_result
) {
644 LIST_REQ_PAGEOUT_PAGES(object
,
645 data_cnt
, pageout_action
,
646 paging_offset
, offset_resid
, io_errno
, should_iosync
);
651 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
655 pageout_action
= page_lock_result
;
656 paging_offset
= offset
;
658 data_cnt
+= PAGE_SIZE
;
659 next_offset
= offset
+ PAGE_SIZE_64
;
664 m
->list_req_pending
= TRUE
;
668 /* let's not flush a wired page... */
671 * and add additional state
677 vm_page_lockspin_queues();
679 vm_page_unlock_queues();
689 * We have completed the scan for applicable pages.
690 * Clean any pages that have been saved.
693 LIST_REQ_PAGEOUT_PAGES(object
,
694 data_cnt
, pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
702 * Routine: vm_object_update
704 * Work function for m_o_lock_request(), vm_o_sync().
706 * Called with object locked and paging ref taken.
710 register vm_object_t object
,
711 register vm_object_offset_t offset
,
712 register vm_object_size_t size
,
713 register vm_object_offset_t
*resid_offset
,
715 memory_object_return_t should_return
,
717 vm_prot_t protection
)
719 vm_object_t copy_object
= VM_OBJECT_NULL
;
720 boolean_t data_returned
= FALSE
;
721 boolean_t update_cow
;
722 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
723 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
724 vm_fault_return_t result
;
727 #define MAX_EXTENTS 8
728 #define EXTENT_SIZE (1024 * 1024 * 256)
729 #define RESIDENT_LIMIT (1024 * 32)
731 vm_object_offset_t e_base
;
732 vm_object_offset_t e_min
;
733 vm_object_offset_t e_max
;
734 } extents
[MAX_EXTENTS
];
737 * To avoid blocking while scanning for pages, save
738 * dirty pages to be cleaned all at once.
740 * XXXO A similar strategy could be used to limit the
741 * number of times that a scan must be restarted for
742 * other reasons. Those pages that would require blocking
743 * could be temporarily collected in another list, or
744 * their offsets could be recorded in a small array.
748 * XXX NOTE: May want to consider converting this to a page list
749 * XXX vm_map_copy interface. Need to understand object
750 * XXX coalescing implications before doing so.
753 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
754 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
755 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
756 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
758 if (update_cow
|| (flags
& (MEMORY_OBJECT_DATA_PURGE
| MEMORY_OBJECT_DATA_SYNC
))) {
761 while ((copy_object
= object
->copy
) != VM_OBJECT_NULL
) {
763 * need to do a try here since we're swimming upstream
764 * against the normal lock ordering... however, we need
765 * to hold the object stable until we gain control of the
766 * copy object so we have to be careful how we approach this
768 if (vm_object_lock_try(copy_object
)) {
770 * we 'won' the lock on the copy object...
771 * no need to hold the object lock any longer...
772 * take a real reference on the copy object because
773 * we're going to call vm_fault_page on it which may
774 * under certain conditions drop the lock and the paging
775 * reference we're about to take... the reference
776 * will keep the copy object from going away if that happens
778 vm_object_unlock(object
);
779 vm_object_reference_locked(copy_object
);
782 vm_object_unlock(object
);
785 mutex_pause(collisions
);
787 vm_object_lock(object
);
790 if ((copy_object
!= VM_OBJECT_NULL
&& update_cow
) || (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
792 vm_map_size_t copy_size
;
793 vm_map_offset_t copy_offset
;
797 kern_return_t error
= 0;
798 struct vm_object_fault_info fault_info
;
800 if (copy_object
!= VM_OBJECT_NULL
) {
802 * translate offset with respect to shadow's offset
804 copy_offset
= (offset
>= copy_object
->shadow_offset
) ?
805 (vm_map_offset_t
)(offset
- copy_object
->shadow_offset
) :
808 if (copy_offset
> copy_object
->size
)
809 copy_offset
= copy_object
->size
;
812 * clip size with respect to shadow offset
814 if (offset
>= copy_object
->shadow_offset
) {
816 } else if (size
>= copy_object
->shadow_offset
- offset
) {
817 copy_size
= size
- (copy_object
->shadow_offset
- offset
);
822 if (copy_offset
+ copy_size
> copy_object
->size
) {
823 if (copy_object
->size
>= copy_offset
) {
824 copy_size
= copy_object
->size
- copy_offset
;
829 copy_size
+=copy_offset
;
832 copy_object
= object
;
834 copy_size
= offset
+ size
;
835 copy_offset
= offset
;
837 fault_info
.interruptible
= THREAD_UNINT
;
838 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
839 fault_info
.user_tag
= 0;
840 fault_info
.lo_offset
= copy_offset
;
841 fault_info
.hi_offset
= copy_size
;
842 fault_info
.no_cache
= FALSE
;
843 fault_info
.stealth
= TRUE
;
844 fault_info
.mark_zf_absent
= FALSE
;
846 vm_object_paging_begin(copy_object
);
848 for (i
= copy_offset
; i
< copy_size
; i
+= PAGE_SIZE
) {
849 RETRY_COW_OF_LOCK_REQUEST
:
850 fault_info
.cluster_size
= (vm_size_t
) (copy_size
- i
);
851 assert(fault_info
.cluster_size
== copy_size
- i
);
853 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
854 result
= vm_fault_page(copy_object
, i
,
855 VM_PROT_WRITE
|VM_PROT_READ
,
866 case VM_FAULT_SUCCESS
:
869 page
->object
, top_page
);
870 vm_object_lock(copy_object
);
871 vm_object_paging_begin(copy_object
);
876 vm_page_lockspin_queues();
880 vm_page_deactivate(page
);
881 vm_page_unlock_queues();
883 PAGE_WAKEUP_DONE(page
);
886 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
887 vm_object_lock(copy_object
);
888 vm_object_paging_begin(copy_object
);
889 goto RETRY_COW_OF_LOCK_REQUEST
;
890 case VM_FAULT_INTERRUPTED
:
891 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
892 vm_object_lock(copy_object
);
893 vm_object_paging_begin(copy_object
);
894 goto RETRY_COW_OF_LOCK_REQUEST
;
895 case VM_FAULT_MEMORY_SHORTAGE
:
897 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
898 vm_object_lock(copy_object
);
899 vm_object_paging_begin(copy_object
);
900 goto RETRY_COW_OF_LOCK_REQUEST
;
901 case VM_FAULT_FICTITIOUS_SHORTAGE
:
902 vm_page_more_fictitious();
903 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
904 vm_object_lock(copy_object
);
905 vm_object_paging_begin(copy_object
);
906 goto RETRY_COW_OF_LOCK_REQUEST
;
907 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
908 /* success but no VM page: fail */
909 vm_object_paging_end(copy_object
);
910 vm_object_unlock(copy_object
);
912 case VM_FAULT_MEMORY_ERROR
:
913 if (object
!= copy_object
)
914 vm_object_deallocate(copy_object
);
915 vm_object_lock(object
);
916 goto BYPASS_COW_COPYIN
;
918 panic("vm_object_update: unexpected error 0x%x"
919 " from vm_fault_page()\n", result
);
923 vm_object_paging_end(copy_object
);
925 if ((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
926 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
927 vm_object_unlock(copy_object
);
928 vm_object_deallocate(copy_object
);
929 vm_object_lock(object
);
933 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
934 if ((flags
& MEMORY_OBJECT_DATA_PURGE
)) {
935 copy_object
->shadow_severed
= TRUE
;
936 copy_object
->shadowed
= FALSE
;
937 copy_object
->shadow
= NULL
;
939 * delete the ref the COW was holding on the target object
941 vm_object_deallocate(object
);
943 vm_object_unlock(copy_object
);
944 vm_object_deallocate(copy_object
);
945 vm_object_lock(object
);
950 * when we have a really large range to check relative
951 * to the number of actual resident pages, we'd like
952 * to use the resident page list to drive our checks
953 * however, the object lock will get dropped while processing
954 * the page which means the resident queue can change which
955 * means we can't walk the queue as we process the pages
956 * we also want to do the processing in offset order to allow
957 * 'runs' of pages to be collected if we're being told to
958 * flush to disk... the resident page queue is NOT ordered.
960 * a temporary solution (until we figure out how to deal with
961 * large address spaces more generically) is to pre-flight
962 * the resident page queue (if it's small enough) and develop
963 * a collection of extents (that encompass actual resident pages)
964 * to visit. This will at least allow us to deal with some of the
965 * more pathological cases in a more efficient manner. The current
966 * worst case (a single resident page at the end of an extremely large
967 * range) can take minutes to complete for ranges in the terrabyte
968 * category... since this routine is called when truncating a file,
969 * and we currently support files up to 16 Tbytes in size, this
970 * is not a theoretical problem
973 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
974 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
976 vm_object_offset_t start
;
977 vm_object_offset_t end
;
978 vm_object_size_t e_mask
;
984 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
986 m
= (vm_page_t
) queue_first(&object
->memq
);
988 while (!queue_end(&object
->memq
, (queue_entry_t
) m
)) {
989 next
= (vm_page_t
) queue_next(&m
->listq
);
991 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
993 * this is a page we're interested in
994 * try to fit it into a current extent
996 for (n
= 0; n
< num_of_extents
; n
++) {
997 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
999 * use (PAGE_SIZE - 1) to determine the
1000 * max offset so that we don't wrap if
1001 * we're at the last page of the space
1003 if (m
->offset
< extents
[n
].e_min
)
1004 extents
[n
].e_min
= m
->offset
;
1005 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
1006 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
1010 if (n
== num_of_extents
) {
1012 * didn't find a current extent that can encompass
1015 if (n
< MAX_EXTENTS
) {
1017 * if we still have room,
1018 * create a new extent
1020 extents
[n
].e_base
= m
->offset
& e_mask
;
1021 extents
[n
].e_min
= m
->offset
;
1022 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
1027 * no room to create a new extent...
1028 * fall back to a single extent based
1029 * on the min and max page offsets
1030 * we find in the range we're interested in...
1031 * first, look through the extent list and
1032 * develop the overall min and max for the
1033 * pages we've looked at up to this point
1035 for (n
= 1; n
< num_of_extents
; n
++) {
1036 if (extents
[n
].e_min
< extents
[0].e_min
)
1037 extents
[0].e_min
= extents
[n
].e_min
;
1038 if (extents
[n
].e_max
> extents
[0].e_max
)
1039 extents
[0].e_max
= extents
[n
].e_max
;
1042 * now setup to run through the remaining pages
1043 * to determine the overall min and max
1044 * offset for the specified range
1046 extents
[0].e_base
= 0;
1051 * by continuing, we'll reprocess the
1052 * page that forced us to abandon trying
1053 * to develop multiple extents
1062 extents
[0].e_min
= offset
;
1063 extents
[0].e_max
= offset
+ (size
- 1);
1067 for (n
= 0; n
< num_of_extents
; n
++) {
1068 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1069 should_flush
, should_return
, should_iosync
, protection
))
1070 data_returned
= TRUE
;
1072 return (data_returned
);
1077 * Routine: memory_object_synchronize_completed [user interface]
1079 * Tell kernel that previously synchronized data
1080 * (memory_object_synchronize) has been queue or placed on the
1083 * Note: there may be multiple synchronize requests for a given
1084 * memory object outstanding but they will not overlap.
1088 memory_object_synchronize_completed(
1089 memory_object_control_t control
,
1090 memory_object_offset_t offset
,
1091 memory_object_size_t length
)
1096 object
= memory_object_control_to_vm_object(control
);
1098 XPR(XPR_MEMORY_OBJECT
,
1099 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1100 object
, offset
, length
, 0, 0);
1103 * Look for bogus arguments
1106 if (object
== VM_OBJECT_NULL
)
1107 return (KERN_INVALID_ARGUMENT
);
1109 vm_object_lock(object
);
1112 * search for sync request structure
1114 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
1115 if (msr
->offset
== offset
&& msr
->length
== length
) {
1116 queue_remove(&object
->msr_q
, msr
, msync_req_t
, msr_q
);
1119 }/* queue_iterate */
1121 if (queue_end(&object
->msr_q
, (queue_entry_t
)msr
)) {
1122 vm_object_unlock(object
);
1123 return KERN_INVALID_ARGUMENT
;
1127 vm_object_unlock(object
);
1128 msr
->flag
= VM_MSYNC_DONE
;
1130 thread_wakeup((event_t
) msr
);
1132 return KERN_SUCCESS
;
1133 }/* memory_object_synchronize_completed */
1135 static kern_return_t
1136 vm_object_set_attributes_common(
1138 boolean_t may_cache
,
1139 memory_object_copy_strategy_t copy_strategy
,
1140 boolean_t temporary
,
1141 boolean_t silent_overwrite
,
1142 boolean_t advisory_pageout
)
1144 boolean_t object_became_ready
;
1146 XPR(XPR_MEMORY_OBJECT
,
1147 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1148 object
, (may_cache
&1)|((temporary
&1)<1), copy_strategy
, 0, 0);
1150 if (object
== VM_OBJECT_NULL
)
1151 return(KERN_INVALID_ARGUMENT
);
1154 * Verify the attributes of importance
1157 switch(copy_strategy
) {
1158 case MEMORY_OBJECT_COPY_NONE
:
1159 case MEMORY_OBJECT_COPY_DELAY
:
1162 return(KERN_INVALID_ARGUMENT
);
1165 #if !ADVISORY_PAGEOUT
1166 if (silent_overwrite
|| advisory_pageout
)
1167 return(KERN_INVALID_ARGUMENT
);
1169 #endif /* !ADVISORY_PAGEOUT */
1175 vm_object_lock(object
);
1178 * Copy the attributes
1180 assert(!object
->internal
);
1181 object_became_ready
= !object
->pager_ready
;
1182 object
->copy_strategy
= copy_strategy
;
1183 object
->can_persist
= may_cache
;
1184 object
->temporary
= temporary
;
1185 object
->silent_overwrite
= silent_overwrite
;
1186 object
->advisory_pageout
= advisory_pageout
;
1189 * Wake up anyone waiting for the ready attribute
1190 * to become asserted.
1193 if (object_became_ready
) {
1194 object
->pager_ready
= TRUE
;
1195 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1198 vm_object_unlock(object
);
1200 return(KERN_SUCCESS
);
1204 * Set the memory object attribute as provided.
1206 * XXX This routine cannot be completed until the vm_msync, clean
1207 * in place, and cluster work is completed. See ifdef notyet
1208 * below and note that vm_object_set_attributes_common()
1209 * may have to be expanded.
1212 memory_object_change_attributes(
1213 memory_object_control_t control
,
1214 memory_object_flavor_t flavor
,
1215 memory_object_info_t attributes
,
1216 mach_msg_type_number_t count
)
1219 kern_return_t result
= KERN_SUCCESS
;
1220 boolean_t temporary
;
1221 boolean_t may_cache
;
1222 boolean_t invalidate
;
1223 memory_object_copy_strategy_t copy_strategy
;
1224 boolean_t silent_overwrite
;
1225 boolean_t advisory_pageout
;
1227 object
= memory_object_control_to_vm_object(control
);
1228 if (object
== VM_OBJECT_NULL
)
1229 return (KERN_INVALID_ARGUMENT
);
1231 vm_object_lock(object
);
1233 temporary
= object
->temporary
;
1234 may_cache
= object
->can_persist
;
1235 copy_strategy
= object
->copy_strategy
;
1236 silent_overwrite
= object
->silent_overwrite
;
1237 advisory_pageout
= object
->advisory_pageout
;
1239 invalidate
= object
->invalidate
;
1241 vm_object_unlock(object
);
1244 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1246 old_memory_object_behave_info_t behave
;
1248 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1249 result
= KERN_INVALID_ARGUMENT
;
1253 behave
= (old_memory_object_behave_info_t
) attributes
;
1255 temporary
= behave
->temporary
;
1256 invalidate
= behave
->invalidate
;
1257 copy_strategy
= behave
->copy_strategy
;
1262 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1264 memory_object_behave_info_t behave
;
1266 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1267 result
= KERN_INVALID_ARGUMENT
;
1271 behave
= (memory_object_behave_info_t
) attributes
;
1273 temporary
= behave
->temporary
;
1274 invalidate
= behave
->invalidate
;
1275 copy_strategy
= behave
->copy_strategy
;
1276 silent_overwrite
= behave
->silent_overwrite
;
1277 advisory_pageout
= behave
->advisory_pageout
;
1281 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1283 memory_object_perf_info_t perf
;
1285 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1286 result
= KERN_INVALID_ARGUMENT
;
1290 perf
= (memory_object_perf_info_t
) attributes
;
1292 may_cache
= perf
->may_cache
;
1297 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1299 old_memory_object_attr_info_t attr
;
1301 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1302 result
= KERN_INVALID_ARGUMENT
;
1306 attr
= (old_memory_object_attr_info_t
) attributes
;
1308 may_cache
= attr
->may_cache
;
1309 copy_strategy
= attr
->copy_strategy
;
1314 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1316 memory_object_attr_info_t attr
;
1318 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1319 result
= KERN_INVALID_ARGUMENT
;
1323 attr
= (memory_object_attr_info_t
) attributes
;
1325 copy_strategy
= attr
->copy_strategy
;
1326 may_cache
= attr
->may_cache_object
;
1327 temporary
= attr
->temporary
;
1333 result
= KERN_INVALID_ARGUMENT
;
1337 if (result
!= KERN_SUCCESS
)
1340 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1341 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1348 * XXX may_cache may become a tri-valued variable to handle
1349 * XXX uncache if not in use.
1351 return (vm_object_set_attributes_common(object
,
1360 memory_object_get_attributes(
1361 memory_object_control_t control
,
1362 memory_object_flavor_t flavor
,
1363 memory_object_info_t attributes
, /* pointer to OUT array */
1364 mach_msg_type_number_t
*count
) /* IN/OUT */
1366 kern_return_t ret
= KERN_SUCCESS
;
1369 object
= memory_object_control_to_vm_object(control
);
1370 if (object
== VM_OBJECT_NULL
)
1371 return (KERN_INVALID_ARGUMENT
);
1373 vm_object_lock(object
);
1376 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1378 old_memory_object_behave_info_t behave
;
1380 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1381 ret
= KERN_INVALID_ARGUMENT
;
1385 behave
= (old_memory_object_behave_info_t
) attributes
;
1386 behave
->copy_strategy
= object
->copy_strategy
;
1387 behave
->temporary
= object
->temporary
;
1388 #if notyet /* remove when vm_msync complies and clean in place fini */
1389 behave
->invalidate
= object
->invalidate
;
1391 behave
->invalidate
= FALSE
;
1394 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1398 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1400 memory_object_behave_info_t behave
;
1402 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1403 ret
= KERN_INVALID_ARGUMENT
;
1407 behave
= (memory_object_behave_info_t
) attributes
;
1408 behave
->copy_strategy
= object
->copy_strategy
;
1409 behave
->temporary
= object
->temporary
;
1410 #if notyet /* remove when vm_msync complies and clean in place fini */
1411 behave
->invalidate
= object
->invalidate
;
1413 behave
->invalidate
= FALSE
;
1415 behave
->advisory_pageout
= object
->advisory_pageout
;
1416 behave
->silent_overwrite
= object
->silent_overwrite
;
1417 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1421 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1423 memory_object_perf_info_t perf
;
1425 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1426 ret
= KERN_INVALID_ARGUMENT
;
1430 perf
= (memory_object_perf_info_t
) attributes
;
1431 perf
->cluster_size
= PAGE_SIZE
;
1432 perf
->may_cache
= object
->can_persist
;
1434 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1438 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1440 old_memory_object_attr_info_t attr
;
1442 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1443 ret
= KERN_INVALID_ARGUMENT
;
1447 attr
= (old_memory_object_attr_info_t
) attributes
;
1448 attr
->may_cache
= object
->can_persist
;
1449 attr
->copy_strategy
= object
->copy_strategy
;
1451 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1455 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1457 memory_object_attr_info_t attr
;
1459 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1460 ret
= KERN_INVALID_ARGUMENT
;
1464 attr
= (memory_object_attr_info_t
) attributes
;
1465 attr
->copy_strategy
= object
->copy_strategy
;
1466 attr
->cluster_size
= PAGE_SIZE
;
1467 attr
->may_cache_object
= object
->can_persist
;
1468 attr
->temporary
= object
->temporary
;
1470 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1475 ret
= KERN_INVALID_ARGUMENT
;
1479 vm_object_unlock(object
);
1486 memory_object_iopl_request(
1488 memory_object_offset_t offset
,
1489 upl_size_t
*upl_size
,
1491 upl_page_info_array_t user_page_list
,
1492 unsigned int *page_list_count
,
1499 caller_flags
= *flags
;
1501 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1503 * For forward compatibility's sake,
1504 * reject any unknown flag.
1506 return KERN_INVALID_VALUE
;
1509 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1510 vm_named_entry_t named_entry
;
1512 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1513 /* a few checks to make sure user is obeying rules */
1514 if(*upl_size
== 0) {
1515 if(offset
>= named_entry
->size
)
1516 return(KERN_INVALID_RIGHT
);
1517 *upl_size
= (upl_size_t
)(named_entry
->size
- offset
);
1518 if (*upl_size
!= named_entry
->size
- offset
)
1519 return KERN_INVALID_ARGUMENT
;
1521 if(caller_flags
& UPL_COPYOUT_FROM
) {
1522 if((named_entry
->protection
& VM_PROT_READ
)
1524 return(KERN_INVALID_RIGHT
);
1527 if((named_entry
->protection
&
1528 (VM_PROT_READ
| VM_PROT_WRITE
))
1529 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1530 return(KERN_INVALID_RIGHT
);
1533 if(named_entry
->size
< (offset
+ *upl_size
))
1534 return(KERN_INVALID_ARGUMENT
);
1536 /* the callers parameter offset is defined to be the */
1537 /* offset from beginning of named entry offset in object */
1538 offset
= offset
+ named_entry
->offset
;
1540 if(named_entry
->is_sub_map
)
1541 return (KERN_INVALID_ARGUMENT
);
1543 named_entry_lock(named_entry
);
1545 if (named_entry
->is_pager
) {
1546 object
= vm_object_enter(named_entry
->backing
.pager
,
1547 named_entry
->offset
+ named_entry
->size
,
1548 named_entry
->internal
,
1551 if (object
== VM_OBJECT_NULL
) {
1552 named_entry_unlock(named_entry
);
1553 return(KERN_INVALID_OBJECT
);
1556 /* JMM - drop reference on pager here? */
1558 /* create an extra reference for the named entry */
1559 vm_object_lock(object
);
1560 vm_object_reference_locked(object
);
1561 named_entry
->backing
.object
= object
;
1562 named_entry
->is_pager
= FALSE
;
1563 named_entry_unlock(named_entry
);
1565 /* wait for object to be ready */
1566 while (!object
->pager_ready
) {
1567 vm_object_wait(object
,
1568 VM_OBJECT_EVENT_PAGER_READY
,
1570 vm_object_lock(object
);
1572 vm_object_unlock(object
);
1574 /* This is the case where we are going to map */
1575 /* an already mapped object. If the object is */
1576 /* not ready it is internal. An external */
1577 /* object cannot be mapped until it is ready */
1578 /* we can therefore avoid the ready check */
1580 object
= named_entry
->backing
.object
;
1581 vm_object_reference(object
);
1582 named_entry_unlock(named_entry
);
1584 } else if (ip_kotype(port
) == IKOT_MEM_OBJ_CONTROL
) {
1585 memory_object_control_t control
;
1586 control
= (memory_object_control_t
) port
;
1587 if (control
== NULL
)
1588 return (KERN_INVALID_ARGUMENT
);
1589 object
= memory_object_control_to_vm_object(control
);
1590 if (object
== VM_OBJECT_NULL
)
1591 return (KERN_INVALID_ARGUMENT
);
1592 vm_object_reference(object
);
1594 return KERN_INVALID_ARGUMENT
;
1596 if (object
== VM_OBJECT_NULL
)
1597 return (KERN_INVALID_ARGUMENT
);
1599 if (!object
->private) {
1600 if (*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
1601 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
1602 if (object
->phys_contiguous
) {
1603 *flags
= UPL_PHYS_CONTIG
;
1608 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1611 ret
= vm_object_iopl_request(object
,
1618 vm_object_deallocate(object
);
1623 * Routine: memory_object_upl_request [interface]
1625 * Cause the population of a portion of a vm_object.
1626 * Depending on the nature of the request, the pages
1627 * returned may be contain valid data or be uninitialized.
1632 memory_object_upl_request(
1633 memory_object_control_t control
,
1634 memory_object_offset_t offset
,
1637 upl_page_info_array_t user_page_list
,
1638 unsigned int *page_list_count
,
1643 object
= memory_object_control_to_vm_object(control
);
1644 if (object
== VM_OBJECT_NULL
)
1645 return (KERN_TERMINATED
);
1647 return vm_object_upl_request(object
,
1657 * Routine: memory_object_super_upl_request [interface]
1659 * Cause the population of a portion of a vm_object
1660 * in much the same way as memory_object_upl_request.
1661 * Depending on the nature of the request, the pages
1662 * returned may be contain valid data or be uninitialized.
1663 * However, the region may be expanded up to the super
1664 * cluster size provided.
1668 memory_object_super_upl_request(
1669 memory_object_control_t control
,
1670 memory_object_offset_t offset
,
1672 upl_size_t super_cluster
,
1674 upl_page_info_t
*user_page_list
,
1675 unsigned int *page_list_count
,
1680 object
= memory_object_control_to_vm_object(control
);
1681 if (object
== VM_OBJECT_NULL
)
1682 return (KERN_INVALID_ARGUMENT
);
1684 return vm_object_super_upl_request(object
,
1695 memory_object_cluster_size(memory_object_control_t control
, memory_object_offset_t
*start
,
1696 vm_size_t
*length
, uint32_t *io_streaming
, memory_object_fault_info_t fault_info
)
1700 object
= memory_object_control_to_vm_object(control
);
1702 if (object
== VM_OBJECT_NULL
|| object
->paging_offset
> *start
)
1703 return (KERN_INVALID_ARGUMENT
);
1705 *start
-= object
->paging_offset
;
1707 vm_object_cluster_size(object
, (vm_object_offset_t
*)start
, length
, (vm_object_fault_info_t
)fault_info
, io_streaming
);
1709 *start
+= object
->paging_offset
;
1711 return (KERN_SUCCESS
);
1715 int vm_stat_discard_cleared_reply
= 0;
1716 int vm_stat_discard_cleared_unset
= 0;
1717 int vm_stat_discard_cleared_too_late
= 0;
1722 * Routine: host_default_memory_manager [interface]
1724 * set/get the default memory manager port and default cluster
1727 * If successful, consumes the supplied naked send right.
1730 host_default_memory_manager(
1731 host_priv_t host_priv
,
1732 memory_object_default_t
*default_manager
,
1733 __unused memory_object_cluster_size_t cluster_size
)
1735 memory_object_default_t current_manager
;
1736 memory_object_default_t new_manager
;
1737 memory_object_default_t returned_manager
;
1738 kern_return_t result
= KERN_SUCCESS
;
1740 if (host_priv
== HOST_PRIV_NULL
)
1741 return(KERN_INVALID_HOST
);
1743 assert(host_priv
== &realhost
);
1745 new_manager
= *default_manager
;
1746 lck_mtx_lock(&memory_manager_default_lock
);
1747 current_manager
= memory_manager_default
;
1748 returned_manager
= MEMORY_OBJECT_DEFAULT_NULL
;
1750 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1752 * Retrieve the current value.
1754 returned_manager
= current_manager
;
1755 memory_object_default_reference(returned_manager
);
1759 * If this is the first non-null manager, start
1760 * up the internal pager support.
1762 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1763 result
= vm_pageout_internal_start();
1764 if (result
!= KERN_SUCCESS
)
1769 * Retrieve the current value,
1770 * and replace it with the supplied value.
1771 * We return the old reference to the caller
1772 * but we have to take a reference on the new
1775 returned_manager
= current_manager
;
1776 memory_manager_default
= new_manager
;
1777 memory_object_default_reference(new_manager
);
1780 * In case anyone's been waiting for a memory
1781 * manager to be established, wake them up.
1784 thread_wakeup((event_t
) &memory_manager_default
);
1787 * Now that we have a default pager for anonymous memory,
1788 * reactivate all the throttled pages (i.e. dirty pages with
1791 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1792 vm_page_reactivate_all_throttled();
1796 lck_mtx_unlock(&memory_manager_default_lock
);
1798 *default_manager
= returned_manager
;
1803 * Routine: memory_manager_default_reference
1805 * Returns a naked send right for the default
1806 * memory manager. The returned right is always
1807 * valid (not IP_NULL or IP_DEAD).
1810 __private_extern__ memory_object_default_t
1811 memory_manager_default_reference(void)
1813 memory_object_default_t current_manager
;
1815 lck_mtx_lock(&memory_manager_default_lock
);
1816 current_manager
= memory_manager_default
;
1817 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1820 res
= lck_mtx_sleep(&memory_manager_default_lock
,
1822 (event_t
) &memory_manager_default
,
1824 assert(res
== THREAD_AWAKENED
);
1825 current_manager
= memory_manager_default
;
1827 memory_object_default_reference(current_manager
);
1828 lck_mtx_unlock(&memory_manager_default_lock
);
1830 return current_manager
;
1834 * Routine: memory_manager_default_check
1837 * Check whether a default memory manager has been set
1838 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1839 * and KERN_FAILURE if dmm does not exist.
1841 * If there is no default memory manager, log an error,
1842 * but only the first time.
1845 __private_extern__ kern_return_t
1846 memory_manager_default_check(void)
1848 memory_object_default_t current
;
1850 lck_mtx_lock(&memory_manager_default_lock
);
1851 current
= memory_manager_default
;
1852 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1853 static boolean_t logged
; /* initialized to 0 */
1854 boolean_t complain
= !logged
;
1856 lck_mtx_unlock(&memory_manager_default_lock
);
1858 printf("Warning: No default memory manager\n");
1859 return(KERN_FAILURE
);
1861 lck_mtx_unlock(&memory_manager_default_lock
);
1862 return(KERN_SUCCESS
);
1866 __private_extern__
void
1867 memory_manager_default_init(void)
1869 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1870 lck_mtx_init(&memory_manager_default_lock
, &vm_object_lck_grp
, &vm_object_lck_attr
);
1875 /* Allow manipulation of individual page state. This is actually part of */
1876 /* the UPL regimen but takes place on the object rather than on a UPL */
1879 memory_object_page_op(
1880 memory_object_control_t control
,
1881 memory_object_offset_t offset
,
1883 ppnum_t
*phys_entry
,
1888 object
= memory_object_control_to_vm_object(control
);
1889 if (object
== VM_OBJECT_NULL
)
1890 return (KERN_INVALID_ARGUMENT
);
1892 return vm_object_page_op(object
, offset
, ops
, phys_entry
, flags
);
1896 * memory_object_range_op offers performance enhancement over
1897 * memory_object_page_op for page_op functions which do not require page
1898 * level state to be returned from the call. Page_op was created to provide
1899 * a low-cost alternative to page manipulation via UPLs when only a single
1900 * page was involved. The range_op call establishes the ability in the _op
1901 * family of functions to work on multiple pages where the lack of page level
1902 * state handling allows the caller to avoid the overhead of the upl structures.
1906 memory_object_range_op(
1907 memory_object_control_t control
,
1908 memory_object_offset_t offset_beg
,
1909 memory_object_offset_t offset_end
,
1915 object
= memory_object_control_to_vm_object(control
);
1916 if (object
== VM_OBJECT_NULL
)
1917 return (KERN_INVALID_ARGUMENT
);
1919 return vm_object_range_op(object
,
1923 (uint32_t *) range
);
1928 memory_object_pages_resident(
1929 memory_object_control_t control
,
1930 boolean_t
* has_pages_resident
)
1934 *has_pages_resident
= FALSE
;
1936 object
= memory_object_control_to_vm_object(control
);
1937 if (object
== VM_OBJECT_NULL
)
1938 return (KERN_INVALID_ARGUMENT
);
1940 if (object
->resident_page_count
)
1941 *has_pages_resident
= TRUE
;
1943 return (KERN_SUCCESS
);
1947 memory_object_signed(
1948 memory_object_control_t control
,
1949 boolean_t is_signed
)
1953 object
= memory_object_control_to_vm_object(control
);
1954 if (object
== VM_OBJECT_NULL
)
1955 return KERN_INVALID_ARGUMENT
;
1957 vm_object_lock(object
);
1958 object
->code_signed
= is_signed
;
1959 vm_object_unlock(object
);
1961 return KERN_SUCCESS
;
1964 static zone_t mem_obj_control_zone
;
1966 __private_extern__
void
1967 memory_object_control_bootstrap(void)
1971 i
= (vm_size_t
) sizeof (struct memory_object_control
);
1972 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
1973 zone_change(mem_obj_control_zone
, Z_NOENCRYPT
, TRUE
);
1977 __private_extern__ memory_object_control_t
1978 memory_object_control_allocate(
1981 memory_object_control_t control
;
1983 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
1984 if (control
!= MEMORY_OBJECT_CONTROL_NULL
) {
1985 control
->moc_object
= object
;
1986 control
->moc_ikot
= IKOT_MEM_OBJ_CONTROL
; /* fake ip_kotype */
1991 __private_extern__
void
1992 memory_object_control_collapse(
1993 memory_object_control_t control
,
1996 assert((control
->moc_object
!= VM_OBJECT_NULL
) &&
1997 (control
->moc_object
!= object
));
1998 control
->moc_object
= object
;
2001 __private_extern__ vm_object_t
2002 memory_object_control_to_vm_object(
2003 memory_object_control_t control
)
2005 if (control
== MEMORY_OBJECT_CONTROL_NULL
||
2006 control
->moc_ikot
!= IKOT_MEM_OBJ_CONTROL
)
2007 return VM_OBJECT_NULL
;
2009 return (control
->moc_object
);
2012 memory_object_control_t
2013 convert_port_to_mo_control(
2014 __unused mach_port_t port
)
2016 return MEMORY_OBJECT_CONTROL_NULL
;
2021 convert_mo_control_to_port(
2022 __unused memory_object_control_t control
)
2024 return MACH_PORT_NULL
;
2028 memory_object_control_reference(
2029 __unused memory_object_control_t control
)
2035 * We only every issue one of these references, so kill it
2036 * when that gets released (should switch the real reference
2037 * counting in true port-less EMMI).
2040 memory_object_control_deallocate(
2041 memory_object_control_t control
)
2043 zfree(mem_obj_control_zone
, control
);
2047 memory_object_control_disable(
2048 memory_object_control_t control
)
2050 assert(control
->moc_object
!= VM_OBJECT_NULL
);
2051 control
->moc_object
= VM_OBJECT_NULL
;
2055 memory_object_default_reference(
2056 memory_object_default_t dmm
)
2058 ipc_port_make_send(dmm
);
2062 memory_object_default_deallocate(
2063 memory_object_default_t dmm
)
2065 ipc_port_release_send(dmm
);
2069 convert_port_to_memory_object(
2070 __unused mach_port_t port
)
2072 return (MEMORY_OBJECT_NULL
);
2077 convert_memory_object_to_port(
2078 __unused memory_object_t object
)
2080 return (MACH_PORT_NULL
);
2084 /* Routine memory_object_reference */
2085 void memory_object_reference(
2086 memory_object_t memory_object
)
2088 (memory_object
->mo_pager_ops
->memory_object_reference
)(
2092 /* Routine memory_object_deallocate */
2093 void memory_object_deallocate(
2094 memory_object_t memory_object
)
2096 (memory_object
->mo_pager_ops
->memory_object_deallocate
)(
2101 /* Routine memory_object_init */
2102 kern_return_t memory_object_init
2104 memory_object_t memory_object
,
2105 memory_object_control_t memory_control
,
2106 memory_object_cluster_size_t memory_object_page_size
2109 return (memory_object
->mo_pager_ops
->memory_object_init
)(
2112 memory_object_page_size
);
2115 /* Routine memory_object_terminate */
2116 kern_return_t memory_object_terminate
2118 memory_object_t memory_object
2121 return (memory_object
->mo_pager_ops
->memory_object_terminate
)(
2125 /* Routine memory_object_data_request */
2126 kern_return_t memory_object_data_request
2128 memory_object_t memory_object
,
2129 memory_object_offset_t offset
,
2130 memory_object_cluster_size_t length
,
2131 vm_prot_t desired_access
,
2132 memory_object_fault_info_t fault_info
2135 return (memory_object
->mo_pager_ops
->memory_object_data_request
)(
2143 /* Routine memory_object_data_return */
2144 kern_return_t memory_object_data_return
2146 memory_object_t memory_object
,
2147 memory_object_offset_t offset
,
2148 memory_object_cluster_size_t size
,
2149 memory_object_offset_t
*resid_offset
,
2152 boolean_t kernel_copy
,
2156 return (memory_object
->mo_pager_ops
->memory_object_data_return
)(
2167 /* Routine memory_object_data_initialize */
2168 kern_return_t memory_object_data_initialize
2170 memory_object_t memory_object
,
2171 memory_object_offset_t offset
,
2172 memory_object_cluster_size_t size
2175 return (memory_object
->mo_pager_ops
->memory_object_data_initialize
)(
2181 /* Routine memory_object_data_unlock */
2182 kern_return_t memory_object_data_unlock
2184 memory_object_t memory_object
,
2185 memory_object_offset_t offset
,
2186 memory_object_size_t size
,
2187 vm_prot_t desired_access
2190 return (memory_object
->mo_pager_ops
->memory_object_data_unlock
)(
2197 /* Routine memory_object_synchronize */
2198 kern_return_t memory_object_synchronize
2200 memory_object_t memory_object
,
2201 memory_object_offset_t offset
,
2202 memory_object_size_t size
,
2203 vm_sync_t sync_flags
2206 return (memory_object
->mo_pager_ops
->memory_object_synchronize
)(
2215 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2216 * each time a "named" VM object gets mapped directly or indirectly
2217 * (copy-on-write mapping). A "named" VM object has an extra reference held
2218 * by the pager to keep it alive until the pager decides that the
2219 * memory object (and its VM object) can be reclaimed.
2220 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2221 * the mappings of that memory object have been removed.
2223 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2224 * are serialized (through object->mapping_in_progress), to ensure that the
2225 * pager gets a consistent view of the mapping status of the memory object.
2227 * This allows the pager to keep track of how many times a memory object
2228 * has been mapped and with which protections, to decide when it can be
2232 /* Routine memory_object_map */
2233 kern_return_t memory_object_map
2235 memory_object_t memory_object
,
2239 return (memory_object
->mo_pager_ops
->memory_object_map
)(
2244 /* Routine memory_object_last_unmap */
2245 kern_return_t memory_object_last_unmap
2247 memory_object_t memory_object
2250 return (memory_object
->mo_pager_ops
->memory_object_last_unmap
)(
2254 /* Routine memory_object_create */
2255 kern_return_t memory_object_create
2257 memory_object_default_t default_memory_manager
,
2258 vm_size_t new_memory_object_size
,
2259 memory_object_t
*new_memory_object
2262 return default_pager_memory_object_create(default_memory_manager
,
2263 new_memory_object_size
,
2268 convert_port_to_upl(
2274 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2278 upl
= (upl_t
) port
->ip_kobject
;
2287 convert_upl_to_port(
2290 return MACH_PORT_NULL
;
2293 __private_extern__
void
2295 __unused ipc_port_t port
,
2296 __unused mach_port_mscount_t mscount
)