2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
62 * External memory management interface control functions.
65 #include <advisory_pageout.h>
68 * Interface dependencies:
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
85 * Implementation dependencies:
87 #include <string.h> /* For memcpy() */
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
103 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
106 #include <vm/vm_external.h>
107 #endif /* MACH_PAGEMAP */
109 #include <vm/vm_protos.h>
112 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
113 decl_lck_mtx_data(, memory_manager_default_lock
)
117 * Routine: memory_object_should_return_page
120 * Determine whether the given page should be returned,
121 * based on the page's state and on the given return policy.
123 * We should return the page if one of the following is true:
125 * 1. Page is dirty and should_return is not RETURN_NONE.
126 * 2. Page is precious and should_return is RETURN_ALL.
127 * 3. Should_return is RETURN_ANYTHING.
129 * As a side effect, m->dirty will be made consistent
130 * with pmap_is_modified(m), if should_return is not
131 * MEMORY_OBJECT_RETURN_NONE.
134 #define memory_object_should_return_page(m, should_return) \
135 (should_return != MEMORY_OBJECT_RETURN_NONE && \
136 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
137 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
138 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
140 typedef int memory_object_lock_result_t
;
142 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
143 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
147 memory_object_lock_result_t
memory_object_lock_page(
149 memory_object_return_t should_return
,
150 boolean_t should_flush
,
154 * Routine: memory_object_lock_page
157 * Perform the appropriate lock operations on the
158 * given page. See the description of
159 * "memory_object_lock_request" for the meanings
162 * Returns an indication that the operation
163 * completed, blocked, or that the page must
166 memory_object_lock_result_t
167 memory_object_lock_page(
169 memory_object_return_t should_return
,
170 boolean_t should_flush
,
173 XPR(XPR_MEMORY_OBJECT
,
174 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
175 m
, should_return
, should_flush
, prot
, 0);
178 * If we cannot change access to the page,
179 * either because a mapping is in progress
180 * (busy page) or because a mapping has been
181 * wired, then give up.
184 if (m
->busy
|| m
->cleaning
) {
185 if (m
->list_req_pending
&& m
->pageout
&&
186 should_return
== MEMORY_OBJECT_RETURN_NONE
&&
187 should_flush
== TRUE
) {
189 * page was earmarked by vm_pageout_scan
190 * to be cleaned and stolen... we're going
191 * to take it back since we are being asked to
192 * flush the page w/o cleaning it (i.e. we don't
193 * care that it's dirty, we want it gone from
194 * the cache) and we don't want to stall
195 * waiting for it to be cleaned for 2 reasons...
196 * 1 - no use paging it out since we're probably
197 * shrinking the file at this point or we no
198 * longer care about the data in the page
199 * 2 - if we stall, we may casue a deadlock in
200 * the FS trying to acquire its locks
201 * on the VNOP_PAGEOUT path presuming that
202 * those locks are already held on the truncate
203 * path before calling through to this function
205 * so undo all of the state that vm_pageout_scan
210 vm_pageout_queue_steal(m
, FALSE
);
212 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
216 * Don't worry about pages for which the kernel
217 * does not have any data.
220 if (m
->absent
|| m
->error
|| m
->restart
) {
221 if(m
->error
&& should_flush
) {
222 /* dump the page, pager wants us to */
223 /* clean it up and there is no */
224 /* relevant data to return */
225 if ( !VM_PAGE_WIRED(m
)) {
227 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
230 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
234 assert(!m
->fictitious
);
237 * If the page is wired, just clean or return the page if needed.
238 * Wired pages don't get flushed or disconnected from the pmap.
241 if (VM_PAGE_WIRED(m
)) {
242 if (memory_object_should_return_page(m
, should_return
)) {
244 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
);
246 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
249 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
253 * If the page is to be flushed, allow
254 * that to be done as part of the protection.
263 * If we are decreasing permission, do it now;
264 * let the fault handler take care of increases
265 * (pmap_page_protect may not increase protection).
268 if (prot
!= VM_PROT_NO_CHANGE
) {
269 pmap_page_protect(m
->phys_page
, VM_PROT_ALL
& ~prot
);
275 * Handle page returning.
277 if (memory_object_should_return_page(m
, should_return
)) {
280 * If we weren't planning
281 * to flush the page anyway,
282 * we may need to remove the
283 * page from the pageout
284 * system and from physical
288 vm_page_lockspin_queues();
289 VM_PAGE_QUEUES_REMOVE(m
);
290 vm_page_unlock_queues();
293 pmap_disconnect(m
->phys_page
);
296 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
);
298 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
308 * XXX Make clean but not flush a paging hint,
309 * and deactivate the pages. This is a hack
310 * because it overloads flush/clean with
311 * implementation-dependent meaning. This only
312 * happens to pages that are already clean.
315 if (vm_page_deactivate_hint
&&
316 (should_return
!= MEMORY_OBJECT_RETURN_NONE
)) {
317 vm_page_lockspin_queues();
318 vm_page_deactivate(m
);
319 vm_page_unlock_queues();
323 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
326 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
329 register int upl_flags; \
330 memory_object_t pager; \
332 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
333 vm_object_paging_begin(object); \
334 vm_object_unlock(object); \
337 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
339 upl_flags = UPL_MSYNC; \
341 (void) memory_object_data_return(pager, \
343 (memory_object_cluster_size_t)data_cnt, \
346 (action) == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN,\
350 vm_object_lock(object); \
351 vm_object_paging_end(object); \
356 * Routine: memory_object_lock_request [user interface]
359 * Control use of the data associated with the given
360 * memory object. For each page in the given range,
361 * perform the following operations, in order:
362 * 1) restrict access to the page (disallow
363 * forms specified by "prot");
364 * 2) return data to the manager (if "should_return"
365 * is RETURN_DIRTY and the page is dirty, or
366 * "should_return" is RETURN_ALL and the page
367 * is either dirty or precious); and,
368 * 3) flush the cached copy (if "should_flush"
370 * The set of pages is defined by a starting offset
371 * ("offset") and size ("size"). Only pages with the
372 * same page alignment as the starting offset are
375 * A single acknowledgement is sent (to the "reply_to"
376 * port) when these actions are complete. If successful,
377 * the naked send right for reply_to is consumed.
381 memory_object_lock_request(
382 memory_object_control_t control
,
383 memory_object_offset_t offset
,
384 memory_object_size_t size
,
385 memory_object_offset_t
* resid_offset
,
387 memory_object_return_t should_return
,
394 * Check for bogus arguments.
396 object
= memory_object_control_to_vm_object(control
);
397 if (object
== VM_OBJECT_NULL
)
398 return (KERN_INVALID_ARGUMENT
);
400 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
401 return (KERN_INVALID_ARGUMENT
);
403 size
= round_page_64(size
);
406 * Lock the object, and acquire a paging reference to
407 * prevent the memory_object reference from being released.
409 vm_object_lock(object
);
410 vm_object_paging_begin(object
);
412 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
) {
413 if ((should_return
!= MEMORY_OBJECT_RETURN_NONE
) || offset
|| object
->copy
) {
414 flags
&= ~MEMORY_OBJECT_DATA_FLUSH_ALL
;
415 flags
|= MEMORY_OBJECT_DATA_FLUSH
;
418 offset
-= object
->paging_offset
;
420 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
)
421 vm_object_reap_pages(object
, REAP_DATA_FLUSH
);
423 (void)vm_object_update(object
, offset
, size
, resid_offset
,
424 io_errno
, should_return
, flags
, prot
);
426 vm_object_paging_end(object
);
427 vm_object_unlock(object
);
429 return (KERN_SUCCESS
);
433 * memory_object_release_name: [interface]
435 * Enforces name semantic on memory_object reference count decrement
436 * This routine should not be called unless the caller holds a name
437 * reference gained through the memory_object_named_create or the
438 * memory_object_rename call.
439 * If the TERMINATE_IDLE flag is set, the call will return if the
440 * reference count is not 1. i.e. idle with the only remaining reference
442 * If the decision is made to proceed the name field flag is set to
443 * false and the reference count is decremented. If the RESPECT_CACHE
444 * flag is set and the reference count has gone to zero, the
445 * memory_object is checked to see if it is cacheable otherwise when
446 * the reference count is zero, it is simply terminated.
450 memory_object_release_name(
451 memory_object_control_t control
,
456 object
= memory_object_control_to_vm_object(control
);
457 if (object
== VM_OBJECT_NULL
)
458 return (KERN_INVALID_ARGUMENT
);
460 return vm_object_release_name(object
, flags
);
466 * Routine: memory_object_destroy [user interface]
468 * Shut down a memory object, despite the
469 * presence of address map (or other) references
473 memory_object_destroy(
474 memory_object_control_t control
,
475 kern_return_t reason
)
479 object
= memory_object_control_to_vm_object(control
);
480 if (object
== VM_OBJECT_NULL
)
481 return (KERN_INVALID_ARGUMENT
);
483 return (vm_object_destroy(object
, reason
));
487 * Routine: vm_object_sync
489 * Kernel internal function to synch out pages in a given
490 * range within an object to its memory manager. Much the
491 * same as memory_object_lock_request but page protection
494 * If the should_flush and should_return flags are true pages
495 * are flushed, that is dirty & precious pages are written to
496 * the memory manager and then discarded. If should_return
497 * is false, only precious pages are returned to the memory
500 * If should flush is false and should_return true, the memory
501 * manager's copy of the pages is updated. If should_return
502 * is also false, only the precious pages are updated. This
503 * last option is of limited utility.
506 * FALSE if no pages were returned to the pager
513 vm_object_offset_t offset
,
514 vm_object_size_t size
,
515 boolean_t should_flush
,
516 boolean_t should_return
,
517 boolean_t should_iosync
)
523 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
524 object
, offset
, size
, should_flush
, should_return
);
527 * Lock the object, and acquire a paging reference to
528 * prevent the memory_object and control ports from
531 vm_object_lock(object
);
532 vm_object_paging_begin(object
);
535 flags
= MEMORY_OBJECT_DATA_FLUSH
;
540 flags
|= MEMORY_OBJECT_IO_SYNC
;
542 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
544 MEMORY_OBJECT_RETURN_ALL
:
545 MEMORY_OBJECT_RETURN_NONE
,
550 vm_object_paging_end(object
);
551 vm_object_unlock(object
);
559 vm_object_update_extent(
561 vm_object_offset_t offset
,
562 vm_object_offset_t offset_end
,
563 vm_object_offset_t
*offset_resid
,
565 boolean_t should_flush
,
566 memory_object_return_t should_return
,
567 boolean_t should_iosync
,
572 memory_object_cluster_size_t data_cnt
= 0;
573 vm_object_offset_t paging_offset
= 0;
574 vm_object_offset_t next_offset
= offset
;
575 memory_object_lock_result_t page_lock_result
;
576 memory_object_lock_result_t pageout_action
;
578 pageout_action
= MEMORY_OBJECT_LOCK_RESULT_DONE
;
581 offset
< offset_end
&& object
->resident_page_count
;
582 offset
+= PAGE_SIZE_64
) {
585 * Limit the number of pages to be cleaned at once to a contiguous
586 * run, or at most MAX_UPL_TRANSFER size
589 if ((data_cnt
>= PAGE_SIZE
* MAX_UPL_TRANSFER
) || (next_offset
!= offset
)) {
590 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
591 pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
596 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
597 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
599 XPR(XPR_MEMORY_OBJECT
,
600 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
601 object
, offset
, page_lock_result
, 0, 0);
603 switch (page_lock_result
)
605 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
607 * End of a cluster of dirty pages.
610 LIST_REQ_PAGEOUT_PAGES(object
,
611 data_cnt
, pageout_action
,
612 paging_offset
, offset_resid
, io_errno
, should_iosync
);
618 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
620 * Since it is necessary to block,
621 * clean any dirty pages now.
624 LIST_REQ_PAGEOUT_PAGES(object
,
625 data_cnt
, pageout_action
,
626 paging_offset
, offset_resid
, io_errno
, should_iosync
);
630 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
633 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
:
634 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
636 * The clean and return cases are similar.
638 * if this would form a discontiguous block,
639 * clean the old pages and start anew.
641 if (data_cnt
&& pageout_action
!= page_lock_result
) {
642 LIST_REQ_PAGEOUT_PAGES(object
,
643 data_cnt
, pageout_action
,
644 paging_offset
, offset_resid
, io_errno
, should_iosync
);
649 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
653 pageout_action
= page_lock_result
;
654 paging_offset
= offset
;
656 data_cnt
+= PAGE_SIZE
;
657 next_offset
= offset
+ PAGE_SIZE_64
;
662 m
->list_req_pending
= TRUE
;
666 /* let's not flush a wired page... */
669 * and add additional state
675 vm_page_lockspin_queues();
677 vm_page_unlock_queues();
687 * We have completed the scan for applicable pages.
688 * Clean any pages that have been saved.
691 LIST_REQ_PAGEOUT_PAGES(object
,
692 data_cnt
, pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
700 * Routine: vm_object_update
702 * Work function for m_o_lock_request(), vm_o_sync().
704 * Called with object locked and paging ref taken.
708 register vm_object_t object
,
709 register vm_object_offset_t offset
,
710 register vm_object_size_t size
,
711 register vm_object_offset_t
*resid_offset
,
713 memory_object_return_t should_return
,
715 vm_prot_t protection
)
717 vm_object_t copy_object
= VM_OBJECT_NULL
;
718 boolean_t data_returned
= FALSE
;
719 boolean_t update_cow
;
720 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
721 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
722 vm_fault_return_t result
;
725 #define MAX_EXTENTS 8
726 #define EXTENT_SIZE (1024 * 1024 * 256)
727 #define RESIDENT_LIMIT (1024 * 32)
729 vm_object_offset_t e_base
;
730 vm_object_offset_t e_min
;
731 vm_object_offset_t e_max
;
732 } extents
[MAX_EXTENTS
];
735 * To avoid blocking while scanning for pages, save
736 * dirty pages to be cleaned all at once.
738 * XXXO A similar strategy could be used to limit the
739 * number of times that a scan must be restarted for
740 * other reasons. Those pages that would require blocking
741 * could be temporarily collected in another list, or
742 * their offsets could be recorded in a small array.
746 * XXX NOTE: May want to consider converting this to a page list
747 * XXX vm_map_copy interface. Need to understand object
748 * XXX coalescing implications before doing so.
751 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
752 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
753 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
754 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
756 if (update_cow
|| (flags
& (MEMORY_OBJECT_DATA_PURGE
| MEMORY_OBJECT_DATA_SYNC
))) {
759 while ((copy_object
= object
->copy
) != VM_OBJECT_NULL
) {
761 * need to do a try here since we're swimming upstream
762 * against the normal lock ordering... however, we need
763 * to hold the object stable until we gain control of the
764 * copy object so we have to be careful how we approach this
766 if (vm_object_lock_try(copy_object
)) {
768 * we 'won' the lock on the copy object...
769 * no need to hold the object lock any longer...
770 * take a real reference on the copy object because
771 * we're going to call vm_fault_page on it which may
772 * under certain conditions drop the lock and the paging
773 * reference we're about to take... the reference
774 * will keep the copy object from going away if that happens
776 vm_object_unlock(object
);
777 vm_object_reference_locked(copy_object
);
780 vm_object_unlock(object
);
783 mutex_pause(collisions
);
785 vm_object_lock(object
);
788 if ((copy_object
!= VM_OBJECT_NULL
&& update_cow
) || (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
790 vm_map_size_t copy_size
;
791 vm_map_offset_t copy_offset
;
795 kern_return_t error
= 0;
796 struct vm_object_fault_info fault_info
;
798 if (copy_object
!= VM_OBJECT_NULL
) {
800 * translate offset with respect to shadow's offset
802 copy_offset
= (offset
>= copy_object
->shadow_offset
) ?
803 (vm_map_offset_t
)(offset
- copy_object
->shadow_offset
) :
806 if (copy_offset
> copy_object
->size
)
807 copy_offset
= copy_object
->size
;
810 * clip size with respect to shadow offset
812 if (offset
>= copy_object
->shadow_offset
) {
814 } else if (size
>= copy_object
->shadow_offset
- offset
) {
815 copy_size
= size
- (copy_object
->shadow_offset
- offset
);
820 if (copy_offset
+ copy_size
> copy_object
->size
) {
821 if (copy_object
->size
>= copy_offset
) {
822 copy_size
= copy_object
->size
- copy_offset
;
827 copy_size
+=copy_offset
;
830 copy_object
= object
;
832 copy_size
= offset
+ size
;
833 copy_offset
= offset
;
835 fault_info
.interruptible
= THREAD_UNINT
;
836 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
837 fault_info
.user_tag
= 0;
838 fault_info
.lo_offset
= copy_offset
;
839 fault_info
.hi_offset
= copy_size
;
840 fault_info
.no_cache
= FALSE
;
841 fault_info
.stealth
= TRUE
;
843 vm_object_paging_begin(copy_object
);
845 for (i
= copy_offset
; i
< copy_size
; i
+= PAGE_SIZE
) {
846 RETRY_COW_OF_LOCK_REQUEST
:
847 fault_info
.cluster_size
= (vm_size_t
) (copy_size
- i
);
848 assert(fault_info
.cluster_size
== copy_size
- i
);
850 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
851 result
= vm_fault_page(copy_object
, i
,
852 VM_PROT_WRITE
|VM_PROT_READ
,
863 case VM_FAULT_SUCCESS
:
866 page
->object
, top_page
);
867 vm_object_lock(copy_object
);
868 vm_object_paging_begin(copy_object
);
873 vm_page_lockspin_queues();
877 vm_page_deactivate(page
);
878 vm_page_unlock_queues();
880 PAGE_WAKEUP_DONE(page
);
883 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
884 vm_object_lock(copy_object
);
885 vm_object_paging_begin(copy_object
);
886 goto RETRY_COW_OF_LOCK_REQUEST
;
887 case VM_FAULT_INTERRUPTED
:
888 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
889 vm_object_lock(copy_object
);
890 vm_object_paging_begin(copy_object
);
891 goto RETRY_COW_OF_LOCK_REQUEST
;
892 case VM_FAULT_MEMORY_SHORTAGE
:
894 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
895 vm_object_lock(copy_object
);
896 vm_object_paging_begin(copy_object
);
897 goto RETRY_COW_OF_LOCK_REQUEST
;
898 case VM_FAULT_FICTITIOUS_SHORTAGE
:
899 vm_page_more_fictitious();
900 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
901 vm_object_lock(copy_object
);
902 vm_object_paging_begin(copy_object
);
903 goto RETRY_COW_OF_LOCK_REQUEST
;
904 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
905 /* success but no VM page: fail */
906 vm_object_paging_end(copy_object
);
907 vm_object_unlock(copy_object
);
909 case VM_FAULT_MEMORY_ERROR
:
910 if (object
!= copy_object
)
911 vm_object_deallocate(copy_object
);
912 vm_object_lock(object
);
913 goto BYPASS_COW_COPYIN
;
915 panic("vm_object_update: unexpected error 0x%x"
916 " from vm_fault_page()\n", result
);
920 vm_object_paging_end(copy_object
);
922 if ((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
923 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
924 vm_object_unlock(copy_object
);
925 vm_object_deallocate(copy_object
);
926 vm_object_lock(object
);
930 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
931 if ((flags
& MEMORY_OBJECT_DATA_PURGE
)) {
932 copy_object
->shadow_severed
= TRUE
;
933 copy_object
->shadowed
= FALSE
;
934 copy_object
->shadow
= NULL
;
936 * delete the ref the COW was holding on the target object
938 vm_object_deallocate(object
);
940 vm_object_unlock(copy_object
);
941 vm_object_deallocate(copy_object
);
942 vm_object_lock(object
);
947 * when we have a really large range to check relative
948 * to the number of actual resident pages, we'd like
949 * to use the resident page list to drive our checks
950 * however, the object lock will get dropped while processing
951 * the page which means the resident queue can change which
952 * means we can't walk the queue as we process the pages
953 * we also want to do the processing in offset order to allow
954 * 'runs' of pages to be collected if we're being told to
955 * flush to disk... the resident page queue is NOT ordered.
957 * a temporary solution (until we figure out how to deal with
958 * large address spaces more generically) is to pre-flight
959 * the resident page queue (if it's small enough) and develop
960 * a collection of extents (that encompass actual resident pages)
961 * to visit. This will at least allow us to deal with some of the
962 * more pathological cases in a more efficient manner. The current
963 * worst case (a single resident page at the end of an extremely large
964 * range) can take minutes to complete for ranges in the terrabyte
965 * category... since this routine is called when truncating a file,
966 * and we currently support files up to 16 Tbytes in size, this
967 * is not a theoretical problem
970 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
971 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
973 vm_object_offset_t start
;
974 vm_object_offset_t end
;
975 vm_object_size_t e_mask
;
981 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
983 m
= (vm_page_t
) queue_first(&object
->memq
);
985 while (!queue_end(&object
->memq
, (queue_entry_t
) m
)) {
986 next
= (vm_page_t
) queue_next(&m
->listq
);
988 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
990 * this is a page we're interested in
991 * try to fit it into a current extent
993 for (n
= 0; n
< num_of_extents
; n
++) {
994 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
996 * use (PAGE_SIZE - 1) to determine the
997 * max offset so that we don't wrap if
998 * we're at the last page of the space
1000 if (m
->offset
< extents
[n
].e_min
)
1001 extents
[n
].e_min
= m
->offset
;
1002 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
1003 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
1007 if (n
== num_of_extents
) {
1009 * didn't find a current extent that can encompass
1012 if (n
< MAX_EXTENTS
) {
1014 * if we still have room,
1015 * create a new extent
1017 extents
[n
].e_base
= m
->offset
& e_mask
;
1018 extents
[n
].e_min
= m
->offset
;
1019 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
1024 * no room to create a new extent...
1025 * fall back to a single extent based
1026 * on the min and max page offsets
1027 * we find in the range we're interested in...
1028 * first, look through the extent list and
1029 * develop the overall min and max for the
1030 * pages we've looked at up to this point
1032 for (n
= 1; n
< num_of_extents
; n
++) {
1033 if (extents
[n
].e_min
< extents
[0].e_min
)
1034 extents
[0].e_min
= extents
[n
].e_min
;
1035 if (extents
[n
].e_max
> extents
[0].e_max
)
1036 extents
[0].e_max
= extents
[n
].e_max
;
1039 * now setup to run through the remaining pages
1040 * to determine the overall min and max
1041 * offset for the specified range
1043 extents
[0].e_base
= 0;
1048 * by continuing, we'll reprocess the
1049 * page that forced us to abandon trying
1050 * to develop multiple extents
1059 extents
[0].e_min
= offset
;
1060 extents
[0].e_max
= offset
+ (size
- 1);
1064 for (n
= 0; n
< num_of_extents
; n
++) {
1065 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1066 should_flush
, should_return
, should_iosync
, protection
))
1067 data_returned
= TRUE
;
1069 return (data_returned
);
1074 * Routine: memory_object_synchronize_completed [user interface]
1076 * Tell kernel that previously synchronized data
1077 * (memory_object_synchronize) has been queue or placed on the
1080 * Note: there may be multiple synchronize requests for a given
1081 * memory object outstanding but they will not overlap.
1085 memory_object_synchronize_completed(
1086 memory_object_control_t control
,
1087 memory_object_offset_t offset
,
1088 memory_object_size_t length
)
1093 object
= memory_object_control_to_vm_object(control
);
1095 XPR(XPR_MEMORY_OBJECT
,
1096 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1097 object
, offset
, length
, 0, 0);
1100 * Look for bogus arguments
1103 if (object
== VM_OBJECT_NULL
)
1104 return (KERN_INVALID_ARGUMENT
);
1106 vm_object_lock(object
);
1109 * search for sync request structure
1111 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
1112 if (msr
->offset
== offset
&& msr
->length
== length
) {
1113 queue_remove(&object
->msr_q
, msr
, msync_req_t
, msr_q
);
1116 }/* queue_iterate */
1118 if (queue_end(&object
->msr_q
, (queue_entry_t
)msr
)) {
1119 vm_object_unlock(object
);
1120 return KERN_INVALID_ARGUMENT
;
1124 vm_object_unlock(object
);
1125 msr
->flag
= VM_MSYNC_DONE
;
1127 thread_wakeup((event_t
) msr
);
1129 return KERN_SUCCESS
;
1130 }/* memory_object_synchronize_completed */
1132 static kern_return_t
1133 vm_object_set_attributes_common(
1135 boolean_t may_cache
,
1136 memory_object_copy_strategy_t copy_strategy
,
1137 boolean_t temporary
,
1138 boolean_t silent_overwrite
,
1139 boolean_t advisory_pageout
)
1141 boolean_t object_became_ready
;
1143 XPR(XPR_MEMORY_OBJECT
,
1144 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1145 object
, (may_cache
&1)|((temporary
&1)<1), copy_strategy
, 0, 0);
1147 if (object
== VM_OBJECT_NULL
)
1148 return(KERN_INVALID_ARGUMENT
);
1151 * Verify the attributes of importance
1154 switch(copy_strategy
) {
1155 case MEMORY_OBJECT_COPY_NONE
:
1156 case MEMORY_OBJECT_COPY_DELAY
:
1159 return(KERN_INVALID_ARGUMENT
);
1162 #if !ADVISORY_PAGEOUT
1163 if (silent_overwrite
|| advisory_pageout
)
1164 return(KERN_INVALID_ARGUMENT
);
1166 #endif /* !ADVISORY_PAGEOUT */
1172 vm_object_lock(object
);
1175 * Copy the attributes
1177 assert(!object
->internal
);
1178 object_became_ready
= !object
->pager_ready
;
1179 object
->copy_strategy
= copy_strategy
;
1180 object
->can_persist
= may_cache
;
1181 object
->temporary
= temporary
;
1182 object
->silent_overwrite
= silent_overwrite
;
1183 object
->advisory_pageout
= advisory_pageout
;
1186 * Wake up anyone waiting for the ready attribute
1187 * to become asserted.
1190 if (object_became_ready
) {
1191 object
->pager_ready
= TRUE
;
1192 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1195 vm_object_unlock(object
);
1197 return(KERN_SUCCESS
);
1201 * Set the memory object attribute as provided.
1203 * XXX This routine cannot be completed until the vm_msync, clean
1204 * in place, and cluster work is completed. See ifdef notyet
1205 * below and note that vm_object_set_attributes_common()
1206 * may have to be expanded.
1209 memory_object_change_attributes(
1210 memory_object_control_t control
,
1211 memory_object_flavor_t flavor
,
1212 memory_object_info_t attributes
,
1213 mach_msg_type_number_t count
)
1216 kern_return_t result
= KERN_SUCCESS
;
1217 boolean_t temporary
;
1218 boolean_t may_cache
;
1219 boolean_t invalidate
;
1220 memory_object_copy_strategy_t copy_strategy
;
1221 boolean_t silent_overwrite
;
1222 boolean_t advisory_pageout
;
1224 object
= memory_object_control_to_vm_object(control
);
1225 if (object
== VM_OBJECT_NULL
)
1226 return (KERN_INVALID_ARGUMENT
);
1228 vm_object_lock(object
);
1230 temporary
= object
->temporary
;
1231 may_cache
= object
->can_persist
;
1232 copy_strategy
= object
->copy_strategy
;
1233 silent_overwrite
= object
->silent_overwrite
;
1234 advisory_pageout
= object
->advisory_pageout
;
1236 invalidate
= object
->invalidate
;
1238 vm_object_unlock(object
);
1241 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1243 old_memory_object_behave_info_t behave
;
1245 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1246 result
= KERN_INVALID_ARGUMENT
;
1250 behave
= (old_memory_object_behave_info_t
) attributes
;
1252 temporary
= behave
->temporary
;
1253 invalidate
= behave
->invalidate
;
1254 copy_strategy
= behave
->copy_strategy
;
1259 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1261 memory_object_behave_info_t behave
;
1263 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1264 result
= KERN_INVALID_ARGUMENT
;
1268 behave
= (memory_object_behave_info_t
) attributes
;
1270 temporary
= behave
->temporary
;
1271 invalidate
= behave
->invalidate
;
1272 copy_strategy
= behave
->copy_strategy
;
1273 silent_overwrite
= behave
->silent_overwrite
;
1274 advisory_pageout
= behave
->advisory_pageout
;
1278 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1280 memory_object_perf_info_t perf
;
1282 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1283 result
= KERN_INVALID_ARGUMENT
;
1287 perf
= (memory_object_perf_info_t
) attributes
;
1289 may_cache
= perf
->may_cache
;
1294 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1296 old_memory_object_attr_info_t attr
;
1298 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1299 result
= KERN_INVALID_ARGUMENT
;
1303 attr
= (old_memory_object_attr_info_t
) attributes
;
1305 may_cache
= attr
->may_cache
;
1306 copy_strategy
= attr
->copy_strategy
;
1311 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1313 memory_object_attr_info_t attr
;
1315 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1316 result
= KERN_INVALID_ARGUMENT
;
1320 attr
= (memory_object_attr_info_t
) attributes
;
1322 copy_strategy
= attr
->copy_strategy
;
1323 may_cache
= attr
->may_cache_object
;
1324 temporary
= attr
->temporary
;
1330 result
= KERN_INVALID_ARGUMENT
;
1334 if (result
!= KERN_SUCCESS
)
1337 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1338 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1345 * XXX may_cache may become a tri-valued variable to handle
1346 * XXX uncache if not in use.
1348 return (vm_object_set_attributes_common(object
,
1357 memory_object_get_attributes(
1358 memory_object_control_t control
,
1359 memory_object_flavor_t flavor
,
1360 memory_object_info_t attributes
, /* pointer to OUT array */
1361 mach_msg_type_number_t
*count
) /* IN/OUT */
1363 kern_return_t ret
= KERN_SUCCESS
;
1366 object
= memory_object_control_to_vm_object(control
);
1367 if (object
== VM_OBJECT_NULL
)
1368 return (KERN_INVALID_ARGUMENT
);
1370 vm_object_lock(object
);
1373 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1375 old_memory_object_behave_info_t behave
;
1377 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1378 ret
= KERN_INVALID_ARGUMENT
;
1382 behave
= (old_memory_object_behave_info_t
) attributes
;
1383 behave
->copy_strategy
= object
->copy_strategy
;
1384 behave
->temporary
= object
->temporary
;
1385 #if notyet /* remove when vm_msync complies and clean in place fini */
1386 behave
->invalidate
= object
->invalidate
;
1388 behave
->invalidate
= FALSE
;
1391 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1395 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1397 memory_object_behave_info_t behave
;
1399 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1400 ret
= KERN_INVALID_ARGUMENT
;
1404 behave
= (memory_object_behave_info_t
) attributes
;
1405 behave
->copy_strategy
= object
->copy_strategy
;
1406 behave
->temporary
= object
->temporary
;
1407 #if notyet /* remove when vm_msync complies and clean in place fini */
1408 behave
->invalidate
= object
->invalidate
;
1410 behave
->invalidate
= FALSE
;
1412 behave
->advisory_pageout
= object
->advisory_pageout
;
1413 behave
->silent_overwrite
= object
->silent_overwrite
;
1414 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1418 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1420 memory_object_perf_info_t perf
;
1422 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1423 ret
= KERN_INVALID_ARGUMENT
;
1427 perf
= (memory_object_perf_info_t
) attributes
;
1428 perf
->cluster_size
= PAGE_SIZE
;
1429 perf
->may_cache
= object
->can_persist
;
1431 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1435 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1437 old_memory_object_attr_info_t attr
;
1439 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1440 ret
= KERN_INVALID_ARGUMENT
;
1444 attr
= (old_memory_object_attr_info_t
) attributes
;
1445 attr
->may_cache
= object
->can_persist
;
1446 attr
->copy_strategy
= object
->copy_strategy
;
1448 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1452 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1454 memory_object_attr_info_t attr
;
1456 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1457 ret
= KERN_INVALID_ARGUMENT
;
1461 attr
= (memory_object_attr_info_t
) attributes
;
1462 attr
->copy_strategy
= object
->copy_strategy
;
1463 attr
->cluster_size
= PAGE_SIZE
;
1464 attr
->may_cache_object
= object
->can_persist
;
1465 attr
->temporary
= object
->temporary
;
1467 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1472 ret
= KERN_INVALID_ARGUMENT
;
1476 vm_object_unlock(object
);
1483 memory_object_iopl_request(
1485 memory_object_offset_t offset
,
1486 upl_size_t
*upl_size
,
1488 upl_page_info_array_t user_page_list
,
1489 unsigned int *page_list_count
,
1496 caller_flags
= *flags
;
1498 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1500 * For forward compatibility's sake,
1501 * reject any unknown flag.
1503 return KERN_INVALID_VALUE
;
1506 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1507 vm_named_entry_t named_entry
;
1509 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1510 /* a few checks to make sure user is obeying rules */
1511 if(*upl_size
== 0) {
1512 if(offset
>= named_entry
->size
)
1513 return(KERN_INVALID_RIGHT
);
1514 *upl_size
= (upl_size_t
)(named_entry
->size
- offset
);
1515 if (*upl_size
!= named_entry
->size
- offset
)
1516 return KERN_INVALID_ARGUMENT
;
1518 if(caller_flags
& UPL_COPYOUT_FROM
) {
1519 if((named_entry
->protection
& VM_PROT_READ
)
1521 return(KERN_INVALID_RIGHT
);
1524 if((named_entry
->protection
&
1525 (VM_PROT_READ
| VM_PROT_WRITE
))
1526 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1527 return(KERN_INVALID_RIGHT
);
1530 if(named_entry
->size
< (offset
+ *upl_size
))
1531 return(KERN_INVALID_ARGUMENT
);
1533 /* the callers parameter offset is defined to be the */
1534 /* offset from beginning of named entry offset in object */
1535 offset
= offset
+ named_entry
->offset
;
1537 if(named_entry
->is_sub_map
)
1538 return (KERN_INVALID_ARGUMENT
);
1540 named_entry_lock(named_entry
);
1542 if (named_entry
->is_pager
) {
1543 object
= vm_object_enter(named_entry
->backing
.pager
,
1544 named_entry
->offset
+ named_entry
->size
,
1545 named_entry
->internal
,
1548 if (object
== VM_OBJECT_NULL
) {
1549 named_entry_unlock(named_entry
);
1550 return(KERN_INVALID_OBJECT
);
1553 /* JMM - drop reference on pager here? */
1555 /* create an extra reference for the named entry */
1556 vm_object_lock(object
);
1557 vm_object_reference_locked(object
);
1558 named_entry
->backing
.object
= object
;
1559 named_entry
->is_pager
= FALSE
;
1560 named_entry_unlock(named_entry
);
1562 /* wait for object to be ready */
1563 while (!object
->pager_ready
) {
1564 vm_object_wait(object
,
1565 VM_OBJECT_EVENT_PAGER_READY
,
1567 vm_object_lock(object
);
1569 vm_object_unlock(object
);
1571 /* This is the case where we are going to map */
1572 /* an already mapped object. If the object is */
1573 /* not ready it is internal. An external */
1574 /* object cannot be mapped until it is ready */
1575 /* we can therefore avoid the ready check */
1577 object
= named_entry
->backing
.object
;
1578 vm_object_reference(object
);
1579 named_entry_unlock(named_entry
);
1581 } else if (ip_kotype(port
) == IKOT_MEM_OBJ_CONTROL
) {
1582 memory_object_control_t control
;
1583 control
= (memory_object_control_t
) port
;
1584 if (control
== NULL
)
1585 return (KERN_INVALID_ARGUMENT
);
1586 object
= memory_object_control_to_vm_object(control
);
1587 if (object
== VM_OBJECT_NULL
)
1588 return (KERN_INVALID_ARGUMENT
);
1589 vm_object_reference(object
);
1591 return KERN_INVALID_ARGUMENT
;
1593 if (object
== VM_OBJECT_NULL
)
1594 return (KERN_INVALID_ARGUMENT
);
1596 if (!object
->private) {
1597 if (*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
1598 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
1599 if (object
->phys_contiguous
) {
1600 *flags
= UPL_PHYS_CONTIG
;
1605 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1608 ret
= vm_object_iopl_request(object
,
1615 vm_object_deallocate(object
);
1620 * Routine: memory_object_upl_request [interface]
1622 * Cause the population of a portion of a vm_object.
1623 * Depending on the nature of the request, the pages
1624 * returned may be contain valid data or be uninitialized.
1629 memory_object_upl_request(
1630 memory_object_control_t control
,
1631 memory_object_offset_t offset
,
1634 upl_page_info_array_t user_page_list
,
1635 unsigned int *page_list_count
,
1640 object
= memory_object_control_to_vm_object(control
);
1641 if (object
== VM_OBJECT_NULL
)
1642 return (KERN_TERMINATED
);
1644 return vm_object_upl_request(object
,
1654 * Routine: memory_object_super_upl_request [interface]
1656 * Cause the population of a portion of a vm_object
1657 * in much the same way as memory_object_upl_request.
1658 * Depending on the nature of the request, the pages
1659 * returned may be contain valid data or be uninitialized.
1660 * However, the region may be expanded up to the super
1661 * cluster size provided.
1665 memory_object_super_upl_request(
1666 memory_object_control_t control
,
1667 memory_object_offset_t offset
,
1669 upl_size_t super_cluster
,
1671 upl_page_info_t
*user_page_list
,
1672 unsigned int *page_list_count
,
1677 object
= memory_object_control_to_vm_object(control
);
1678 if (object
== VM_OBJECT_NULL
)
1679 return (KERN_INVALID_ARGUMENT
);
1681 return vm_object_super_upl_request(object
,
1692 memory_object_cluster_size(memory_object_control_t control
, memory_object_offset_t
*start
,
1693 vm_size_t
*length
, uint32_t *io_streaming
, memory_object_fault_info_t fault_info
)
1697 object
= memory_object_control_to_vm_object(control
);
1699 if (object
== VM_OBJECT_NULL
|| object
->paging_offset
> *start
)
1700 return (KERN_INVALID_ARGUMENT
);
1702 *start
-= object
->paging_offset
;
1704 vm_object_cluster_size(object
, (vm_object_offset_t
*)start
, length
, (vm_object_fault_info_t
)fault_info
, io_streaming
);
1706 *start
+= object
->paging_offset
;
1708 return (KERN_SUCCESS
);
1712 int vm_stat_discard_cleared_reply
= 0;
1713 int vm_stat_discard_cleared_unset
= 0;
1714 int vm_stat_discard_cleared_too_late
= 0;
1719 * Routine: host_default_memory_manager [interface]
1721 * set/get the default memory manager port and default cluster
1724 * If successful, consumes the supplied naked send right.
1727 host_default_memory_manager(
1728 host_priv_t host_priv
,
1729 memory_object_default_t
*default_manager
,
1730 __unused memory_object_cluster_size_t cluster_size
)
1732 memory_object_default_t current_manager
;
1733 memory_object_default_t new_manager
;
1734 memory_object_default_t returned_manager
;
1735 kern_return_t result
= KERN_SUCCESS
;
1737 if (host_priv
== HOST_PRIV_NULL
)
1738 return(KERN_INVALID_HOST
);
1740 assert(host_priv
== &realhost
);
1742 new_manager
= *default_manager
;
1743 lck_mtx_lock(&memory_manager_default_lock
);
1744 current_manager
= memory_manager_default
;
1745 returned_manager
= MEMORY_OBJECT_DEFAULT_NULL
;
1747 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1749 * Retrieve the current value.
1751 returned_manager
= current_manager
;
1752 memory_object_default_reference(returned_manager
);
1756 * If this is the first non-null manager, start
1757 * up the internal pager support.
1759 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1760 result
= vm_pageout_internal_start();
1761 if (result
!= KERN_SUCCESS
)
1766 * Retrieve the current value,
1767 * and replace it with the supplied value.
1768 * We return the old reference to the caller
1769 * but we have to take a reference on the new
1772 returned_manager
= current_manager
;
1773 memory_manager_default
= new_manager
;
1774 memory_object_default_reference(new_manager
);
1777 * In case anyone's been waiting for a memory
1778 * manager to be established, wake them up.
1781 thread_wakeup((event_t
) &memory_manager_default
);
1784 * Now that we have a default pager for anonymous memory,
1785 * reactivate all the throttled pages (i.e. dirty pages with
1788 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1789 vm_page_reactivate_all_throttled();
1793 lck_mtx_unlock(&memory_manager_default_lock
);
1795 *default_manager
= returned_manager
;
1800 * Routine: memory_manager_default_reference
1802 * Returns a naked send right for the default
1803 * memory manager. The returned right is always
1804 * valid (not IP_NULL or IP_DEAD).
1807 __private_extern__ memory_object_default_t
1808 memory_manager_default_reference(void)
1810 memory_object_default_t current_manager
;
1812 lck_mtx_lock(&memory_manager_default_lock
);
1813 current_manager
= memory_manager_default
;
1814 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1817 res
= lck_mtx_sleep(&memory_manager_default_lock
,
1819 (event_t
) &memory_manager_default
,
1821 assert(res
== THREAD_AWAKENED
);
1822 current_manager
= memory_manager_default
;
1824 memory_object_default_reference(current_manager
);
1825 lck_mtx_unlock(&memory_manager_default_lock
);
1827 return current_manager
;
1831 * Routine: memory_manager_default_check
1834 * Check whether a default memory manager has been set
1835 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1836 * and KERN_FAILURE if dmm does not exist.
1838 * If there is no default memory manager, log an error,
1839 * but only the first time.
1842 __private_extern__ kern_return_t
1843 memory_manager_default_check(void)
1845 memory_object_default_t current
;
1847 lck_mtx_lock(&memory_manager_default_lock
);
1848 current
= memory_manager_default
;
1849 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1850 static boolean_t logged
; /* initialized to 0 */
1851 boolean_t complain
= !logged
;
1853 lck_mtx_unlock(&memory_manager_default_lock
);
1855 printf("Warning: No default memory manager\n");
1856 return(KERN_FAILURE
);
1858 lck_mtx_unlock(&memory_manager_default_lock
);
1859 return(KERN_SUCCESS
);
1863 __private_extern__
void
1864 memory_manager_default_init(void)
1866 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1867 lck_mtx_init(&memory_manager_default_lock
, &vm_object_lck_grp
, &vm_object_lck_attr
);
1872 /* Allow manipulation of individual page state. This is actually part of */
1873 /* the UPL regimen but takes place on the object rather than on a UPL */
1876 memory_object_page_op(
1877 memory_object_control_t control
,
1878 memory_object_offset_t offset
,
1880 ppnum_t
*phys_entry
,
1885 object
= memory_object_control_to_vm_object(control
);
1886 if (object
== VM_OBJECT_NULL
)
1887 return (KERN_INVALID_ARGUMENT
);
1889 return vm_object_page_op(object
, offset
, ops
, phys_entry
, flags
);
1893 * memory_object_range_op offers performance enhancement over
1894 * memory_object_page_op for page_op functions which do not require page
1895 * level state to be returned from the call. Page_op was created to provide
1896 * a low-cost alternative to page manipulation via UPLs when only a single
1897 * page was involved. The range_op call establishes the ability in the _op
1898 * family of functions to work on multiple pages where the lack of page level
1899 * state handling allows the caller to avoid the overhead of the upl structures.
1903 memory_object_range_op(
1904 memory_object_control_t control
,
1905 memory_object_offset_t offset_beg
,
1906 memory_object_offset_t offset_end
,
1912 object
= memory_object_control_to_vm_object(control
);
1913 if (object
== VM_OBJECT_NULL
)
1914 return (KERN_INVALID_ARGUMENT
);
1916 return vm_object_range_op(object
,
1920 (uint32_t *) range
);
1925 memory_object_pages_resident(
1926 memory_object_control_t control
,
1927 boolean_t
* has_pages_resident
)
1931 *has_pages_resident
= FALSE
;
1933 object
= memory_object_control_to_vm_object(control
);
1934 if (object
== VM_OBJECT_NULL
)
1935 return (KERN_INVALID_ARGUMENT
);
1937 if (object
->resident_page_count
)
1938 *has_pages_resident
= TRUE
;
1940 return (KERN_SUCCESS
);
1944 memory_object_signed(
1945 memory_object_control_t control
,
1946 boolean_t is_signed
)
1950 object
= memory_object_control_to_vm_object(control
);
1951 if (object
== VM_OBJECT_NULL
)
1952 return KERN_INVALID_ARGUMENT
;
1954 vm_object_lock(object
);
1955 object
->code_signed
= is_signed
;
1956 vm_object_unlock(object
);
1958 return KERN_SUCCESS
;
1961 static zone_t mem_obj_control_zone
;
1963 __private_extern__
void
1964 memory_object_control_bootstrap(void)
1968 i
= (vm_size_t
) sizeof (struct memory_object_control
);
1969 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
1973 __private_extern__ memory_object_control_t
1974 memory_object_control_allocate(
1977 memory_object_control_t control
;
1979 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
1980 if (control
!= MEMORY_OBJECT_CONTROL_NULL
) {
1981 control
->moc_object
= object
;
1982 control
->moc_ikot
= IKOT_MEM_OBJ_CONTROL
; /* fake ip_kotype */
1987 __private_extern__
void
1988 memory_object_control_collapse(
1989 memory_object_control_t control
,
1992 assert((control
->moc_object
!= VM_OBJECT_NULL
) &&
1993 (control
->moc_object
!= object
));
1994 control
->moc_object
= object
;
1997 __private_extern__ vm_object_t
1998 memory_object_control_to_vm_object(
1999 memory_object_control_t control
)
2001 if (control
== MEMORY_OBJECT_CONTROL_NULL
||
2002 control
->moc_ikot
!= IKOT_MEM_OBJ_CONTROL
)
2003 return VM_OBJECT_NULL
;
2005 return (control
->moc_object
);
2008 memory_object_control_t
2009 convert_port_to_mo_control(
2010 __unused mach_port_t port
)
2012 return MEMORY_OBJECT_CONTROL_NULL
;
2017 convert_mo_control_to_port(
2018 __unused memory_object_control_t control
)
2020 return MACH_PORT_NULL
;
2024 memory_object_control_reference(
2025 __unused memory_object_control_t control
)
2031 * We only every issue one of these references, so kill it
2032 * when that gets released (should switch the real reference
2033 * counting in true port-less EMMI).
2036 memory_object_control_deallocate(
2037 memory_object_control_t control
)
2039 zfree(mem_obj_control_zone
, control
);
2043 memory_object_control_disable(
2044 memory_object_control_t control
)
2046 assert(control
->moc_object
!= VM_OBJECT_NULL
);
2047 control
->moc_object
= VM_OBJECT_NULL
;
2051 memory_object_default_reference(
2052 memory_object_default_t dmm
)
2054 ipc_port_make_send(dmm
);
2058 memory_object_default_deallocate(
2059 memory_object_default_t dmm
)
2061 ipc_port_release_send(dmm
);
2065 convert_port_to_memory_object(
2066 __unused mach_port_t port
)
2068 return (MEMORY_OBJECT_NULL
);
2073 convert_memory_object_to_port(
2074 __unused memory_object_t object
)
2076 return (MACH_PORT_NULL
);
2080 /* Routine memory_object_reference */
2081 void memory_object_reference(
2082 memory_object_t memory_object
)
2084 (memory_object
->mo_pager_ops
->memory_object_reference
)(
2088 /* Routine memory_object_deallocate */
2089 void memory_object_deallocate(
2090 memory_object_t memory_object
)
2092 (memory_object
->mo_pager_ops
->memory_object_deallocate
)(
2097 /* Routine memory_object_init */
2098 kern_return_t memory_object_init
2100 memory_object_t memory_object
,
2101 memory_object_control_t memory_control
,
2102 memory_object_cluster_size_t memory_object_page_size
2105 return (memory_object
->mo_pager_ops
->memory_object_init
)(
2108 memory_object_page_size
);
2111 /* Routine memory_object_terminate */
2112 kern_return_t memory_object_terminate
2114 memory_object_t memory_object
2117 return (memory_object
->mo_pager_ops
->memory_object_terminate
)(
2121 /* Routine memory_object_data_request */
2122 kern_return_t memory_object_data_request
2124 memory_object_t memory_object
,
2125 memory_object_offset_t offset
,
2126 memory_object_cluster_size_t length
,
2127 vm_prot_t desired_access
,
2128 memory_object_fault_info_t fault_info
2131 return (memory_object
->mo_pager_ops
->memory_object_data_request
)(
2139 /* Routine memory_object_data_return */
2140 kern_return_t memory_object_data_return
2142 memory_object_t memory_object
,
2143 memory_object_offset_t offset
,
2144 memory_object_cluster_size_t size
,
2145 memory_object_offset_t
*resid_offset
,
2148 boolean_t kernel_copy
,
2152 return (memory_object
->mo_pager_ops
->memory_object_data_return
)(
2163 /* Routine memory_object_data_initialize */
2164 kern_return_t memory_object_data_initialize
2166 memory_object_t memory_object
,
2167 memory_object_offset_t offset
,
2168 memory_object_cluster_size_t size
2171 return (memory_object
->mo_pager_ops
->memory_object_data_initialize
)(
2177 /* Routine memory_object_data_unlock */
2178 kern_return_t memory_object_data_unlock
2180 memory_object_t memory_object
,
2181 memory_object_offset_t offset
,
2182 memory_object_size_t size
,
2183 vm_prot_t desired_access
2186 return (memory_object
->mo_pager_ops
->memory_object_data_unlock
)(
2193 /* Routine memory_object_synchronize */
2194 kern_return_t memory_object_synchronize
2196 memory_object_t memory_object
,
2197 memory_object_offset_t offset
,
2198 memory_object_size_t size
,
2199 vm_sync_t sync_flags
2202 return (memory_object
->mo_pager_ops
->memory_object_synchronize
)(
2211 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2212 * each time a "named" VM object gets mapped directly or indirectly
2213 * (copy-on-write mapping). A "named" VM object has an extra reference held
2214 * by the pager to keep it alive until the pager decides that the
2215 * memory object (and its VM object) can be reclaimed.
2216 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2217 * the mappings of that memory object have been removed.
2219 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2220 * are serialized (through object->mapping_in_progress), to ensure that the
2221 * pager gets a consistent view of the mapping status of the memory object.
2223 * This allows the pager to keep track of how many times a memory object
2224 * has been mapped and with which protections, to decide when it can be
2228 /* Routine memory_object_map */
2229 kern_return_t memory_object_map
2231 memory_object_t memory_object
,
2235 return (memory_object
->mo_pager_ops
->memory_object_map
)(
2240 /* Routine memory_object_last_unmap */
2241 kern_return_t memory_object_last_unmap
2243 memory_object_t memory_object
2246 return (memory_object
->mo_pager_ops
->memory_object_last_unmap
)(
2250 /* Routine memory_object_create */
2251 kern_return_t memory_object_create
2253 memory_object_default_t default_memory_manager
,
2254 vm_size_t new_memory_object_size
,
2255 memory_object_t
*new_memory_object
2258 return default_pager_memory_object_create(default_memory_manager
,
2259 new_memory_object_size
,
2264 convert_port_to_upl(
2270 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2274 upl
= (upl_t
) port
->ip_kobject
;
2283 convert_upl_to_port(
2286 return MACH_PORT_NULL
;
2289 __private_extern__
void
2291 __unused ipc_port_t port
,
2292 __unused mach_port_mscount_t mscount
)