2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
62 * External memory management interface control functions.
65 #include <advisory_pageout.h>
68 * Interface dependencies:
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
85 * Implementation dependencies:
87 #include <string.h> /* For memcpy() */
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
103 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
104 #include <vm/vm_shared_region.h>
107 #include <vm/vm_external.h>
108 #endif /* MACH_PAGEMAP */
110 #include <vm/vm_protos.h>
113 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
114 decl_lck_mtx_data(, memory_manager_default_lock
)
118 * Routine: memory_object_should_return_page
121 * Determine whether the given page should be returned,
122 * based on the page's state and on the given return policy.
124 * We should return the page if one of the following is true:
126 * 1. Page is dirty and should_return is not RETURN_NONE.
127 * 2. Page is precious and should_return is RETURN_ALL.
128 * 3. Should_return is RETURN_ANYTHING.
130 * As a side effect, m->dirty will be made consistent
131 * with pmap_is_modified(m), if should_return is not
132 * MEMORY_OBJECT_RETURN_NONE.
135 #define memory_object_should_return_page(m, should_return) \
136 (should_return != MEMORY_OBJECT_RETURN_NONE && \
137 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
138 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
139 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
141 typedef int memory_object_lock_result_t
;
143 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
146 #define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
148 memory_object_lock_result_t
memory_object_lock_page(
150 memory_object_return_t should_return
,
151 boolean_t should_flush
,
155 * Routine: memory_object_lock_page
158 * Perform the appropriate lock operations on the
159 * given page. See the description of
160 * "memory_object_lock_request" for the meanings
163 * Returns an indication that the operation
164 * completed, blocked, or that the page must
167 memory_object_lock_result_t
168 memory_object_lock_page(
170 memory_object_return_t should_return
,
171 boolean_t should_flush
,
174 XPR(XPR_MEMORY_OBJECT
,
175 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
176 m
, should_return
, should_flush
, prot
, 0);
179 if (m
->busy
|| m
->cleaning
) {
180 if (m
->list_req_pending
&&
181 should_return
== MEMORY_OBJECT_RETURN_NONE
&&
182 should_flush
== TRUE
) {
186 * this is the list_req_pending | absent | busy case
187 * which originates from vm_fault_page.
188 * Combine that with should_flush == TRUE and we
189 * have a case where we need to toss the page from
192 if (!VM_PAGE_WIRED(m
)) {
193 return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
);
195 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
198 if (m
->pageout
|| m
->cleaning
) {
200 * if pageout is set, page was earmarked by vm_pageout_scan
201 * to be cleaned and stolen... if cleaning is set, we're
202 * pre-cleaning pages for a hibernate...
203 * in either case, we're going
204 * to take it back since we are being asked to
205 * flush the page w/o cleaning it (i.e. we don't
206 * care that it's dirty, we want it gone from
207 * the cache) and we don't want to stall
208 * waiting for it to be cleaned for 2 reasons...
209 * 1 - no use paging it out since we're probably
210 * shrinking the file at this point or we no
211 * longer care about the data in the page
212 * 2 - if we stall, we may casue a deadlock in
213 * the FS trying to acquire its locks
214 * on the VNOP_PAGEOUT path presuming that
215 * those locks are already held on the truncate
216 * path before calling through to this function
218 * so undo all of the state that vm_pageout_scan
222 vm_pageout_queue_steal(m
, FALSE
);
225 panic("list_req_pending on page %p without absent/pageout/cleaning set\n", m
);
228 return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
231 * Don't worry about pages for which the kernel
232 * does not have any data.
234 if (m
->absent
|| m
->error
|| m
->restart
) {
235 if (m
->error
&& should_flush
&& !VM_PAGE_WIRED(m
)) {
237 * dump the page, pager wants us to
238 * clean it up and there is no
239 * relevant data to return
241 return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
);
243 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
245 assert(!m
->fictitious
);
247 if (VM_PAGE_WIRED(m
)) {
249 * The page is wired... just clean or return the page if needed.
250 * Wired pages don't get flushed or disconnected from the pmap.
252 if (memory_object_should_return_page(m
, should_return
))
253 return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
255 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
260 * must do the pmap_disconnect before determining the
261 * need to return the page... otherwise it's possible
262 * for the page to go from the clean to the dirty state
263 * after we've made our decision
265 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
)
269 * If we are decreasing permission, do it now;
270 * let the fault handler take care of increases
271 * (pmap_page_protect may not increase protection).
273 if (prot
!= VM_PROT_NO_CHANGE
)
274 pmap_page_protect(m
->phys_page
, VM_PROT_ALL
& ~prot
);
277 * Handle returning dirty or precious pages
279 if (memory_object_should_return_page(m
, should_return
)) {
281 * we use to do a pmap_disconnect here in support
282 * of memory_object_lock_request, but that routine
283 * no longer requires this... in any event, in
284 * our world, it would turn into a big noop since
285 * we don't lock the page in any way and as soon
286 * as we drop the object lock, the page can be
287 * faulted back into an address space
290 * pmap_disconnect(m->phys_page);
292 return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
296 * Handle flushing clean pages
299 return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
);
302 * we use to deactivate clean pages at this point,
303 * but we do not believe that an msync should change
304 * the 'age' of a page in the cache... here is the
305 * original comment and code concerning this...
307 * XXX Make clean but not flush a paging hint,
308 * and deactivate the pages. This is a hack
309 * because it overloads flush/clean with
310 * implementation-dependent meaning. This only
311 * happens to pages that are already clean.
313 * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
314 * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
317 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
323 * Routine: memory_object_lock_request [user interface]
326 * Control use of the data associated with the given
327 * memory object. For each page in the given range,
328 * perform the following operations, in order:
329 * 1) restrict access to the page (disallow
330 * forms specified by "prot");
331 * 2) return data to the manager (if "should_return"
332 * is RETURN_DIRTY and the page is dirty, or
333 * "should_return" is RETURN_ALL and the page
334 * is either dirty or precious); and,
335 * 3) flush the cached copy (if "should_flush"
337 * The set of pages is defined by a starting offset
338 * ("offset") and size ("size"). Only pages with the
339 * same page alignment as the starting offset are
342 * A single acknowledgement is sent (to the "reply_to"
343 * port) when these actions are complete. If successful,
344 * the naked send right for reply_to is consumed.
348 memory_object_lock_request(
349 memory_object_control_t control
,
350 memory_object_offset_t offset
,
351 memory_object_size_t size
,
352 memory_object_offset_t
* resid_offset
,
354 memory_object_return_t should_return
,
361 * Check for bogus arguments.
363 object
= memory_object_control_to_vm_object(control
);
364 if (object
== VM_OBJECT_NULL
)
365 return (KERN_INVALID_ARGUMENT
);
367 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
368 return (KERN_INVALID_ARGUMENT
);
370 size
= round_page_64(size
);
373 * Lock the object, and acquire a paging reference to
374 * prevent the memory_object reference from being released.
376 vm_object_lock(object
);
377 vm_object_paging_begin(object
);
379 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
) {
380 if ((should_return
!= MEMORY_OBJECT_RETURN_NONE
) || offset
|| object
->copy
) {
381 flags
&= ~MEMORY_OBJECT_DATA_FLUSH_ALL
;
382 flags
|= MEMORY_OBJECT_DATA_FLUSH
;
385 offset
-= object
->paging_offset
;
387 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
)
388 vm_object_reap_pages(object
, REAP_DATA_FLUSH
);
390 (void)vm_object_update(object
, offset
, size
, resid_offset
,
391 io_errno
, should_return
, flags
, prot
);
393 vm_object_paging_end(object
);
394 vm_object_unlock(object
);
396 return (KERN_SUCCESS
);
400 * memory_object_release_name: [interface]
402 * Enforces name semantic on memory_object reference count decrement
403 * This routine should not be called unless the caller holds a name
404 * reference gained through the memory_object_named_create or the
405 * memory_object_rename call.
406 * If the TERMINATE_IDLE flag is set, the call will return if the
407 * reference count is not 1. i.e. idle with the only remaining reference
409 * If the decision is made to proceed the name field flag is set to
410 * false and the reference count is decremented. If the RESPECT_CACHE
411 * flag is set and the reference count has gone to zero, the
412 * memory_object is checked to see if it is cacheable otherwise when
413 * the reference count is zero, it is simply terminated.
417 memory_object_release_name(
418 memory_object_control_t control
,
423 object
= memory_object_control_to_vm_object(control
);
424 if (object
== VM_OBJECT_NULL
)
425 return (KERN_INVALID_ARGUMENT
);
427 return vm_object_release_name(object
, flags
);
433 * Routine: memory_object_destroy [user interface]
435 * Shut down a memory object, despite the
436 * presence of address map (or other) references
440 memory_object_destroy(
441 memory_object_control_t control
,
442 kern_return_t reason
)
446 object
= memory_object_control_to_vm_object(control
);
447 if (object
== VM_OBJECT_NULL
)
448 return (KERN_INVALID_ARGUMENT
);
450 return (vm_object_destroy(object
, reason
));
454 * Routine: vm_object_sync
456 * Kernel internal function to synch out pages in a given
457 * range within an object to its memory manager. Much the
458 * same as memory_object_lock_request but page protection
461 * If the should_flush and should_return flags are true pages
462 * are flushed, that is dirty & precious pages are written to
463 * the memory manager and then discarded. If should_return
464 * is false, only precious pages are returned to the memory
467 * If should flush is false and should_return true, the memory
468 * manager's copy of the pages is updated. If should_return
469 * is also false, only the precious pages are updated. This
470 * last option is of limited utility.
473 * FALSE if no pages were returned to the pager
480 vm_object_offset_t offset
,
481 vm_object_size_t size
,
482 boolean_t should_flush
,
483 boolean_t should_return
,
484 boolean_t should_iosync
)
490 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
491 object
, offset
, size
, should_flush
, should_return
);
494 * Lock the object, and acquire a paging reference to
495 * prevent the memory_object and control ports from
498 vm_object_lock(object
);
499 vm_object_paging_begin(object
);
502 flags
= MEMORY_OBJECT_DATA_FLUSH
;
507 flags
|= MEMORY_OBJECT_IO_SYNC
;
509 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
511 MEMORY_OBJECT_RETURN_ALL
:
512 MEMORY_OBJECT_RETURN_NONE
,
517 vm_object_paging_end(object
);
518 vm_object_unlock(object
);
524 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
528 memory_object_t pager; \
530 if (object == slide_info.slide_object) { \
531 panic("Objects with slid pages not allowed\n"); \
534 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
535 vm_object_paging_begin(object); \
536 vm_object_unlock(object); \
539 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
541 upl_flags = UPL_MSYNC; \
543 (void) memory_object_data_return(pager, \
545 (memory_object_cluster_size_t)data_cnt, \
552 vm_object_lock(object); \
553 vm_object_paging_end(object); \
560 vm_object_update_extent(
562 vm_object_offset_t offset
,
563 vm_object_offset_t offset_end
,
564 vm_object_offset_t
*offset_resid
,
566 boolean_t should_flush
,
567 memory_object_return_t should_return
,
568 boolean_t should_iosync
,
573 vm_object_offset_t paging_offset
= 0;
574 vm_object_offset_t next_offset
= offset
;
575 memory_object_lock_result_t page_lock_result
;
576 memory_object_cluster_size_t data_cnt
= 0;
577 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
578 struct vm_page_delayed_work
*dwp
;
584 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
587 offset
< offset_end
&& object
->resident_page_count
;
588 offset
+= PAGE_SIZE_64
) {
591 * Limit the number of pages to be cleaned at once to a contiguous
592 * run, or at most MAX_UPL_TRANSFER size
595 if ((data_cnt
>= PAGE_SIZE
* MAX_UPL_TRANSFER
) || (next_offset
!= offset
)) {
598 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
602 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
603 paging_offset
, offset_resid
, io_errno
, should_iosync
);
607 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
611 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
613 if (data_cnt
&& page_lock_result
!= MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
) {
615 * End of a run of dirty/precious pages.
618 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
622 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
623 paging_offset
, offset_resid
, io_errno
, should_iosync
);
625 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
626 * allow the state of page 'm' to change... we need to re-lookup
633 switch (page_lock_result
) {
635 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
638 case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
:
639 dwp
->dw_mask
|= DW_vm_page_free
;
642 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
643 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
646 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
648 paging_offset
= offset
;
650 data_cnt
+= PAGE_SIZE
;
651 next_offset
= offset
+ PAGE_SIZE_64
;
656 m
->list_req_pending
= TRUE
;
660 * wired pages shouldn't be flushed and
661 * since they aren't on any queue,
662 * no need to remove them
664 if (!VM_PAGE_WIRED(m
)) {
668 * add additional state for the flush
673 dwp
->dw_mask
|= DW_vm_page_wire
;
676 * we use to remove the page from the queues at this
677 * point, but we do not believe that an msync
678 * should cause the 'age' of a page to be changed
681 * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
688 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
690 if (dw_count
>= dw_limit
) {
691 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
700 * We have completed the scan for applicable pages.
701 * Clean any pages that have been saved.
704 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
707 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
708 paging_offset
, offset_resid
, io_errno
, should_iosync
);
716 * Routine: vm_object_update
718 * Work function for m_o_lock_request(), vm_o_sync().
720 * Called with object locked and paging ref taken.
725 vm_object_offset_t offset
,
726 vm_object_size_t size
,
727 vm_object_offset_t
*resid_offset
,
729 memory_object_return_t should_return
,
731 vm_prot_t protection
)
733 vm_object_t copy_object
= VM_OBJECT_NULL
;
734 boolean_t data_returned
= FALSE
;
735 boolean_t update_cow
;
736 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
737 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
738 vm_fault_return_t result
;
741 #define MAX_EXTENTS 8
742 #define EXTENT_SIZE (1024 * 1024 * 256)
743 #define RESIDENT_LIMIT (1024 * 32)
745 vm_object_offset_t e_base
;
746 vm_object_offset_t e_min
;
747 vm_object_offset_t e_max
;
748 } extents
[MAX_EXTENTS
];
751 * To avoid blocking while scanning for pages, save
752 * dirty pages to be cleaned all at once.
754 * XXXO A similar strategy could be used to limit the
755 * number of times that a scan must be restarted for
756 * other reasons. Those pages that would require blocking
757 * could be temporarily collected in another list, or
758 * their offsets could be recorded in a small array.
762 * XXX NOTE: May want to consider converting this to a page list
763 * XXX vm_map_copy interface. Need to understand object
764 * XXX coalescing implications before doing so.
767 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
768 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
769 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
770 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
772 if (update_cow
|| (flags
& (MEMORY_OBJECT_DATA_PURGE
| MEMORY_OBJECT_DATA_SYNC
))) {
775 while ((copy_object
= object
->copy
) != VM_OBJECT_NULL
) {
777 * need to do a try here since we're swimming upstream
778 * against the normal lock ordering... however, we need
779 * to hold the object stable until we gain control of the
780 * copy object so we have to be careful how we approach this
782 if (vm_object_lock_try(copy_object
)) {
784 * we 'won' the lock on the copy object...
785 * no need to hold the object lock any longer...
786 * take a real reference on the copy object because
787 * we're going to call vm_fault_page on it which may
788 * under certain conditions drop the lock and the paging
789 * reference we're about to take... the reference
790 * will keep the copy object from going away if that happens
792 vm_object_unlock(object
);
793 vm_object_reference_locked(copy_object
);
796 vm_object_unlock(object
);
799 mutex_pause(collisions
);
801 vm_object_lock(object
);
804 if ((copy_object
!= VM_OBJECT_NULL
&& update_cow
) || (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
806 vm_map_size_t copy_size
;
807 vm_map_offset_t copy_offset
;
811 kern_return_t error
= 0;
812 struct vm_object_fault_info fault_info
;
814 if (copy_object
!= VM_OBJECT_NULL
) {
816 * translate offset with respect to shadow's offset
818 copy_offset
= (offset
>= copy_object
->vo_shadow_offset
) ?
819 (vm_map_offset_t
)(offset
- copy_object
->vo_shadow_offset
) :
822 if (copy_offset
> copy_object
->vo_size
)
823 copy_offset
= copy_object
->vo_size
;
826 * clip size with respect to shadow offset
828 if (offset
>= copy_object
->vo_shadow_offset
) {
830 } else if (size
>= copy_object
->vo_shadow_offset
- offset
) {
831 copy_size
= size
- (copy_object
->vo_shadow_offset
- offset
);
836 if (copy_offset
+ copy_size
> copy_object
->vo_size
) {
837 if (copy_object
->vo_size
>= copy_offset
) {
838 copy_size
= copy_object
->vo_size
- copy_offset
;
843 copy_size
+=copy_offset
;
846 copy_object
= object
;
848 copy_size
= offset
+ size
;
849 copy_offset
= offset
;
851 fault_info
.interruptible
= THREAD_UNINT
;
852 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
853 fault_info
.user_tag
= 0;
854 fault_info
.lo_offset
= copy_offset
;
855 fault_info
.hi_offset
= copy_size
;
856 fault_info
.no_cache
= FALSE
;
857 fault_info
.stealth
= TRUE
;
858 fault_info
.io_sync
= FALSE
;
859 fault_info
.cs_bypass
= FALSE
;
860 fault_info
.mark_zf_absent
= FALSE
;
862 vm_object_paging_begin(copy_object
);
864 for (i
= copy_offset
; i
< copy_size
; i
+= PAGE_SIZE
) {
865 RETRY_COW_OF_LOCK_REQUEST
:
866 fault_info
.cluster_size
= (vm_size_t
) (copy_size
- i
);
867 assert(fault_info
.cluster_size
== copy_size
- i
);
869 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
870 result
= vm_fault_page(copy_object
, i
,
871 VM_PROT_WRITE
|VM_PROT_READ
,
882 case VM_FAULT_SUCCESS
:
885 page
->object
, top_page
);
886 vm_object_lock(copy_object
);
887 vm_object_paging_begin(copy_object
);
892 vm_page_lockspin_queues();
896 vm_page_deactivate(page
);
897 vm_page_unlock_queues();
899 PAGE_WAKEUP_DONE(page
);
902 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
903 vm_object_lock(copy_object
);
904 vm_object_paging_begin(copy_object
);
905 goto RETRY_COW_OF_LOCK_REQUEST
;
906 case VM_FAULT_INTERRUPTED
:
907 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
908 vm_object_lock(copy_object
);
909 vm_object_paging_begin(copy_object
);
910 goto RETRY_COW_OF_LOCK_REQUEST
;
911 case VM_FAULT_MEMORY_SHORTAGE
:
913 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
914 vm_object_lock(copy_object
);
915 vm_object_paging_begin(copy_object
);
916 goto RETRY_COW_OF_LOCK_REQUEST
;
917 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
918 /* success but no VM page: fail */
919 vm_object_paging_end(copy_object
);
920 vm_object_unlock(copy_object
);
922 case VM_FAULT_MEMORY_ERROR
:
923 if (object
!= copy_object
)
924 vm_object_deallocate(copy_object
);
925 vm_object_lock(object
);
926 goto BYPASS_COW_COPYIN
;
928 panic("vm_object_update: unexpected error 0x%x"
929 " from vm_fault_page()\n", result
);
933 vm_object_paging_end(copy_object
);
935 if ((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
936 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
937 vm_object_unlock(copy_object
);
938 vm_object_deallocate(copy_object
);
939 vm_object_lock(object
);
943 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
944 if ((flags
& MEMORY_OBJECT_DATA_PURGE
)) {
945 copy_object
->shadow_severed
= TRUE
;
946 copy_object
->shadowed
= FALSE
;
947 copy_object
->shadow
= NULL
;
949 * delete the ref the COW was holding on the target object
951 vm_object_deallocate(object
);
953 vm_object_unlock(copy_object
);
954 vm_object_deallocate(copy_object
);
955 vm_object_lock(object
);
960 * when we have a really large range to check relative
961 * to the number of actual resident pages, we'd like
962 * to use the resident page list to drive our checks
963 * however, the object lock will get dropped while processing
964 * the page which means the resident queue can change which
965 * means we can't walk the queue as we process the pages
966 * we also want to do the processing in offset order to allow
967 * 'runs' of pages to be collected if we're being told to
968 * flush to disk... the resident page queue is NOT ordered.
970 * a temporary solution (until we figure out how to deal with
971 * large address spaces more generically) is to pre-flight
972 * the resident page queue (if it's small enough) and develop
973 * a collection of extents (that encompass actual resident pages)
974 * to visit. This will at least allow us to deal with some of the
975 * more pathological cases in a more efficient manner. The current
976 * worst case (a single resident page at the end of an extremely large
977 * range) can take minutes to complete for ranges in the terrabyte
978 * category... since this routine is called when truncating a file,
979 * and we currently support files up to 16 Tbytes in size, this
980 * is not a theoretical problem
983 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
984 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
986 vm_object_offset_t start
;
987 vm_object_offset_t end
;
988 vm_object_size_t e_mask
;
994 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
996 m
= (vm_page_t
) queue_first(&object
->memq
);
998 while (!queue_end(&object
->memq
, (queue_entry_t
) m
)) {
999 next
= (vm_page_t
) queue_next(&m
->listq
);
1001 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
1003 * this is a page we're interested in
1004 * try to fit it into a current extent
1006 for (n
= 0; n
< num_of_extents
; n
++) {
1007 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
1009 * use (PAGE_SIZE - 1) to determine the
1010 * max offset so that we don't wrap if
1011 * we're at the last page of the space
1013 if (m
->offset
< extents
[n
].e_min
)
1014 extents
[n
].e_min
= m
->offset
;
1015 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
1016 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
1020 if (n
== num_of_extents
) {
1022 * didn't find a current extent that can encompass
1025 if (n
< MAX_EXTENTS
) {
1027 * if we still have room,
1028 * create a new extent
1030 extents
[n
].e_base
= m
->offset
& e_mask
;
1031 extents
[n
].e_min
= m
->offset
;
1032 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
1037 * no room to create a new extent...
1038 * fall back to a single extent based
1039 * on the min and max page offsets
1040 * we find in the range we're interested in...
1041 * first, look through the extent list and
1042 * develop the overall min and max for the
1043 * pages we've looked at up to this point
1045 for (n
= 1; n
< num_of_extents
; n
++) {
1046 if (extents
[n
].e_min
< extents
[0].e_min
)
1047 extents
[0].e_min
= extents
[n
].e_min
;
1048 if (extents
[n
].e_max
> extents
[0].e_max
)
1049 extents
[0].e_max
= extents
[n
].e_max
;
1052 * now setup to run through the remaining pages
1053 * to determine the overall min and max
1054 * offset for the specified range
1056 extents
[0].e_base
= 0;
1061 * by continuing, we'll reprocess the
1062 * page that forced us to abandon trying
1063 * to develop multiple extents
1072 extents
[0].e_min
= offset
;
1073 extents
[0].e_max
= offset
+ (size
- 1);
1077 for (n
= 0; n
< num_of_extents
; n
++) {
1078 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1079 should_flush
, should_return
, should_iosync
, protection
))
1080 data_returned
= TRUE
;
1082 return (data_returned
);
1087 * Routine: memory_object_synchronize_completed [user interface]
1089 * Tell kernel that previously synchronized data
1090 * (memory_object_synchronize) has been queue or placed on the
1093 * Note: there may be multiple synchronize requests for a given
1094 * memory object outstanding but they will not overlap.
1098 memory_object_synchronize_completed(
1099 memory_object_control_t control
,
1100 memory_object_offset_t offset
,
1101 memory_object_size_t length
)
1106 object
= memory_object_control_to_vm_object(control
);
1108 XPR(XPR_MEMORY_OBJECT
,
1109 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1110 object
, offset
, length
, 0, 0);
1113 * Look for bogus arguments
1116 if (object
== VM_OBJECT_NULL
)
1117 return (KERN_INVALID_ARGUMENT
);
1119 vm_object_lock(object
);
1122 * search for sync request structure
1124 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
1125 if (msr
->offset
== offset
&& msr
->length
== length
) {
1126 queue_remove(&object
->msr_q
, msr
, msync_req_t
, msr_q
);
1129 }/* queue_iterate */
1131 if (queue_end(&object
->msr_q
, (queue_entry_t
)msr
)) {
1132 vm_object_unlock(object
);
1133 return KERN_INVALID_ARGUMENT
;
1137 vm_object_unlock(object
);
1138 msr
->flag
= VM_MSYNC_DONE
;
1140 thread_wakeup((event_t
) msr
);
1142 return KERN_SUCCESS
;
1143 }/* memory_object_synchronize_completed */
1145 static kern_return_t
1146 vm_object_set_attributes_common(
1148 boolean_t may_cache
,
1149 memory_object_copy_strategy_t copy_strategy
,
1150 boolean_t temporary
,
1151 boolean_t silent_overwrite
,
1152 boolean_t advisory_pageout
)
1154 boolean_t object_became_ready
;
1156 XPR(XPR_MEMORY_OBJECT
,
1157 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1158 object
, (may_cache
&1)|((temporary
&1)<1), copy_strategy
, 0, 0);
1160 if (object
== VM_OBJECT_NULL
)
1161 return(KERN_INVALID_ARGUMENT
);
1164 * Verify the attributes of importance
1167 switch(copy_strategy
) {
1168 case MEMORY_OBJECT_COPY_NONE
:
1169 case MEMORY_OBJECT_COPY_DELAY
:
1172 return(KERN_INVALID_ARGUMENT
);
1175 #if !ADVISORY_PAGEOUT
1176 if (silent_overwrite
|| advisory_pageout
)
1177 return(KERN_INVALID_ARGUMENT
);
1179 #endif /* !ADVISORY_PAGEOUT */
1185 vm_object_lock(object
);
1188 * Copy the attributes
1190 assert(!object
->internal
);
1191 object_became_ready
= !object
->pager_ready
;
1192 object
->copy_strategy
= copy_strategy
;
1193 object
->can_persist
= may_cache
;
1194 object
->temporary
= temporary
;
1195 object
->silent_overwrite
= silent_overwrite
;
1196 object
->advisory_pageout
= advisory_pageout
;
1199 * Wake up anyone waiting for the ready attribute
1200 * to become asserted.
1203 if (object_became_ready
) {
1204 object
->pager_ready
= TRUE
;
1205 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1208 vm_object_unlock(object
);
1210 return(KERN_SUCCESS
);
1214 * Set the memory object attribute as provided.
1216 * XXX This routine cannot be completed until the vm_msync, clean
1217 * in place, and cluster work is completed. See ifdef notyet
1218 * below and note that vm_object_set_attributes_common()
1219 * may have to be expanded.
1222 memory_object_change_attributes(
1223 memory_object_control_t control
,
1224 memory_object_flavor_t flavor
,
1225 memory_object_info_t attributes
,
1226 mach_msg_type_number_t count
)
1229 kern_return_t result
= KERN_SUCCESS
;
1230 boolean_t temporary
;
1231 boolean_t may_cache
;
1232 boolean_t invalidate
;
1233 memory_object_copy_strategy_t copy_strategy
;
1234 boolean_t silent_overwrite
;
1235 boolean_t advisory_pageout
;
1237 object
= memory_object_control_to_vm_object(control
);
1238 if (object
== VM_OBJECT_NULL
)
1239 return (KERN_INVALID_ARGUMENT
);
1241 vm_object_lock(object
);
1243 temporary
= object
->temporary
;
1244 may_cache
= object
->can_persist
;
1245 copy_strategy
= object
->copy_strategy
;
1246 silent_overwrite
= object
->silent_overwrite
;
1247 advisory_pageout
= object
->advisory_pageout
;
1249 invalidate
= object
->invalidate
;
1251 vm_object_unlock(object
);
1254 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1256 old_memory_object_behave_info_t behave
;
1258 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1259 result
= KERN_INVALID_ARGUMENT
;
1263 behave
= (old_memory_object_behave_info_t
) attributes
;
1265 temporary
= behave
->temporary
;
1266 invalidate
= behave
->invalidate
;
1267 copy_strategy
= behave
->copy_strategy
;
1272 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1274 memory_object_behave_info_t behave
;
1276 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1277 result
= KERN_INVALID_ARGUMENT
;
1281 behave
= (memory_object_behave_info_t
) attributes
;
1283 temporary
= behave
->temporary
;
1284 invalidate
= behave
->invalidate
;
1285 copy_strategy
= behave
->copy_strategy
;
1286 silent_overwrite
= behave
->silent_overwrite
;
1287 advisory_pageout
= behave
->advisory_pageout
;
1291 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1293 memory_object_perf_info_t perf
;
1295 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1296 result
= KERN_INVALID_ARGUMENT
;
1300 perf
= (memory_object_perf_info_t
) attributes
;
1302 may_cache
= perf
->may_cache
;
1307 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1309 old_memory_object_attr_info_t attr
;
1311 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1312 result
= KERN_INVALID_ARGUMENT
;
1316 attr
= (old_memory_object_attr_info_t
) attributes
;
1318 may_cache
= attr
->may_cache
;
1319 copy_strategy
= attr
->copy_strategy
;
1324 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1326 memory_object_attr_info_t attr
;
1328 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1329 result
= KERN_INVALID_ARGUMENT
;
1333 attr
= (memory_object_attr_info_t
) attributes
;
1335 copy_strategy
= attr
->copy_strategy
;
1336 may_cache
= attr
->may_cache_object
;
1337 temporary
= attr
->temporary
;
1343 result
= KERN_INVALID_ARGUMENT
;
1347 if (result
!= KERN_SUCCESS
)
1350 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1351 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1358 * XXX may_cache may become a tri-valued variable to handle
1359 * XXX uncache if not in use.
1361 return (vm_object_set_attributes_common(object
,
1370 memory_object_get_attributes(
1371 memory_object_control_t control
,
1372 memory_object_flavor_t flavor
,
1373 memory_object_info_t attributes
, /* pointer to OUT array */
1374 mach_msg_type_number_t
*count
) /* IN/OUT */
1376 kern_return_t ret
= KERN_SUCCESS
;
1379 object
= memory_object_control_to_vm_object(control
);
1380 if (object
== VM_OBJECT_NULL
)
1381 return (KERN_INVALID_ARGUMENT
);
1383 vm_object_lock(object
);
1386 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1388 old_memory_object_behave_info_t behave
;
1390 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1391 ret
= KERN_INVALID_ARGUMENT
;
1395 behave
= (old_memory_object_behave_info_t
) attributes
;
1396 behave
->copy_strategy
= object
->copy_strategy
;
1397 behave
->temporary
= object
->temporary
;
1398 #if notyet /* remove when vm_msync complies and clean in place fini */
1399 behave
->invalidate
= object
->invalidate
;
1401 behave
->invalidate
= FALSE
;
1404 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1408 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1410 memory_object_behave_info_t behave
;
1412 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1413 ret
= KERN_INVALID_ARGUMENT
;
1417 behave
= (memory_object_behave_info_t
) attributes
;
1418 behave
->copy_strategy
= object
->copy_strategy
;
1419 behave
->temporary
= object
->temporary
;
1420 #if notyet /* remove when vm_msync complies and clean in place fini */
1421 behave
->invalidate
= object
->invalidate
;
1423 behave
->invalidate
= FALSE
;
1425 behave
->advisory_pageout
= object
->advisory_pageout
;
1426 behave
->silent_overwrite
= object
->silent_overwrite
;
1427 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1431 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1433 memory_object_perf_info_t perf
;
1435 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1436 ret
= KERN_INVALID_ARGUMENT
;
1440 perf
= (memory_object_perf_info_t
) attributes
;
1441 perf
->cluster_size
= PAGE_SIZE
;
1442 perf
->may_cache
= object
->can_persist
;
1444 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1448 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1450 old_memory_object_attr_info_t attr
;
1452 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1453 ret
= KERN_INVALID_ARGUMENT
;
1457 attr
= (old_memory_object_attr_info_t
) attributes
;
1458 attr
->may_cache
= object
->can_persist
;
1459 attr
->copy_strategy
= object
->copy_strategy
;
1461 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1465 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1467 memory_object_attr_info_t attr
;
1469 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1470 ret
= KERN_INVALID_ARGUMENT
;
1474 attr
= (memory_object_attr_info_t
) attributes
;
1475 attr
->copy_strategy
= object
->copy_strategy
;
1476 attr
->cluster_size
= PAGE_SIZE
;
1477 attr
->may_cache_object
= object
->can_persist
;
1478 attr
->temporary
= object
->temporary
;
1480 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1485 ret
= KERN_INVALID_ARGUMENT
;
1489 vm_object_unlock(object
);
1496 memory_object_iopl_request(
1498 memory_object_offset_t offset
,
1499 upl_size_t
*upl_size
,
1501 upl_page_info_array_t user_page_list
,
1502 unsigned int *page_list_count
,
1509 caller_flags
= *flags
;
1511 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1513 * For forward compatibility's sake,
1514 * reject any unknown flag.
1516 return KERN_INVALID_VALUE
;
1519 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1520 vm_named_entry_t named_entry
;
1522 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1523 /* a few checks to make sure user is obeying rules */
1524 if(*upl_size
== 0) {
1525 if(offset
>= named_entry
->size
)
1526 return(KERN_INVALID_RIGHT
);
1527 *upl_size
= (upl_size_t
)(named_entry
->size
- offset
);
1528 if (*upl_size
!= named_entry
->size
- offset
)
1529 return KERN_INVALID_ARGUMENT
;
1531 if(caller_flags
& UPL_COPYOUT_FROM
) {
1532 if((named_entry
->protection
& VM_PROT_READ
)
1534 return(KERN_INVALID_RIGHT
);
1537 if((named_entry
->protection
&
1538 (VM_PROT_READ
| VM_PROT_WRITE
))
1539 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1540 return(KERN_INVALID_RIGHT
);
1543 if(named_entry
->size
< (offset
+ *upl_size
))
1544 return(KERN_INVALID_ARGUMENT
);
1546 /* the callers parameter offset is defined to be the */
1547 /* offset from beginning of named entry offset in object */
1548 offset
= offset
+ named_entry
->offset
;
1550 if(named_entry
->is_sub_map
)
1551 return (KERN_INVALID_ARGUMENT
);
1553 named_entry_lock(named_entry
);
1555 if (named_entry
->is_pager
) {
1556 object
= vm_object_enter(named_entry
->backing
.pager
,
1557 named_entry
->offset
+ named_entry
->size
,
1558 named_entry
->internal
,
1561 if (object
== VM_OBJECT_NULL
) {
1562 named_entry_unlock(named_entry
);
1563 return(KERN_INVALID_OBJECT
);
1566 /* JMM - drop reference on pager here? */
1568 /* create an extra reference for the named entry */
1569 vm_object_lock(object
);
1570 vm_object_reference_locked(object
);
1571 named_entry
->backing
.object
= object
;
1572 named_entry
->is_pager
= FALSE
;
1573 named_entry_unlock(named_entry
);
1575 /* wait for object to be ready */
1576 while (!object
->pager_ready
) {
1577 vm_object_wait(object
,
1578 VM_OBJECT_EVENT_PAGER_READY
,
1580 vm_object_lock(object
);
1582 vm_object_unlock(object
);
1584 /* This is the case where we are going to map */
1585 /* an already mapped object. If the object is */
1586 /* not ready it is internal. An external */
1587 /* object cannot be mapped until it is ready */
1588 /* we can therefore avoid the ready check */
1590 object
= named_entry
->backing
.object
;
1591 vm_object_reference(object
);
1592 named_entry_unlock(named_entry
);
1594 } else if (ip_kotype(port
) == IKOT_MEM_OBJ_CONTROL
) {
1595 memory_object_control_t control
;
1596 control
= (memory_object_control_t
) port
;
1597 if (control
== NULL
)
1598 return (KERN_INVALID_ARGUMENT
);
1599 object
= memory_object_control_to_vm_object(control
);
1600 if (object
== VM_OBJECT_NULL
)
1601 return (KERN_INVALID_ARGUMENT
);
1602 vm_object_reference(object
);
1604 return KERN_INVALID_ARGUMENT
;
1606 if (object
== VM_OBJECT_NULL
)
1607 return (KERN_INVALID_ARGUMENT
);
1609 if (!object
->private) {
1610 if (*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
1611 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
1612 if (object
->phys_contiguous
) {
1613 *flags
= UPL_PHYS_CONTIG
;
1618 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1621 ret
= vm_object_iopl_request(object
,
1628 vm_object_deallocate(object
);
1633 * Routine: memory_object_upl_request [interface]
1635 * Cause the population of a portion of a vm_object.
1636 * Depending on the nature of the request, the pages
1637 * returned may be contain valid data or be uninitialized.
1642 memory_object_upl_request(
1643 memory_object_control_t control
,
1644 memory_object_offset_t offset
,
1647 upl_page_info_array_t user_page_list
,
1648 unsigned int *page_list_count
,
1653 object
= memory_object_control_to_vm_object(control
);
1654 if (object
== VM_OBJECT_NULL
)
1655 return (KERN_TERMINATED
);
1657 return vm_object_upl_request(object
,
1667 * Routine: memory_object_super_upl_request [interface]
1669 * Cause the population of a portion of a vm_object
1670 * in much the same way as memory_object_upl_request.
1671 * Depending on the nature of the request, the pages
1672 * returned may be contain valid data or be uninitialized.
1673 * However, the region may be expanded up to the super
1674 * cluster size provided.
1678 memory_object_super_upl_request(
1679 memory_object_control_t control
,
1680 memory_object_offset_t offset
,
1682 upl_size_t super_cluster
,
1684 upl_page_info_t
*user_page_list
,
1685 unsigned int *page_list_count
,
1690 object
= memory_object_control_to_vm_object(control
);
1691 if (object
== VM_OBJECT_NULL
)
1692 return (KERN_INVALID_ARGUMENT
);
1694 return vm_object_super_upl_request(object
,
1705 memory_object_cluster_size(memory_object_control_t control
, memory_object_offset_t
*start
,
1706 vm_size_t
*length
, uint32_t *io_streaming
, memory_object_fault_info_t fault_info
)
1710 object
= memory_object_control_to_vm_object(control
);
1712 if (object
== VM_OBJECT_NULL
|| object
->paging_offset
> *start
)
1713 return (KERN_INVALID_ARGUMENT
);
1715 *start
-= object
->paging_offset
;
1717 vm_object_cluster_size(object
, (vm_object_offset_t
*)start
, length
, (vm_object_fault_info_t
)fault_info
, io_streaming
);
1719 *start
+= object
->paging_offset
;
1721 return (KERN_SUCCESS
);
1725 int vm_stat_discard_cleared_reply
= 0;
1726 int vm_stat_discard_cleared_unset
= 0;
1727 int vm_stat_discard_cleared_too_late
= 0;
1732 * Routine: host_default_memory_manager [interface]
1734 * set/get the default memory manager port and default cluster
1737 * If successful, consumes the supplied naked send right.
1740 host_default_memory_manager(
1741 host_priv_t host_priv
,
1742 memory_object_default_t
*default_manager
,
1743 __unused memory_object_cluster_size_t cluster_size
)
1745 memory_object_default_t current_manager
;
1746 memory_object_default_t new_manager
;
1747 memory_object_default_t returned_manager
;
1748 kern_return_t result
= KERN_SUCCESS
;
1750 if (host_priv
== HOST_PRIV_NULL
)
1751 return(KERN_INVALID_HOST
);
1753 assert(host_priv
== &realhost
);
1755 new_manager
= *default_manager
;
1756 lck_mtx_lock(&memory_manager_default_lock
);
1757 current_manager
= memory_manager_default
;
1758 returned_manager
= MEMORY_OBJECT_DEFAULT_NULL
;
1760 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1762 * Retrieve the current value.
1764 returned_manager
= current_manager
;
1765 memory_object_default_reference(returned_manager
);
1769 * If this is the first non-null manager, start
1770 * up the internal pager support.
1772 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1773 result
= vm_pageout_internal_start();
1774 if (result
!= KERN_SUCCESS
)
1779 * Retrieve the current value,
1780 * and replace it with the supplied value.
1781 * We return the old reference to the caller
1782 * but we have to take a reference on the new
1785 returned_manager
= current_manager
;
1786 memory_manager_default
= new_manager
;
1787 memory_object_default_reference(new_manager
);
1790 * In case anyone's been waiting for a memory
1791 * manager to be established, wake them up.
1794 thread_wakeup((event_t
) &memory_manager_default
);
1796 #ifndef CONFIG_FREEZE
1798 * Now that we have a default pager for anonymous memory,
1799 * reactivate all the throttled pages (i.e. dirty pages with
1802 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
)
1804 vm_page_reactivate_all_throttled();
1809 lck_mtx_unlock(&memory_manager_default_lock
);
1811 *default_manager
= returned_manager
;
1816 * Routine: memory_manager_default_reference
1818 * Returns a naked send right for the default
1819 * memory manager. The returned right is always
1820 * valid (not IP_NULL or IP_DEAD).
1823 __private_extern__ memory_object_default_t
1824 memory_manager_default_reference(void)
1826 memory_object_default_t current_manager
;
1828 lck_mtx_lock(&memory_manager_default_lock
);
1829 current_manager
= memory_manager_default
;
1830 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1833 res
= lck_mtx_sleep(&memory_manager_default_lock
,
1835 (event_t
) &memory_manager_default
,
1837 assert(res
== THREAD_AWAKENED
);
1838 current_manager
= memory_manager_default
;
1840 memory_object_default_reference(current_manager
);
1841 lck_mtx_unlock(&memory_manager_default_lock
);
1843 return current_manager
;
1847 * Routine: memory_manager_default_check
1850 * Check whether a default memory manager has been set
1851 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1852 * and KERN_FAILURE if dmm does not exist.
1854 * If there is no default memory manager, log an error,
1855 * but only the first time.
1858 __private_extern__ kern_return_t
1859 memory_manager_default_check(void)
1861 memory_object_default_t current
;
1863 lck_mtx_lock(&memory_manager_default_lock
);
1864 current
= memory_manager_default
;
1865 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1866 static boolean_t logged
; /* initialized to 0 */
1867 boolean_t complain
= !logged
;
1869 lck_mtx_unlock(&memory_manager_default_lock
);
1871 printf("Warning: No default memory manager\n");
1872 return(KERN_FAILURE
);
1874 lck_mtx_unlock(&memory_manager_default_lock
);
1875 return(KERN_SUCCESS
);
1879 __private_extern__
void
1880 memory_manager_default_init(void)
1882 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1883 lck_mtx_init(&memory_manager_default_lock
, &vm_object_lck_grp
, &vm_object_lck_attr
);
1888 /* Allow manipulation of individual page state. This is actually part of */
1889 /* the UPL regimen but takes place on the object rather than on a UPL */
1892 memory_object_page_op(
1893 memory_object_control_t control
,
1894 memory_object_offset_t offset
,
1896 ppnum_t
*phys_entry
,
1901 object
= memory_object_control_to_vm_object(control
);
1902 if (object
== VM_OBJECT_NULL
)
1903 return (KERN_INVALID_ARGUMENT
);
1905 return vm_object_page_op(object
, offset
, ops
, phys_entry
, flags
);
1909 * memory_object_range_op offers performance enhancement over
1910 * memory_object_page_op for page_op functions which do not require page
1911 * level state to be returned from the call. Page_op was created to provide
1912 * a low-cost alternative to page manipulation via UPLs when only a single
1913 * page was involved. The range_op call establishes the ability in the _op
1914 * family of functions to work on multiple pages where the lack of page level
1915 * state handling allows the caller to avoid the overhead of the upl structures.
1919 memory_object_range_op(
1920 memory_object_control_t control
,
1921 memory_object_offset_t offset_beg
,
1922 memory_object_offset_t offset_end
,
1928 object
= memory_object_control_to_vm_object(control
);
1929 if (object
== VM_OBJECT_NULL
)
1930 return (KERN_INVALID_ARGUMENT
);
1932 return vm_object_range_op(object
,
1936 (uint32_t *) range
);
1941 memory_object_mark_used(
1942 memory_object_control_t control
)
1946 if (control
== NULL
)
1949 object
= memory_object_control_to_vm_object(control
);
1951 if (object
!= VM_OBJECT_NULL
)
1952 vm_object_cache_remove(object
);
1957 memory_object_mark_unused(
1958 memory_object_control_t control
,
1959 __unused boolean_t rage
)
1963 if (control
== NULL
)
1966 object
= memory_object_control_to_vm_object(control
);
1968 if (object
!= VM_OBJECT_NULL
)
1969 vm_object_cache_add(object
);
1974 memory_object_pages_resident(
1975 memory_object_control_t control
,
1976 boolean_t
* has_pages_resident
)
1980 *has_pages_resident
= FALSE
;
1982 object
= memory_object_control_to_vm_object(control
);
1983 if (object
== VM_OBJECT_NULL
)
1984 return (KERN_INVALID_ARGUMENT
);
1986 if (object
->resident_page_count
)
1987 *has_pages_resident
= TRUE
;
1989 return (KERN_SUCCESS
);
1993 memory_object_signed(
1994 memory_object_control_t control
,
1995 boolean_t is_signed
)
1999 object
= memory_object_control_to_vm_object(control
);
2000 if (object
== VM_OBJECT_NULL
)
2001 return KERN_INVALID_ARGUMENT
;
2003 vm_object_lock(object
);
2004 object
->code_signed
= is_signed
;
2005 vm_object_unlock(object
);
2007 return KERN_SUCCESS
;
2011 memory_object_is_slid(
2012 memory_object_control_t control
)
2014 vm_object_t object
= VM_OBJECT_NULL
;
2015 vm_object_t slide_object
= slide_info
.slide_object
;
2017 object
= memory_object_control_to_vm_object(control
);
2018 if (object
== VM_OBJECT_NULL
)
2021 return (object
== slide_object
);
2024 static zone_t mem_obj_control_zone
;
2026 __private_extern__
void
2027 memory_object_control_bootstrap(void)
2031 i
= (vm_size_t
) sizeof (struct memory_object_control
);
2032 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
2033 zone_change(mem_obj_control_zone
, Z_CALLERACCT
, FALSE
);
2034 zone_change(mem_obj_control_zone
, Z_NOENCRYPT
, TRUE
);
2038 __private_extern__ memory_object_control_t
2039 memory_object_control_allocate(
2042 memory_object_control_t control
;
2044 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
2045 if (control
!= MEMORY_OBJECT_CONTROL_NULL
) {
2046 control
->moc_object
= object
;
2047 control
->moc_ikot
= IKOT_MEM_OBJ_CONTROL
; /* fake ip_kotype */
2052 __private_extern__
void
2053 memory_object_control_collapse(
2054 memory_object_control_t control
,
2057 assert((control
->moc_object
!= VM_OBJECT_NULL
) &&
2058 (control
->moc_object
!= object
));
2059 control
->moc_object
= object
;
2062 __private_extern__ vm_object_t
2063 memory_object_control_to_vm_object(
2064 memory_object_control_t control
)
2066 if (control
== MEMORY_OBJECT_CONTROL_NULL
||
2067 control
->moc_ikot
!= IKOT_MEM_OBJ_CONTROL
)
2068 return VM_OBJECT_NULL
;
2070 return (control
->moc_object
);
2073 memory_object_control_t
2074 convert_port_to_mo_control(
2075 __unused mach_port_t port
)
2077 return MEMORY_OBJECT_CONTROL_NULL
;
2082 convert_mo_control_to_port(
2083 __unused memory_object_control_t control
)
2085 return MACH_PORT_NULL
;
2089 memory_object_control_reference(
2090 __unused memory_object_control_t control
)
2096 * We only every issue one of these references, so kill it
2097 * when that gets released (should switch the real reference
2098 * counting in true port-less EMMI).
2101 memory_object_control_deallocate(
2102 memory_object_control_t control
)
2104 zfree(mem_obj_control_zone
, control
);
2108 memory_object_control_disable(
2109 memory_object_control_t control
)
2111 assert(control
->moc_object
!= VM_OBJECT_NULL
);
2112 control
->moc_object
= VM_OBJECT_NULL
;
2116 memory_object_default_reference(
2117 memory_object_default_t dmm
)
2119 ipc_port_make_send(dmm
);
2123 memory_object_default_deallocate(
2124 memory_object_default_t dmm
)
2126 ipc_port_release_send(dmm
);
2130 convert_port_to_memory_object(
2131 __unused mach_port_t port
)
2133 return (MEMORY_OBJECT_NULL
);
2138 convert_memory_object_to_port(
2139 __unused memory_object_t object
)
2141 return (MACH_PORT_NULL
);
2145 /* Routine memory_object_reference */
2146 void memory_object_reference(
2147 memory_object_t memory_object
)
2149 (memory_object
->mo_pager_ops
->memory_object_reference
)(
2153 /* Routine memory_object_deallocate */
2154 void memory_object_deallocate(
2155 memory_object_t memory_object
)
2157 (memory_object
->mo_pager_ops
->memory_object_deallocate
)(
2162 /* Routine memory_object_init */
2163 kern_return_t memory_object_init
2165 memory_object_t memory_object
,
2166 memory_object_control_t memory_control
,
2167 memory_object_cluster_size_t memory_object_page_size
2170 return (memory_object
->mo_pager_ops
->memory_object_init
)(
2173 memory_object_page_size
);
2176 /* Routine memory_object_terminate */
2177 kern_return_t memory_object_terminate
2179 memory_object_t memory_object
2182 return (memory_object
->mo_pager_ops
->memory_object_terminate
)(
2186 /* Routine memory_object_data_request */
2187 kern_return_t memory_object_data_request
2189 memory_object_t memory_object
,
2190 memory_object_offset_t offset
,
2191 memory_object_cluster_size_t length
,
2192 vm_prot_t desired_access
,
2193 memory_object_fault_info_t fault_info
2196 return (memory_object
->mo_pager_ops
->memory_object_data_request
)(
2204 /* Routine memory_object_data_return */
2205 kern_return_t memory_object_data_return
2207 memory_object_t memory_object
,
2208 memory_object_offset_t offset
,
2209 memory_object_cluster_size_t size
,
2210 memory_object_offset_t
*resid_offset
,
2213 boolean_t kernel_copy
,
2217 return (memory_object
->mo_pager_ops
->memory_object_data_return
)(
2228 /* Routine memory_object_data_initialize */
2229 kern_return_t memory_object_data_initialize
2231 memory_object_t memory_object
,
2232 memory_object_offset_t offset
,
2233 memory_object_cluster_size_t size
2236 return (memory_object
->mo_pager_ops
->memory_object_data_initialize
)(
2242 /* Routine memory_object_data_unlock */
2243 kern_return_t memory_object_data_unlock
2245 memory_object_t memory_object
,
2246 memory_object_offset_t offset
,
2247 memory_object_size_t size
,
2248 vm_prot_t desired_access
2251 return (memory_object
->mo_pager_ops
->memory_object_data_unlock
)(
2258 /* Routine memory_object_synchronize */
2259 kern_return_t memory_object_synchronize
2261 memory_object_t memory_object
,
2262 memory_object_offset_t offset
,
2263 memory_object_size_t size
,
2264 vm_sync_t sync_flags
2267 return (memory_object
->mo_pager_ops
->memory_object_synchronize
)(
2276 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2277 * each time a "named" VM object gets mapped directly or indirectly
2278 * (copy-on-write mapping). A "named" VM object has an extra reference held
2279 * by the pager to keep it alive until the pager decides that the
2280 * memory object (and its VM object) can be reclaimed.
2281 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2282 * the mappings of that memory object have been removed.
2284 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2285 * are serialized (through object->mapping_in_progress), to ensure that the
2286 * pager gets a consistent view of the mapping status of the memory object.
2288 * This allows the pager to keep track of how many times a memory object
2289 * has been mapped and with which protections, to decide when it can be
2293 /* Routine memory_object_map */
2294 kern_return_t memory_object_map
2296 memory_object_t memory_object
,
2300 return (memory_object
->mo_pager_ops
->memory_object_map
)(
2305 /* Routine memory_object_last_unmap */
2306 kern_return_t memory_object_last_unmap
2308 memory_object_t memory_object
2311 return (memory_object
->mo_pager_ops
->memory_object_last_unmap
)(
2315 /* Routine memory_object_data_reclaim */
2316 kern_return_t memory_object_data_reclaim
2318 memory_object_t memory_object
,
2319 boolean_t reclaim_backing_store
2322 if (memory_object
->mo_pager_ops
->memory_object_data_reclaim
== NULL
)
2323 return KERN_NOT_SUPPORTED
;
2324 return (memory_object
->mo_pager_ops
->memory_object_data_reclaim
)(
2326 reclaim_backing_store
);
2329 /* Routine memory_object_create */
2330 kern_return_t memory_object_create
2332 memory_object_default_t default_memory_manager
,
2333 vm_size_t new_memory_object_size
,
2334 memory_object_t
*new_memory_object
2337 return default_pager_memory_object_create(default_memory_manager
,
2338 new_memory_object_size
,
2343 convert_port_to_upl(
2349 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2353 upl
= (upl_t
) port
->ip_kobject
;
2362 convert_upl_to_port(
2365 return MACH_PORT_NULL
;
2368 __private_extern__
void
2370 __unused ipc_port_t port
,
2371 __unused mach_port_mscount_t mscount
)