2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
62 * External memory management interface control functions.
65 #include <advisory_pageout.h>
68 * Interface dependencies:
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
85 * Implementation dependencies:
87 #include <string.h> /* For memcpy() */
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
103 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
104 #include <vm/vm_shared_region.h>
107 #include <vm/vm_external.h>
108 #endif /* MACH_PAGEMAP */
110 #include <vm/vm_protos.h>
113 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
114 decl_lck_mtx_data(, memory_manager_default_lock
)
118 * Routine: memory_object_should_return_page
121 * Determine whether the given page should be returned,
122 * based on the page's state and on the given return policy.
124 * We should return the page if one of the following is true:
126 * 1. Page is dirty and should_return is not RETURN_NONE.
127 * 2. Page is precious and should_return is RETURN_ALL.
128 * 3. Should_return is RETURN_ANYTHING.
130 * As a side effect, m->dirty will be made consistent
131 * with pmap_is_modified(m), if should_return is not
132 * MEMORY_OBJECT_RETURN_NONE.
135 #define memory_object_should_return_page(m, should_return) \
136 (should_return != MEMORY_OBJECT_RETURN_NONE && \
137 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
138 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
139 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
141 typedef int memory_object_lock_result_t
;
143 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
146 #define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
148 memory_object_lock_result_t
memory_object_lock_page(
150 memory_object_return_t should_return
,
151 boolean_t should_flush
,
155 * Routine: memory_object_lock_page
158 * Perform the appropriate lock operations on the
159 * given page. See the description of
160 * "memory_object_lock_request" for the meanings
163 * Returns an indication that the operation
164 * completed, blocked, or that the page must
167 memory_object_lock_result_t
168 memory_object_lock_page(
170 memory_object_return_t should_return
,
171 boolean_t should_flush
,
174 XPR(XPR_MEMORY_OBJECT
,
175 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
176 m
, should_return
, should_flush
, prot
, 0);
179 if (m
->busy
|| m
->cleaning
)
180 return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
183 vm_pageout_steal_laundry(m
, FALSE
);
186 * Don't worry about pages for which the kernel
187 * does not have any data.
189 if (m
->absent
|| m
->error
|| m
->restart
) {
190 if (m
->error
&& should_flush
&& !VM_PAGE_WIRED(m
)) {
192 * dump the page, pager wants us to
193 * clean it up and there is no
194 * relevant data to return
196 return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
);
198 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
200 assert(!m
->fictitious
);
202 if (VM_PAGE_WIRED(m
)) {
204 * The page is wired... just clean or return the page if needed.
205 * Wired pages don't get flushed or disconnected from the pmap.
207 if (memory_object_should_return_page(m
, should_return
))
208 return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
210 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
215 * must do the pmap_disconnect before determining the
216 * need to return the page... otherwise it's possible
217 * for the page to go from the clean to the dirty state
218 * after we've made our decision
220 if (pmap_disconnect(m
->phys_page
) & VM_MEM_MODIFIED
) {
221 SET_PAGE_DIRTY(m
, FALSE
);
225 * If we are decreasing permission, do it now;
226 * let the fault handler take care of increases
227 * (pmap_page_protect may not increase protection).
229 if (prot
!= VM_PROT_NO_CHANGE
)
230 pmap_page_protect(m
->phys_page
, VM_PROT_ALL
& ~prot
);
233 * Handle returning dirty or precious pages
235 if (memory_object_should_return_page(m
, should_return
)) {
237 * we use to do a pmap_disconnect here in support
238 * of memory_object_lock_request, but that routine
239 * no longer requires this... in any event, in
240 * our world, it would turn into a big noop since
241 * we don't lock the page in any way and as soon
242 * as we drop the object lock, the page can be
243 * faulted back into an address space
246 * pmap_disconnect(m->phys_page);
248 return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
252 * Handle flushing clean pages
255 return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
);
258 * we use to deactivate clean pages at this point,
259 * but we do not believe that an msync should change
260 * the 'age' of a page in the cache... here is the
261 * original comment and code concerning this...
263 * XXX Make clean but not flush a paging hint,
264 * and deactivate the pages. This is a hack
265 * because it overloads flush/clean with
266 * implementation-dependent meaning. This only
267 * happens to pages that are already clean.
269 * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
270 * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
273 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
279 * Routine: memory_object_lock_request [user interface]
282 * Control use of the data associated with the given
283 * memory object. For each page in the given range,
284 * perform the following operations, in order:
285 * 1) restrict access to the page (disallow
286 * forms specified by "prot");
287 * 2) return data to the manager (if "should_return"
288 * is RETURN_DIRTY and the page is dirty, or
289 * "should_return" is RETURN_ALL and the page
290 * is either dirty or precious); and,
291 * 3) flush the cached copy (if "should_flush"
293 * The set of pages is defined by a starting offset
294 * ("offset") and size ("size"). Only pages with the
295 * same page alignment as the starting offset are
298 * A single acknowledgement is sent (to the "reply_to"
299 * port) when these actions are complete. If successful,
300 * the naked send right for reply_to is consumed.
304 memory_object_lock_request(
305 memory_object_control_t control
,
306 memory_object_offset_t offset
,
307 memory_object_size_t size
,
308 memory_object_offset_t
* resid_offset
,
310 memory_object_return_t should_return
,
317 * Check for bogus arguments.
319 object
= memory_object_control_to_vm_object(control
);
320 if (object
== VM_OBJECT_NULL
)
321 return (KERN_INVALID_ARGUMENT
);
323 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
324 return (KERN_INVALID_ARGUMENT
);
326 size
= round_page_64(size
);
329 * Lock the object, and acquire a paging reference to
330 * prevent the memory_object reference from being released.
332 vm_object_lock(object
);
333 vm_object_paging_begin(object
);
335 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
) {
336 if ((should_return
!= MEMORY_OBJECT_RETURN_NONE
) || offset
|| object
->copy
) {
337 flags
&= ~MEMORY_OBJECT_DATA_FLUSH_ALL
;
338 flags
|= MEMORY_OBJECT_DATA_FLUSH
;
341 offset
-= object
->paging_offset
;
343 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
)
344 vm_object_reap_pages(object
, REAP_DATA_FLUSH
);
346 (void)vm_object_update(object
, offset
, size
, resid_offset
,
347 io_errno
, should_return
, flags
, prot
);
349 vm_object_paging_end(object
);
350 vm_object_unlock(object
);
352 return (KERN_SUCCESS
);
356 * memory_object_release_name: [interface]
358 * Enforces name semantic on memory_object reference count decrement
359 * This routine should not be called unless the caller holds a name
360 * reference gained through the memory_object_named_create or the
361 * memory_object_rename call.
362 * If the TERMINATE_IDLE flag is set, the call will return if the
363 * reference count is not 1. i.e. idle with the only remaining reference
365 * If the decision is made to proceed the name field flag is set to
366 * false and the reference count is decremented. If the RESPECT_CACHE
367 * flag is set and the reference count has gone to zero, the
368 * memory_object is checked to see if it is cacheable otherwise when
369 * the reference count is zero, it is simply terminated.
373 memory_object_release_name(
374 memory_object_control_t control
,
379 object
= memory_object_control_to_vm_object(control
);
380 if (object
== VM_OBJECT_NULL
)
381 return (KERN_INVALID_ARGUMENT
);
383 return vm_object_release_name(object
, flags
);
389 * Routine: memory_object_destroy [user interface]
391 * Shut down a memory object, despite the
392 * presence of address map (or other) references
396 memory_object_destroy(
397 memory_object_control_t control
,
398 kern_return_t reason
)
402 object
= memory_object_control_to_vm_object(control
);
403 if (object
== VM_OBJECT_NULL
)
404 return (KERN_INVALID_ARGUMENT
);
406 return (vm_object_destroy(object
, reason
));
410 * Routine: vm_object_sync
412 * Kernel internal function to synch out pages in a given
413 * range within an object to its memory manager. Much the
414 * same as memory_object_lock_request but page protection
417 * If the should_flush and should_return flags are true pages
418 * are flushed, that is dirty & precious pages are written to
419 * the memory manager and then discarded. If should_return
420 * is false, only precious pages are returned to the memory
423 * If should flush is false and should_return true, the memory
424 * manager's copy of the pages is updated. If should_return
425 * is also false, only the precious pages are updated. This
426 * last option is of limited utility.
429 * FALSE if no pages were returned to the pager
436 vm_object_offset_t offset
,
437 vm_object_size_t size
,
438 boolean_t should_flush
,
439 boolean_t should_return
,
440 boolean_t should_iosync
)
446 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
447 object
, offset
, size
, should_flush
, should_return
);
450 * Lock the object, and acquire a paging reference to
451 * prevent the memory_object and control ports from
454 vm_object_lock(object
);
455 vm_object_paging_begin(object
);
458 flags
= MEMORY_OBJECT_DATA_FLUSH
;
463 flags
|= MEMORY_OBJECT_IO_SYNC
;
465 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
467 MEMORY_OBJECT_RETURN_ALL
:
468 MEMORY_OBJECT_RETURN_NONE
,
473 vm_object_paging_end(object
);
474 vm_object_unlock(object
);
480 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
484 memory_object_t pager; \
486 if (object == slide_info.slide_object) { \
487 panic("Objects with slid pages not allowed\n"); \
490 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
491 vm_object_paging_begin(object); \
492 vm_object_unlock(object); \
495 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
497 upl_flags = UPL_MSYNC; \
499 (void) memory_object_data_return(pager, \
501 (memory_object_cluster_size_t)data_cnt, \
508 vm_object_lock(object); \
509 vm_object_paging_end(object); \
516 vm_object_update_extent(
518 vm_object_offset_t offset
,
519 vm_object_offset_t offset_end
,
520 vm_object_offset_t
*offset_resid
,
522 boolean_t should_flush
,
523 memory_object_return_t should_return
,
524 boolean_t should_iosync
,
529 vm_object_offset_t paging_offset
= 0;
530 vm_object_offset_t next_offset
= offset
;
531 memory_object_lock_result_t page_lock_result
;
532 memory_object_cluster_size_t data_cnt
= 0;
533 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
534 struct vm_page_delayed_work
*dwp
;
540 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
543 offset
< offset_end
&& object
->resident_page_count
;
544 offset
+= PAGE_SIZE_64
) {
547 * Limit the number of pages to be cleaned at once to a contiguous
548 * run, or at most MAX_UPL_TRANSFER size
551 if ((data_cnt
>= PAGE_SIZE
* MAX_UPL_TRANSFER
) || (next_offset
!= offset
)) {
554 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
558 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
559 paging_offset
, offset_resid
, io_errno
, should_iosync
);
563 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
567 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
569 if (data_cnt
&& page_lock_result
!= MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
) {
571 * End of a run of dirty/precious pages.
574 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
578 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
579 paging_offset
, offset_resid
, io_errno
, should_iosync
);
581 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
582 * allow the state of page 'm' to change... we need to re-lookup
589 switch (page_lock_result
) {
591 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
594 case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
:
595 dwp
->dw_mask
|= DW_vm_page_free
;
598 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
599 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
602 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
604 paging_offset
= offset
;
606 data_cnt
+= PAGE_SIZE
;
607 next_offset
= offset
+ PAGE_SIZE_64
;
610 * wired pages shouldn't be flushed and
611 * since they aren't on any queue,
612 * no need to remove them
614 if (!VM_PAGE_WIRED(m
)) {
618 * add additional state for the flush
623 * we use to remove the page from the queues at this
624 * point, but we do not believe that an msync
625 * should cause the 'age' of a page to be changed
628 * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
635 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
637 if (dw_count
>= dw_limit
) {
638 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
647 * We have completed the scan for applicable pages.
648 * Clean any pages that have been saved.
651 vm_page_do_delayed_work(object
, &dw_array
[0], dw_count
);
654 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
655 paging_offset
, offset_resid
, io_errno
, should_iosync
);
663 * Routine: vm_object_update
665 * Work function for m_o_lock_request(), vm_o_sync().
667 * Called with object locked and paging ref taken.
672 vm_object_offset_t offset
,
673 vm_object_size_t size
,
674 vm_object_offset_t
*resid_offset
,
676 memory_object_return_t should_return
,
678 vm_prot_t protection
)
680 vm_object_t copy_object
= VM_OBJECT_NULL
;
681 boolean_t data_returned
= FALSE
;
682 boolean_t update_cow
;
683 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
684 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
685 vm_fault_return_t result
;
688 #define MAX_EXTENTS 8
689 #define EXTENT_SIZE (1024 * 1024 * 256)
690 #define RESIDENT_LIMIT (1024 * 32)
692 vm_object_offset_t e_base
;
693 vm_object_offset_t e_min
;
694 vm_object_offset_t e_max
;
695 } extents
[MAX_EXTENTS
];
698 * To avoid blocking while scanning for pages, save
699 * dirty pages to be cleaned all at once.
701 * XXXO A similar strategy could be used to limit the
702 * number of times that a scan must be restarted for
703 * other reasons. Those pages that would require blocking
704 * could be temporarily collected in another list, or
705 * their offsets could be recorded in a small array.
709 * XXX NOTE: May want to consider converting this to a page list
710 * XXX vm_map_copy interface. Need to understand object
711 * XXX coalescing implications before doing so.
714 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
715 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
716 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
717 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
719 if (update_cow
|| (flags
& (MEMORY_OBJECT_DATA_PURGE
| MEMORY_OBJECT_DATA_SYNC
))) {
722 while ((copy_object
= object
->copy
) != VM_OBJECT_NULL
) {
724 * need to do a try here since we're swimming upstream
725 * against the normal lock ordering... however, we need
726 * to hold the object stable until we gain control of the
727 * copy object so we have to be careful how we approach this
729 if (vm_object_lock_try(copy_object
)) {
731 * we 'won' the lock on the copy object...
732 * no need to hold the object lock any longer...
733 * take a real reference on the copy object because
734 * we're going to call vm_fault_page on it which may
735 * under certain conditions drop the lock and the paging
736 * reference we're about to take... the reference
737 * will keep the copy object from going away if that happens
739 vm_object_unlock(object
);
740 vm_object_reference_locked(copy_object
);
743 vm_object_unlock(object
);
746 mutex_pause(collisions
);
748 vm_object_lock(object
);
751 if ((copy_object
!= VM_OBJECT_NULL
&& update_cow
) || (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
753 vm_map_size_t copy_size
;
754 vm_map_offset_t copy_offset
;
758 kern_return_t error
= 0;
759 struct vm_object_fault_info fault_info
;
761 if (copy_object
!= VM_OBJECT_NULL
) {
763 * translate offset with respect to shadow's offset
765 copy_offset
= (offset
>= copy_object
->vo_shadow_offset
) ?
766 (vm_map_offset_t
)(offset
- copy_object
->vo_shadow_offset
) :
769 if (copy_offset
> copy_object
->vo_size
)
770 copy_offset
= copy_object
->vo_size
;
773 * clip size with respect to shadow offset
775 if (offset
>= copy_object
->vo_shadow_offset
) {
777 } else if (size
>= copy_object
->vo_shadow_offset
- offset
) {
778 copy_size
= size
- (copy_object
->vo_shadow_offset
- offset
);
783 if (copy_offset
+ copy_size
> copy_object
->vo_size
) {
784 if (copy_object
->vo_size
>= copy_offset
) {
785 copy_size
= copy_object
->vo_size
- copy_offset
;
790 copy_size
+=copy_offset
;
793 copy_object
= object
;
795 copy_size
= offset
+ size
;
796 copy_offset
= offset
;
798 fault_info
.interruptible
= THREAD_UNINT
;
799 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
800 fault_info
.user_tag
= 0;
801 fault_info
.lo_offset
= copy_offset
;
802 fault_info
.hi_offset
= copy_size
;
803 fault_info
.no_cache
= FALSE
;
804 fault_info
.stealth
= TRUE
;
805 fault_info
.io_sync
= FALSE
;
806 fault_info
.cs_bypass
= FALSE
;
807 fault_info
.mark_zf_absent
= FALSE
;
808 fault_info
.batch_pmap_op
= FALSE
;
810 vm_object_paging_begin(copy_object
);
812 for (i
= copy_offset
; i
< copy_size
; i
+= PAGE_SIZE
) {
813 RETRY_COW_OF_LOCK_REQUEST
:
814 fault_info
.cluster_size
= (vm_size_t
) (copy_size
- i
);
815 assert(fault_info
.cluster_size
== copy_size
- i
);
817 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
818 result
= vm_fault_page(copy_object
, i
,
819 VM_PROT_WRITE
|VM_PROT_READ
,
830 case VM_FAULT_SUCCESS
:
833 page
->object
, top_page
);
834 vm_object_lock(copy_object
);
835 vm_object_paging_begin(copy_object
);
840 vm_page_lockspin_queues();
844 vm_page_deactivate(page
);
845 vm_page_unlock_queues();
847 PAGE_WAKEUP_DONE(page
);
850 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
851 vm_object_lock(copy_object
);
852 vm_object_paging_begin(copy_object
);
853 goto RETRY_COW_OF_LOCK_REQUEST
;
854 case VM_FAULT_INTERRUPTED
:
855 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
856 vm_object_lock(copy_object
);
857 vm_object_paging_begin(copy_object
);
858 goto RETRY_COW_OF_LOCK_REQUEST
;
859 case VM_FAULT_MEMORY_SHORTAGE
:
861 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
862 vm_object_lock(copy_object
);
863 vm_object_paging_begin(copy_object
);
864 goto RETRY_COW_OF_LOCK_REQUEST
;
865 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
866 /* success but no VM page: fail */
867 vm_object_paging_end(copy_object
);
868 vm_object_unlock(copy_object
);
870 case VM_FAULT_MEMORY_ERROR
:
871 if (object
!= copy_object
)
872 vm_object_deallocate(copy_object
);
873 vm_object_lock(object
);
874 goto BYPASS_COW_COPYIN
;
876 panic("vm_object_update: unexpected error 0x%x"
877 " from vm_fault_page()\n", result
);
881 vm_object_paging_end(copy_object
);
883 if ((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
884 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
885 vm_object_unlock(copy_object
);
886 vm_object_deallocate(copy_object
);
887 vm_object_lock(object
);
891 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
892 if ((flags
& MEMORY_OBJECT_DATA_PURGE
)) {
893 copy_object
->shadow_severed
= TRUE
;
894 copy_object
->shadowed
= FALSE
;
895 copy_object
->shadow
= NULL
;
897 * delete the ref the COW was holding on the target object
899 vm_object_deallocate(object
);
901 vm_object_unlock(copy_object
);
902 vm_object_deallocate(copy_object
);
903 vm_object_lock(object
);
908 * when we have a really large range to check relative
909 * to the number of actual resident pages, we'd like
910 * to use the resident page list to drive our checks
911 * however, the object lock will get dropped while processing
912 * the page which means the resident queue can change which
913 * means we can't walk the queue as we process the pages
914 * we also want to do the processing in offset order to allow
915 * 'runs' of pages to be collected if we're being told to
916 * flush to disk... the resident page queue is NOT ordered.
918 * a temporary solution (until we figure out how to deal with
919 * large address spaces more generically) is to pre-flight
920 * the resident page queue (if it's small enough) and develop
921 * a collection of extents (that encompass actual resident pages)
922 * to visit. This will at least allow us to deal with some of the
923 * more pathological cases in a more efficient manner. The current
924 * worst case (a single resident page at the end of an extremely large
925 * range) can take minutes to complete for ranges in the terrabyte
926 * category... since this routine is called when truncating a file,
927 * and we currently support files up to 16 Tbytes in size, this
928 * is not a theoretical problem
931 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
932 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
934 vm_object_offset_t start
;
935 vm_object_offset_t end
;
936 vm_object_size_t e_mask
;
942 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
944 m
= (vm_page_t
) queue_first(&object
->memq
);
946 while (!queue_end(&object
->memq
, (queue_entry_t
) m
)) {
947 next
= (vm_page_t
) queue_next(&m
->listq
);
949 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
951 * this is a page we're interested in
952 * try to fit it into a current extent
954 for (n
= 0; n
< num_of_extents
; n
++) {
955 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
957 * use (PAGE_SIZE - 1) to determine the
958 * max offset so that we don't wrap if
959 * we're at the last page of the space
961 if (m
->offset
< extents
[n
].e_min
)
962 extents
[n
].e_min
= m
->offset
;
963 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
964 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
968 if (n
== num_of_extents
) {
970 * didn't find a current extent that can encompass
973 if (n
< MAX_EXTENTS
) {
975 * if we still have room,
976 * create a new extent
978 extents
[n
].e_base
= m
->offset
& e_mask
;
979 extents
[n
].e_min
= m
->offset
;
980 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
985 * no room to create a new extent...
986 * fall back to a single extent based
987 * on the min and max page offsets
988 * we find in the range we're interested in...
989 * first, look through the extent list and
990 * develop the overall min and max for the
991 * pages we've looked at up to this point
993 for (n
= 1; n
< num_of_extents
; n
++) {
994 if (extents
[n
].e_min
< extents
[0].e_min
)
995 extents
[0].e_min
= extents
[n
].e_min
;
996 if (extents
[n
].e_max
> extents
[0].e_max
)
997 extents
[0].e_max
= extents
[n
].e_max
;
1000 * now setup to run through the remaining pages
1001 * to determine the overall min and max
1002 * offset for the specified range
1004 extents
[0].e_base
= 0;
1009 * by continuing, we'll reprocess the
1010 * page that forced us to abandon trying
1011 * to develop multiple extents
1020 extents
[0].e_min
= offset
;
1021 extents
[0].e_max
= offset
+ (size
- 1);
1025 for (n
= 0; n
< num_of_extents
; n
++) {
1026 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1027 should_flush
, should_return
, should_iosync
, protection
))
1028 data_returned
= TRUE
;
1030 return (data_returned
);
1035 * Routine: memory_object_synchronize_completed [user interface]
1037 * Tell kernel that previously synchronized data
1038 * (memory_object_synchronize) has been queue or placed on the
1041 * Note: there may be multiple synchronize requests for a given
1042 * memory object outstanding but they will not overlap.
1046 memory_object_synchronize_completed(
1047 memory_object_control_t control
,
1048 memory_object_offset_t offset
,
1049 memory_object_size_t length
)
1054 object
= memory_object_control_to_vm_object(control
);
1056 XPR(XPR_MEMORY_OBJECT
,
1057 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1058 object
, offset
, length
, 0, 0);
1061 * Look for bogus arguments
1064 if (object
== VM_OBJECT_NULL
)
1065 return (KERN_INVALID_ARGUMENT
);
1067 vm_object_lock(object
);
1070 * search for sync request structure
1072 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
1073 if (msr
->offset
== offset
&& msr
->length
== length
) {
1074 queue_remove(&object
->msr_q
, msr
, msync_req_t
, msr_q
);
1077 }/* queue_iterate */
1079 if (queue_end(&object
->msr_q
, (queue_entry_t
)msr
)) {
1080 vm_object_unlock(object
);
1081 return KERN_INVALID_ARGUMENT
;
1085 vm_object_unlock(object
);
1086 msr
->flag
= VM_MSYNC_DONE
;
1088 thread_wakeup((event_t
) msr
);
1090 return KERN_SUCCESS
;
1091 }/* memory_object_synchronize_completed */
1093 static kern_return_t
1094 vm_object_set_attributes_common(
1096 boolean_t may_cache
,
1097 memory_object_copy_strategy_t copy_strategy
,
1098 boolean_t temporary
,
1099 boolean_t silent_overwrite
,
1100 boolean_t advisory_pageout
)
1102 boolean_t object_became_ready
;
1104 XPR(XPR_MEMORY_OBJECT
,
1105 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1106 object
, (may_cache
&1)|((temporary
&1)<1), copy_strategy
, 0, 0);
1108 if (object
== VM_OBJECT_NULL
)
1109 return(KERN_INVALID_ARGUMENT
);
1112 * Verify the attributes of importance
1115 switch(copy_strategy
) {
1116 case MEMORY_OBJECT_COPY_NONE
:
1117 case MEMORY_OBJECT_COPY_DELAY
:
1120 return(KERN_INVALID_ARGUMENT
);
1123 #if !ADVISORY_PAGEOUT
1124 if (silent_overwrite
|| advisory_pageout
)
1125 return(KERN_INVALID_ARGUMENT
);
1127 #endif /* !ADVISORY_PAGEOUT */
1133 vm_object_lock(object
);
1136 * Copy the attributes
1138 assert(!object
->internal
);
1139 object_became_ready
= !object
->pager_ready
;
1140 object
->copy_strategy
= copy_strategy
;
1141 object
->can_persist
= may_cache
;
1142 object
->temporary
= temporary
;
1143 object
->silent_overwrite
= silent_overwrite
;
1144 object
->advisory_pageout
= advisory_pageout
;
1147 * Wake up anyone waiting for the ready attribute
1148 * to become asserted.
1151 if (object_became_ready
) {
1152 object
->pager_ready
= TRUE
;
1153 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1156 vm_object_unlock(object
);
1158 return(KERN_SUCCESS
);
1162 * Set the memory object attribute as provided.
1164 * XXX This routine cannot be completed until the vm_msync, clean
1165 * in place, and cluster work is completed. See ifdef notyet
1166 * below and note that vm_object_set_attributes_common()
1167 * may have to be expanded.
1170 memory_object_change_attributes(
1171 memory_object_control_t control
,
1172 memory_object_flavor_t flavor
,
1173 memory_object_info_t attributes
,
1174 mach_msg_type_number_t count
)
1177 kern_return_t result
= KERN_SUCCESS
;
1178 boolean_t temporary
;
1179 boolean_t may_cache
;
1180 boolean_t invalidate
;
1181 memory_object_copy_strategy_t copy_strategy
;
1182 boolean_t silent_overwrite
;
1183 boolean_t advisory_pageout
;
1185 object
= memory_object_control_to_vm_object(control
);
1186 if (object
== VM_OBJECT_NULL
)
1187 return (KERN_INVALID_ARGUMENT
);
1189 vm_object_lock(object
);
1191 temporary
= object
->temporary
;
1192 may_cache
= object
->can_persist
;
1193 copy_strategy
= object
->copy_strategy
;
1194 silent_overwrite
= object
->silent_overwrite
;
1195 advisory_pageout
= object
->advisory_pageout
;
1197 invalidate
= object
->invalidate
;
1199 vm_object_unlock(object
);
1202 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1204 old_memory_object_behave_info_t behave
;
1206 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1207 result
= KERN_INVALID_ARGUMENT
;
1211 behave
= (old_memory_object_behave_info_t
) attributes
;
1213 temporary
= behave
->temporary
;
1214 invalidate
= behave
->invalidate
;
1215 copy_strategy
= behave
->copy_strategy
;
1220 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1222 memory_object_behave_info_t behave
;
1224 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1225 result
= KERN_INVALID_ARGUMENT
;
1229 behave
= (memory_object_behave_info_t
) attributes
;
1231 temporary
= behave
->temporary
;
1232 invalidate
= behave
->invalidate
;
1233 copy_strategy
= behave
->copy_strategy
;
1234 silent_overwrite
= behave
->silent_overwrite
;
1235 advisory_pageout
= behave
->advisory_pageout
;
1239 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1241 memory_object_perf_info_t perf
;
1243 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1244 result
= KERN_INVALID_ARGUMENT
;
1248 perf
= (memory_object_perf_info_t
) attributes
;
1250 may_cache
= perf
->may_cache
;
1255 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1257 old_memory_object_attr_info_t attr
;
1259 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1260 result
= KERN_INVALID_ARGUMENT
;
1264 attr
= (old_memory_object_attr_info_t
) attributes
;
1266 may_cache
= attr
->may_cache
;
1267 copy_strategy
= attr
->copy_strategy
;
1272 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1274 memory_object_attr_info_t attr
;
1276 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1277 result
= KERN_INVALID_ARGUMENT
;
1281 attr
= (memory_object_attr_info_t
) attributes
;
1283 copy_strategy
= attr
->copy_strategy
;
1284 may_cache
= attr
->may_cache_object
;
1285 temporary
= attr
->temporary
;
1291 result
= KERN_INVALID_ARGUMENT
;
1295 if (result
!= KERN_SUCCESS
)
1298 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1299 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1306 * XXX may_cache may become a tri-valued variable to handle
1307 * XXX uncache if not in use.
1309 return (vm_object_set_attributes_common(object
,
1318 memory_object_get_attributes(
1319 memory_object_control_t control
,
1320 memory_object_flavor_t flavor
,
1321 memory_object_info_t attributes
, /* pointer to OUT array */
1322 mach_msg_type_number_t
*count
) /* IN/OUT */
1324 kern_return_t ret
= KERN_SUCCESS
;
1327 object
= memory_object_control_to_vm_object(control
);
1328 if (object
== VM_OBJECT_NULL
)
1329 return (KERN_INVALID_ARGUMENT
);
1331 vm_object_lock(object
);
1334 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1336 old_memory_object_behave_info_t behave
;
1338 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1339 ret
= KERN_INVALID_ARGUMENT
;
1343 behave
= (old_memory_object_behave_info_t
) attributes
;
1344 behave
->copy_strategy
= object
->copy_strategy
;
1345 behave
->temporary
= object
->temporary
;
1346 #if notyet /* remove when vm_msync complies and clean in place fini */
1347 behave
->invalidate
= object
->invalidate
;
1349 behave
->invalidate
= FALSE
;
1352 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1356 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1358 memory_object_behave_info_t behave
;
1360 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1361 ret
= KERN_INVALID_ARGUMENT
;
1365 behave
= (memory_object_behave_info_t
) attributes
;
1366 behave
->copy_strategy
= object
->copy_strategy
;
1367 behave
->temporary
= object
->temporary
;
1368 #if notyet /* remove when vm_msync complies and clean in place fini */
1369 behave
->invalidate
= object
->invalidate
;
1371 behave
->invalidate
= FALSE
;
1373 behave
->advisory_pageout
= object
->advisory_pageout
;
1374 behave
->silent_overwrite
= object
->silent_overwrite
;
1375 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1379 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1381 memory_object_perf_info_t perf
;
1383 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1384 ret
= KERN_INVALID_ARGUMENT
;
1388 perf
= (memory_object_perf_info_t
) attributes
;
1389 perf
->cluster_size
= PAGE_SIZE
;
1390 perf
->may_cache
= object
->can_persist
;
1392 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1396 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1398 old_memory_object_attr_info_t attr
;
1400 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1401 ret
= KERN_INVALID_ARGUMENT
;
1405 attr
= (old_memory_object_attr_info_t
) attributes
;
1406 attr
->may_cache
= object
->can_persist
;
1407 attr
->copy_strategy
= object
->copy_strategy
;
1409 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1413 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1415 memory_object_attr_info_t attr
;
1417 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1418 ret
= KERN_INVALID_ARGUMENT
;
1422 attr
= (memory_object_attr_info_t
) attributes
;
1423 attr
->copy_strategy
= object
->copy_strategy
;
1424 attr
->cluster_size
= PAGE_SIZE
;
1425 attr
->may_cache_object
= object
->can_persist
;
1426 attr
->temporary
= object
->temporary
;
1428 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1433 ret
= KERN_INVALID_ARGUMENT
;
1437 vm_object_unlock(object
);
1444 memory_object_iopl_request(
1446 memory_object_offset_t offset
,
1447 upl_size_t
*upl_size
,
1449 upl_page_info_array_t user_page_list
,
1450 unsigned int *page_list_count
,
1457 caller_flags
= *flags
;
1459 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1461 * For forward compatibility's sake,
1462 * reject any unknown flag.
1464 return KERN_INVALID_VALUE
;
1467 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1468 vm_named_entry_t named_entry
;
1470 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1471 /* a few checks to make sure user is obeying rules */
1472 if(*upl_size
== 0) {
1473 if(offset
>= named_entry
->size
)
1474 return(KERN_INVALID_RIGHT
);
1475 *upl_size
= (upl_size_t
)(named_entry
->size
- offset
);
1476 if (*upl_size
!= named_entry
->size
- offset
)
1477 return KERN_INVALID_ARGUMENT
;
1479 if(caller_flags
& UPL_COPYOUT_FROM
) {
1480 if((named_entry
->protection
& VM_PROT_READ
)
1482 return(KERN_INVALID_RIGHT
);
1485 if((named_entry
->protection
&
1486 (VM_PROT_READ
| VM_PROT_WRITE
))
1487 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1488 return(KERN_INVALID_RIGHT
);
1491 if(named_entry
->size
< (offset
+ *upl_size
))
1492 return(KERN_INVALID_ARGUMENT
);
1494 /* the callers parameter offset is defined to be the */
1495 /* offset from beginning of named entry offset in object */
1496 offset
= offset
+ named_entry
->offset
;
1498 if(named_entry
->is_sub_map
)
1499 return (KERN_INVALID_ARGUMENT
);
1501 named_entry_lock(named_entry
);
1503 if (named_entry
->is_pager
) {
1504 object
= vm_object_enter(named_entry
->backing
.pager
,
1505 named_entry
->offset
+ named_entry
->size
,
1506 named_entry
->internal
,
1509 if (object
== VM_OBJECT_NULL
) {
1510 named_entry_unlock(named_entry
);
1511 return(KERN_INVALID_OBJECT
);
1514 /* JMM - drop reference on pager here? */
1516 /* create an extra reference for the named entry */
1517 vm_object_lock(object
);
1518 vm_object_reference_locked(object
);
1519 named_entry
->backing
.object
= object
;
1520 named_entry
->is_pager
= FALSE
;
1521 named_entry_unlock(named_entry
);
1523 /* wait for object to be ready */
1524 while (!object
->pager_ready
) {
1525 vm_object_wait(object
,
1526 VM_OBJECT_EVENT_PAGER_READY
,
1528 vm_object_lock(object
);
1530 vm_object_unlock(object
);
1532 /* This is the case where we are going to map */
1533 /* an already mapped object. If the object is */
1534 /* not ready it is internal. An external */
1535 /* object cannot be mapped until it is ready */
1536 /* we can therefore avoid the ready check */
1538 object
= named_entry
->backing
.object
;
1539 vm_object_reference(object
);
1540 named_entry_unlock(named_entry
);
1542 } else if (ip_kotype(port
) == IKOT_MEM_OBJ_CONTROL
) {
1543 memory_object_control_t control
;
1544 control
= (memory_object_control_t
) port
;
1545 if (control
== NULL
)
1546 return (KERN_INVALID_ARGUMENT
);
1547 object
= memory_object_control_to_vm_object(control
);
1548 if (object
== VM_OBJECT_NULL
)
1549 return (KERN_INVALID_ARGUMENT
);
1550 vm_object_reference(object
);
1552 return KERN_INVALID_ARGUMENT
;
1554 if (object
== VM_OBJECT_NULL
)
1555 return (KERN_INVALID_ARGUMENT
);
1557 if (!object
->private) {
1558 if (*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
1559 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
1560 if (object
->phys_contiguous
) {
1561 *flags
= UPL_PHYS_CONTIG
;
1566 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1569 ret
= vm_object_iopl_request(object
,
1576 vm_object_deallocate(object
);
1581 * Routine: memory_object_upl_request [interface]
1583 * Cause the population of a portion of a vm_object.
1584 * Depending on the nature of the request, the pages
1585 * returned may be contain valid data or be uninitialized.
1590 memory_object_upl_request(
1591 memory_object_control_t control
,
1592 memory_object_offset_t offset
,
1595 upl_page_info_array_t user_page_list
,
1596 unsigned int *page_list_count
,
1601 object
= memory_object_control_to_vm_object(control
);
1602 if (object
== VM_OBJECT_NULL
)
1603 return (KERN_TERMINATED
);
1605 return vm_object_upl_request(object
,
1615 * Routine: memory_object_super_upl_request [interface]
1617 * Cause the population of a portion of a vm_object
1618 * in much the same way as memory_object_upl_request.
1619 * Depending on the nature of the request, the pages
1620 * returned may be contain valid data or be uninitialized.
1621 * However, the region may be expanded up to the super
1622 * cluster size provided.
1626 memory_object_super_upl_request(
1627 memory_object_control_t control
,
1628 memory_object_offset_t offset
,
1630 upl_size_t super_cluster
,
1632 upl_page_info_t
*user_page_list
,
1633 unsigned int *page_list_count
,
1638 object
= memory_object_control_to_vm_object(control
);
1639 if (object
== VM_OBJECT_NULL
)
1640 return (KERN_INVALID_ARGUMENT
);
1642 return vm_object_super_upl_request(object
,
1653 memory_object_cluster_size(memory_object_control_t control
, memory_object_offset_t
*start
,
1654 vm_size_t
*length
, uint32_t *io_streaming
, memory_object_fault_info_t fault_info
)
1658 object
= memory_object_control_to_vm_object(control
);
1660 if (object
== VM_OBJECT_NULL
|| object
->paging_offset
> *start
)
1661 return (KERN_INVALID_ARGUMENT
);
1663 *start
-= object
->paging_offset
;
1665 vm_object_cluster_size(object
, (vm_object_offset_t
*)start
, length
, (vm_object_fault_info_t
)fault_info
, io_streaming
);
1667 *start
+= object
->paging_offset
;
1669 return (KERN_SUCCESS
);
1673 int vm_stat_discard_cleared_reply
= 0;
1674 int vm_stat_discard_cleared_unset
= 0;
1675 int vm_stat_discard_cleared_too_late
= 0;
1680 * Routine: host_default_memory_manager [interface]
1682 * set/get the default memory manager port and default cluster
1685 * If successful, consumes the supplied naked send right.
1688 host_default_memory_manager(
1689 host_priv_t host_priv
,
1690 memory_object_default_t
*default_manager
,
1691 __unused memory_object_cluster_size_t cluster_size
)
1693 memory_object_default_t current_manager
;
1694 memory_object_default_t new_manager
;
1695 memory_object_default_t returned_manager
;
1696 kern_return_t result
= KERN_SUCCESS
;
1698 if (host_priv
== HOST_PRIV_NULL
)
1699 return(KERN_INVALID_HOST
);
1701 assert(host_priv
== &realhost
);
1703 new_manager
= *default_manager
;
1704 lck_mtx_lock(&memory_manager_default_lock
);
1705 current_manager
= memory_manager_default
;
1706 returned_manager
= MEMORY_OBJECT_DEFAULT_NULL
;
1708 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1710 * Retrieve the current value.
1712 returned_manager
= current_manager
;
1713 memory_object_default_reference(returned_manager
);
1717 * If this is the first non-null manager, start
1718 * up the internal pager support.
1720 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1721 result
= vm_pageout_internal_start();
1722 if (result
!= KERN_SUCCESS
)
1727 * Retrieve the current value,
1728 * and replace it with the supplied value.
1729 * We return the old reference to the caller
1730 * but we have to take a reference on the new
1733 returned_manager
= current_manager
;
1734 memory_manager_default
= new_manager
;
1735 memory_object_default_reference(new_manager
);
1738 * In case anyone's been waiting for a memory
1739 * manager to be established, wake them up.
1742 thread_wakeup((event_t
) &memory_manager_default
);
1745 * Now that we have a default pager for anonymous memory,
1746 * reactivate all the throttled pages (i.e. dirty pages with
1749 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
)
1751 vm_page_reactivate_all_throttled();
1755 lck_mtx_unlock(&memory_manager_default_lock
);
1757 *default_manager
= returned_manager
;
1762 * Routine: memory_manager_default_reference
1764 * Returns a naked send right for the default
1765 * memory manager. The returned right is always
1766 * valid (not IP_NULL or IP_DEAD).
1769 __private_extern__ memory_object_default_t
1770 memory_manager_default_reference(void)
1772 memory_object_default_t current_manager
;
1774 lck_mtx_lock(&memory_manager_default_lock
);
1775 current_manager
= memory_manager_default
;
1776 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1779 res
= lck_mtx_sleep(&memory_manager_default_lock
,
1781 (event_t
) &memory_manager_default
,
1783 assert(res
== THREAD_AWAKENED
);
1784 current_manager
= memory_manager_default
;
1786 memory_object_default_reference(current_manager
);
1787 lck_mtx_unlock(&memory_manager_default_lock
);
1789 return current_manager
;
1793 * Routine: memory_manager_default_check
1796 * Check whether a default memory manager has been set
1797 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1798 * and KERN_FAILURE if dmm does not exist.
1800 * If there is no default memory manager, log an error,
1801 * but only the first time.
1804 __private_extern__ kern_return_t
1805 memory_manager_default_check(void)
1807 memory_object_default_t current
;
1809 lck_mtx_lock(&memory_manager_default_lock
);
1810 current
= memory_manager_default
;
1811 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1812 static boolean_t logged
; /* initialized to 0 */
1813 boolean_t complain
= !logged
;
1815 lck_mtx_unlock(&memory_manager_default_lock
);
1817 printf("Warning: No default memory manager\n");
1818 return(KERN_FAILURE
);
1820 lck_mtx_unlock(&memory_manager_default_lock
);
1821 return(KERN_SUCCESS
);
1825 __private_extern__
void
1826 memory_manager_default_init(void)
1828 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1829 lck_mtx_init(&memory_manager_default_lock
, &vm_object_lck_grp
, &vm_object_lck_attr
);
1834 /* Allow manipulation of individual page state. This is actually part of */
1835 /* the UPL regimen but takes place on the object rather than on a UPL */
1838 memory_object_page_op(
1839 memory_object_control_t control
,
1840 memory_object_offset_t offset
,
1842 ppnum_t
*phys_entry
,
1847 object
= memory_object_control_to_vm_object(control
);
1848 if (object
== VM_OBJECT_NULL
)
1849 return (KERN_INVALID_ARGUMENT
);
1851 return vm_object_page_op(object
, offset
, ops
, phys_entry
, flags
);
1855 * memory_object_range_op offers performance enhancement over
1856 * memory_object_page_op for page_op functions which do not require page
1857 * level state to be returned from the call. Page_op was created to provide
1858 * a low-cost alternative to page manipulation via UPLs when only a single
1859 * page was involved. The range_op call establishes the ability in the _op
1860 * family of functions to work on multiple pages where the lack of page level
1861 * state handling allows the caller to avoid the overhead of the upl structures.
1865 memory_object_range_op(
1866 memory_object_control_t control
,
1867 memory_object_offset_t offset_beg
,
1868 memory_object_offset_t offset_end
,
1874 object
= memory_object_control_to_vm_object(control
);
1875 if (object
== VM_OBJECT_NULL
)
1876 return (KERN_INVALID_ARGUMENT
);
1878 return vm_object_range_op(object
,
1882 (uint32_t *) range
);
1887 memory_object_mark_used(
1888 memory_object_control_t control
)
1892 if (control
== NULL
)
1895 object
= memory_object_control_to_vm_object(control
);
1897 if (object
!= VM_OBJECT_NULL
)
1898 vm_object_cache_remove(object
);
1903 memory_object_mark_unused(
1904 memory_object_control_t control
,
1905 __unused boolean_t rage
)
1909 if (control
== NULL
)
1912 object
= memory_object_control_to_vm_object(control
);
1914 if (object
!= VM_OBJECT_NULL
)
1915 vm_object_cache_add(object
);
1920 memory_object_pages_resident(
1921 memory_object_control_t control
,
1922 boolean_t
* has_pages_resident
)
1926 *has_pages_resident
= FALSE
;
1928 object
= memory_object_control_to_vm_object(control
);
1929 if (object
== VM_OBJECT_NULL
)
1930 return (KERN_INVALID_ARGUMENT
);
1932 if (object
->resident_page_count
)
1933 *has_pages_resident
= TRUE
;
1935 return (KERN_SUCCESS
);
1939 memory_object_signed(
1940 memory_object_control_t control
,
1941 boolean_t is_signed
)
1945 object
= memory_object_control_to_vm_object(control
);
1946 if (object
== VM_OBJECT_NULL
)
1947 return KERN_INVALID_ARGUMENT
;
1949 vm_object_lock(object
);
1950 object
->code_signed
= is_signed
;
1951 vm_object_unlock(object
);
1953 return KERN_SUCCESS
;
1957 memory_object_is_slid(
1958 memory_object_control_t control
)
1960 vm_object_t object
= VM_OBJECT_NULL
;
1961 vm_object_t slide_object
= slide_info
.slide_object
;
1963 object
= memory_object_control_to_vm_object(control
);
1964 if (object
== VM_OBJECT_NULL
)
1967 return (object
== slide_object
);
1970 static zone_t mem_obj_control_zone
;
1972 __private_extern__
void
1973 memory_object_control_bootstrap(void)
1977 i
= (vm_size_t
) sizeof (struct memory_object_control
);
1978 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
1979 zone_change(mem_obj_control_zone
, Z_CALLERACCT
, FALSE
);
1980 zone_change(mem_obj_control_zone
, Z_NOENCRYPT
, TRUE
);
1984 __private_extern__ memory_object_control_t
1985 memory_object_control_allocate(
1988 memory_object_control_t control
;
1990 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
1991 if (control
!= MEMORY_OBJECT_CONTROL_NULL
) {
1992 control
->moc_object
= object
;
1993 control
->moc_ikot
= IKOT_MEM_OBJ_CONTROL
; /* fake ip_kotype */
1998 __private_extern__
void
1999 memory_object_control_collapse(
2000 memory_object_control_t control
,
2003 assert((control
->moc_object
!= VM_OBJECT_NULL
) &&
2004 (control
->moc_object
!= object
));
2005 control
->moc_object
= object
;
2008 __private_extern__ vm_object_t
2009 memory_object_control_to_vm_object(
2010 memory_object_control_t control
)
2012 if (control
== MEMORY_OBJECT_CONTROL_NULL
||
2013 control
->moc_ikot
!= IKOT_MEM_OBJ_CONTROL
)
2014 return VM_OBJECT_NULL
;
2016 return (control
->moc_object
);
2019 memory_object_control_t
2020 convert_port_to_mo_control(
2021 __unused mach_port_t port
)
2023 return MEMORY_OBJECT_CONTROL_NULL
;
2028 convert_mo_control_to_port(
2029 __unused memory_object_control_t control
)
2031 return MACH_PORT_NULL
;
2035 memory_object_control_reference(
2036 __unused memory_object_control_t control
)
2042 * We only every issue one of these references, so kill it
2043 * when that gets released (should switch the real reference
2044 * counting in true port-less EMMI).
2047 memory_object_control_deallocate(
2048 memory_object_control_t control
)
2050 zfree(mem_obj_control_zone
, control
);
2054 memory_object_control_disable(
2055 memory_object_control_t control
)
2057 assert(control
->moc_object
!= VM_OBJECT_NULL
);
2058 control
->moc_object
= VM_OBJECT_NULL
;
2062 memory_object_default_reference(
2063 memory_object_default_t dmm
)
2065 ipc_port_make_send(dmm
);
2069 memory_object_default_deallocate(
2070 memory_object_default_t dmm
)
2072 ipc_port_release_send(dmm
);
2076 convert_port_to_memory_object(
2077 __unused mach_port_t port
)
2079 return (MEMORY_OBJECT_NULL
);
2084 convert_memory_object_to_port(
2085 __unused memory_object_t object
)
2087 return (MACH_PORT_NULL
);
2091 /* Routine memory_object_reference */
2092 void memory_object_reference(
2093 memory_object_t memory_object
)
2095 (memory_object
->mo_pager_ops
->memory_object_reference
)(
2099 /* Routine memory_object_deallocate */
2100 void memory_object_deallocate(
2101 memory_object_t memory_object
)
2103 (memory_object
->mo_pager_ops
->memory_object_deallocate
)(
2108 /* Routine memory_object_init */
2109 kern_return_t memory_object_init
2111 memory_object_t memory_object
,
2112 memory_object_control_t memory_control
,
2113 memory_object_cluster_size_t memory_object_page_size
2116 return (memory_object
->mo_pager_ops
->memory_object_init
)(
2119 memory_object_page_size
);
2122 /* Routine memory_object_terminate */
2123 kern_return_t memory_object_terminate
2125 memory_object_t memory_object
2128 return (memory_object
->mo_pager_ops
->memory_object_terminate
)(
2132 /* Routine memory_object_data_request */
2133 kern_return_t memory_object_data_request
2135 memory_object_t memory_object
,
2136 memory_object_offset_t offset
,
2137 memory_object_cluster_size_t length
,
2138 vm_prot_t desired_access
,
2139 memory_object_fault_info_t fault_info
2142 return (memory_object
->mo_pager_ops
->memory_object_data_request
)(
2150 /* Routine memory_object_data_return */
2151 kern_return_t memory_object_data_return
2153 memory_object_t memory_object
,
2154 memory_object_offset_t offset
,
2155 memory_object_cluster_size_t size
,
2156 memory_object_offset_t
*resid_offset
,
2159 boolean_t kernel_copy
,
2163 return (memory_object
->mo_pager_ops
->memory_object_data_return
)(
2174 /* Routine memory_object_data_initialize */
2175 kern_return_t memory_object_data_initialize
2177 memory_object_t memory_object
,
2178 memory_object_offset_t offset
,
2179 memory_object_cluster_size_t size
2182 return (memory_object
->mo_pager_ops
->memory_object_data_initialize
)(
2188 /* Routine memory_object_data_unlock */
2189 kern_return_t memory_object_data_unlock
2191 memory_object_t memory_object
,
2192 memory_object_offset_t offset
,
2193 memory_object_size_t size
,
2194 vm_prot_t desired_access
2197 return (memory_object
->mo_pager_ops
->memory_object_data_unlock
)(
2204 /* Routine memory_object_synchronize */
2205 kern_return_t memory_object_synchronize
2207 memory_object_t memory_object
,
2208 memory_object_offset_t offset
,
2209 memory_object_size_t size
,
2210 vm_sync_t sync_flags
2213 return (memory_object
->mo_pager_ops
->memory_object_synchronize
)(
2222 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2223 * each time a "named" VM object gets mapped directly or indirectly
2224 * (copy-on-write mapping). A "named" VM object has an extra reference held
2225 * by the pager to keep it alive until the pager decides that the
2226 * memory object (and its VM object) can be reclaimed.
2227 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2228 * the mappings of that memory object have been removed.
2230 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2231 * are serialized (through object->mapping_in_progress), to ensure that the
2232 * pager gets a consistent view of the mapping status of the memory object.
2234 * This allows the pager to keep track of how many times a memory object
2235 * has been mapped and with which protections, to decide when it can be
2239 /* Routine memory_object_map */
2240 kern_return_t memory_object_map
2242 memory_object_t memory_object
,
2246 return (memory_object
->mo_pager_ops
->memory_object_map
)(
2251 /* Routine memory_object_last_unmap */
2252 kern_return_t memory_object_last_unmap
2254 memory_object_t memory_object
2257 return (memory_object
->mo_pager_ops
->memory_object_last_unmap
)(
2261 /* Routine memory_object_data_reclaim */
2262 kern_return_t memory_object_data_reclaim
2264 memory_object_t memory_object
,
2265 boolean_t reclaim_backing_store
2268 if (memory_object
->mo_pager_ops
->memory_object_data_reclaim
== NULL
)
2269 return KERN_NOT_SUPPORTED
;
2270 return (memory_object
->mo_pager_ops
->memory_object_data_reclaim
)(
2272 reclaim_backing_store
);
2275 /* Routine memory_object_create */
2276 kern_return_t memory_object_create
2278 memory_object_default_t default_memory_manager
,
2279 vm_size_t new_memory_object_size
,
2280 memory_object_t
*new_memory_object
2283 return default_pager_memory_object_create(default_memory_manager
,
2284 new_memory_object_size
,
2289 convert_port_to_upl(
2295 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2299 upl
= (upl_t
) port
->ip_kobject
;
2308 convert_upl_to_port(
2311 return MACH_PORT_NULL
;
2314 __private_extern__
void
2316 __unused ipc_port_t port
,
2317 __unused mach_port_mscount_t mscount
)