2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
62 * External memory management interface control functions.
66 * Interface dependencies:
69 #include <mach/std_types.h> /* For pointer_t */
70 #include <mach/mach_types.h>
73 #include <mach/kern_return.h>
74 #include <mach/memory_object.h>
75 #include <mach/memory_object_default.h>
76 #include <mach/memory_object_control_server.h>
77 #include <mach/host_priv_server.h>
78 #include <mach/boolean.h>
79 #include <mach/vm_prot.h>
80 #include <mach/message.h>
83 * Implementation dependencies:
85 #include <string.h> /* For memcpy() */
88 #include <kern/host.h>
89 #include <kern/thread.h> /* For current_thread() */
90 #include <kern/ipc_mig.h>
91 #include <kern/misc_protos.h>
93 #include <vm/vm_object.h>
94 #include <vm/vm_fault.h>
95 #include <vm/memory_object.h>
96 #include <vm/vm_page.h>
97 #include <vm/vm_pageout.h>
98 #include <vm/pmap.h> /* For pmap_clear_modify */
99 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
100 #include <vm/vm_map.h> /* For vm_map_pageable */
101 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
102 #include <vm/vm_shared_region.h>
104 #include <vm/vm_external.h>
106 #include <vm/vm_protos.h>
108 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
109 decl_lck_mtx_data(, memory_manager_default_lock
)
113 * Routine: memory_object_should_return_page
116 * Determine whether the given page should be returned,
117 * based on the page's state and on the given return policy.
119 * We should return the page if one of the following is true:
121 * 1. Page is dirty and should_return is not RETURN_NONE.
122 * 2. Page is precious and should_return is RETURN_ALL.
123 * 3. Should_return is RETURN_ANYTHING.
125 * As a side effect, m->dirty will be made consistent
126 * with pmap_is_modified(m), if should_return is not
127 * MEMORY_OBJECT_RETURN_NONE.
130 #define memory_object_should_return_page(m, should_return) \
131 (should_return != MEMORY_OBJECT_RETURN_NONE && \
132 (((m)->dirty || ((m)->dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))) || \
133 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
134 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
136 typedef int memory_object_lock_result_t
;
138 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
139 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
140 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
141 #define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
143 memory_object_lock_result_t
memory_object_lock_page(
145 memory_object_return_t should_return
,
146 boolean_t should_flush
,
150 * Routine: memory_object_lock_page
153 * Perform the appropriate lock operations on the
154 * given page. See the description of
155 * "memory_object_lock_request" for the meanings
158 * Returns an indication that the operation
159 * completed, blocked, or that the page must
162 memory_object_lock_result_t
163 memory_object_lock_page(
165 memory_object_return_t should_return
,
166 boolean_t should_flush
,
169 XPR(XPR_MEMORY_OBJECT
,
170 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
171 m
, should_return
, should_flush
, prot
, 0);
174 if (m
->busy
|| m
->cleaning
)
175 return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
178 vm_pageout_steal_laundry(m
, FALSE
);
181 * Don't worry about pages for which the kernel
182 * does not have any data.
184 if (m
->absent
|| m
->error
|| m
->restart
) {
185 if (m
->error
&& should_flush
&& !VM_PAGE_WIRED(m
)) {
187 * dump the page, pager wants us to
188 * clean it up and there is no
189 * relevant data to return
191 return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
);
193 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
195 assert(!m
->fictitious
);
197 if (VM_PAGE_WIRED(m
)) {
199 * The page is wired... just clean or return the page if needed.
200 * Wired pages don't get flushed or disconnected from the pmap.
202 if (memory_object_should_return_page(m
, should_return
))
203 return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
205 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
210 * must do the pmap_disconnect before determining the
211 * need to return the page... otherwise it's possible
212 * for the page to go from the clean to the dirty state
213 * after we've made our decision
215 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
)) & VM_MEM_MODIFIED
) {
216 SET_PAGE_DIRTY(m
, FALSE
);
220 * If we are decreasing permission, do it now;
221 * let the fault handler take care of increases
222 * (pmap_page_protect may not increase protection).
224 if (prot
!= VM_PROT_NO_CHANGE
)
225 pmap_page_protect(VM_PAGE_GET_PHYS_PAGE(m
), VM_PROT_ALL
& ~prot
);
228 * Handle returning dirty or precious pages
230 if (memory_object_should_return_page(m
, should_return
)) {
232 * we use to do a pmap_disconnect here in support
233 * of memory_object_lock_request, but that routine
234 * no longer requires this... in any event, in
235 * our world, it would turn into a big noop since
236 * we don't lock the page in any way and as soon
237 * as we drop the object lock, the page can be
238 * faulted back into an address space
241 * pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
243 return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
247 * Handle flushing clean pages
250 return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
);
253 * we use to deactivate clean pages at this point,
254 * but we do not believe that an msync should change
255 * the 'age' of a page in the cache... here is the
256 * original comment and code concerning this...
258 * XXX Make clean but not flush a paging hint,
259 * and deactivate the pages. This is a hack
260 * because it overloads flush/clean with
261 * implementation-dependent meaning. This only
262 * happens to pages that are already clean.
264 * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
265 * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
268 return (MEMORY_OBJECT_LOCK_RESULT_DONE
);
274 * Routine: memory_object_lock_request [user interface]
277 * Control use of the data associated with the given
278 * memory object. For each page in the given range,
279 * perform the following operations, in order:
280 * 1) restrict access to the page (disallow
281 * forms specified by "prot");
282 * 2) return data to the manager (if "should_return"
283 * is RETURN_DIRTY and the page is dirty, or
284 * "should_return" is RETURN_ALL and the page
285 * is either dirty or precious); and,
286 * 3) flush the cached copy (if "should_flush"
288 * The set of pages is defined by a starting offset
289 * ("offset") and size ("size"). Only pages with the
290 * same page alignment as the starting offset are
293 * A single acknowledgement is sent (to the "reply_to"
294 * port) when these actions are complete. If successful,
295 * the naked send right for reply_to is consumed.
299 memory_object_lock_request(
300 memory_object_control_t control
,
301 memory_object_offset_t offset
,
302 memory_object_size_t size
,
303 memory_object_offset_t
* resid_offset
,
305 memory_object_return_t should_return
,
312 * Check for bogus arguments.
314 object
= memory_object_control_to_vm_object(control
);
315 if (object
== VM_OBJECT_NULL
)
316 return (KERN_INVALID_ARGUMENT
);
318 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
319 return (KERN_INVALID_ARGUMENT
);
321 size
= round_page_64(size
);
324 * Lock the object, and acquire a paging reference to
325 * prevent the memory_object reference from being released.
327 vm_object_lock(object
);
328 vm_object_paging_begin(object
);
330 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
) {
331 if ((should_return
!= MEMORY_OBJECT_RETURN_NONE
) || offset
|| object
->copy
) {
332 flags
&= ~MEMORY_OBJECT_DATA_FLUSH_ALL
;
333 flags
|= MEMORY_OBJECT_DATA_FLUSH
;
336 offset
-= object
->paging_offset
;
338 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
)
339 vm_object_reap_pages(object
, REAP_DATA_FLUSH
);
341 (void)vm_object_update(object
, offset
, size
, resid_offset
,
342 io_errno
, should_return
, flags
, prot
);
344 vm_object_paging_end(object
);
345 vm_object_unlock(object
);
347 return (KERN_SUCCESS
);
351 * memory_object_release_name: [interface]
353 * Enforces name semantic on memory_object reference count decrement
354 * This routine should not be called unless the caller holds a name
355 * reference gained through the memory_object_named_create or the
356 * memory_object_rename call.
357 * If the TERMINATE_IDLE flag is set, the call will return if the
358 * reference count is not 1. i.e. idle with the only remaining reference
360 * If the decision is made to proceed the name field flag is set to
361 * false and the reference count is decremented. If the RESPECT_CACHE
362 * flag is set and the reference count has gone to zero, the
363 * memory_object is checked to see if it is cacheable otherwise when
364 * the reference count is zero, it is simply terminated.
368 memory_object_release_name(
369 memory_object_control_t control
,
374 object
= memory_object_control_to_vm_object(control
);
375 if (object
== VM_OBJECT_NULL
)
376 return (KERN_INVALID_ARGUMENT
);
378 return vm_object_release_name(object
, flags
);
384 * Routine: memory_object_destroy [user interface]
386 * Shut down a memory object, despite the
387 * presence of address map (or other) references
391 memory_object_destroy(
392 memory_object_control_t control
,
393 kern_return_t reason
)
397 object
= memory_object_control_to_vm_object(control
);
398 if (object
== VM_OBJECT_NULL
)
399 return (KERN_INVALID_ARGUMENT
);
401 return (vm_object_destroy(object
, reason
));
405 * Routine: vm_object_sync
407 * Kernel internal function to synch out pages in a given
408 * range within an object to its memory manager. Much the
409 * same as memory_object_lock_request but page protection
412 * If the should_flush and should_return flags are true pages
413 * are flushed, that is dirty & precious pages are written to
414 * the memory manager and then discarded. If should_return
415 * is false, only precious pages are returned to the memory
418 * If should flush is false and should_return true, the memory
419 * manager's copy of the pages is updated. If should_return
420 * is also false, only the precious pages are updated. This
421 * last option is of limited utility.
424 * FALSE if no pages were returned to the pager
431 vm_object_offset_t offset
,
432 vm_object_size_t size
,
433 boolean_t should_flush
,
434 boolean_t should_return
,
435 boolean_t should_iosync
)
441 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
442 object
, offset
, size
, should_flush
, should_return
);
445 * Lock the object, and acquire a paging reference to
446 * prevent the memory_object and control ports from
449 vm_object_lock(object
);
450 vm_object_paging_begin(object
);
453 flags
= MEMORY_OBJECT_DATA_FLUSH
;
455 * This flush is from an msync(), not a truncate(), so the
456 * contents of the file are not affected.
457 * MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know
458 * that the data is not changed and that there's no need to
459 * push the old contents to a copy object.
461 flags
|= MEMORY_OBJECT_DATA_NO_CHANGE
;
466 flags
|= MEMORY_OBJECT_IO_SYNC
;
468 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
470 MEMORY_OBJECT_RETURN_ALL
:
471 MEMORY_OBJECT_RETURN_NONE
,
476 vm_object_paging_end(object
);
477 vm_object_unlock(object
);
483 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
487 memory_object_t pager; \
489 if (object->object_slid) { \
490 panic("Objects with slid pages not allowed\n"); \
493 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
494 vm_object_paging_begin(object); \
495 vm_object_unlock(object); \
498 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
500 upl_flags = UPL_MSYNC; \
502 (void) memory_object_data_return(pager, \
504 (memory_object_cluster_size_t)data_cnt, \
511 vm_object_lock(object); \
512 vm_object_paging_end(object); \
516 extern struct vnode
*
517 vnode_pager_lookup_vnode(memory_object_t
);
520 vm_object_update_extent(
522 vm_object_offset_t offset
,
523 vm_object_offset_t offset_end
,
524 vm_object_offset_t
*offset_resid
,
526 boolean_t should_flush
,
527 memory_object_return_t should_return
,
528 boolean_t should_iosync
,
533 vm_object_offset_t paging_offset
= 0;
534 vm_object_offset_t next_offset
= offset
;
535 memory_object_lock_result_t page_lock_result
;
536 memory_object_cluster_size_t data_cnt
= 0;
537 struct vm_page_delayed_work dw_array
[DEFAULT_DELAYED_WORK_LIMIT
];
538 struct vm_page_delayed_work
*dwp
;
545 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
549 offset
< offset_end
&& object
->resident_page_count
;
550 offset
+= PAGE_SIZE_64
) {
553 * Limit the number of pages to be cleaned at once to a contiguous
554 * run, or at most MAX_UPL_TRANSFER_BYTES
557 if ((data_cnt
>= MAX_UPL_TRANSFER_BYTES
) || (next_offset
!= offset
)) {
560 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
564 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
565 paging_offset
, offset_resid
, io_errno
, should_iosync
);
569 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
573 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
575 if (data_cnt
&& page_lock_result
!= MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
) {
577 * End of a run of dirty/precious pages.
580 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
584 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
585 paging_offset
, offset_resid
, io_errno
, should_iosync
);
587 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
588 * allow the state of page 'm' to change... we need to re-lookup
595 switch (page_lock_result
) {
597 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
600 case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
:
601 if (m
->dirty
== TRUE
)
603 dwp
->dw_mask
|= DW_vm_page_free
;
606 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
607 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
610 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
612 paging_offset
= offset
;
614 data_cnt
+= PAGE_SIZE
;
615 next_offset
= offset
+ PAGE_SIZE_64
;
618 * wired pages shouldn't be flushed and
619 * since they aren't on any queue,
620 * no need to remove them
622 if (!VM_PAGE_WIRED(m
)) {
626 * add additional state for the flush
628 m
->free_when_done
= TRUE
;
631 * we use to remove the page from the queues at this
632 * point, but we do not believe that an msync
633 * should cause the 'age' of a page to be changed
636 * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
643 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
645 if (dw_count
>= dw_limit
) {
646 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
656 task_update_logical_writes(current_task(), (dirty_count
* PAGE_SIZE
), TASK_WRITE_INVALIDATED
, vnode_pager_lookup_vnode(object
->pager
));
658 * We have completed the scan for applicable pages.
659 * Clean any pages that have been saved.
662 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, &dw_array
[0], dw_count
);
665 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
666 paging_offset
, offset_resid
, io_errno
, should_iosync
);
674 * Routine: vm_object_update
676 * Work function for m_o_lock_request(), vm_o_sync().
678 * Called with object locked and paging ref taken.
683 vm_object_offset_t offset
,
684 vm_object_size_t size
,
685 vm_object_offset_t
*resid_offset
,
687 memory_object_return_t should_return
,
689 vm_prot_t protection
)
691 vm_object_t copy_object
= VM_OBJECT_NULL
;
692 boolean_t data_returned
= FALSE
;
693 boolean_t update_cow
;
694 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
695 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
696 vm_fault_return_t result
;
699 #define MAX_EXTENTS 8
700 #define EXTENT_SIZE (1024 * 1024 * 256)
701 #define RESIDENT_LIMIT (1024 * 32)
703 vm_object_offset_t e_base
;
704 vm_object_offset_t e_min
;
705 vm_object_offset_t e_max
;
706 } extents
[MAX_EXTENTS
];
709 * To avoid blocking while scanning for pages, save
710 * dirty pages to be cleaned all at once.
712 * XXXO A similar strategy could be used to limit the
713 * number of times that a scan must be restarted for
714 * other reasons. Those pages that would require blocking
715 * could be temporarily collected in another list, or
716 * their offsets could be recorded in a small array.
720 * XXX NOTE: May want to consider converting this to a page list
721 * XXX vm_map_copy interface. Need to understand object
722 * XXX coalescing implications before doing so.
725 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
726 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
727 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
728 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
730 if (update_cow
|| (flags
& (MEMORY_OBJECT_DATA_PURGE
| MEMORY_OBJECT_DATA_SYNC
))) {
733 while ((copy_object
= object
->copy
) != VM_OBJECT_NULL
) {
735 * need to do a try here since we're swimming upstream
736 * against the normal lock ordering... however, we need
737 * to hold the object stable until we gain control of the
738 * copy object so we have to be careful how we approach this
740 if (vm_object_lock_try(copy_object
)) {
742 * we 'won' the lock on the copy object...
743 * no need to hold the object lock any longer...
744 * take a real reference on the copy object because
745 * we're going to call vm_fault_page on it which may
746 * under certain conditions drop the lock and the paging
747 * reference we're about to take... the reference
748 * will keep the copy object from going away if that happens
750 vm_object_unlock(object
);
751 vm_object_reference_locked(copy_object
);
754 vm_object_unlock(object
);
757 mutex_pause(collisions
);
759 vm_object_lock(object
);
762 if ((copy_object
!= VM_OBJECT_NULL
&& update_cow
) || (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
764 vm_map_size_t copy_size
;
765 vm_map_offset_t copy_offset
;
769 kern_return_t error
= 0;
770 struct vm_object_fault_info fault_info
;
772 if (copy_object
!= VM_OBJECT_NULL
) {
774 * translate offset with respect to shadow's offset
776 copy_offset
= (offset
>= copy_object
->vo_shadow_offset
) ?
777 (vm_map_offset_t
)(offset
- copy_object
->vo_shadow_offset
) :
780 if (copy_offset
> copy_object
->vo_size
)
781 copy_offset
= copy_object
->vo_size
;
784 * clip size with respect to shadow offset
786 if (offset
>= copy_object
->vo_shadow_offset
) {
788 } else if (size
>= copy_object
->vo_shadow_offset
- offset
) {
789 copy_size
= size
- (copy_object
->vo_shadow_offset
- offset
);
794 if (copy_offset
+ copy_size
> copy_object
->vo_size
) {
795 if (copy_object
->vo_size
>= copy_offset
) {
796 copy_size
= copy_object
->vo_size
- copy_offset
;
801 copy_size
+=copy_offset
;
804 copy_object
= object
;
806 copy_size
= offset
+ size
;
807 copy_offset
= offset
;
809 fault_info
.interruptible
= THREAD_UNINT
;
810 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
811 fault_info
.user_tag
= 0;
812 fault_info
.pmap_options
= 0;
813 fault_info
.lo_offset
= copy_offset
;
814 fault_info
.hi_offset
= copy_size
;
815 fault_info
.no_cache
= FALSE
;
816 fault_info
.stealth
= TRUE
;
817 fault_info
.io_sync
= FALSE
;
818 fault_info
.cs_bypass
= FALSE
;
819 fault_info
.mark_zf_absent
= FALSE
;
820 fault_info
.batch_pmap_op
= FALSE
;
822 vm_object_paging_begin(copy_object
);
824 for (i
= copy_offset
; i
< copy_size
; i
+= PAGE_SIZE
) {
825 RETRY_COW_OF_LOCK_REQUEST
:
826 fault_info
.cluster_size
= (vm_size_t
) (copy_size
- i
);
827 assert(fault_info
.cluster_size
== copy_size
- i
);
829 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
831 result
= vm_fault_page(copy_object
, i
,
832 VM_PROT_WRITE
|VM_PROT_READ
,
834 FALSE
, /* page not looked up */
844 case VM_FAULT_SUCCESS
:
847 VM_PAGE_OBJECT(page
), top_page
);
848 vm_object_lock(copy_object
);
849 vm_object_paging_begin(copy_object
);
851 if (( !VM_PAGE_NON_SPECULATIVE_PAGEABLE(page
))) {
853 vm_page_lockspin_queues();
855 if (( !VM_PAGE_NON_SPECULATIVE_PAGEABLE(page
))) {
856 vm_page_deactivate(page
);
858 vm_page_unlock_queues();
860 PAGE_WAKEUP_DONE(page
);
863 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
864 vm_object_lock(copy_object
);
865 vm_object_paging_begin(copy_object
);
866 goto RETRY_COW_OF_LOCK_REQUEST
;
867 case VM_FAULT_INTERRUPTED
:
868 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
869 vm_object_lock(copy_object
);
870 vm_object_paging_begin(copy_object
);
871 goto RETRY_COW_OF_LOCK_REQUEST
;
872 case VM_FAULT_MEMORY_SHORTAGE
:
874 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
875 vm_object_lock(copy_object
);
876 vm_object_paging_begin(copy_object
);
877 goto RETRY_COW_OF_LOCK_REQUEST
;
878 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
879 /* success but no VM page: fail */
880 vm_object_paging_end(copy_object
);
881 vm_object_unlock(copy_object
);
883 case VM_FAULT_MEMORY_ERROR
:
884 if (object
!= copy_object
)
885 vm_object_deallocate(copy_object
);
886 vm_object_lock(object
);
887 goto BYPASS_COW_COPYIN
;
889 panic("vm_object_update: unexpected error 0x%x"
890 " from vm_fault_page()\n", result
);
894 vm_object_paging_end(copy_object
);
896 if ((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
897 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
898 vm_object_unlock(copy_object
);
899 vm_object_deallocate(copy_object
);
900 vm_object_lock(object
);
904 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
905 if ((flags
& MEMORY_OBJECT_DATA_PURGE
)) {
906 vm_object_lock_assert_exclusive(copy_object
);
907 copy_object
->shadow_severed
= TRUE
;
908 copy_object
->shadowed
= FALSE
;
909 copy_object
->shadow
= NULL
;
911 * delete the ref the COW was holding on the target object
913 vm_object_deallocate(object
);
915 vm_object_unlock(copy_object
);
916 vm_object_deallocate(copy_object
);
917 vm_object_lock(object
);
922 * when we have a really large range to check relative
923 * to the number of actual resident pages, we'd like
924 * to use the resident page list to drive our checks
925 * however, the object lock will get dropped while processing
926 * the page which means the resident queue can change which
927 * means we can't walk the queue as we process the pages
928 * we also want to do the processing in offset order to allow
929 * 'runs' of pages to be collected if we're being told to
930 * flush to disk... the resident page queue is NOT ordered.
932 * a temporary solution (until we figure out how to deal with
933 * large address spaces more generically) is to pre-flight
934 * the resident page queue (if it's small enough) and develop
935 * a collection of extents (that encompass actual resident pages)
936 * to visit. This will at least allow us to deal with some of the
937 * more pathological cases in a more efficient manner. The current
938 * worst case (a single resident page at the end of an extremely large
939 * range) can take minutes to complete for ranges in the terrabyte
940 * category... since this routine is called when truncating a file,
941 * and we currently support files up to 16 Tbytes in size, this
942 * is not a theoretical problem
945 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
946 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
948 vm_object_offset_t start
;
949 vm_object_offset_t end
;
950 vm_object_size_t e_mask
;
956 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
958 m
= (vm_page_t
) vm_page_queue_first(&object
->memq
);
960 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
) m
)) {
961 next
= (vm_page_t
) vm_page_queue_next(&m
->listq
);
963 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
965 * this is a page we're interested in
966 * try to fit it into a current extent
968 for (n
= 0; n
< num_of_extents
; n
++) {
969 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
971 * use (PAGE_SIZE - 1) to determine the
972 * max offset so that we don't wrap if
973 * we're at the last page of the space
975 if (m
->offset
< extents
[n
].e_min
)
976 extents
[n
].e_min
= m
->offset
;
977 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
978 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
982 if (n
== num_of_extents
) {
984 * didn't find a current extent that can encompass
987 if (n
< MAX_EXTENTS
) {
989 * if we still have room,
990 * create a new extent
992 extents
[n
].e_base
= m
->offset
& e_mask
;
993 extents
[n
].e_min
= m
->offset
;
994 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
999 * no room to create a new extent...
1000 * fall back to a single extent based
1001 * on the min and max page offsets
1002 * we find in the range we're interested in...
1003 * first, look through the extent list and
1004 * develop the overall min and max for the
1005 * pages we've looked at up to this point
1007 for (n
= 1; n
< num_of_extents
; n
++) {
1008 if (extents
[n
].e_min
< extents
[0].e_min
)
1009 extents
[0].e_min
= extents
[n
].e_min
;
1010 if (extents
[n
].e_max
> extents
[0].e_max
)
1011 extents
[0].e_max
= extents
[n
].e_max
;
1014 * now setup to run through the remaining pages
1015 * to determine the overall min and max
1016 * offset for the specified range
1018 extents
[0].e_base
= 0;
1023 * by continuing, we'll reprocess the
1024 * page that forced us to abandon trying
1025 * to develop multiple extents
1034 extents
[0].e_min
= offset
;
1035 extents
[0].e_max
= offset
+ (size
- 1);
1039 for (n
= 0; n
< num_of_extents
; n
++) {
1040 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1041 should_flush
, should_return
, should_iosync
, protection
))
1042 data_returned
= TRUE
;
1044 return (data_returned
);
1048 static kern_return_t
1049 vm_object_set_attributes_common(
1051 boolean_t may_cache
,
1052 memory_object_copy_strategy_t copy_strategy
)
1054 boolean_t object_became_ready
;
1056 XPR(XPR_MEMORY_OBJECT
,
1057 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1058 object
, (may_cache
&1), copy_strategy
, 0, 0);
1060 if (object
== VM_OBJECT_NULL
)
1061 return(KERN_INVALID_ARGUMENT
);
1064 * Verify the attributes of importance
1067 switch(copy_strategy
) {
1068 case MEMORY_OBJECT_COPY_NONE
:
1069 case MEMORY_OBJECT_COPY_DELAY
:
1072 return(KERN_INVALID_ARGUMENT
);
1078 vm_object_lock(object
);
1081 * Copy the attributes
1083 assert(!object
->internal
);
1084 object_became_ready
= !object
->pager_ready
;
1085 object
->copy_strategy
= copy_strategy
;
1086 object
->can_persist
= may_cache
;
1089 * Wake up anyone waiting for the ready attribute
1090 * to become asserted.
1093 if (object_became_ready
) {
1094 object
->pager_ready
= TRUE
;
1095 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1098 vm_object_unlock(object
);
1100 return(KERN_SUCCESS
);
1105 memory_object_synchronize_completed(
1106 __unused memory_object_control_t control
,
1107 __unused memory_object_offset_t offset
,
1108 __unused memory_object_size_t length
)
1110 panic("memory_object_synchronize_completed no longer supported\n");
1111 return(KERN_FAILURE
);
1116 * Set the memory object attribute as provided.
1118 * XXX This routine cannot be completed until the vm_msync, clean
1119 * in place, and cluster work is completed. See ifdef notyet
1120 * below and note that vm_object_set_attributes_common()
1121 * may have to be expanded.
1124 memory_object_change_attributes(
1125 memory_object_control_t control
,
1126 memory_object_flavor_t flavor
,
1127 memory_object_info_t attributes
,
1128 mach_msg_type_number_t count
)
1131 kern_return_t result
= KERN_SUCCESS
;
1132 boolean_t may_cache
;
1133 boolean_t invalidate
;
1134 memory_object_copy_strategy_t copy_strategy
;
1136 object
= memory_object_control_to_vm_object(control
);
1137 if (object
== VM_OBJECT_NULL
)
1138 return (KERN_INVALID_ARGUMENT
);
1140 vm_object_lock(object
);
1142 may_cache
= object
->can_persist
;
1143 copy_strategy
= object
->copy_strategy
;
1145 invalidate
= object
->invalidate
;
1147 vm_object_unlock(object
);
1150 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1152 old_memory_object_behave_info_t behave
;
1154 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1155 result
= KERN_INVALID_ARGUMENT
;
1159 behave
= (old_memory_object_behave_info_t
) attributes
;
1161 invalidate
= behave
->invalidate
;
1162 copy_strategy
= behave
->copy_strategy
;
1167 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1169 memory_object_behave_info_t behave
;
1171 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1172 result
= KERN_INVALID_ARGUMENT
;
1176 behave
= (memory_object_behave_info_t
) attributes
;
1178 invalidate
= behave
->invalidate
;
1179 copy_strategy
= behave
->copy_strategy
;
1183 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1185 memory_object_perf_info_t perf
;
1187 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1188 result
= KERN_INVALID_ARGUMENT
;
1192 perf
= (memory_object_perf_info_t
) attributes
;
1194 may_cache
= perf
->may_cache
;
1199 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1201 old_memory_object_attr_info_t attr
;
1203 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1204 result
= KERN_INVALID_ARGUMENT
;
1208 attr
= (old_memory_object_attr_info_t
) attributes
;
1210 may_cache
= attr
->may_cache
;
1211 copy_strategy
= attr
->copy_strategy
;
1216 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1218 memory_object_attr_info_t attr
;
1220 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1221 result
= KERN_INVALID_ARGUMENT
;
1225 attr
= (memory_object_attr_info_t
) attributes
;
1227 copy_strategy
= attr
->copy_strategy
;
1228 may_cache
= attr
->may_cache_object
;
1234 result
= KERN_INVALID_ARGUMENT
;
1238 if (result
!= KERN_SUCCESS
)
1241 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1242 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1246 * XXX may_cache may become a tri-valued variable to handle
1247 * XXX uncache if not in use.
1249 return (vm_object_set_attributes_common(object
,
1255 memory_object_get_attributes(
1256 memory_object_control_t control
,
1257 memory_object_flavor_t flavor
,
1258 memory_object_info_t attributes
, /* pointer to OUT array */
1259 mach_msg_type_number_t
*count
) /* IN/OUT */
1261 kern_return_t ret
= KERN_SUCCESS
;
1264 object
= memory_object_control_to_vm_object(control
);
1265 if (object
== VM_OBJECT_NULL
)
1266 return (KERN_INVALID_ARGUMENT
);
1268 vm_object_lock(object
);
1271 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1273 old_memory_object_behave_info_t behave
;
1275 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1276 ret
= KERN_INVALID_ARGUMENT
;
1280 behave
= (old_memory_object_behave_info_t
) attributes
;
1281 behave
->copy_strategy
= object
->copy_strategy
;
1282 behave
->temporary
= FALSE
;
1283 #if notyet /* remove when vm_msync complies and clean in place fini */
1284 behave
->invalidate
= object
->invalidate
;
1286 behave
->invalidate
= FALSE
;
1289 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1293 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1295 memory_object_behave_info_t behave
;
1297 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1298 ret
= KERN_INVALID_ARGUMENT
;
1302 behave
= (memory_object_behave_info_t
) attributes
;
1303 behave
->copy_strategy
= object
->copy_strategy
;
1304 behave
->temporary
= FALSE
;
1305 #if notyet /* remove when vm_msync complies and clean in place fini */
1306 behave
->invalidate
= object
->invalidate
;
1308 behave
->invalidate
= FALSE
;
1310 behave
->advisory_pageout
= FALSE
;
1311 behave
->silent_overwrite
= FALSE
;
1312 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1316 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1318 memory_object_perf_info_t perf
;
1320 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1321 ret
= KERN_INVALID_ARGUMENT
;
1325 perf
= (memory_object_perf_info_t
) attributes
;
1326 perf
->cluster_size
= PAGE_SIZE
;
1327 perf
->may_cache
= object
->can_persist
;
1329 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1333 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1335 old_memory_object_attr_info_t attr
;
1337 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1338 ret
= KERN_INVALID_ARGUMENT
;
1342 attr
= (old_memory_object_attr_info_t
) attributes
;
1343 attr
->may_cache
= object
->can_persist
;
1344 attr
->copy_strategy
= object
->copy_strategy
;
1346 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1350 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1352 memory_object_attr_info_t attr
;
1354 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1355 ret
= KERN_INVALID_ARGUMENT
;
1359 attr
= (memory_object_attr_info_t
) attributes
;
1360 attr
->copy_strategy
= object
->copy_strategy
;
1361 attr
->cluster_size
= PAGE_SIZE
;
1362 attr
->may_cache_object
= object
->can_persist
;
1363 attr
->temporary
= FALSE
;
1365 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1370 ret
= KERN_INVALID_ARGUMENT
;
1374 vm_object_unlock(object
);
1381 memory_object_iopl_request(
1383 memory_object_offset_t offset
,
1384 upl_size_t
*upl_size
,
1386 upl_page_info_array_t user_page_list
,
1387 unsigned int *page_list_count
,
1388 upl_control_flags_t
*flags
,
1393 upl_control_flags_t caller_flags
;
1395 caller_flags
= *flags
;
1397 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1399 * For forward compatibility's sake,
1400 * reject any unknown flag.
1402 return KERN_INVALID_VALUE
;
1405 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1406 vm_named_entry_t named_entry
;
1408 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1409 /* a few checks to make sure user is obeying rules */
1410 if(*upl_size
== 0) {
1411 if(offset
>= named_entry
->size
)
1412 return(KERN_INVALID_RIGHT
);
1413 *upl_size
= (upl_size_t
)(named_entry
->size
- offset
);
1414 if (*upl_size
!= named_entry
->size
- offset
)
1415 return KERN_INVALID_ARGUMENT
;
1417 if(caller_flags
& UPL_COPYOUT_FROM
) {
1418 if((named_entry
->protection
& VM_PROT_READ
)
1420 return(KERN_INVALID_RIGHT
);
1423 if((named_entry
->protection
&
1424 (VM_PROT_READ
| VM_PROT_WRITE
))
1425 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1426 return(KERN_INVALID_RIGHT
);
1429 if(named_entry
->size
< (offset
+ *upl_size
))
1430 return(KERN_INVALID_ARGUMENT
);
1432 /* the callers parameter offset is defined to be the */
1433 /* offset from beginning of named entry offset in object */
1434 offset
= offset
+ named_entry
->offset
;
1436 if (named_entry
->is_sub_map
||
1437 named_entry
->is_copy
)
1438 return KERN_INVALID_ARGUMENT
;
1440 named_entry_lock(named_entry
);
1442 object
= named_entry
->backing
.object
;
1443 vm_object_reference(object
);
1444 named_entry_unlock(named_entry
);
1445 } else if (ip_kotype(port
) == IKOT_MEM_OBJ_CONTROL
) {
1446 memory_object_control_t control
;
1447 control
= (memory_object_control_t
) port
;
1448 if (control
== NULL
)
1449 return (KERN_INVALID_ARGUMENT
);
1450 object
= memory_object_control_to_vm_object(control
);
1451 if (object
== VM_OBJECT_NULL
)
1452 return (KERN_INVALID_ARGUMENT
);
1453 vm_object_reference(object
);
1455 return KERN_INVALID_ARGUMENT
;
1457 if (object
== VM_OBJECT_NULL
)
1458 return (KERN_INVALID_ARGUMENT
);
1460 if (!object
->private) {
1461 if (object
->phys_contiguous
) {
1462 *flags
= UPL_PHYS_CONTIG
;
1467 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1470 ret
= vm_object_iopl_request(object
,
1478 vm_object_deallocate(object
);
1483 * Routine: memory_object_upl_request [interface]
1485 * Cause the population of a portion of a vm_object.
1486 * Depending on the nature of the request, the pages
1487 * returned may be contain valid data or be uninitialized.
1492 memory_object_upl_request(
1493 memory_object_control_t control
,
1494 memory_object_offset_t offset
,
1497 upl_page_info_array_t user_page_list
,
1498 unsigned int *page_list_count
,
1504 object
= memory_object_control_to_vm_object(control
);
1505 if (object
== VM_OBJECT_NULL
)
1506 return (KERN_TERMINATED
);
1508 return vm_object_upl_request(object
,
1514 (upl_control_flags_t
)(unsigned int) cntrl_flags
,
1519 * Routine: memory_object_super_upl_request [interface]
1521 * Cause the population of a portion of a vm_object
1522 * in much the same way as memory_object_upl_request.
1523 * Depending on the nature of the request, the pages
1524 * returned may be contain valid data or be uninitialized.
1525 * However, the region may be expanded up to the super
1526 * cluster size provided.
1530 memory_object_super_upl_request(
1531 memory_object_control_t control
,
1532 memory_object_offset_t offset
,
1534 upl_size_t super_cluster
,
1536 upl_page_info_t
*user_page_list
,
1537 unsigned int *page_list_count
,
1543 object
= memory_object_control_to_vm_object(control
);
1544 if (object
== VM_OBJECT_NULL
)
1545 return (KERN_INVALID_ARGUMENT
);
1547 return vm_object_super_upl_request(object
,
1554 (upl_control_flags_t
)(unsigned int) cntrl_flags
,
1559 memory_object_cluster_size(memory_object_control_t control
, memory_object_offset_t
*start
,
1560 vm_size_t
*length
, uint32_t *io_streaming
, memory_object_fault_info_t fault_info
)
1564 object
= memory_object_control_to_vm_object(control
);
1566 if (object
== VM_OBJECT_NULL
|| object
->paging_offset
> *start
)
1567 return (KERN_INVALID_ARGUMENT
);
1569 *start
-= object
->paging_offset
;
1571 vm_object_cluster_size(object
, (vm_object_offset_t
*)start
, length
, (vm_object_fault_info_t
)fault_info
, io_streaming
);
1573 *start
+= object
->paging_offset
;
1575 return (KERN_SUCCESS
);
1580 * Routine: host_default_memory_manager [interface]
1582 * set/get the default memory manager port and default cluster
1585 * If successful, consumes the supplied naked send right.
1588 host_default_memory_manager(
1589 host_priv_t host_priv
,
1590 memory_object_default_t
*default_manager
,
1591 __unused memory_object_cluster_size_t cluster_size
)
1593 memory_object_default_t current_manager
;
1594 memory_object_default_t new_manager
;
1595 memory_object_default_t returned_manager
;
1596 kern_return_t result
= KERN_SUCCESS
;
1598 if (host_priv
== HOST_PRIV_NULL
)
1599 return(KERN_INVALID_HOST
);
1601 assert(host_priv
== &realhost
);
1603 new_manager
= *default_manager
;
1604 lck_mtx_lock(&memory_manager_default_lock
);
1605 current_manager
= memory_manager_default
;
1606 returned_manager
= MEMORY_OBJECT_DEFAULT_NULL
;
1608 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1610 * Retrieve the current value.
1612 returned_manager
= current_manager
;
1613 memory_object_default_reference(returned_manager
);
1616 * Only allow the kernel to change the value.
1618 extern task_t kernel_task
;
1619 if (current_task() != kernel_task
) {
1620 result
= KERN_NO_ACCESS
;
1625 * If this is the first non-null manager, start
1626 * up the internal pager support.
1628 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1629 result
= vm_pageout_internal_start();
1630 if (result
!= KERN_SUCCESS
)
1635 * Retrieve the current value,
1636 * and replace it with the supplied value.
1637 * We return the old reference to the caller
1638 * but we have to take a reference on the new
1641 returned_manager
= current_manager
;
1642 memory_manager_default
= new_manager
;
1643 memory_object_default_reference(new_manager
);
1646 * In case anyone's been waiting for a memory
1647 * manager to be established, wake them up.
1650 thread_wakeup((event_t
) &memory_manager_default
);
1653 * Now that we have a default pager for anonymous memory,
1654 * reactivate all the throttled pages (i.e. dirty pages with
1657 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
)
1659 vm_page_reactivate_all_throttled();
1663 lck_mtx_unlock(&memory_manager_default_lock
);
1665 *default_manager
= returned_manager
;
1670 * Routine: memory_manager_default_reference
1672 * Returns a naked send right for the default
1673 * memory manager. The returned right is always
1674 * valid (not IP_NULL or IP_DEAD).
1677 __private_extern__ memory_object_default_t
1678 memory_manager_default_reference(void)
1680 memory_object_default_t current_manager
;
1682 lck_mtx_lock(&memory_manager_default_lock
);
1683 current_manager
= memory_manager_default
;
1684 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1687 res
= lck_mtx_sleep(&memory_manager_default_lock
,
1689 (event_t
) &memory_manager_default
,
1691 assert(res
== THREAD_AWAKENED
);
1692 current_manager
= memory_manager_default
;
1694 memory_object_default_reference(current_manager
);
1695 lck_mtx_unlock(&memory_manager_default_lock
);
1697 return current_manager
;
1701 * Routine: memory_manager_default_check
1704 * Check whether a default memory manager has been set
1705 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1706 * and KERN_FAILURE if dmm does not exist.
1708 * If there is no default memory manager, log an error,
1709 * but only the first time.
1712 __private_extern__ kern_return_t
1713 memory_manager_default_check(void)
1715 memory_object_default_t current
;
1717 lck_mtx_lock(&memory_manager_default_lock
);
1718 current
= memory_manager_default
;
1719 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1720 static boolean_t logged
; /* initialized to 0 */
1721 boolean_t complain
= !logged
;
1723 lck_mtx_unlock(&memory_manager_default_lock
);
1725 printf("Warning: No default memory manager\n");
1726 return(KERN_FAILURE
);
1728 lck_mtx_unlock(&memory_manager_default_lock
);
1729 return(KERN_SUCCESS
);
1733 __private_extern__
void
1734 memory_manager_default_init(void)
1736 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1737 lck_mtx_init(&memory_manager_default_lock
, &vm_object_lck_grp
, &vm_object_lck_attr
);
1742 /* Allow manipulation of individual page state. This is actually part of */
1743 /* the UPL regimen but takes place on the object rather than on a UPL */
1746 memory_object_page_op(
1747 memory_object_control_t control
,
1748 memory_object_offset_t offset
,
1750 ppnum_t
*phys_entry
,
1755 object
= memory_object_control_to_vm_object(control
);
1756 if (object
== VM_OBJECT_NULL
)
1757 return (KERN_INVALID_ARGUMENT
);
1759 return vm_object_page_op(object
, offset
, ops
, phys_entry
, flags
);
1763 * memory_object_range_op offers performance enhancement over
1764 * memory_object_page_op for page_op functions which do not require page
1765 * level state to be returned from the call. Page_op was created to provide
1766 * a low-cost alternative to page manipulation via UPLs when only a single
1767 * page was involved. The range_op call establishes the ability in the _op
1768 * family of functions to work on multiple pages where the lack of page level
1769 * state handling allows the caller to avoid the overhead of the upl structures.
1773 memory_object_range_op(
1774 memory_object_control_t control
,
1775 memory_object_offset_t offset_beg
,
1776 memory_object_offset_t offset_end
,
1782 object
= memory_object_control_to_vm_object(control
);
1783 if (object
== VM_OBJECT_NULL
)
1784 return (KERN_INVALID_ARGUMENT
);
1786 return vm_object_range_op(object
,
1790 (uint32_t *) range
);
1795 memory_object_mark_used(
1796 memory_object_control_t control
)
1800 if (control
== NULL
)
1803 object
= memory_object_control_to_vm_object(control
);
1805 if (object
!= VM_OBJECT_NULL
)
1806 vm_object_cache_remove(object
);
1811 memory_object_mark_unused(
1812 memory_object_control_t control
,
1813 __unused boolean_t rage
)
1817 if (control
== NULL
)
1820 object
= memory_object_control_to_vm_object(control
);
1822 if (object
!= VM_OBJECT_NULL
)
1823 vm_object_cache_add(object
);
1827 memory_object_mark_io_tracking(
1828 memory_object_control_t control
)
1832 if (control
== NULL
)
1834 object
= memory_object_control_to_vm_object(control
);
1836 if (object
!= VM_OBJECT_NULL
) {
1837 vm_object_lock(object
);
1838 object
->io_tracking
= TRUE
;
1839 vm_object_unlock(object
);
1843 #if CONFIG_SECLUDED_MEMORY
1845 memory_object_mark_eligible_for_secluded(
1846 memory_object_control_t control
,
1847 boolean_t eligible_for_secluded
)
1851 if (control
== NULL
)
1853 object
= memory_object_control_to_vm_object(control
);
1855 if (object
== VM_OBJECT_NULL
) {
1859 vm_object_lock(object
);
1860 if (eligible_for_secluded
&&
1861 secluded_for_filecache
&& /* global boot-arg */
1862 !object
->eligible_for_secluded
) {
1863 object
->eligible_for_secluded
= TRUE
;
1864 vm_page_secluded
.eligible_for_secluded
+= object
->resident_page_count
;
1865 } else if (!eligible_for_secluded
&&
1866 object
->eligible_for_secluded
) {
1867 object
->eligible_for_secluded
= FALSE
;
1868 vm_page_secluded
.eligible_for_secluded
-= object
->resident_page_count
;
1869 if (object
->resident_page_count
) {
1870 /* XXX FBDP TODO: flush pages from secluded queue? */
1871 // printf("FBDP TODO: flush %d pages from %p from secluded queue\n", object->resident_page_count, object);
1874 vm_object_unlock(object
);
1876 #endif /* CONFIG_SECLUDED_MEMORY */
1879 memory_object_pages_resident(
1880 memory_object_control_t control
,
1881 boolean_t
* has_pages_resident
)
1885 *has_pages_resident
= FALSE
;
1887 object
= memory_object_control_to_vm_object(control
);
1888 if (object
== VM_OBJECT_NULL
)
1889 return (KERN_INVALID_ARGUMENT
);
1891 if (object
->resident_page_count
)
1892 *has_pages_resident
= TRUE
;
1894 return (KERN_SUCCESS
);
1898 memory_object_signed(
1899 memory_object_control_t control
,
1900 boolean_t is_signed
)
1904 object
= memory_object_control_to_vm_object(control
);
1905 if (object
== VM_OBJECT_NULL
)
1906 return KERN_INVALID_ARGUMENT
;
1908 vm_object_lock(object
);
1909 object
->code_signed
= is_signed
;
1910 vm_object_unlock(object
);
1912 return KERN_SUCCESS
;
1916 memory_object_is_signed(
1917 memory_object_control_t control
)
1919 boolean_t is_signed
;
1922 object
= memory_object_control_to_vm_object(control
);
1923 if (object
== VM_OBJECT_NULL
)
1926 vm_object_lock_shared(object
);
1927 is_signed
= object
->code_signed
;
1928 vm_object_unlock(object
);
1934 memory_object_is_slid(
1935 memory_object_control_t control
)
1937 vm_object_t object
= VM_OBJECT_NULL
;
1939 object
= memory_object_control_to_vm_object(control
);
1940 if (object
== VM_OBJECT_NULL
)
1943 return object
->object_slid
;
1946 static zone_t mem_obj_control_zone
;
1948 __private_extern__
void
1949 memory_object_control_bootstrap(void)
1953 i
= (vm_size_t
) sizeof (struct memory_object_control
);
1954 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
1955 zone_change(mem_obj_control_zone
, Z_CALLERACCT
, FALSE
);
1956 zone_change(mem_obj_control_zone
, Z_NOENCRYPT
, TRUE
);
1960 __private_extern__ memory_object_control_t
1961 memory_object_control_allocate(
1964 memory_object_control_t control
;
1966 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
1967 if (control
!= MEMORY_OBJECT_CONTROL_NULL
) {
1968 control
->moc_object
= object
;
1969 control
->moc_ikot
= IKOT_MEM_OBJ_CONTROL
; /* fake ip_kotype */
1974 __private_extern__
void
1975 memory_object_control_collapse(
1976 memory_object_control_t control
,
1979 assert((control
->moc_object
!= VM_OBJECT_NULL
) &&
1980 (control
->moc_object
!= object
));
1981 control
->moc_object
= object
;
1984 __private_extern__ vm_object_t
1985 memory_object_control_to_vm_object(
1986 memory_object_control_t control
)
1988 if (control
== MEMORY_OBJECT_CONTROL_NULL
||
1989 control
->moc_ikot
!= IKOT_MEM_OBJ_CONTROL
)
1990 return VM_OBJECT_NULL
;
1992 return (control
->moc_object
);
1995 __private_extern__ vm_object_t
1996 memory_object_to_vm_object(
1997 memory_object_t mem_obj
)
1999 memory_object_control_t mo_control
;
2001 if (mem_obj
== MEMORY_OBJECT_NULL
) {
2002 return VM_OBJECT_NULL
;
2004 mo_control
= mem_obj
->mo_control
;
2005 if (mo_control
== NULL
) {
2006 return VM_OBJECT_NULL
;
2008 return memory_object_control_to_vm_object(mo_control
);
2011 memory_object_control_t
2012 convert_port_to_mo_control(
2013 __unused mach_port_t port
)
2015 return MEMORY_OBJECT_CONTROL_NULL
;
2020 convert_mo_control_to_port(
2021 __unused memory_object_control_t control
)
2023 return MACH_PORT_NULL
;
2027 memory_object_control_reference(
2028 __unused memory_object_control_t control
)
2034 * We only every issue one of these references, so kill it
2035 * when that gets released (should switch the real reference
2036 * counting in true port-less EMMI).
2039 memory_object_control_deallocate(
2040 memory_object_control_t control
)
2042 zfree(mem_obj_control_zone
, control
);
2046 memory_object_control_disable(
2047 memory_object_control_t control
)
2049 assert(control
->moc_object
!= VM_OBJECT_NULL
);
2050 control
->moc_object
= VM_OBJECT_NULL
;
2054 memory_object_default_reference(
2055 memory_object_default_t dmm
)
2057 ipc_port_make_send(dmm
);
2061 memory_object_default_deallocate(
2062 memory_object_default_t dmm
)
2064 ipc_port_release_send(dmm
);
2068 convert_port_to_memory_object(
2069 __unused mach_port_t port
)
2071 return (MEMORY_OBJECT_NULL
);
2076 convert_memory_object_to_port(
2077 __unused memory_object_t object
)
2079 return (MACH_PORT_NULL
);
2083 /* Routine memory_object_reference */
2084 void memory_object_reference(
2085 memory_object_t memory_object
)
2087 (memory_object
->mo_pager_ops
->memory_object_reference
)(
2091 /* Routine memory_object_deallocate */
2092 void memory_object_deallocate(
2093 memory_object_t memory_object
)
2095 (memory_object
->mo_pager_ops
->memory_object_deallocate
)(
2100 /* Routine memory_object_init */
2101 kern_return_t memory_object_init
2103 memory_object_t memory_object
,
2104 memory_object_control_t memory_control
,
2105 memory_object_cluster_size_t memory_object_page_size
2108 return (memory_object
->mo_pager_ops
->memory_object_init
)(
2111 memory_object_page_size
);
2114 /* Routine memory_object_terminate */
2115 kern_return_t memory_object_terminate
2117 memory_object_t memory_object
2120 return (memory_object
->mo_pager_ops
->memory_object_terminate
)(
2124 /* Routine memory_object_data_request */
2125 kern_return_t memory_object_data_request
2127 memory_object_t memory_object
,
2128 memory_object_offset_t offset
,
2129 memory_object_cluster_size_t length
,
2130 vm_prot_t desired_access
,
2131 memory_object_fault_info_t fault_info
2134 return (memory_object
->mo_pager_ops
->memory_object_data_request
)(
2142 /* Routine memory_object_data_return */
2143 kern_return_t memory_object_data_return
2145 memory_object_t memory_object
,
2146 memory_object_offset_t offset
,
2147 memory_object_cluster_size_t size
,
2148 memory_object_offset_t
*resid_offset
,
2151 boolean_t kernel_copy
,
2155 return (memory_object
->mo_pager_ops
->memory_object_data_return
)(
2166 /* Routine memory_object_data_initialize */
2167 kern_return_t memory_object_data_initialize
2169 memory_object_t memory_object
,
2170 memory_object_offset_t offset
,
2171 memory_object_cluster_size_t size
2174 return (memory_object
->mo_pager_ops
->memory_object_data_initialize
)(
2180 /* Routine memory_object_data_unlock */
2181 kern_return_t memory_object_data_unlock
2183 memory_object_t memory_object
,
2184 memory_object_offset_t offset
,
2185 memory_object_size_t size
,
2186 vm_prot_t desired_access
2189 return (memory_object
->mo_pager_ops
->memory_object_data_unlock
)(
2196 /* Routine memory_object_synchronize */
2197 kern_return_t memory_object_synchronize
2199 memory_object_t memory_object
,
2200 memory_object_offset_t offset
,
2201 memory_object_size_t size
,
2202 vm_sync_t sync_flags
2205 panic("memory_object_syncrhonize no longer supported\n");
2207 return (memory_object
->mo_pager_ops
->memory_object_synchronize
)(
2216 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2217 * each time a "named" VM object gets mapped directly or indirectly
2218 * (copy-on-write mapping). A "named" VM object has an extra reference held
2219 * by the pager to keep it alive until the pager decides that the
2220 * memory object (and its VM object) can be reclaimed.
2221 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2222 * the mappings of that memory object have been removed.
2224 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2225 * are serialized (through object->mapping_in_progress), to ensure that the
2226 * pager gets a consistent view of the mapping status of the memory object.
2228 * This allows the pager to keep track of how many times a memory object
2229 * has been mapped and with which protections, to decide when it can be
2233 /* Routine memory_object_map */
2234 kern_return_t memory_object_map
2236 memory_object_t memory_object
,
2240 return (memory_object
->mo_pager_ops
->memory_object_map
)(
2245 /* Routine memory_object_last_unmap */
2246 kern_return_t memory_object_last_unmap
2248 memory_object_t memory_object
2251 return (memory_object
->mo_pager_ops
->memory_object_last_unmap
)(
2255 /* Routine memory_object_data_reclaim */
2256 kern_return_t memory_object_data_reclaim
2258 memory_object_t memory_object
,
2259 boolean_t reclaim_backing_store
2262 if (memory_object
->mo_pager_ops
->memory_object_data_reclaim
== NULL
)
2263 return KERN_NOT_SUPPORTED
;
2264 return (memory_object
->mo_pager_ops
->memory_object_data_reclaim
)(
2266 reclaim_backing_store
);
2270 convert_port_to_upl(
2276 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2280 upl
= (upl_t
) port
->ip_kobject
;
2289 convert_upl_to_port(
2292 return MACH_PORT_NULL
;
2295 __private_extern__
void
2297 __unused ipc_port_t port
,
2298 __unused mach_port_mscount_t mscount
)