2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
62 * External memory management interface control functions.
66 * Interface dependencies:
69 #include <mach/std_types.h> /* For pointer_t */
70 #include <mach/mach_types.h>
73 #include <mach/kern_return.h>
74 #include <mach/memory_object.h>
75 #include <mach/memory_object_default.h>
76 #include <mach/memory_object_control_server.h>
77 #include <mach/host_priv_server.h>
78 #include <mach/boolean.h>
79 #include <mach/vm_prot.h>
80 #include <mach/message.h>
83 * Implementation dependencies:
85 #include <string.h> /* For memcpy() */
87 #include <kern/host.h>
88 #include <kern/thread.h> /* For current_thread() */
89 #include <kern/ipc_mig.h>
90 #include <kern/misc_protos.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_fault.h>
94 #include <vm/memory_object.h>
95 #include <vm/vm_page.h>
96 #include <vm/vm_pageout.h>
97 #include <vm/pmap.h> /* For pmap_clear_modify */
98 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
99 #include <vm/vm_map.h> /* For vm_map_pageable */
100 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
101 #include <vm/vm_shared_region.h>
103 #include <vm/vm_external.h>
105 #include <vm/vm_protos.h>
107 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
108 LCK_MTX_EARLY_DECLARE(memory_manager_default_lock
, &vm_object_lck_grp
);
112 * Routine: memory_object_should_return_page
115 * Determine whether the given page should be returned,
116 * based on the page's state and on the given return policy.
118 * We should return the page if one of the following is true:
120 * 1. Page is dirty and should_return is not RETURN_NONE.
121 * 2. Page is precious and should_return is RETURN_ALL.
122 * 3. Should_return is RETURN_ANYTHING.
124 * As a side effect, m->vmp_dirty will be made consistent
125 * with pmap_is_modified(m), if should_return is not
126 * MEMORY_OBJECT_RETURN_NONE.
129 #define memory_object_should_return_page(m, should_return) \
130 (should_return != MEMORY_OBJECT_RETURN_NONE && \
131 (((m)->vmp_dirty || ((m)->vmp_dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))) || \
132 ((m)->vmp_precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
133 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
135 typedef int memory_object_lock_result_t
;
137 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
138 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
139 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
140 #define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
142 memory_object_lock_result_t
memory_object_lock_page(
144 memory_object_return_t should_return
,
145 boolean_t should_flush
,
149 * Routine: memory_object_lock_page
152 * Perform the appropriate lock operations on the
153 * given page. See the description of
154 * "memory_object_lock_request" for the meanings
157 * Returns an indication that the operation
158 * completed, blocked, or that the page must
161 memory_object_lock_result_t
162 memory_object_lock_page(
164 memory_object_return_t should_return
,
165 boolean_t should_flush
,
168 if (m
->vmp_busy
|| m
->vmp_cleaning
) {
169 return MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
;
172 if (m
->vmp_laundry
) {
173 vm_pageout_steal_laundry(m
, FALSE
);
177 * Don't worry about pages for which the kernel
178 * does not have any data.
180 if (m
->vmp_absent
|| m
->vmp_error
|| m
->vmp_restart
) {
181 if (m
->vmp_error
&& should_flush
&& !VM_PAGE_WIRED(m
)) {
183 * dump the page, pager wants us to
184 * clean it up and there is no
185 * relevant data to return
187 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
;
189 return MEMORY_OBJECT_LOCK_RESULT_DONE
;
191 assert(!m
->vmp_fictitious
);
193 if (VM_PAGE_WIRED(m
)) {
195 * The page is wired... just clean or return the page if needed.
196 * Wired pages don't get flushed or disconnected from the pmap.
198 if (memory_object_should_return_page(m
, should_return
)) {
199 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
;
202 return MEMORY_OBJECT_LOCK_RESULT_DONE
;
207 * must do the pmap_disconnect before determining the
208 * need to return the page... otherwise it's possible
209 * for the page to go from the clean to the dirty state
210 * after we've made our decision
212 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
)) & VM_MEM_MODIFIED
) {
213 SET_PAGE_DIRTY(m
, FALSE
);
217 * If we are decreasing permission, do it now;
218 * let the fault handler take care of increases
219 * (pmap_page_protect may not increase protection).
221 if (prot
!= VM_PROT_NO_CHANGE
) {
222 pmap_page_protect(VM_PAGE_GET_PHYS_PAGE(m
), VM_PROT_ALL
& ~prot
);
226 * Handle returning dirty or precious pages
228 if (memory_object_should_return_page(m
, should_return
)) {
230 * we use to do a pmap_disconnect here in support
231 * of memory_object_lock_request, but that routine
232 * no longer requires this... in any event, in
233 * our world, it would turn into a big noop since
234 * we don't lock the page in any way and as soon
235 * as we drop the object lock, the page can be
236 * faulted back into an address space
239 * pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
241 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
;
245 * Handle flushing clean pages
248 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
;
252 * we use to deactivate clean pages at this point,
253 * but we do not believe that an msync should change
254 * the 'age' of a page in the cache... here is the
255 * original comment and code concerning this...
257 * XXX Make clean but not flush a paging hint,
258 * and deactivate the pages. This is a hack
259 * because it overloads flush/clean with
260 * implementation-dependent meaning. This only
261 * happens to pages that are already clean.
263 * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
264 * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
267 return MEMORY_OBJECT_LOCK_RESULT_DONE
;
273 * Routine: memory_object_lock_request [user interface]
276 * Control use of the data associated with the given
277 * memory object. For each page in the given range,
278 * perform the following operations, in order:
279 * 1) restrict access to the page (disallow
280 * forms specified by "prot");
281 * 2) return data to the manager (if "should_return"
282 * is RETURN_DIRTY and the page is dirty, or
283 * "should_return" is RETURN_ALL and the page
284 * is either dirty or precious); and,
285 * 3) flush the cached copy (if "should_flush"
287 * The set of pages is defined by a starting offset
288 * ("offset") and size ("size"). Only pages with the
289 * same page alignment as the starting offset are
292 * A single acknowledgement is sent (to the "reply_to"
293 * port) when these actions are complete. If successful,
294 * the naked send right for reply_to is consumed.
298 memory_object_lock_request(
299 memory_object_control_t control
,
300 memory_object_offset_t offset
,
301 memory_object_size_t size
,
302 memory_object_offset_t
* resid_offset
,
304 memory_object_return_t should_return
,
311 * Check for bogus arguments.
313 object
= memory_object_control_to_vm_object(control
);
314 if (object
== VM_OBJECT_NULL
) {
315 return KERN_INVALID_ARGUMENT
;
318 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
) {
319 return KERN_INVALID_ARGUMENT
;
322 size
= round_page_64(size
);
325 * Lock the object, and acquire a paging reference to
326 * prevent the memory_object reference from being released.
328 vm_object_lock(object
);
329 vm_object_paging_begin(object
);
331 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
) {
332 if ((should_return
!= MEMORY_OBJECT_RETURN_NONE
) || offset
|| object
->copy
) {
333 flags
&= ~MEMORY_OBJECT_DATA_FLUSH_ALL
;
334 flags
|= MEMORY_OBJECT_DATA_FLUSH
;
337 offset
-= object
->paging_offset
;
339 if (flags
& MEMORY_OBJECT_DATA_FLUSH_ALL
) {
340 vm_object_reap_pages(object
, REAP_DATA_FLUSH
);
342 (void)vm_object_update(object
, offset
, size
, resid_offset
,
343 io_errno
, should_return
, flags
, prot
);
346 vm_object_paging_end(object
);
347 vm_object_unlock(object
);
353 * memory_object_release_name: [interface]
355 * Enforces name semantic on memory_object reference count decrement
356 * This routine should not be called unless the caller holds a name
357 * reference gained through the memory_object_named_create or the
358 * memory_object_rename call.
359 * If the TERMINATE_IDLE flag is set, the call will return if the
360 * reference count is not 1. i.e. idle with the only remaining reference
362 * If the decision is made to proceed the name field flag is set to
363 * false and the reference count is decremented. If the RESPECT_CACHE
364 * flag is set and the reference count has gone to zero, the
365 * memory_object is checked to see if it is cacheable otherwise when
366 * the reference count is zero, it is simply terminated.
370 memory_object_release_name(
371 memory_object_control_t control
,
376 object
= memory_object_control_to_vm_object(control
);
377 if (object
== VM_OBJECT_NULL
) {
378 return KERN_INVALID_ARGUMENT
;
381 return vm_object_release_name(object
, flags
);
387 * Routine: memory_object_destroy [user interface]
389 * Shut down a memory object, despite the
390 * presence of address map (or other) references
394 memory_object_destroy(
395 memory_object_control_t control
,
396 kern_return_t reason
)
400 object
= memory_object_control_to_vm_object(control
);
401 if (object
== VM_OBJECT_NULL
) {
402 return KERN_INVALID_ARGUMENT
;
405 return vm_object_destroy(object
, reason
);
409 * Routine: vm_object_sync
411 * Kernel internal function to synch out pages in a given
412 * range within an object to its memory manager. Much the
413 * same as memory_object_lock_request but page protection
416 * If the should_flush and should_return flags are true pages
417 * are flushed, that is dirty & precious pages are written to
418 * the memory manager and then discarded. If should_return
419 * is false, only precious pages are returned to the memory
422 * If should flush is false and should_return true, the memory
423 * manager's copy of the pages is updated. If should_return
424 * is also false, only the precious pages are updated. This
425 * last option is of limited utility.
428 * FALSE if no pages were returned to the pager
435 vm_object_offset_t offset
,
436 vm_object_size_t size
,
437 boolean_t should_flush
,
438 boolean_t should_return
,
439 boolean_t should_iosync
)
445 * Lock the object, and acquire a paging reference to
446 * prevent the memory_object and control ports from
449 vm_object_lock(object
);
450 vm_object_paging_begin(object
);
453 flags
= MEMORY_OBJECT_DATA_FLUSH
;
455 * This flush is from an msync(), not a truncate(), so the
456 * contents of the file are not affected.
457 * MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know
458 * that the data is not changed and that there's no need to
459 * push the old contents to a copy object.
461 flags
|= MEMORY_OBJECT_DATA_NO_CHANGE
;
467 flags
|= MEMORY_OBJECT_IO_SYNC
;
470 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
472 MEMORY_OBJECT_RETURN_ALL
:
473 MEMORY_OBJECT_RETURN_NONE
,
478 vm_object_paging_end(object
);
479 vm_object_unlock(object
);
485 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
489 memory_object_t pager; \
491 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
492 vm_object_paging_begin(object); \
493 vm_object_unlock(object); \
496 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
498 upl_flags = UPL_MSYNC; \
500 (void) memory_object_data_return(pager, \
502 (memory_object_cluster_size_t)data_cnt, \
509 vm_object_lock(object); \
510 vm_object_paging_end(object); \
514 extern struct vnode
*
515 vnode_pager_lookup_vnode(memory_object_t
);
518 vm_object_update_extent(
520 vm_object_offset_t offset
,
521 vm_object_offset_t offset_end
,
522 vm_object_offset_t
*offset_resid
,
524 boolean_t should_flush
,
525 memory_object_return_t should_return
,
526 boolean_t should_iosync
,
531 vm_object_offset_t paging_offset
= 0;
532 vm_object_offset_t next_offset
= offset
;
533 memory_object_lock_result_t page_lock_result
;
534 memory_object_cluster_size_t data_cnt
= 0;
535 struct vm_page_delayed_work dw_array
;
536 struct vm_page_delayed_work
*dwp
, *dwp_start
;
537 bool dwp_finish_ctx
= TRUE
;
542 dwp_start
= dwp
= NULL
;
544 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
545 dwp_start
= vm_page_delayed_work_get_ctx();
546 if (dwp_start
== NULL
) {
547 dwp_start
= &dw_array
;
549 dwp_finish_ctx
= FALSE
;
556 offset
< offset_end
&& object
->resident_page_count
;
557 offset
+= PAGE_SIZE_64
) {
559 * Limit the number of pages to be cleaned at once to a contiguous
560 * run, or at most MAX_UPL_TRANSFER_BYTES
563 if ((data_cnt
>= MAX_UPL_TRANSFER_BYTES
) || (next_offset
!= offset
)) {
565 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, dwp_start
, dw_count
);
569 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
570 paging_offset
, offset_resid
, io_errno
, should_iosync
);
574 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
577 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
579 if (data_cnt
&& page_lock_result
!= MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
) {
581 * End of a run of dirty/precious pages.
584 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, dwp_start
, dw_count
);
588 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
589 paging_offset
, offset_resid
, io_errno
, should_iosync
);
591 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
592 * allow the state of page 'm' to change... we need to re-lookup
599 switch (page_lock_result
) {
600 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
603 case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE
:
604 if (m
->vmp_dirty
== TRUE
) {
607 dwp
->dw_mask
|= DW_vm_page_free
;
610 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
611 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
614 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
616 paging_offset
= offset
;
619 data_cnt
+= PAGE_SIZE
;
620 next_offset
= offset
+ PAGE_SIZE_64
;
623 * wired pages shouldn't be flushed and
624 * since they aren't on any queue,
625 * no need to remove them
627 if (!VM_PAGE_WIRED(m
)) {
630 * add additional state for the flush
632 m
->vmp_free_when_done
= TRUE
;
635 * we use to remove the page from the queues at this
636 * point, but we do not believe that an msync
637 * should cause the 'age' of a page to be changed
640 * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
647 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
, dw_count
);
649 if (dw_count
>= dw_limit
) {
650 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, dwp_start
, dw_count
);
660 task_update_logical_writes(current_task(), (dirty_count
* PAGE_SIZE
), TASK_WRITE_INVALIDATED
, vnode_pager_lookup_vnode(object
->pager
));
663 * We have completed the scan for applicable pages.
664 * Clean any pages that have been saved.
667 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, dwp_start
, dw_count
);
671 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
672 paging_offset
, offset_resid
, io_errno
, should_iosync
);
675 if (dwp_start
&& dwp_finish_ctx
) {
676 vm_page_delayed_work_finish_ctx(dwp_start
);
677 dwp_start
= dwp
= NULL
;
686 * Routine: vm_object_update
688 * Work function for m_o_lock_request(), vm_o_sync().
690 * Called with object locked and paging ref taken.
695 vm_object_offset_t offset
,
696 vm_object_size_t size
,
697 vm_object_offset_t
*resid_offset
,
699 memory_object_return_t should_return
,
701 vm_prot_t protection
)
703 vm_object_t copy_object
= VM_OBJECT_NULL
;
704 boolean_t data_returned
= FALSE
;
705 boolean_t update_cow
;
706 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
707 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
708 vm_fault_return_t result
;
711 #define MAX_EXTENTS 8
712 #define EXTENT_SIZE (1024 * 1024 * 256)
713 #define RESIDENT_LIMIT (1024 * 32)
715 vm_object_offset_t e_base
;
716 vm_object_offset_t e_min
;
717 vm_object_offset_t e_max
;
718 } extents
[MAX_EXTENTS
];
721 * To avoid blocking while scanning for pages, save
722 * dirty pages to be cleaned all at once.
724 * XXXO A similar strategy could be used to limit the
725 * number of times that a scan must be restarted for
726 * other reasons. Those pages that would require blocking
727 * could be temporarily collected in another list, or
728 * their offsets could be recorded in a small array.
732 * XXX NOTE: May want to consider converting this to a page list
733 * XXX vm_map_copy interface. Need to understand object
734 * XXX coalescing implications before doing so.
737 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
738 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
739 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
740 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
742 if (update_cow
|| (flags
& (MEMORY_OBJECT_DATA_PURGE
| MEMORY_OBJECT_DATA_SYNC
))) {
745 while ((copy_object
= object
->copy
) != VM_OBJECT_NULL
) {
747 * need to do a try here since we're swimming upstream
748 * against the normal lock ordering... however, we need
749 * to hold the object stable until we gain control of the
750 * copy object so we have to be careful how we approach this
752 if (vm_object_lock_try(copy_object
)) {
754 * we 'won' the lock on the copy object...
755 * no need to hold the object lock any longer...
756 * take a real reference on the copy object because
757 * we're going to call vm_fault_page on it which may
758 * under certain conditions drop the lock and the paging
759 * reference we're about to take... the reference
760 * will keep the copy object from going away if that happens
762 vm_object_unlock(object
);
763 vm_object_reference_locked(copy_object
);
766 vm_object_unlock(object
);
769 mutex_pause(collisions
);
771 vm_object_lock(object
);
774 if ((copy_object
!= VM_OBJECT_NULL
&& update_cow
) || (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
775 vm_object_offset_t i
;
776 vm_object_size_t copy_size
;
777 vm_object_offset_t copy_offset
;
781 kern_return_t error
= 0;
782 struct vm_object_fault_info fault_info
= {};
784 if (copy_object
!= VM_OBJECT_NULL
) {
786 * translate offset with respect to shadow's offset
788 copy_offset
= (offset
>= copy_object
->vo_shadow_offset
) ?
789 (offset
- copy_object
->vo_shadow_offset
) : 0;
791 if (copy_offset
> copy_object
->vo_size
) {
792 copy_offset
= copy_object
->vo_size
;
796 * clip size with respect to shadow offset
798 if (offset
>= copy_object
->vo_shadow_offset
) {
800 } else if (size
>= copy_object
->vo_shadow_offset
- offset
) {
801 copy_size
= (size
- (copy_object
->vo_shadow_offset
- offset
));
806 if (copy_offset
+ copy_size
> copy_object
->vo_size
) {
807 if (copy_object
->vo_size
>= copy_offset
) {
808 copy_size
= copy_object
->vo_size
- copy_offset
;
813 copy_size
+= copy_offset
;
815 copy_object
= object
;
817 copy_size
= offset
+ size
;
818 copy_offset
= offset
;
820 fault_info
.interruptible
= THREAD_UNINT
;
821 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
822 fault_info
.lo_offset
= copy_offset
;
823 fault_info
.hi_offset
= copy_size
;
824 fault_info
.stealth
= TRUE
;
825 assert(fault_info
.cs_bypass
== FALSE
);
826 assert(fault_info
.pmap_cs_associated
== FALSE
);
828 vm_object_paging_begin(copy_object
);
830 for (i
= copy_offset
; i
< copy_size
; i
+= PAGE_SIZE
) {
831 RETRY_COW_OF_LOCK_REQUEST
:
832 fault_info
.cluster_size
= (vm_size_t
) (copy_size
- i
);
833 assert(fault_info
.cluster_size
== copy_size
- i
);
835 prot
= VM_PROT_WRITE
| VM_PROT_READ
;
837 result
= vm_fault_page(copy_object
, i
,
838 VM_PROT_WRITE
| VM_PROT_READ
,
840 FALSE
, /* page not looked up */
850 case VM_FAULT_SUCCESS
:
853 VM_PAGE_OBJECT(page
), top_page
);
854 vm_object_lock(copy_object
);
855 vm_object_paging_begin(copy_object
);
857 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page
))) {
858 vm_page_lockspin_queues();
860 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page
))) {
861 vm_page_deactivate(page
);
863 vm_page_unlock_queues();
865 PAGE_WAKEUP_DONE(page
);
868 prot
= VM_PROT_WRITE
| VM_PROT_READ
;
869 vm_object_lock(copy_object
);
870 vm_object_paging_begin(copy_object
);
871 goto RETRY_COW_OF_LOCK_REQUEST
;
872 case VM_FAULT_INTERRUPTED
:
873 prot
= VM_PROT_WRITE
| VM_PROT_READ
;
874 vm_object_lock(copy_object
);
875 vm_object_paging_begin(copy_object
);
876 goto RETRY_COW_OF_LOCK_REQUEST
;
877 case VM_FAULT_MEMORY_SHORTAGE
:
879 prot
= VM_PROT_WRITE
| VM_PROT_READ
;
880 vm_object_lock(copy_object
);
881 vm_object_paging_begin(copy_object
);
882 goto RETRY_COW_OF_LOCK_REQUEST
;
883 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
884 /* success but no VM page: fail */
885 vm_object_paging_end(copy_object
);
886 vm_object_unlock(copy_object
);
888 case VM_FAULT_MEMORY_ERROR
:
889 if (object
!= copy_object
) {
890 vm_object_deallocate(copy_object
);
892 vm_object_lock(object
);
893 goto BYPASS_COW_COPYIN
;
895 panic("vm_object_update: unexpected error 0x%x"
896 " from vm_fault_page()\n", result
);
899 vm_object_paging_end(copy_object
);
901 if ((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
902 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
903 vm_object_unlock(copy_object
);
904 vm_object_deallocate(copy_object
);
905 vm_object_lock(object
);
909 if (copy_object
!= VM_OBJECT_NULL
&& copy_object
!= object
) {
910 if ((flags
& MEMORY_OBJECT_DATA_PURGE
)) {
911 vm_object_lock_assert_exclusive(copy_object
);
912 copy_object
->shadow_severed
= TRUE
;
913 copy_object
->shadowed
= FALSE
;
914 copy_object
->shadow
= NULL
;
916 * delete the ref the COW was holding on the target object
918 vm_object_deallocate(object
);
920 vm_object_unlock(copy_object
);
921 vm_object_deallocate(copy_object
);
922 vm_object_lock(object
);
927 * when we have a really large range to check relative
928 * to the number of actual resident pages, we'd like
929 * to use the resident page list to drive our checks
930 * however, the object lock will get dropped while processing
931 * the page which means the resident queue can change which
932 * means we can't walk the queue as we process the pages
933 * we also want to do the processing in offset order to allow
934 * 'runs' of pages to be collected if we're being told to
935 * flush to disk... the resident page queue is NOT ordered.
937 * a temporary solution (until we figure out how to deal with
938 * large address spaces more generically) is to pre-flight
939 * the resident page queue (if it's small enough) and develop
940 * a collection of extents (that encompass actual resident pages)
941 * to visit. This will at least allow us to deal with some of the
942 * more pathological cases in a more efficient manner. The current
943 * worst case (a single resident page at the end of an extremely large
944 * range) can take minutes to complete for ranges in the terrabyte
945 * category... since this routine is called when truncating a file,
946 * and we currently support files up to 16 Tbytes in size, this
947 * is not a theoretical problem
950 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
951 (atop_64(size
) > (unsigned)(object
->resident_page_count
/ (8 * MAX_EXTENTS
)))) {
953 vm_object_offset_t start
;
954 vm_object_offset_t end
;
955 vm_object_size_t e_mask
;
961 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
963 m
= (vm_page_t
) vm_page_queue_first(&object
->memq
);
965 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
) m
)) {
966 next
= (vm_page_t
) vm_page_queue_next(&m
->vmp_listq
);
968 if ((m
->vmp_offset
>= start
) && (m
->vmp_offset
< end
)) {
970 * this is a page we're interested in
971 * try to fit it into a current extent
973 for (n
= 0; n
< num_of_extents
; n
++) {
974 if ((m
->vmp_offset
& e_mask
) == extents
[n
].e_base
) {
976 * use (PAGE_SIZE - 1) to determine the
977 * max offset so that we don't wrap if
978 * we're at the last page of the space
980 if (m
->vmp_offset
< extents
[n
].e_min
) {
981 extents
[n
].e_min
= m
->vmp_offset
;
982 } else if ((m
->vmp_offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
) {
983 extents
[n
].e_max
= m
->vmp_offset
+ (PAGE_SIZE
- 1);
988 if (n
== num_of_extents
) {
990 * didn't find a current extent that can encompass
993 if (n
< MAX_EXTENTS
) {
995 * if we still have room,
996 * create a new extent
998 extents
[n
].e_base
= m
->vmp_offset
& e_mask
;
999 extents
[n
].e_min
= m
->vmp_offset
;
1000 extents
[n
].e_max
= m
->vmp_offset
+ (PAGE_SIZE
- 1);
1005 * no room to create a new extent...
1006 * fall back to a single extent based
1007 * on the min and max page offsets
1008 * we find in the range we're interested in...
1009 * first, look through the extent list and
1010 * develop the overall min and max for the
1011 * pages we've looked at up to this point
1013 for (n
= 1; n
< num_of_extents
; n
++) {
1014 if (extents
[n
].e_min
< extents
[0].e_min
) {
1015 extents
[0].e_min
= extents
[n
].e_min
;
1017 if (extents
[n
].e_max
> extents
[0].e_max
) {
1018 extents
[0].e_max
= extents
[n
].e_max
;
1022 * now setup to run through the remaining pages
1023 * to determine the overall min and max
1024 * offset for the specified range
1026 extents
[0].e_base
= 0;
1031 * by continuing, we'll reprocess the
1032 * page that forced us to abandon trying
1033 * to develop multiple extents
1042 extents
[0].e_min
= offset
;
1043 extents
[0].e_max
= offset
+ (size
- 1);
1047 for (n
= 0; n
< num_of_extents
; n
++) {
1048 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1049 should_flush
, should_return
, should_iosync
, protection
)) {
1050 data_returned
= TRUE
;
1053 return data_returned
;
1057 static kern_return_t
1058 vm_object_set_attributes_common(
1060 boolean_t may_cache
,
1061 memory_object_copy_strategy_t copy_strategy
)
1063 boolean_t object_became_ready
;
1065 if (object
== VM_OBJECT_NULL
) {
1066 return KERN_INVALID_ARGUMENT
;
1070 * Verify the attributes of importance
1073 switch (copy_strategy
) {
1074 case MEMORY_OBJECT_COPY_NONE
:
1075 case MEMORY_OBJECT_COPY_DELAY
:
1078 return KERN_INVALID_ARGUMENT
;
1085 vm_object_lock(object
);
1088 * Copy the attributes
1090 assert(!object
->internal
);
1091 object_became_ready
= !object
->pager_ready
;
1092 object
->copy_strategy
= copy_strategy
;
1093 object
->can_persist
= may_cache
;
1096 * Wake up anyone waiting for the ready attribute
1097 * to become asserted.
1100 if (object_became_ready
) {
1101 object
->pager_ready
= TRUE
;
1102 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1105 vm_object_unlock(object
);
1107 return KERN_SUCCESS
;
1112 memory_object_synchronize_completed(
1113 __unused memory_object_control_t control
,
1114 __unused memory_object_offset_t offset
,
1115 __unused memory_object_size_t length
)
1117 panic("memory_object_synchronize_completed no longer supported\n");
1118 return KERN_FAILURE
;
1123 * Set the memory object attribute as provided.
1125 * XXX This routine cannot be completed until the vm_msync, clean
1126 * in place, and cluster work is completed. See ifdef notyet
1127 * below and note that vm_object_set_attributes_common()
1128 * may have to be expanded.
1131 memory_object_change_attributes(
1132 memory_object_control_t control
,
1133 memory_object_flavor_t flavor
,
1134 memory_object_info_t attributes
,
1135 mach_msg_type_number_t count
)
1138 kern_return_t result
= KERN_SUCCESS
;
1139 boolean_t may_cache
;
1140 boolean_t invalidate
;
1141 memory_object_copy_strategy_t copy_strategy
;
1143 object
= memory_object_control_to_vm_object(control
);
1144 if (object
== VM_OBJECT_NULL
) {
1145 return KERN_INVALID_ARGUMENT
;
1148 vm_object_lock(object
);
1150 may_cache
= object
->can_persist
;
1151 copy_strategy
= object
->copy_strategy
;
1153 invalidate
= object
->invalidate
;
1155 vm_object_unlock(object
);
1158 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1160 old_memory_object_behave_info_t behave
;
1162 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1163 result
= KERN_INVALID_ARGUMENT
;
1167 behave
= (old_memory_object_behave_info_t
) attributes
;
1169 invalidate
= behave
->invalidate
;
1170 copy_strategy
= behave
->copy_strategy
;
1175 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1177 memory_object_behave_info_t behave
;
1179 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1180 result
= KERN_INVALID_ARGUMENT
;
1184 behave
= (memory_object_behave_info_t
) attributes
;
1186 invalidate
= behave
->invalidate
;
1187 copy_strategy
= behave
->copy_strategy
;
1191 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1193 memory_object_perf_info_t perf
;
1195 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1196 result
= KERN_INVALID_ARGUMENT
;
1200 perf
= (memory_object_perf_info_t
) attributes
;
1202 may_cache
= perf
->may_cache
;
1207 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1209 old_memory_object_attr_info_t attr
;
1211 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1212 result
= KERN_INVALID_ARGUMENT
;
1216 attr
= (old_memory_object_attr_info_t
) attributes
;
1218 may_cache
= attr
->may_cache
;
1219 copy_strategy
= attr
->copy_strategy
;
1224 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1226 memory_object_attr_info_t attr
;
1228 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1229 result
= KERN_INVALID_ARGUMENT
;
1233 attr
= (memory_object_attr_info_t
) attributes
;
1235 copy_strategy
= attr
->copy_strategy
;
1236 may_cache
= attr
->may_cache_object
;
1242 result
= KERN_INVALID_ARGUMENT
;
1246 if (result
!= KERN_SUCCESS
) {
1250 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1251 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1255 * XXX may_cache may become a tri-valued variable to handle
1256 * XXX uncache if not in use.
1258 return vm_object_set_attributes_common(object
,
1264 memory_object_get_attributes(
1265 memory_object_control_t control
,
1266 memory_object_flavor_t flavor
,
1267 memory_object_info_t attributes
, /* pointer to OUT array */
1268 mach_msg_type_number_t
*count
) /* IN/OUT */
1270 kern_return_t ret
= KERN_SUCCESS
;
1273 object
= memory_object_control_to_vm_object(control
);
1274 if (object
== VM_OBJECT_NULL
) {
1275 return KERN_INVALID_ARGUMENT
;
1278 vm_object_lock(object
);
1281 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1283 old_memory_object_behave_info_t behave
;
1285 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1286 ret
= KERN_INVALID_ARGUMENT
;
1290 behave
= (old_memory_object_behave_info_t
) attributes
;
1291 behave
->copy_strategy
= object
->copy_strategy
;
1292 behave
->temporary
= FALSE
;
1293 #if notyet /* remove when vm_msync complies and clean in place fini */
1294 behave
->invalidate
= object
->invalidate
;
1296 behave
->invalidate
= FALSE
;
1299 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1303 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1305 memory_object_behave_info_t behave
;
1307 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1308 ret
= KERN_INVALID_ARGUMENT
;
1312 behave
= (memory_object_behave_info_t
) attributes
;
1313 behave
->copy_strategy
= object
->copy_strategy
;
1314 behave
->temporary
= FALSE
;
1315 #if notyet /* remove when vm_msync complies and clean in place fini */
1316 behave
->invalidate
= object
->invalidate
;
1318 behave
->invalidate
= FALSE
;
1320 behave
->advisory_pageout
= FALSE
;
1321 behave
->silent_overwrite
= FALSE
;
1322 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1326 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1328 memory_object_perf_info_t perf
;
1330 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1331 ret
= KERN_INVALID_ARGUMENT
;
1335 perf
= (memory_object_perf_info_t
) attributes
;
1336 perf
->cluster_size
= PAGE_SIZE
;
1337 perf
->may_cache
= object
->can_persist
;
1339 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1343 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1345 old_memory_object_attr_info_t attr
;
1347 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1348 ret
= KERN_INVALID_ARGUMENT
;
1352 attr
= (old_memory_object_attr_info_t
) attributes
;
1353 attr
->may_cache
= object
->can_persist
;
1354 attr
->copy_strategy
= object
->copy_strategy
;
1356 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1360 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1362 memory_object_attr_info_t attr
;
1364 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1365 ret
= KERN_INVALID_ARGUMENT
;
1369 attr
= (memory_object_attr_info_t
) attributes
;
1370 attr
->copy_strategy
= object
->copy_strategy
;
1371 attr
->cluster_size
= PAGE_SIZE
;
1372 attr
->may_cache_object
= object
->can_persist
;
1373 attr
->temporary
= FALSE
;
1375 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1380 ret
= KERN_INVALID_ARGUMENT
;
1384 vm_object_unlock(object
);
1391 memory_object_iopl_request(
1393 memory_object_offset_t offset
,
1394 upl_size_t
*upl_size
,
1396 upl_page_info_array_t user_page_list
,
1397 unsigned int *page_list_count
,
1398 upl_control_flags_t
*flags
,
1403 upl_control_flags_t caller_flags
;
1405 caller_flags
= *flags
;
1407 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1409 * For forward compatibility's sake,
1410 * reject any unknown flag.
1412 return KERN_INVALID_VALUE
;
1415 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1416 vm_named_entry_t named_entry
;
1418 named_entry
= (vm_named_entry_t
) ip_get_kobject(port
);
1419 /* a few checks to make sure user is obeying rules */
1420 if (*upl_size
== 0) {
1421 if (offset
>= named_entry
->size
) {
1422 return KERN_INVALID_RIGHT
;
1424 *upl_size
= (upl_size_t
)(named_entry
->size
- offset
);
1425 if (*upl_size
!= named_entry
->size
- offset
) {
1426 return KERN_INVALID_ARGUMENT
;
1429 if (caller_flags
& UPL_COPYOUT_FROM
) {
1430 if ((named_entry
->protection
& VM_PROT_READ
)
1432 return KERN_INVALID_RIGHT
;
1435 if ((named_entry
->protection
&
1436 (VM_PROT_READ
| VM_PROT_WRITE
))
1437 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1438 return KERN_INVALID_RIGHT
;
1441 if (named_entry
->size
< (offset
+ *upl_size
)) {
1442 return KERN_INVALID_ARGUMENT
;
1445 /* the callers parameter offset is defined to be the */
1446 /* offset from beginning of named entry offset in object */
1447 offset
= offset
+ named_entry
->offset
;
1448 offset
+= named_entry
->data_offset
;
1450 if (named_entry
->is_sub_map
||
1451 named_entry
->is_copy
) {
1452 return KERN_INVALID_ARGUMENT
;
1454 if (!named_entry
->is_object
) {
1455 return KERN_INVALID_ARGUMENT
;
1458 named_entry_lock(named_entry
);
1460 object
= vm_named_entry_to_vm_object(named_entry
);
1461 assert(object
!= VM_OBJECT_NULL
);
1462 vm_object_reference(object
);
1463 named_entry_unlock(named_entry
);
1464 } else if (ip_kotype(port
) == IKOT_MEM_OBJ_CONTROL
) {
1465 memory_object_control_t control
;
1466 control
= (memory_object_control_t
) port
;
1467 if (control
== NULL
) {
1468 return KERN_INVALID_ARGUMENT
;
1470 object
= memory_object_control_to_vm_object(control
);
1471 if (object
== VM_OBJECT_NULL
) {
1472 return KERN_INVALID_ARGUMENT
;
1474 vm_object_reference(object
);
1476 return KERN_INVALID_ARGUMENT
;
1478 if (object
== VM_OBJECT_NULL
) {
1479 return KERN_INVALID_ARGUMENT
;
1482 if (!object
->private) {
1483 if (object
->phys_contiguous
) {
1484 *flags
= UPL_PHYS_CONTIG
;
1489 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1492 ret
= vm_object_iopl_request(object
,
1500 vm_object_deallocate(object
);
1505 * Routine: memory_object_upl_request [interface]
1507 * Cause the population of a portion of a vm_object.
1508 * Depending on the nature of the request, the pages
1509 * returned may be contain valid data or be uninitialized.
1514 memory_object_upl_request(
1515 memory_object_control_t control
,
1516 memory_object_offset_t offset
,
1519 upl_page_info_array_t user_page_list
,
1520 unsigned int *page_list_count
,
1525 vm_tag_t vmtag
= (vm_tag_t
)tag
;
1526 assert(vmtag
== tag
);
1528 object
= memory_object_control_to_vm_object(control
);
1529 if (object
== VM_OBJECT_NULL
) {
1530 return KERN_TERMINATED
;
1533 return vm_object_upl_request(object
,
1539 (upl_control_flags_t
)(unsigned int) cntrl_flags
,
1544 * Routine: memory_object_super_upl_request [interface]
1546 * Cause the population of a portion of a vm_object
1547 * in much the same way as memory_object_upl_request.
1548 * Depending on the nature of the request, the pages
1549 * returned may be contain valid data or be uninitialized.
1550 * However, the region may be expanded up to the super
1551 * cluster size provided.
1555 memory_object_super_upl_request(
1556 memory_object_control_t control
,
1557 memory_object_offset_t offset
,
1559 upl_size_t super_cluster
,
1561 upl_page_info_t
*user_page_list
,
1562 unsigned int *page_list_count
,
1567 vm_tag_t vmtag
= (vm_tag_t
)tag
;
1568 assert(vmtag
== tag
);
1570 object
= memory_object_control_to_vm_object(control
);
1571 if (object
== VM_OBJECT_NULL
) {
1572 return KERN_INVALID_ARGUMENT
;
1575 return vm_object_super_upl_request(object
,
1582 (upl_control_flags_t
)(unsigned int) cntrl_flags
,
1587 memory_object_cluster_size(
1588 memory_object_control_t control
,
1589 memory_object_offset_t
*start
,
1591 uint32_t *io_streaming
,
1592 memory_object_fault_info_t mo_fault_info
)
1595 vm_object_fault_info_t fault_info
;
1597 object
= memory_object_control_to_vm_object(control
);
1599 if (object
== VM_OBJECT_NULL
|| object
->paging_offset
> *start
) {
1600 return KERN_INVALID_ARGUMENT
;
1603 *start
-= object
->paging_offset
;
1605 fault_info
= (vm_object_fault_info_t
)(uintptr_t) mo_fault_info
;
1606 vm_object_cluster_size(object
,
1607 (vm_object_offset_t
*)start
,
1612 *start
+= object
->paging_offset
;
1614 return KERN_SUCCESS
;
1619 * Routine: host_default_memory_manager [interface]
1621 * set/get the default memory manager port and default cluster
1624 * If successful, consumes the supplied naked send right.
1627 host_default_memory_manager(
1628 host_priv_t host_priv
,
1629 memory_object_default_t
*default_manager
,
1630 __unused memory_object_cluster_size_t cluster_size
)
1632 memory_object_default_t current_manager
;
1633 memory_object_default_t new_manager
;
1634 memory_object_default_t returned_manager
;
1635 kern_return_t result
= KERN_SUCCESS
;
1637 if (host_priv
== HOST_PRIV_NULL
) {
1638 return KERN_INVALID_HOST
;
1641 assert(host_priv
== &realhost
);
1643 new_manager
= *default_manager
;
1644 lck_mtx_lock(&memory_manager_default_lock
);
1645 current_manager
= memory_manager_default
;
1646 returned_manager
= MEMORY_OBJECT_DEFAULT_NULL
;
1648 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1650 * Retrieve the current value.
1652 returned_manager
= current_manager
;
1653 memory_object_default_reference(returned_manager
);
1656 * Only allow the kernel to change the value.
1658 extern task_t kernel_task
;
1659 if (current_task() != kernel_task
) {
1660 result
= KERN_NO_ACCESS
;
1665 * If this is the first non-null manager, start
1666 * up the internal pager support.
1668 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1669 result
= vm_pageout_internal_start();
1670 if (result
!= KERN_SUCCESS
) {
1676 * Retrieve the current value,
1677 * and replace it with the supplied value.
1678 * We return the old reference to the caller
1679 * but we have to take a reference on the new
1682 returned_manager
= current_manager
;
1683 memory_manager_default
= new_manager
;
1684 memory_object_default_reference(new_manager
);
1687 * In case anyone's been waiting for a memory
1688 * manager to be established, wake them up.
1691 thread_wakeup((event_t
) &memory_manager_default
);
1694 * Now that we have a default pager for anonymous memory,
1695 * reactivate all the throttled pages (i.e. dirty pages with
1698 if (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1699 vm_page_reactivate_all_throttled();
1703 lck_mtx_unlock(&memory_manager_default_lock
);
1705 *default_manager
= returned_manager
;
1710 * Routine: memory_manager_default_reference
1712 * Returns a naked send right for the default
1713 * memory manager. The returned right is always
1714 * valid (not IP_NULL or IP_DEAD).
1717 __private_extern__ memory_object_default_t
1718 memory_manager_default_reference(void)
1720 memory_object_default_t current_manager
;
1722 lck_mtx_lock(&memory_manager_default_lock
);
1723 current_manager
= memory_manager_default
;
1724 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1727 res
= lck_mtx_sleep(&memory_manager_default_lock
,
1729 (event_t
) &memory_manager_default
,
1731 assert(res
== THREAD_AWAKENED
);
1732 current_manager
= memory_manager_default
;
1734 memory_object_default_reference(current_manager
);
1735 lck_mtx_unlock(&memory_manager_default_lock
);
1737 return current_manager
;
1741 * Routine: memory_manager_default_check
1744 * Check whether a default memory manager has been set
1745 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1746 * and KERN_FAILURE if dmm does not exist.
1748 * If there is no default memory manager, log an error,
1749 * but only the first time.
1752 __private_extern__ kern_return_t
1753 memory_manager_default_check(void)
1755 memory_object_default_t current
;
1757 lck_mtx_lock(&memory_manager_default_lock
);
1758 current
= memory_manager_default
;
1759 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1760 static boolean_t logged
; /* initialized to 0 */
1761 boolean_t complain
= !logged
;
1763 lck_mtx_unlock(&memory_manager_default_lock
);
1765 printf("Warning: No default memory manager\n");
1767 return KERN_FAILURE
;
1769 lck_mtx_unlock(&memory_manager_default_lock
);
1770 return KERN_SUCCESS
;
1774 /* Allow manipulation of individual page state. This is actually part of */
1775 /* the UPL regimen but takes place on the object rather than on a UPL */
1778 memory_object_page_op(
1779 memory_object_control_t control
,
1780 memory_object_offset_t offset
,
1782 ppnum_t
*phys_entry
,
1787 object
= memory_object_control_to_vm_object(control
);
1788 if (object
== VM_OBJECT_NULL
) {
1789 return KERN_INVALID_ARGUMENT
;
1792 return vm_object_page_op(object
, offset
, ops
, phys_entry
, flags
);
1796 * memory_object_range_op offers performance enhancement over
1797 * memory_object_page_op for page_op functions which do not require page
1798 * level state to be returned from the call. Page_op was created to provide
1799 * a low-cost alternative to page manipulation via UPLs when only a single
1800 * page was involved. The range_op call establishes the ability in the _op
1801 * family of functions to work on multiple pages where the lack of page level
1802 * state handling allows the caller to avoid the overhead of the upl structures.
1806 memory_object_range_op(
1807 memory_object_control_t control
,
1808 memory_object_offset_t offset_beg
,
1809 memory_object_offset_t offset_end
,
1815 object
= memory_object_control_to_vm_object(control
);
1816 if (object
== VM_OBJECT_NULL
) {
1817 return KERN_INVALID_ARGUMENT
;
1820 return vm_object_range_op(object
,
1824 (uint32_t *) range
);
1829 memory_object_mark_used(
1830 memory_object_control_t control
)
1834 if (control
== NULL
) {
1838 object
= memory_object_control_to_vm_object(control
);
1840 if (object
!= VM_OBJECT_NULL
) {
1841 vm_object_cache_remove(object
);
1847 memory_object_mark_unused(
1848 memory_object_control_t control
,
1849 __unused boolean_t rage
)
1853 if (control
== NULL
) {
1857 object
= memory_object_control_to_vm_object(control
);
1859 if (object
!= VM_OBJECT_NULL
) {
1860 vm_object_cache_add(object
);
1865 memory_object_mark_io_tracking(
1866 memory_object_control_t control
)
1870 if (control
== NULL
) {
1873 object
= memory_object_control_to_vm_object(control
);
1875 if (object
!= VM_OBJECT_NULL
) {
1876 vm_object_lock(object
);
1877 object
->io_tracking
= TRUE
;
1878 vm_object_unlock(object
);
1883 memory_object_mark_trusted(
1884 memory_object_control_t control
)
1888 if (control
== NULL
) {
1891 object
= memory_object_control_to_vm_object(control
);
1893 if (object
!= VM_OBJECT_NULL
) {
1894 vm_object_lock(object
);
1895 object
->pager_trusted
= TRUE
;
1896 vm_object_unlock(object
);
1900 #if CONFIG_SECLUDED_MEMORY
1902 memory_object_mark_eligible_for_secluded(
1903 memory_object_control_t control
,
1904 boolean_t eligible_for_secluded
)
1908 if (control
== NULL
) {
1911 object
= memory_object_control_to_vm_object(control
);
1913 if (object
== VM_OBJECT_NULL
) {
1917 vm_object_lock(object
);
1918 if (eligible_for_secluded
&&
1919 secluded_for_filecache
&& /* global boot-arg */
1920 !object
->eligible_for_secluded
) {
1921 object
->eligible_for_secluded
= TRUE
;
1922 vm_page_secluded
.eligible_for_secluded
+= object
->resident_page_count
;
1923 } else if (!eligible_for_secluded
&&
1924 object
->eligible_for_secluded
) {
1925 object
->eligible_for_secluded
= FALSE
;
1926 vm_page_secluded
.eligible_for_secluded
-= object
->resident_page_count
;
1927 if (object
->resident_page_count
) {
1928 /* XXX FBDP TODO: flush pages from secluded queue? */
1929 // printf("FBDP TODO: flush %d pages from %p from secluded queue\n", object->resident_page_count, object);
1932 vm_object_unlock(object
);
1934 #endif /* CONFIG_SECLUDED_MEMORY */
1937 memory_object_pages_resident(
1938 memory_object_control_t control
,
1939 boolean_t
* has_pages_resident
)
1943 *has_pages_resident
= FALSE
;
1945 object
= memory_object_control_to_vm_object(control
);
1946 if (object
== VM_OBJECT_NULL
) {
1947 return KERN_INVALID_ARGUMENT
;
1950 if (object
->resident_page_count
) {
1951 *has_pages_resident
= TRUE
;
1954 return KERN_SUCCESS
;
1958 memory_object_signed(
1959 memory_object_control_t control
,
1960 boolean_t is_signed
)
1964 object
= memory_object_control_to_vm_object(control
);
1965 if (object
== VM_OBJECT_NULL
) {
1966 return KERN_INVALID_ARGUMENT
;
1969 vm_object_lock(object
);
1970 object
->code_signed
= is_signed
;
1971 vm_object_unlock(object
);
1973 return KERN_SUCCESS
;
1977 memory_object_is_signed(
1978 memory_object_control_t control
)
1980 boolean_t is_signed
;
1983 object
= memory_object_control_to_vm_object(control
);
1984 if (object
== VM_OBJECT_NULL
) {
1988 vm_object_lock_shared(object
);
1989 is_signed
= object
->code_signed
;
1990 vm_object_unlock(object
);
1996 memory_object_is_shared_cache(
1997 memory_object_control_t control
)
1999 vm_object_t object
= VM_OBJECT_NULL
;
2001 object
= memory_object_control_to_vm_object(control
);
2002 if (object
== VM_OBJECT_NULL
) {
2006 return object
->object_is_shared_cache
;
2009 static ZONE_DECLARE(mem_obj_control_zone
, "mem_obj_control",
2010 sizeof(struct memory_object_control
), ZC_NOENCRYPT
);
2012 __private_extern__ memory_object_control_t
2013 memory_object_control_allocate(
2016 memory_object_control_t control
;
2018 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
2019 if (control
!= MEMORY_OBJECT_CONTROL_NULL
) {
2020 control
->moc_object
= object
;
2021 control
->moc_ikot
= IKOT_MEM_OBJ_CONTROL
; /* fake ip_kotype */
2026 __private_extern__
void
2027 memory_object_control_collapse(
2028 memory_object_control_t control
,
2031 assert((control
->moc_object
!= VM_OBJECT_NULL
) &&
2032 (control
->moc_object
!= object
));
2033 control
->moc_object
= object
;
2036 __private_extern__ vm_object_t
2037 memory_object_control_to_vm_object(
2038 memory_object_control_t control
)
2040 if (control
== MEMORY_OBJECT_CONTROL_NULL
||
2041 control
->moc_ikot
!= IKOT_MEM_OBJ_CONTROL
) {
2042 return VM_OBJECT_NULL
;
2045 return control
->moc_object
;
2048 __private_extern__ vm_object_t
2049 memory_object_to_vm_object(
2050 memory_object_t mem_obj
)
2052 memory_object_control_t mo_control
;
2054 if (mem_obj
== MEMORY_OBJECT_NULL
) {
2055 return VM_OBJECT_NULL
;
2057 mo_control
= mem_obj
->mo_control
;
2058 if (mo_control
== NULL
) {
2059 return VM_OBJECT_NULL
;
2061 return memory_object_control_to_vm_object(mo_control
);
2064 memory_object_control_t
2065 convert_port_to_mo_control(
2066 __unused mach_port_t port
)
2068 return MEMORY_OBJECT_CONTROL_NULL
;
2073 convert_mo_control_to_port(
2074 __unused memory_object_control_t control
)
2076 return MACH_PORT_NULL
;
2080 memory_object_control_reference(
2081 __unused memory_object_control_t control
)
2087 * We only every issue one of these references, so kill it
2088 * when that gets released (should switch the real reference
2089 * counting in true port-less EMMI).
2092 memory_object_control_deallocate(
2093 memory_object_control_t control
)
2095 zfree(mem_obj_control_zone
, control
);
2099 memory_object_control_disable(
2100 memory_object_control_t control
)
2102 assert(control
->moc_object
!= VM_OBJECT_NULL
);
2103 control
->moc_object
= VM_OBJECT_NULL
;
2107 memory_object_default_reference(
2108 memory_object_default_t dmm
)
2110 ipc_port_make_send(dmm
);
2114 memory_object_default_deallocate(
2115 memory_object_default_t dmm
)
2117 ipc_port_release_send(dmm
);
2121 convert_port_to_memory_object(
2122 __unused mach_port_t port
)
2124 return MEMORY_OBJECT_NULL
;
2129 convert_memory_object_to_port(
2130 __unused memory_object_t object
)
2132 return MACH_PORT_NULL
;
2136 /* Routine memory_object_reference */
2138 memory_object_reference(
2139 memory_object_t memory_object
)
2141 (memory_object
->mo_pager_ops
->memory_object_reference
)(
2145 /* Routine memory_object_deallocate */
2147 memory_object_deallocate(
2148 memory_object_t memory_object
)
2150 (memory_object
->mo_pager_ops
->memory_object_deallocate
)(
2155 /* Routine memory_object_init */
2159 memory_object_t memory_object
,
2160 memory_object_control_t memory_control
,
2161 memory_object_cluster_size_t memory_object_page_size
2164 return (memory_object
->mo_pager_ops
->memory_object_init
)(
2167 memory_object_page_size
);
2170 /* Routine memory_object_terminate */
2172 memory_object_terminate
2174 memory_object_t memory_object
2177 return (memory_object
->mo_pager_ops
->memory_object_terminate
)(
2181 /* Routine memory_object_data_request */
2183 memory_object_data_request
2185 memory_object_t memory_object
,
2186 memory_object_offset_t offset
,
2187 memory_object_cluster_size_t length
,
2188 vm_prot_t desired_access
,
2189 memory_object_fault_info_t fault_info
2192 return (memory_object
->mo_pager_ops
->memory_object_data_request
)(
2200 /* Routine memory_object_data_return */
2202 memory_object_data_return
2204 memory_object_t memory_object
,
2205 memory_object_offset_t offset
,
2206 memory_object_cluster_size_t size
,
2207 memory_object_offset_t
*resid_offset
,
2210 boolean_t kernel_copy
,
2214 return (memory_object
->mo_pager_ops
->memory_object_data_return
)(
2225 /* Routine memory_object_data_initialize */
2227 memory_object_data_initialize
2229 memory_object_t memory_object
,
2230 memory_object_offset_t offset
,
2231 memory_object_cluster_size_t size
2234 return (memory_object
->mo_pager_ops
->memory_object_data_initialize
)(
2240 /* Routine memory_object_data_unlock */
2242 memory_object_data_unlock
2244 memory_object_t memory_object
,
2245 memory_object_offset_t offset
,
2246 memory_object_size_t size
,
2247 vm_prot_t desired_access
2250 return (memory_object
->mo_pager_ops
->memory_object_data_unlock
)(
2257 /* Routine memory_object_synchronize */
2259 memory_object_synchronize
2261 memory_object_t memory_object
,
2262 memory_object_offset_t offset
,
2263 memory_object_size_t size
,
2264 vm_sync_t sync_flags
2267 panic("memory_object_syncrhonize no longer supported\n");
2269 return (memory_object
->mo_pager_ops
->memory_object_synchronize
)(
2278 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2279 * each time a "named" VM object gets mapped directly or indirectly
2280 * (copy-on-write mapping). A "named" VM object has an extra reference held
2281 * by the pager to keep it alive until the pager decides that the
2282 * memory object (and its VM object) can be reclaimed.
2283 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2284 * the mappings of that memory object have been removed.
2286 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2287 * are serialized (through object->mapping_in_progress), to ensure that the
2288 * pager gets a consistent view of the mapping status of the memory object.
2290 * This allows the pager to keep track of how many times a memory object
2291 * has been mapped and with which protections, to decide when it can be
2295 /* Routine memory_object_map */
2299 memory_object_t memory_object
,
2303 return (memory_object
->mo_pager_ops
->memory_object_map
)(
2308 /* Routine memory_object_last_unmap */
2310 memory_object_last_unmap
2312 memory_object_t memory_object
2315 return (memory_object
->mo_pager_ops
->memory_object_last_unmap
)(
2319 /* Routine memory_object_data_reclaim */
2321 memory_object_data_reclaim
2323 memory_object_t memory_object
,
2324 boolean_t reclaim_backing_store
2327 if (memory_object
->mo_pager_ops
->memory_object_data_reclaim
== NULL
) {
2328 return KERN_NOT_SUPPORTED
;
2330 return (memory_object
->mo_pager_ops
->memory_object_data_reclaim
)(
2332 reclaim_backing_store
);
2336 memory_object_backing_object
2338 memory_object_t memory_object
,
2339 memory_object_offset_t offset
,
2340 vm_object_t
*backing_object
,
2341 vm_object_offset_t
*backing_offset
)
2343 if (memory_object
->mo_pager_ops
->memory_object_backing_object
== NULL
) {
2346 return (memory_object
->mo_pager_ops
->memory_object_backing_object
)(
2354 convert_port_to_upl(
2360 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2364 upl
= (upl_t
) ip_get_kobject(port
);
2367 upl
->ref_count
+= 1;
2373 convert_upl_to_port(
2376 return MACH_PORT_NULL
;
2379 __private_extern__
void
2381 __unused ipc_port_t port
,
2382 __unused mach_port_mscount_t mscount
)