2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
62 * External memory management interface control functions.
65 #include <advisory_pageout.h>
68 * Interface dependencies:
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
85 * Implementation dependencies:
87 #include <string.h> /* For memcpy() */
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
105 #include <vm/vm_external.h>
106 #endif /* MACH_PAGEMAP */
108 #include <vm/vm_protos.h>
111 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
112 vm_size_t memory_manager_default_cluster
= 0;
113 decl_mutex_data(, memory_manager_default_lock
)
117 * Routine: memory_object_should_return_page
120 * Determine whether the given page should be returned,
121 * based on the page's state and on the given return policy.
123 * We should return the page if one of the following is true:
125 * 1. Page is dirty and should_return is not RETURN_NONE.
126 * 2. Page is precious and should_return is RETURN_ALL.
127 * 3. Should_return is RETURN_ANYTHING.
129 * As a side effect, m->dirty will be made consistent
130 * with pmap_is_modified(m), if should_return is not
131 * MEMORY_OBJECT_RETURN_NONE.
134 #define memory_object_should_return_page(m, should_return) \
135 (should_return != MEMORY_OBJECT_RETURN_NONE && \
136 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
137 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
138 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
140 typedef int memory_object_lock_result_t
;
142 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
143 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
147 memory_object_lock_result_t
memory_object_lock_page(
149 memory_object_return_t should_return
,
150 boolean_t should_flush
,
154 * Routine: memory_object_lock_page
157 * Perform the appropriate lock operations on the
158 * given page. See the description of
159 * "memory_object_lock_request" for the meanings
162 * Returns an indication that the operation
163 * completed, blocked, or that the page must
166 memory_object_lock_result_t
167 memory_object_lock_page(
169 memory_object_return_t should_return
,
170 boolean_t should_flush
,
173 XPR(XPR_MEMORY_OBJECT
,
174 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
175 (integer_t
)m
, should_return
, should_flush
, prot
, 0);
178 * If we cannot change access to the page,
179 * either because a mapping is in progress
180 * (busy page) or because a mapping has been
181 * wired, then give up.
184 if (m
->busy
|| m
->cleaning
)
185 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
188 * Don't worry about pages for which the kernel
189 * does not have any data.
192 if (m
->absent
|| m
->error
|| m
->restart
) {
193 if(m
->error
&& should_flush
) {
194 /* dump the page, pager wants us to */
195 /* clean it up and there is no */
196 /* relevant data to return */
197 if(m
->wire_count
== 0) {
199 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
202 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
206 assert(!m
->fictitious
);
208 if (m
->wire_count
!= 0) {
210 * If no change would take place
211 * anyway, return successfully.
215 * No change to page lock [2 checks] AND
216 * Should not return page
218 * XXX This doesn't handle sending a copy of a wired
219 * XXX page to the pager, but that will require some
220 * XXX significant surgery.
223 (m
->page_lock
== prot
|| prot
== VM_PROT_NO_CHANGE
) &&
224 ! memory_object_should_return_page(m
, should_return
)) {
227 * Restart page unlock requests,
228 * even though no change took place.
229 * [Memory managers may be expecting
230 * to see new requests.]
232 m
->unlock_request
= VM_PROT_NONE
;
235 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
238 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
242 * If the page is to be flushed, allow
243 * that to be done as part of the protection.
252 * If we are decreasing permission, do it now;
253 * let the fault handler take care of increases
254 * (pmap_page_protect may not increase protection).
257 if (prot
!= VM_PROT_NO_CHANGE
) {
258 if ((m
->page_lock
^ prot
) & prot
) {
259 pmap_page_protect(m
->phys_page
, VM_PROT_ALL
& ~prot
);
262 /* code associated with the vestigial
263 * memory_object_data_unlock
266 m
->lock_supplied
= TRUE
;
267 if (prot
!= VM_PROT_NONE
)
273 * Restart any past unlock requests, even if no
274 * change resulted. If the manager explicitly
275 * requested no protection change, then it is assumed
276 * to be remembering past requests.
279 m
->unlock_request
= VM_PROT_NONE
;
285 * Handle page returning.
288 if (memory_object_should_return_page(m
, should_return
)) {
291 * If we weren't planning
292 * to flush the page anyway,
293 * we may need to remove the
294 * page from the pageout
295 * system and from physical
299 vm_page_lock_queues();
300 VM_PAGE_QUEUES_REMOVE(m
);
301 vm_page_unlock_queues();
304 pmap_disconnect(m
->phys_page
);
307 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
);
309 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
320 * XXX Make clean but not flush a paging hint,
321 * and deactivate the pages. This is a hack
322 * because it overloads flush/clean with
323 * implementation-dependent meaning. This only
324 * happens to pages that are already clean.
327 if (vm_page_deactivate_hint
&&
328 (should_return
!= MEMORY_OBJECT_RETURN_NONE
)) {
329 vm_page_lock_queues();
330 vm_page_deactivate(m
);
331 vm_page_unlock_queues();
335 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
338 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
341 register int upl_flags; \
343 vm_object_unlock(object); \
346 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
348 upl_flags = UPL_MSYNC; \
350 (void) memory_object_data_return(object->pager, \
355 (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
359 vm_object_lock(object); \
363 * Routine: memory_object_lock_request [user interface]
366 * Control use of the data associated with the given
367 * memory object. For each page in the given range,
368 * perform the following operations, in order:
369 * 1) restrict access to the page (disallow
370 * forms specified by "prot");
371 * 2) return data to the manager (if "should_return"
372 * is RETURN_DIRTY and the page is dirty, or
373 * "should_return" is RETURN_ALL and the page
374 * is either dirty or precious); and,
375 * 3) flush the cached copy (if "should_flush"
377 * The set of pages is defined by a starting offset
378 * ("offset") and size ("size"). Only pages with the
379 * same page alignment as the starting offset are
382 * A single acknowledgement is sent (to the "reply_to"
383 * port) when these actions are complete. If successful,
384 * the naked send right for reply_to is consumed.
388 memory_object_lock_request(
389 memory_object_control_t control
,
390 memory_object_offset_t offset
,
391 memory_object_size_t size
,
392 memory_object_offset_t
* resid_offset
,
394 memory_object_return_t should_return
,
399 __unused boolean_t should_flush
;
401 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
403 XPR(XPR_MEMORY_OBJECT
,
404 "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
405 (integer_t
)control
, offset
, size
,
406 (((should_return
&1)<<1)|should_flush
), prot
);
409 * Check for bogus arguments.
411 object
= memory_object_control_to_vm_object(control
);
412 if (object
== VM_OBJECT_NULL
)
413 return (KERN_INVALID_ARGUMENT
);
415 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
416 return (KERN_INVALID_ARGUMENT
);
418 size
= round_page_64(size
);
421 * Lock the object, and acquire a paging reference to
422 * prevent the memory_object reference from being released.
424 vm_object_lock(object
);
425 vm_object_paging_begin(object
);
426 offset
-= object
->paging_offset
;
428 (void)vm_object_update(object
,
429 offset
, size
, resid_offset
, io_errno
, should_return
, flags
, prot
);
431 vm_object_paging_end(object
);
432 vm_object_unlock(object
);
434 return (KERN_SUCCESS
);
438 * memory_object_release_name: [interface]
440 * Enforces name semantic on memory_object reference count decrement
441 * This routine should not be called unless the caller holds a name
442 * reference gained through the memory_object_named_create or the
443 * memory_object_rename call.
444 * If the TERMINATE_IDLE flag is set, the call will return if the
445 * reference count is not 1. i.e. idle with the only remaining reference
447 * If the decision is made to proceed the name field flag is set to
448 * false and the reference count is decremented. If the RESPECT_CACHE
449 * flag is set and the reference count has gone to zero, the
450 * memory_object is checked to see if it is cacheable otherwise when
451 * the reference count is zero, it is simply terminated.
455 memory_object_release_name(
456 memory_object_control_t control
,
461 object
= memory_object_control_to_vm_object(control
);
462 if (object
== VM_OBJECT_NULL
)
463 return (KERN_INVALID_ARGUMENT
);
465 return vm_object_release_name(object
, flags
);
471 * Routine: memory_object_destroy [user interface]
473 * Shut down a memory object, despite the
474 * presence of address map (or other) references
478 memory_object_destroy(
479 memory_object_control_t control
,
480 kern_return_t reason
)
484 object
= memory_object_control_to_vm_object(control
);
485 if (object
== VM_OBJECT_NULL
)
486 return (KERN_INVALID_ARGUMENT
);
488 return (vm_object_destroy(object
, reason
));
492 * Routine: vm_object_sync
494 * Kernel internal function to synch out pages in a given
495 * range within an object to its memory manager. Much the
496 * same as memory_object_lock_request but page protection
499 * If the should_flush and should_return flags are true pages
500 * are flushed, that is dirty & precious pages are written to
501 * the memory manager and then discarded. If should_return
502 * is false, only precious pages are returned to the memory
505 * If should flush is false and should_return true, the memory
506 * manager's copy of the pages is updated. If should_return
507 * is also false, only the precious pages are updated. This
508 * last option is of limited utility.
511 * FALSE if no pages were returned to the pager
518 vm_object_offset_t offset
,
519 vm_object_size_t size
,
520 boolean_t should_flush
,
521 boolean_t should_return
,
522 boolean_t should_iosync
)
528 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
529 (integer_t
)object
, offset
, size
, should_flush
, should_return
);
532 * Lock the object, and acquire a paging reference to
533 * prevent the memory_object and control ports from
536 vm_object_lock(object
);
537 vm_object_paging_begin(object
);
540 flags
= MEMORY_OBJECT_DATA_FLUSH
;
545 flags
|= MEMORY_OBJECT_IO_SYNC
;
547 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
549 MEMORY_OBJECT_RETURN_ALL
:
550 MEMORY_OBJECT_RETURN_NONE
,
555 vm_object_paging_end(object
);
556 vm_object_unlock(object
);
564 vm_object_update_extent(
566 vm_object_offset_t offset
,
567 vm_object_offset_t offset_end
,
568 vm_object_offset_t
*offset_resid
,
570 boolean_t should_flush
,
571 memory_object_return_t should_return
,
572 boolean_t should_iosync
,
577 vm_size_t data_cnt
= 0;
578 vm_object_offset_t paging_offset
= 0;
579 vm_object_offset_t last_offset
= offset
;
580 memory_object_lock_result_t page_lock_result
;
581 memory_object_lock_result_t pageout_action
;
583 pageout_action
= MEMORY_OBJECT_LOCK_RESULT_DONE
;
586 offset
< offset_end
&& object
->resident_page_count
;
587 offset
+= PAGE_SIZE_64
) {
590 * Limit the number of pages to be cleaned at once.
592 if (data_cnt
>= PAGE_SIZE
* MAX_UPL_TRANSFER
) {
593 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
594 pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
598 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
599 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
601 XPR(XPR_MEMORY_OBJECT
,
602 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
603 (integer_t
)object
, offset
, page_lock_result
, 0, 0);
605 switch (page_lock_result
)
607 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
609 * End of a cluster of dirty pages.
612 LIST_REQ_PAGEOUT_PAGES(object
,
613 data_cnt
, pageout_action
,
614 paging_offset
, offset_resid
, io_errno
, should_iosync
);
620 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
622 * Since it is necessary to block,
623 * clean any dirty pages now.
626 LIST_REQ_PAGEOUT_PAGES(object
,
627 data_cnt
, pageout_action
,
628 paging_offset
, offset_resid
, io_errno
, should_iosync
);
632 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
635 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
:
636 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
638 * The clean and return cases are similar.
640 * if this would form a discontiguous block,
641 * clean the old pages and start anew.
643 * Mark the page busy since we will unlock the
644 * object if we issue the LIST_REQ_PAGEOUT
648 ((last_offset
!= offset
) || (pageout_action
!= page_lock_result
))) {
649 LIST_REQ_PAGEOUT_PAGES(object
,
650 data_cnt
, pageout_action
,
651 paging_offset
, offset_resid
, io_errno
, should_iosync
);
657 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
661 pageout_action
= page_lock_result
;
662 paging_offset
= offset
;
664 data_cnt
+= PAGE_SIZE
;
665 last_offset
= offset
+ PAGE_SIZE_64
;
667 vm_page_lock_queues();
671 m
->list_req_pending
= TRUE
;
676 * and add additional state
683 vm_page_unlock_queues();
692 * We have completed the scan for applicable pages.
693 * Clean any pages that have been saved.
696 LIST_REQ_PAGEOUT_PAGES(object
,
697 data_cnt
, pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
705 * Routine: vm_object_update
707 * Work function for m_o_lock_request(), vm_o_sync().
709 * Called with object locked and paging ref taken.
713 register vm_object_t object
,
714 register vm_object_offset_t offset
,
715 register vm_object_size_t size
,
716 register vm_object_offset_t
*resid_offset
,
718 memory_object_return_t should_return
,
720 vm_prot_t protection
)
722 vm_object_t copy_object
;
723 boolean_t data_returned
= FALSE
;
724 boolean_t update_cow
;
725 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
726 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
729 #define MAX_EXTENTS 8
730 #define EXTENT_SIZE (1024 * 1024 * 256)
731 #define RESIDENT_LIMIT (1024 * 32)
733 vm_object_offset_t e_base
;
734 vm_object_offset_t e_min
;
735 vm_object_offset_t e_max
;
736 } extents
[MAX_EXTENTS
];
739 * To avoid blocking while scanning for pages, save
740 * dirty pages to be cleaned all at once.
742 * XXXO A similar strategy could be used to limit the
743 * number of times that a scan must be restarted for
744 * other reasons. Those pages that would require blocking
745 * could be temporarily collected in another list, or
746 * their offsets could be recorded in a small array.
750 * XXX NOTE: May want to consider converting this to a page list
751 * XXX vm_map_copy interface. Need to understand object
752 * XXX coalescing implications before doing so.
755 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
756 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
757 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
758 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
761 if((((copy_object
= object
->copy
) != NULL
) && update_cow
) ||
762 (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
764 vm_map_size_t copy_size
;
765 vm_map_offset_t copy_offset
;
769 kern_return_t error
= 0;
771 if(copy_object
!= NULL
) {
772 /* translate offset with respect to shadow's offset */
773 copy_offset
= (offset
>= copy_object
->shadow_offset
)?
774 (vm_map_offset_t
)(offset
- copy_object
->shadow_offset
) :
776 if(copy_offset
> copy_object
->size
)
777 copy_offset
= copy_object
->size
;
779 /* clip size with respect to shadow offset */
780 if (offset
>= copy_object
->shadow_offset
) {
782 } else if (size
>= copy_object
->shadow_offset
- offset
) {
784 (copy_object
->shadow_offset
- offset
);
789 if (copy_offset
+ copy_size
> copy_object
->size
) {
790 if (copy_object
->size
>= copy_offset
) {
791 copy_size
= copy_object
->size
- copy_offset
;
797 copy_size
+=copy_offset
;
799 vm_object_unlock(object
);
800 vm_object_lock(copy_object
);
802 copy_object
= object
;
804 copy_size
= offset
+ size
;
805 copy_offset
= offset
;
808 vm_object_paging_begin(copy_object
);
809 for (i
=copy_offset
; i
<copy_size
; i
+=PAGE_SIZE
) {
810 RETRY_COW_OF_LOCK_REQUEST
:
811 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
812 switch (vm_fault_page(copy_object
, i
,
813 VM_PROT_WRITE
|VM_PROT_READ
,
817 copy_offset
+copy_size
,
818 VM_BEHAVIOR_SEQUENTIAL
,
827 case VM_FAULT_SUCCESS
:
830 page
->object
, top_page
);
831 PAGE_WAKEUP_DONE(page
);
832 vm_page_lock_queues();
833 if (!page
->active
&& !page
->inactive
)
834 vm_page_activate(page
);
835 vm_page_unlock_queues();
836 vm_object_lock(copy_object
);
837 vm_object_paging_begin(copy_object
);
839 PAGE_WAKEUP_DONE(page
);
840 vm_page_lock_queues();
841 if (!page
->active
&& !page
->inactive
)
842 vm_page_activate(page
);
843 vm_page_unlock_queues();
847 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
848 vm_object_lock(copy_object
);
849 vm_object_paging_begin(copy_object
);
850 goto RETRY_COW_OF_LOCK_REQUEST
;
851 case VM_FAULT_INTERRUPTED
:
852 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
853 vm_object_lock(copy_object
);
854 vm_object_paging_begin(copy_object
);
855 goto RETRY_COW_OF_LOCK_REQUEST
;
856 case VM_FAULT_MEMORY_SHORTAGE
:
858 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
859 vm_object_lock(copy_object
);
860 vm_object_paging_begin(copy_object
);
861 goto RETRY_COW_OF_LOCK_REQUEST
;
862 case VM_FAULT_FICTITIOUS_SHORTAGE
:
863 vm_page_more_fictitious();
864 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
865 vm_object_lock(copy_object
);
866 vm_object_paging_begin(copy_object
);
867 goto RETRY_COW_OF_LOCK_REQUEST
;
868 case VM_FAULT_MEMORY_ERROR
:
869 vm_object_lock(object
);
870 goto BYPASS_COW_COPYIN
;
874 vm_object_paging_end(copy_object
);
875 if(copy_object
!= object
) {
876 vm_object_unlock(copy_object
);
877 vm_object_lock(object
);
880 if((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
883 if(((copy_object
= object
->copy
) != NULL
) &&
884 (flags
& MEMORY_OBJECT_DATA_PURGE
)) {
885 copy_object
->shadow_severed
= TRUE
;
886 copy_object
->shadowed
= FALSE
;
887 copy_object
->shadow
= NULL
;
888 /* delete the ref the COW was holding on the target object */
889 vm_object_deallocate(object
);
894 * when we have a really large range to check relative
895 * to the number of actual resident pages, we'd like
896 * to use the resident page list to drive our checks
897 * however, the object lock will get dropped while processing
898 * the page which means the resident queue can change which
899 * means we can't walk the queue as we process the pages
900 * we also want to do the processing in offset order to allow
901 * 'runs' of pages to be collected if we're being told to
902 * flush to disk... the resident page queue is NOT ordered.
904 * a temporary solution (until we figure out how to deal with
905 * large address spaces more generically) is to pre-flight
906 * the resident page queue (if it's small enough) and develop
907 * a collection of extents (that encompass actual resident pages)
908 * to visit. This will at least allow us to deal with some of the
909 * more pathological cases in a more efficient manner. The current
910 * worst case (a single resident page at the end of an extremely large
911 * range) can take minutes to complete for ranges in the terrabyte
912 * category... since this routine is called when truncating a file,
913 * and we currently support files up to 16 Tbytes in size, this
914 * is not a theoretical problem
917 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
918 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
920 vm_object_offset_t start
;
921 vm_object_offset_t end
;
922 vm_object_size_t e_mask
;
928 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
930 m
= (vm_page_t
) queue_first(&object
->memq
);
932 while (!queue_end(&object
->memq
, (queue_entry_t
) m
)) {
933 next
= (vm_page_t
) queue_next(&m
->listq
);
935 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
937 * this is a page we're interested in
938 * try to fit it into a current extent
940 for (n
= 0; n
< num_of_extents
; n
++) {
941 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
943 * use (PAGE_SIZE - 1) to determine the
944 * max offset so that we don't wrap if
945 * we're at the last page of the space
947 if (m
->offset
< extents
[n
].e_min
)
948 extents
[n
].e_min
= m
->offset
;
949 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
950 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
954 if (n
== num_of_extents
) {
956 * didn't find a current extent that can encompass
959 if (n
< MAX_EXTENTS
) {
961 * if we still have room,
962 * create a new extent
964 extents
[n
].e_base
= m
->offset
& e_mask
;
965 extents
[n
].e_min
= m
->offset
;
966 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
971 * no room to create a new extent...
972 * fall back to a single extent based
973 * on the min and max page offsets
974 * we find in the range we're interested in...
975 * first, look through the extent list and
976 * develop the overall min and max for the
977 * pages we've looked at up to this point
979 for (n
= 1; n
< num_of_extents
; n
++) {
980 if (extents
[n
].e_min
< extents
[0].e_min
)
981 extents
[0].e_min
= extents
[n
].e_min
;
982 if (extents
[n
].e_max
> extents
[0].e_max
)
983 extents
[0].e_max
= extents
[n
].e_max
;
986 * now setup to run through the remaining pages
987 * to determine the overall min and max
988 * offset for the specified range
990 extents
[0].e_base
= 0;
995 * by continuing, we'll reprocess the
996 * page that forced us to abandon trying
997 * to develop multiple extents
1006 extents
[0].e_min
= offset
;
1007 extents
[0].e_max
= offset
+ (size
- 1);
1011 for (n
= 0; n
< num_of_extents
; n
++) {
1012 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1013 should_flush
, should_return
, should_iosync
, protection
))
1014 data_returned
= TRUE
;
1016 return (data_returned
);
1021 * Routine: memory_object_synchronize_completed [user interface]
1023 * Tell kernel that previously synchronized data
1024 * (memory_object_synchronize) has been queue or placed on the
1027 * Note: there may be multiple synchronize requests for a given
1028 * memory object outstanding but they will not overlap.
1032 memory_object_synchronize_completed(
1033 memory_object_control_t control
,
1034 memory_object_offset_t offset
,
1040 object
= memory_object_control_to_vm_object(control
);
1042 XPR(XPR_MEMORY_OBJECT
,
1043 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1044 (integer_t
)object
, offset
, length
, 0, 0);
1047 * Look for bogus arguments
1050 if (object
== VM_OBJECT_NULL
)
1051 return (KERN_INVALID_ARGUMENT
);
1053 vm_object_lock(object
);
1056 * search for sync request structure
1058 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
1059 if (msr
->offset
== offset
&& msr
->length
== length
) {
1060 queue_remove(&object
->msr_q
, msr
, msync_req_t
, msr_q
);
1063 }/* queue_iterate */
1065 if (queue_end(&object
->msr_q
, (queue_entry_t
)msr
)) {
1066 vm_object_unlock(object
);
1067 return KERN_INVALID_ARGUMENT
;
1071 vm_object_unlock(object
);
1072 msr
->flag
= VM_MSYNC_DONE
;
1074 thread_wakeup((event_t
) msr
);
1076 return KERN_SUCCESS
;
1077 }/* memory_object_synchronize_completed */
1079 static kern_return_t
1080 vm_object_set_attributes_common(
1082 boolean_t may_cache
,
1083 memory_object_copy_strategy_t copy_strategy
,
1084 boolean_t temporary
,
1085 memory_object_cluster_size_t cluster_size
,
1086 boolean_t silent_overwrite
,
1087 boolean_t advisory_pageout
)
1089 boolean_t object_became_ready
;
1091 XPR(XPR_MEMORY_OBJECT
,
1092 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1093 (integer_t
)object
, (may_cache
&1)|((temporary
&1)<1), copy_strategy
, 0, 0);
1095 if (object
== VM_OBJECT_NULL
)
1096 return(KERN_INVALID_ARGUMENT
);
1099 * Verify the attributes of importance
1102 switch(copy_strategy
) {
1103 case MEMORY_OBJECT_COPY_NONE
:
1104 case MEMORY_OBJECT_COPY_DELAY
:
1107 return(KERN_INVALID_ARGUMENT
);
1110 #if !ADVISORY_PAGEOUT
1111 if (silent_overwrite
|| advisory_pageout
)
1112 return(KERN_INVALID_ARGUMENT
);
1114 #endif /* !ADVISORY_PAGEOUT */
1119 if (cluster_size
!= 0) {
1120 int pages_per_cluster
;
1121 pages_per_cluster
= atop_32(cluster_size
);
1123 * Cluster size must be integral multiple of page size,
1124 * and be a power of 2 number of pages.
1126 if ((cluster_size
& (PAGE_SIZE
-1)) ||
1127 ((pages_per_cluster
-1) & pages_per_cluster
))
1128 return KERN_INVALID_ARGUMENT
;
1131 vm_object_lock(object
);
1134 * Copy the attributes
1136 assert(!object
->internal
);
1137 object_became_ready
= !object
->pager_ready
;
1138 object
->copy_strategy
= copy_strategy
;
1139 object
->can_persist
= may_cache
;
1140 object
->temporary
= temporary
;
1141 object
->silent_overwrite
= silent_overwrite
;
1142 object
->advisory_pageout
= advisory_pageout
;
1143 if (cluster_size
== 0)
1144 cluster_size
= PAGE_SIZE
;
1145 object
->cluster_size
= cluster_size
;
1147 assert(cluster_size
>= PAGE_SIZE
&&
1148 cluster_size
% PAGE_SIZE
== 0);
1151 * Wake up anyone waiting for the ready attribute
1152 * to become asserted.
1155 if (object_became_ready
) {
1156 object
->pager_ready
= TRUE
;
1157 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1160 vm_object_unlock(object
);
1162 return(KERN_SUCCESS
);
1166 * Set the memory object attribute as provided.
1168 * XXX This routine cannot be completed until the vm_msync, clean
1169 * in place, and cluster work is completed. See ifdef notyet
1170 * below and note that vm_object_set_attributes_common()
1171 * may have to be expanded.
1174 memory_object_change_attributes(
1175 memory_object_control_t control
,
1176 memory_object_flavor_t flavor
,
1177 memory_object_info_t attributes
,
1178 mach_msg_type_number_t count
)
1181 kern_return_t result
= KERN_SUCCESS
;
1182 boolean_t temporary
;
1183 boolean_t may_cache
;
1184 boolean_t invalidate
;
1185 memory_object_cluster_size_t cluster_size
;
1186 memory_object_copy_strategy_t copy_strategy
;
1187 boolean_t silent_overwrite
;
1188 boolean_t advisory_pageout
;
1190 object
= memory_object_control_to_vm_object(control
);
1191 if (object
== VM_OBJECT_NULL
)
1192 return (KERN_INVALID_ARGUMENT
);
1194 vm_object_lock(object
);
1196 temporary
= object
->temporary
;
1197 may_cache
= object
->can_persist
;
1198 copy_strategy
= object
->copy_strategy
;
1199 silent_overwrite
= object
->silent_overwrite
;
1200 advisory_pageout
= object
->advisory_pageout
;
1202 invalidate
= object
->invalidate
;
1204 cluster_size
= object
->cluster_size
;
1205 vm_object_unlock(object
);
1208 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1210 old_memory_object_behave_info_t behave
;
1212 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1213 result
= KERN_INVALID_ARGUMENT
;
1217 behave
= (old_memory_object_behave_info_t
) attributes
;
1219 temporary
= behave
->temporary
;
1220 invalidate
= behave
->invalidate
;
1221 copy_strategy
= behave
->copy_strategy
;
1226 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1228 memory_object_behave_info_t behave
;
1230 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1231 result
= KERN_INVALID_ARGUMENT
;
1235 behave
= (memory_object_behave_info_t
) attributes
;
1237 temporary
= behave
->temporary
;
1238 invalidate
= behave
->invalidate
;
1239 copy_strategy
= behave
->copy_strategy
;
1240 silent_overwrite
= behave
->silent_overwrite
;
1241 advisory_pageout
= behave
->advisory_pageout
;
1245 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1247 memory_object_perf_info_t perf
;
1249 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1250 result
= KERN_INVALID_ARGUMENT
;
1254 perf
= (memory_object_perf_info_t
) attributes
;
1256 may_cache
= perf
->may_cache
;
1257 cluster_size
= round_page_32(perf
->cluster_size
);
1262 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1264 old_memory_object_attr_info_t attr
;
1266 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1267 result
= KERN_INVALID_ARGUMENT
;
1271 attr
= (old_memory_object_attr_info_t
) attributes
;
1273 may_cache
= attr
->may_cache
;
1274 copy_strategy
= attr
->copy_strategy
;
1275 cluster_size
= page_size
;
1280 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1282 memory_object_attr_info_t attr
;
1284 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1285 result
= KERN_INVALID_ARGUMENT
;
1289 attr
= (memory_object_attr_info_t
) attributes
;
1291 copy_strategy
= attr
->copy_strategy
;
1292 may_cache
= attr
->may_cache_object
;
1293 cluster_size
= attr
->cluster_size
;
1294 temporary
= attr
->temporary
;
1300 result
= KERN_INVALID_ARGUMENT
;
1304 if (result
!= KERN_SUCCESS
)
1307 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1308 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1315 * XXX may_cache may become a tri-valued variable to handle
1316 * XXX uncache if not in use.
1318 return (vm_object_set_attributes_common(object
,
1328 memory_object_get_attributes(
1329 memory_object_control_t control
,
1330 memory_object_flavor_t flavor
,
1331 memory_object_info_t attributes
, /* pointer to OUT array */
1332 mach_msg_type_number_t
*count
) /* IN/OUT */
1334 kern_return_t ret
= KERN_SUCCESS
;
1337 object
= memory_object_control_to_vm_object(control
);
1338 if (object
== VM_OBJECT_NULL
)
1339 return (KERN_INVALID_ARGUMENT
);
1341 vm_object_lock(object
);
1344 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1346 old_memory_object_behave_info_t behave
;
1348 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1349 ret
= KERN_INVALID_ARGUMENT
;
1353 behave
= (old_memory_object_behave_info_t
) attributes
;
1354 behave
->copy_strategy
= object
->copy_strategy
;
1355 behave
->temporary
= object
->temporary
;
1356 #if notyet /* remove when vm_msync complies and clean in place fini */
1357 behave
->invalidate
= object
->invalidate
;
1359 behave
->invalidate
= FALSE
;
1362 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1366 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1368 memory_object_behave_info_t behave
;
1370 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1371 ret
= KERN_INVALID_ARGUMENT
;
1375 behave
= (memory_object_behave_info_t
) attributes
;
1376 behave
->copy_strategy
= object
->copy_strategy
;
1377 behave
->temporary
= object
->temporary
;
1378 #if notyet /* remove when vm_msync complies and clean in place fini */
1379 behave
->invalidate
= object
->invalidate
;
1381 behave
->invalidate
= FALSE
;
1383 behave
->advisory_pageout
= object
->advisory_pageout
;
1384 behave
->silent_overwrite
= object
->silent_overwrite
;
1385 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1389 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1391 memory_object_perf_info_t perf
;
1393 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1394 ret
= KERN_INVALID_ARGUMENT
;
1398 perf
= (memory_object_perf_info_t
) attributes
;
1399 perf
->cluster_size
= object
->cluster_size
;
1400 perf
->may_cache
= object
->can_persist
;
1402 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1406 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1408 old_memory_object_attr_info_t attr
;
1410 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1411 ret
= KERN_INVALID_ARGUMENT
;
1415 attr
= (old_memory_object_attr_info_t
) attributes
;
1416 attr
->may_cache
= object
->can_persist
;
1417 attr
->copy_strategy
= object
->copy_strategy
;
1419 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1423 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1425 memory_object_attr_info_t attr
;
1427 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1428 ret
= KERN_INVALID_ARGUMENT
;
1432 attr
= (memory_object_attr_info_t
) attributes
;
1433 attr
->copy_strategy
= object
->copy_strategy
;
1434 attr
->cluster_size
= object
->cluster_size
;
1435 attr
->may_cache_object
= object
->can_persist
;
1436 attr
->temporary
= object
->temporary
;
1438 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1443 ret
= KERN_INVALID_ARGUMENT
;
1447 vm_object_unlock(object
);
1454 memory_object_iopl_request(
1456 memory_object_offset_t offset
,
1457 upl_size_t
*upl_size
,
1459 upl_page_info_array_t user_page_list
,
1460 unsigned int *page_list_count
,
1467 caller_flags
= *flags
;
1469 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1471 * For forward compatibility's sake,
1472 * reject any unknown flag.
1474 return KERN_INVALID_VALUE
;
1477 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1478 vm_named_entry_t named_entry
;
1480 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1481 /* a few checks to make sure user is obeying rules */
1482 if(*upl_size
== 0) {
1483 if(offset
>= named_entry
->size
)
1484 return(KERN_INVALID_RIGHT
);
1485 *upl_size
= named_entry
->size
- offset
;
1487 if(caller_flags
& UPL_COPYOUT_FROM
) {
1488 if((named_entry
->protection
& VM_PROT_READ
)
1490 return(KERN_INVALID_RIGHT
);
1493 if((named_entry
->protection
&
1494 (VM_PROT_READ
| VM_PROT_WRITE
))
1495 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1496 return(KERN_INVALID_RIGHT
);
1499 if(named_entry
->size
< (offset
+ *upl_size
))
1500 return(KERN_INVALID_ARGUMENT
);
1502 /* the callers parameter offset is defined to be the */
1503 /* offset from beginning of named entry offset in object */
1504 offset
= offset
+ named_entry
->offset
;
1506 if(named_entry
->is_sub_map
)
1507 return (KERN_INVALID_ARGUMENT
);
1509 named_entry_lock(named_entry
);
1511 if (named_entry
->is_pager
) {
1512 object
= vm_object_enter(named_entry
->backing
.pager
,
1513 named_entry
->offset
+ named_entry
->size
,
1514 named_entry
->internal
,
1517 if (object
== VM_OBJECT_NULL
) {
1518 named_entry_unlock(named_entry
);
1519 return(KERN_INVALID_OBJECT
);
1522 /* JMM - drop reference on pager here? */
1524 /* create an extra reference for the named entry */
1525 vm_object_lock(object
);
1526 vm_object_reference_locked(object
);
1527 named_entry
->backing
.object
= object
;
1528 named_entry
->is_pager
= FALSE
;
1529 named_entry_unlock(named_entry
);
1531 /* wait for object to be ready */
1532 while (!object
->pager_ready
) {
1533 vm_object_wait(object
,
1534 VM_OBJECT_EVENT_PAGER_READY
,
1536 vm_object_lock(object
);
1538 vm_object_unlock(object
);
1540 /* This is the case where we are going to map */
1541 /* an already mapped object. If the object is */
1542 /* not ready it is internal. An external */
1543 /* object cannot be mapped until it is ready */
1544 /* we can therefore avoid the ready check */
1546 object
= named_entry
->backing
.object
;
1547 vm_object_reference(object
);
1548 named_entry_unlock(named_entry
);
1551 memory_object_control_t control
;
1552 control
= (memory_object_control_t
)port
->ip_kobject
;
1553 if (control
== NULL
)
1554 return (KERN_INVALID_ARGUMENT
);
1555 object
= memory_object_control_to_vm_object(control
);
1556 if (object
== VM_OBJECT_NULL
)
1557 return (KERN_INVALID_ARGUMENT
);
1558 vm_object_reference(object
);
1560 if (object
== VM_OBJECT_NULL
)
1561 return (KERN_INVALID_ARGUMENT
);
1563 if (!object
->private) {
1564 if (*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
1565 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
1566 if (object
->phys_contiguous
) {
1567 *flags
= UPL_PHYS_CONTIG
;
1572 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1575 ret
= vm_object_iopl_request(object
,
1582 vm_object_deallocate(object
);
1587 * Routine: memory_object_upl_request [interface]
1589 * Cause the population of a portion of a vm_object.
1590 * Depending on the nature of the request, the pages
1591 * returned may be contain valid data or be uninitialized.
1596 memory_object_upl_request(
1597 memory_object_control_t control
,
1598 memory_object_offset_t offset
,
1601 upl_page_info_array_t user_page_list
,
1602 unsigned int *page_list_count
,
1607 object
= memory_object_control_to_vm_object(control
);
1608 if (object
== VM_OBJECT_NULL
)
1609 return (KERN_INVALID_ARGUMENT
);
1611 return vm_object_upl_request(object
,
1621 * Routine: memory_object_super_upl_request [interface]
1623 * Cause the population of a portion of a vm_object
1624 * in much the same way as memory_object_upl_request.
1625 * Depending on the nature of the request, the pages
1626 * returned may be contain valid data or be uninitialized.
1627 * However, the region may be expanded up to the super
1628 * cluster size provided.
1632 memory_object_super_upl_request(
1633 memory_object_control_t control
,
1634 memory_object_offset_t offset
,
1636 upl_size_t super_cluster
,
1638 upl_page_info_t
*user_page_list
,
1639 unsigned int *page_list_count
,
1644 object
= memory_object_control_to_vm_object(control
);
1645 if (object
== VM_OBJECT_NULL
)
1646 return (KERN_INVALID_ARGUMENT
);
1648 return vm_object_super_upl_request(object
,
1658 int vm_stat_discard_cleared_reply
= 0;
1659 int vm_stat_discard_cleared_unset
= 0;
1660 int vm_stat_discard_cleared_too_late
= 0;
1665 * Routine: host_default_memory_manager [interface]
1667 * set/get the default memory manager port and default cluster
1670 * If successful, consumes the supplied naked send right.
1673 host_default_memory_manager(
1674 host_priv_t host_priv
,
1675 memory_object_default_t
*default_manager
,
1676 memory_object_cluster_size_t cluster_size
)
1678 memory_object_default_t current_manager
;
1679 memory_object_default_t new_manager
;
1680 memory_object_default_t returned_manager
;
1682 if (host_priv
== HOST_PRIV_NULL
)
1683 return(KERN_INVALID_HOST
);
1685 assert(host_priv
== &realhost
);
1687 new_manager
= *default_manager
;
1688 mutex_lock(&memory_manager_default_lock
);
1689 current_manager
= memory_manager_default
;
1691 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1693 * Retrieve the current value.
1695 memory_object_default_reference(current_manager
);
1696 returned_manager
= current_manager
;
1699 * Retrieve the current value,
1700 * and replace it with the supplied value.
1701 * We return the old reference to the caller
1702 * but we have to take a reference on the new
1706 returned_manager
= current_manager
;
1707 memory_manager_default
= new_manager
;
1708 memory_object_default_reference(new_manager
);
1710 if (cluster_size
% PAGE_SIZE
!= 0) {
1712 mutex_unlock(&memory_manager_default_lock
);
1713 return KERN_INVALID_ARGUMENT
;
1715 cluster_size
= round_page_32(cluster_size
);
1718 memory_manager_default_cluster
= cluster_size
;
1721 * In case anyone's been waiting for a memory
1722 * manager to be established, wake them up.
1725 thread_wakeup((event_t
) &memory_manager_default
);
1728 mutex_unlock(&memory_manager_default_lock
);
1730 *default_manager
= returned_manager
;
1731 return(KERN_SUCCESS
);
1735 * Routine: memory_manager_default_reference
1737 * Returns a naked send right for the default
1738 * memory manager. The returned right is always
1739 * valid (not IP_NULL or IP_DEAD).
1742 __private_extern__ memory_object_default_t
1743 memory_manager_default_reference(
1744 memory_object_cluster_size_t
*cluster_size
)
1746 memory_object_default_t current_manager
;
1748 mutex_lock(&memory_manager_default_lock
);
1749 current_manager
= memory_manager_default
;
1750 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1753 res
= thread_sleep_mutex((event_t
) &memory_manager_default
,
1754 &memory_manager_default_lock
,
1756 assert(res
== THREAD_AWAKENED
);
1757 current_manager
= memory_manager_default
;
1759 memory_object_default_reference(current_manager
);
1760 *cluster_size
= memory_manager_default_cluster
;
1761 mutex_unlock(&memory_manager_default_lock
);
1763 return current_manager
;
1767 * Routine: memory_manager_default_check
1770 * Check whether a default memory manager has been set
1771 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1772 * and KERN_FAILURE if dmm does not exist.
1774 * If there is no default memory manager, log an error,
1775 * but only the first time.
1778 __private_extern__ kern_return_t
1779 memory_manager_default_check(void)
1781 memory_object_default_t current
;
1783 mutex_lock(&memory_manager_default_lock
);
1784 current
= memory_manager_default
;
1785 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1786 static boolean_t logged
; /* initialized to 0 */
1787 boolean_t complain
= !logged
;
1789 mutex_unlock(&memory_manager_default_lock
);
1791 printf("Warning: No default memory manager\n");
1792 return(KERN_FAILURE
);
1794 mutex_unlock(&memory_manager_default_lock
);
1795 return(KERN_SUCCESS
);
1799 __private_extern__
void
1800 memory_manager_default_init(void)
1802 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1803 mutex_init(&memory_manager_default_lock
, 0);
1808 /* Allow manipulation of individual page state. This is actually part of */
1809 /* the UPL regimen but takes place on the object rather than on a UPL */
1812 memory_object_page_op(
1813 memory_object_control_t control
,
1814 memory_object_offset_t offset
,
1816 ppnum_t
*phys_entry
,
1823 object
= memory_object_control_to_vm_object(control
);
1824 if (object
== VM_OBJECT_NULL
)
1825 return (KERN_INVALID_ARGUMENT
);
1827 vm_object_lock(object
);
1829 if(ops
& UPL_POP_PHYSICAL
) {
1830 if(object
->phys_contiguous
) {
1832 *phys_entry
= (ppnum_t
)
1833 (object
->shadow_offset
>> 12);
1835 vm_object_unlock(object
);
1836 return KERN_SUCCESS
;
1838 vm_object_unlock(object
);
1839 return KERN_INVALID_OBJECT
;
1842 if(object
->phys_contiguous
) {
1843 vm_object_unlock(object
);
1844 return KERN_INVALID_OBJECT
;
1848 if((dst_page
= vm_page_lookup(object
,offset
)) == VM_PAGE_NULL
) {
1849 vm_object_unlock(object
);
1850 return KERN_FAILURE
;
1853 /* Sync up on getting the busy bit */
1854 if((dst_page
->busy
|| dst_page
->cleaning
) &&
1855 (((ops
& UPL_POP_SET
) &&
1856 (ops
& UPL_POP_BUSY
)) || (ops
& UPL_POP_DUMP
))) {
1857 /* someone else is playing with the page, we will */
1859 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
1863 if (ops
& UPL_POP_DUMP
) {
1864 vm_page_lock_queues();
1866 if (dst_page
->no_isync
== FALSE
)
1867 pmap_disconnect(dst_page
->phys_page
);
1868 vm_page_free(dst_page
);
1870 vm_page_unlock_queues();
1877 /* Get the condition of flags before requested ops */
1878 /* are undertaken */
1880 if(dst_page
->dirty
) *flags
|= UPL_POP_DIRTY
;
1881 if(dst_page
->pageout
) *flags
|= UPL_POP_PAGEOUT
;
1882 if(dst_page
->precious
) *flags
|= UPL_POP_PRECIOUS
;
1883 if(dst_page
->absent
) *flags
|= UPL_POP_ABSENT
;
1884 if(dst_page
->busy
) *flags
|= UPL_POP_BUSY
;
1887 /* The caller should have made a call either contingent with */
1888 /* or prior to this call to set UPL_POP_BUSY */
1889 if(ops
& UPL_POP_SET
) {
1890 /* The protection granted with this assert will */
1891 /* not be complete. If the caller violates the */
1892 /* convention and attempts to change page state */
1893 /* without first setting busy we may not see it */
1894 /* because the page may already be busy. However */
1895 /* if such violations occur we will assert sooner */
1897 assert(dst_page
->busy
|| (ops
& UPL_POP_BUSY
));
1898 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= TRUE
;
1899 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= TRUE
;
1900 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= TRUE
;
1901 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= TRUE
;
1902 if (ops
& UPL_POP_BUSY
) dst_page
->busy
= TRUE
;
1905 if(ops
& UPL_POP_CLR
) {
1906 assert(dst_page
->busy
);
1907 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= FALSE
;
1908 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= FALSE
;
1909 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= FALSE
;
1910 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= FALSE
;
1911 if (ops
& UPL_POP_BUSY
) {
1912 dst_page
->busy
= FALSE
;
1913 PAGE_WAKEUP(dst_page
);
1917 if (dst_page
->encrypted
) {
1920 * We need to decrypt this encrypted page before the
1921 * caller can access its contents.
1922 * But if the caller really wants to access the page's
1923 * contents, they have to keep the page "busy".
1924 * Otherwise, the page could get recycled or re-encrypted
1927 if ((ops
& UPL_POP_SET
) && (ops
& UPL_POP_BUSY
) &&
1930 * The page is stable enough to be accessed by
1931 * the caller, so make sure its contents are
1934 vm_page_decrypt(dst_page
, 0);
1937 * The page is not busy, so don't bother
1938 * decrypting it, since anything could
1939 * happen to it between now and when the
1940 * caller wants to access it.
1941 * We should not give the caller access
1944 assert(!phys_entry
);
1950 * The physical page number will remain valid
1951 * only if the page is kept busy.
1952 * ENCRYPTED SWAP: make sure we don't let the
1953 * caller access an encrypted page.
1955 assert(dst_page
->busy
);
1956 assert(!dst_page
->encrypted
);
1957 *phys_entry
= dst_page
->phys_page
;
1963 vm_object_unlock(object
);
1964 return KERN_SUCCESS
;
1969 * memory_object_range_op offers performance enhancement over
1970 * memory_object_page_op for page_op functions which do not require page
1971 * level state to be returned from the call. Page_op was created to provide
1972 * a low-cost alternative to page manipulation via UPLs when only a single
1973 * page was involved. The range_op call establishes the ability in the _op
1974 * family of functions to work on multiple pages where the lack of page level
1975 * state handling allows the caller to avoid the overhead of the upl structures.
1979 memory_object_range_op(
1980 memory_object_control_t control
,
1981 memory_object_offset_t offset_beg
,
1982 memory_object_offset_t offset_end
,
1986 memory_object_offset_t offset
;
1990 object
= memory_object_control_to_vm_object(control
);
1991 if (object
== VM_OBJECT_NULL
)
1992 return (KERN_INVALID_ARGUMENT
);
1994 if (object
->resident_page_count
== 0) {
1996 if (ops
& UPL_ROP_PRESENT
)
1999 *range
= offset_end
- offset_beg
;
2001 return KERN_SUCCESS
;
2003 vm_object_lock(object
);
2005 if (object
->phys_contiguous
) {
2006 vm_object_unlock(object
);
2007 return KERN_INVALID_OBJECT
;
2010 offset
= offset_beg
;
2012 while (offset
< offset_end
) {
2013 dst_page
= vm_page_lookup(object
, offset
);
2014 if (dst_page
!= VM_PAGE_NULL
) {
2015 if (ops
& UPL_ROP_DUMP
) {
2016 if (dst_page
->busy
|| dst_page
->cleaning
) {
2018 * someone else is playing with the
2019 * page, we will have to wait
2022 dst_page
, THREAD_UNINT
);
2024 * need to relook the page up since it's
2025 * state may have changed while we slept
2026 * it might even belong to a different object
2031 vm_page_lock_queues();
2033 if (dst_page
->no_isync
== FALSE
)
2034 pmap_disconnect(dst_page
->phys_page
);
2035 vm_page_free(dst_page
);
2037 vm_page_unlock_queues();
2038 } else if (ops
& UPL_ROP_ABSENT
)
2040 } else if (ops
& UPL_ROP_PRESENT
)
2043 offset
+= PAGE_SIZE
;
2045 vm_object_unlock(object
);
2048 *range
= offset
- offset_beg
;
2050 return KERN_SUCCESS
;
2055 memory_object_pages_resident(
2056 memory_object_control_t control
,
2057 boolean_t
* has_pages_resident
)
2061 *has_pages_resident
= FALSE
;
2063 object
= memory_object_control_to_vm_object(control
);
2064 if (object
== VM_OBJECT_NULL
)
2065 return (KERN_INVALID_ARGUMENT
);
2067 if (object
->resident_page_count
)
2068 *has_pages_resident
= TRUE
;
2070 return (KERN_SUCCESS
);
2074 static zone_t mem_obj_control_zone
;
2076 __private_extern__
void
2077 memory_object_control_bootstrap(void)
2081 i
= (vm_size_t
) sizeof (struct memory_object_control
);
2082 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
2086 __private_extern__ memory_object_control_t
2087 memory_object_control_allocate(
2090 memory_object_control_t control
;
2092 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
2093 if (control
!= MEMORY_OBJECT_CONTROL_NULL
)
2094 control
->object
= object
;
2098 __private_extern__
void
2099 memory_object_control_collapse(
2100 memory_object_control_t control
,
2103 assert((control
->object
!= VM_OBJECT_NULL
) &&
2104 (control
->object
!= object
));
2105 control
->object
= object
;
2108 __private_extern__ vm_object_t
2109 memory_object_control_to_vm_object(
2110 memory_object_control_t control
)
2112 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
2113 return VM_OBJECT_NULL
;
2115 return (control
->object
);
2118 memory_object_control_t
2119 convert_port_to_mo_control(
2120 __unused mach_port_t port
)
2122 return MEMORY_OBJECT_CONTROL_NULL
;
2127 convert_mo_control_to_port(
2128 __unused memory_object_control_t control
)
2130 return MACH_PORT_NULL
;
2134 memory_object_control_reference(
2135 __unused memory_object_control_t control
)
2141 * We only every issue one of these references, so kill it
2142 * when that gets released (should switch the real reference
2143 * counting in true port-less EMMI).
2146 memory_object_control_deallocate(
2147 memory_object_control_t control
)
2149 zfree(mem_obj_control_zone
, control
);
2153 memory_object_control_disable(
2154 memory_object_control_t control
)
2156 assert(control
->object
!= VM_OBJECT_NULL
);
2157 control
->object
= VM_OBJECT_NULL
;
2161 memory_object_default_reference(
2162 memory_object_default_t dmm
)
2164 ipc_port_make_send(dmm
);
2168 memory_object_default_deallocate(
2169 memory_object_default_t dmm
)
2171 ipc_port_release_send(dmm
);
2175 convert_port_to_memory_object(
2176 __unused mach_port_t port
)
2178 return (MEMORY_OBJECT_NULL
);
2183 convert_memory_object_to_port(
2184 __unused memory_object_t object
)
2186 return (MACH_PORT_NULL
);
2190 /* Routine memory_object_reference */
2191 void memory_object_reference(
2192 memory_object_t memory_object
)
2196 if (memory_object
->pager
== &vnode_pager_workaround
) {
2197 vnode_pager_reference(memory_object
);
2198 } else if (memory_object
->pager
== &device_pager_workaround
) {
2199 device_pager_reference(memory_object
);
2202 dp_memory_object_reference(memory_object
);
2205 /* Routine memory_object_deallocate */
2206 void memory_object_deallocate(
2207 memory_object_t memory_object
)
2211 if (memory_object
->pager
== &vnode_pager_workaround
) {
2212 vnode_pager_deallocate(memory_object
);
2213 } else if (memory_object
->pager
== &device_pager_workaround
) {
2214 device_pager_deallocate(memory_object
);
2217 dp_memory_object_deallocate(memory_object
);
2221 /* Routine memory_object_init */
2222 kern_return_t memory_object_init
2224 memory_object_t memory_object
,
2225 memory_object_control_t memory_control
,
2226 memory_object_cluster_size_t memory_object_page_size
2230 if (memory_object
->pager
== &vnode_pager_workaround
) {
2231 return vnode_pager_init(memory_object
,
2233 memory_object_page_size
);
2234 } else if (memory_object
->pager
== &device_pager_workaround
) {
2235 return device_pager_init(memory_object
,
2237 memory_object_page_size
);
2240 return dp_memory_object_init(memory_object
,
2242 memory_object_page_size
);
2245 /* Routine memory_object_terminate */
2246 kern_return_t memory_object_terminate
2248 memory_object_t memory_object
2252 if (memory_object
->pager
== &vnode_pager_workaround
) {
2253 return vnode_pager_terminate(memory_object
);
2254 } else if (memory_object
->pager
== &device_pager_workaround
) {
2255 return device_pager_terminate(memory_object
);
2258 return dp_memory_object_terminate(memory_object
);
2261 /* Routine memory_object_data_request */
2262 kern_return_t memory_object_data_request
2264 memory_object_t memory_object
,
2265 memory_object_offset_t offset
,
2266 memory_object_cluster_size_t length
,
2267 vm_prot_t desired_access
2271 if (memory_object
->pager
== &vnode_pager_workaround
) {
2272 return vnode_pager_data_request(memory_object
,
2276 } else if (memory_object
->pager
== &device_pager_workaround
) {
2277 return device_pager_data_request(memory_object
,
2283 return dp_memory_object_data_request(memory_object
,
2289 /* Routine memory_object_data_return */
2290 kern_return_t memory_object_data_return
2292 memory_object_t memory_object
,
2293 memory_object_offset_t offset
,
2295 memory_object_offset_t
*resid_offset
,
2298 boolean_t kernel_copy
,
2303 if (memory_object
->pager
== &vnode_pager_workaround
) {
2304 return vnode_pager_data_return(memory_object
,
2312 } else if (memory_object
->pager
== &device_pager_workaround
) {
2314 return device_pager_data_return(memory_object
,
2324 return dp_memory_object_data_return(memory_object
,
2335 /* Routine memory_object_data_initialize */
2336 kern_return_t memory_object_data_initialize
2338 memory_object_t memory_object
,
2339 memory_object_offset_t offset
,
2344 if (memory_object
->pager
== &vnode_pager_workaround
) {
2345 return vnode_pager_data_initialize(memory_object
,
2348 } else if (memory_object
->pager
== &device_pager_workaround
) {
2349 return device_pager_data_initialize(memory_object
,
2354 return dp_memory_object_data_initialize(memory_object
,
2359 /* Routine memory_object_data_unlock */
2360 kern_return_t memory_object_data_unlock
2362 memory_object_t memory_object
,
2363 memory_object_offset_t offset
,
2365 vm_prot_t desired_access
2369 if (memory_object
->pager
== &vnode_pager_workaround
) {
2370 return vnode_pager_data_unlock(memory_object
,
2374 } else if (memory_object
->pager
== &device_pager_workaround
) {
2375 return device_pager_data_unlock(memory_object
,
2381 return dp_memory_object_data_unlock(memory_object
,
2387 /* Routine memory_object_synchronize */
2388 kern_return_t memory_object_synchronize
2390 memory_object_t memory_object
,
2391 memory_object_offset_t offset
,
2393 vm_sync_t sync_flags
2397 if (memory_object
->pager
== &vnode_pager_workaround
) {
2398 return vnode_pager_synchronize(memory_object
,
2402 } else if (memory_object
->pager
== &device_pager_workaround
) {
2403 return device_pager_synchronize(memory_object
,
2409 return dp_memory_object_synchronize(memory_object
,
2415 /* Routine memory_object_unmap */
2416 kern_return_t memory_object_unmap
2418 memory_object_t memory_object
2422 if (memory_object
->pager
== &vnode_pager_workaround
) {
2423 return vnode_pager_unmap(memory_object
);
2424 } else if (memory_object
->pager
== &device_pager_workaround
) {
2425 return device_pager_unmap(memory_object
);
2428 return dp_memory_object_unmap(memory_object
);
2431 /* Routine memory_object_create */
2432 kern_return_t memory_object_create
2434 memory_object_default_t default_memory_manager
,
2435 vm_size_t new_memory_object_size
,
2436 memory_object_t
*new_memory_object
2439 return default_pager_memory_object_create(default_memory_manager
,
2440 new_memory_object_size
,
2445 convert_port_to_upl(
2451 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2455 upl
= (upl_t
) port
->ip_kobject
;
2464 convert_upl_to_port(
2467 return MACH_PORT_NULL
;
2470 __private_extern__
void
2472 __unused ipc_port_t port
,
2473 __unused mach_port_mscount_t mscount
)