2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
41 * Carnegie Mellon requests users of this software to return to
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
54 * File: vm/memory_object.c
55 * Author: Michael Wayne Young
57 * External memory management interface control functions.
60 #include <advisory_pageout.h>
63 * Interface dependencies:
66 #include <mach/std_types.h> /* For pointer_t */
67 #include <mach/mach_types.h>
70 #include <mach/kern_return.h>
71 #include <mach/memory_object.h>
72 #include <mach/memory_object_default.h>
73 #include <mach/memory_object_control_server.h>
74 #include <mach/host_priv_server.h>
75 #include <mach/boolean.h>
76 #include <mach/vm_prot.h>
77 #include <mach/message.h>
80 * Implementation dependencies:
82 #include <string.h> /* For memcpy() */
85 #include <kern/host.h>
86 #include <kern/thread.h> /* For current_thread() */
87 #include <kern/ipc_mig.h>
88 #include <kern/misc_protos.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_fault.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_page.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/pmap.h> /* For pmap_clear_modify */
96 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
97 #include <vm/vm_map.h> /* For vm_map_pageable */
100 #include <vm/vm_external.h>
101 #endif /* MACH_PAGEMAP */
103 #include <vm/vm_protos.h>
106 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
107 vm_size_t memory_manager_default_cluster
= 0;
108 decl_mutex_data(, memory_manager_default_lock
)
112 * Routine: memory_object_should_return_page
115 * Determine whether the given page should be returned,
116 * based on the page's state and on the given return policy.
118 * We should return the page if one of the following is true:
120 * 1. Page is dirty and should_return is not RETURN_NONE.
121 * 2. Page is precious and should_return is RETURN_ALL.
122 * 3. Should_return is RETURN_ANYTHING.
124 * As a side effect, m->dirty will be made consistent
125 * with pmap_is_modified(m), if should_return is not
126 * MEMORY_OBJECT_RETURN_NONE.
129 #define memory_object_should_return_page(m, should_return) \
130 (should_return != MEMORY_OBJECT_RETURN_NONE && \
131 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
132 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
133 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
135 typedef int memory_object_lock_result_t
;
137 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
138 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
139 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
140 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
142 memory_object_lock_result_t
memory_object_lock_page(
144 memory_object_return_t should_return
,
145 boolean_t should_flush
,
149 * Routine: memory_object_lock_page
152 * Perform the appropriate lock operations on the
153 * given page. See the description of
154 * "memory_object_lock_request" for the meanings
157 * Returns an indication that the operation
158 * completed, blocked, or that the page must
161 memory_object_lock_result_t
162 memory_object_lock_page(
164 memory_object_return_t should_return
,
165 boolean_t should_flush
,
168 XPR(XPR_MEMORY_OBJECT
,
169 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
170 (integer_t
)m
, should_return
, should_flush
, prot
, 0);
173 * If we cannot change access to the page,
174 * either because a mapping is in progress
175 * (busy page) or because a mapping has been
176 * wired, then give up.
179 if (m
->busy
|| m
->cleaning
)
180 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
183 * Don't worry about pages for which the kernel
184 * does not have any data.
187 if (m
->absent
|| m
->error
|| m
->restart
) {
188 if(m
->error
&& should_flush
) {
189 /* dump the page, pager wants us to */
190 /* clean it up and there is no */
191 /* relevant data to return */
192 if(m
->wire_count
== 0) {
194 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
197 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
201 assert(!m
->fictitious
);
203 if (m
->wire_count
!= 0) {
205 * If no change would take place
206 * anyway, return successfully.
210 * No change to page lock [2 checks] AND
211 * Should not return page
213 * XXX This doesn't handle sending a copy of a wired
214 * XXX page to the pager, but that will require some
215 * XXX significant surgery.
218 (m
->page_lock
== prot
|| prot
== VM_PROT_NO_CHANGE
) &&
219 ! memory_object_should_return_page(m
, should_return
)) {
222 * Restart page unlock requests,
223 * even though no change took place.
224 * [Memory managers may be expecting
225 * to see new requests.]
227 m
->unlock_request
= VM_PROT_NONE
;
230 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
233 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
237 * If the page is to be flushed, allow
238 * that to be done as part of the protection.
247 * If we are decreasing permission, do it now;
248 * let the fault handler take care of increases
249 * (pmap_page_protect may not increase protection).
252 if (prot
!= VM_PROT_NO_CHANGE
) {
253 if ((m
->page_lock
^ prot
) & prot
) {
254 pmap_page_protect(m
->phys_page
, VM_PROT_ALL
& ~prot
);
257 /* code associated with the vestigial
258 * memory_object_data_unlock
261 m
->lock_supplied
= TRUE
;
262 if (prot
!= VM_PROT_NONE
)
268 * Restart any past unlock requests, even if no
269 * change resulted. If the manager explicitly
270 * requested no protection change, then it is assumed
271 * to be remembering past requests.
274 m
->unlock_request
= VM_PROT_NONE
;
280 * Handle page returning.
283 if (memory_object_should_return_page(m
, should_return
)) {
286 * If we weren't planning
287 * to flush the page anyway,
288 * we may need to remove the
289 * page from the pageout
290 * system and from physical
294 vm_page_lock_queues();
295 VM_PAGE_QUEUES_REMOVE(m
);
296 vm_page_unlock_queues();
299 pmap_disconnect(m
->phys_page
);
302 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
);
304 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
315 * XXX Make clean but not flush a paging hint,
316 * and deactivate the pages. This is a hack
317 * because it overloads flush/clean with
318 * implementation-dependent meaning. This only
319 * happens to pages that are already clean.
322 if (vm_page_deactivate_hint
&&
323 (should_return
!= MEMORY_OBJECT_RETURN_NONE
)) {
324 vm_page_lock_queues();
325 vm_page_deactivate(m
);
326 vm_page_unlock_queues();
330 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
333 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
336 register int upl_flags; \
338 vm_object_unlock(object); \
341 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
343 upl_flags = UPL_MSYNC; \
345 (void) memory_object_data_return(object->pager, \
350 (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
354 vm_object_lock(object); \
358 * Routine: memory_object_lock_request [user interface]
361 * Control use of the data associated with the given
362 * memory object. For each page in the given range,
363 * perform the following operations, in order:
364 * 1) restrict access to the page (disallow
365 * forms specified by "prot");
366 * 2) return data to the manager (if "should_return"
367 * is RETURN_DIRTY and the page is dirty, or
368 * "should_return" is RETURN_ALL and the page
369 * is either dirty or precious); and,
370 * 3) flush the cached copy (if "should_flush"
372 * The set of pages is defined by a starting offset
373 * ("offset") and size ("size"). Only pages with the
374 * same page alignment as the starting offset are
377 * A single acknowledgement is sent (to the "reply_to"
378 * port) when these actions are complete. If successful,
379 * the naked send right for reply_to is consumed.
383 memory_object_lock_request(
384 memory_object_control_t control
,
385 memory_object_offset_t offset
,
386 memory_object_size_t size
,
387 memory_object_offset_t
* resid_offset
,
389 memory_object_return_t should_return
,
394 __unused boolean_t should_flush
;
396 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
398 XPR(XPR_MEMORY_OBJECT
,
399 "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
400 (integer_t
)control
, offset
, size
,
401 (((should_return
&1)<<1)|should_flush
), prot
);
404 * Check for bogus arguments.
406 object
= memory_object_control_to_vm_object(control
);
407 if (object
== VM_OBJECT_NULL
)
408 return (KERN_INVALID_ARGUMENT
);
410 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
411 return (KERN_INVALID_ARGUMENT
);
413 size
= round_page_64(size
);
416 * Lock the object, and acquire a paging reference to
417 * prevent the memory_object reference from being released.
419 vm_object_lock(object
);
420 vm_object_paging_begin(object
);
421 offset
-= object
->paging_offset
;
423 (void)vm_object_update(object
,
424 offset
, size
, resid_offset
, io_errno
, should_return
, flags
, prot
);
426 vm_object_paging_end(object
);
427 vm_object_unlock(object
);
429 return (KERN_SUCCESS
);
433 * memory_object_release_name: [interface]
435 * Enforces name semantic on memory_object reference count decrement
436 * This routine should not be called unless the caller holds a name
437 * reference gained through the memory_object_named_create or the
438 * memory_object_rename call.
439 * If the TERMINATE_IDLE flag is set, the call will return if the
440 * reference count is not 1. i.e. idle with the only remaining reference
442 * If the decision is made to proceed the name field flag is set to
443 * false and the reference count is decremented. If the RESPECT_CACHE
444 * flag is set and the reference count has gone to zero, the
445 * memory_object is checked to see if it is cacheable otherwise when
446 * the reference count is zero, it is simply terminated.
450 memory_object_release_name(
451 memory_object_control_t control
,
456 object
= memory_object_control_to_vm_object(control
);
457 if (object
== VM_OBJECT_NULL
)
458 return (KERN_INVALID_ARGUMENT
);
460 return vm_object_release_name(object
, flags
);
466 * Routine: memory_object_destroy [user interface]
468 * Shut down a memory object, despite the
469 * presence of address map (or other) references
473 memory_object_destroy(
474 memory_object_control_t control
,
475 kern_return_t reason
)
479 object
= memory_object_control_to_vm_object(control
);
480 if (object
== VM_OBJECT_NULL
)
481 return (KERN_INVALID_ARGUMENT
);
483 return (vm_object_destroy(object
, reason
));
487 * Routine: vm_object_sync
489 * Kernel internal function to synch out pages in a given
490 * range within an object to its memory manager. Much the
491 * same as memory_object_lock_request but page protection
494 * If the should_flush and should_return flags are true pages
495 * are flushed, that is dirty & precious pages are written to
496 * the memory manager and then discarded. If should_return
497 * is false, only precious pages are returned to the memory
500 * If should flush is false and should_return true, the memory
501 * manager's copy of the pages is updated. If should_return
502 * is also false, only the precious pages are updated. This
503 * last option is of limited utility.
506 * FALSE if no pages were returned to the pager
513 vm_object_offset_t offset
,
514 vm_object_size_t size
,
515 boolean_t should_flush
,
516 boolean_t should_return
,
517 boolean_t should_iosync
)
523 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
524 (integer_t
)object
, offset
, size
, should_flush
, should_return
);
527 * Lock the object, and acquire a paging reference to
528 * prevent the memory_object and control ports from
531 vm_object_lock(object
);
532 vm_object_paging_begin(object
);
535 flags
= MEMORY_OBJECT_DATA_FLUSH
;
540 flags
|= MEMORY_OBJECT_IO_SYNC
;
542 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
544 MEMORY_OBJECT_RETURN_ALL
:
545 MEMORY_OBJECT_RETURN_NONE
,
550 vm_object_paging_end(object
);
551 vm_object_unlock(object
);
559 vm_object_update_extent(
561 vm_object_offset_t offset
,
562 vm_object_offset_t offset_end
,
563 vm_object_offset_t
*offset_resid
,
565 boolean_t should_flush
,
566 memory_object_return_t should_return
,
567 boolean_t should_iosync
,
572 vm_size_t data_cnt
= 0;
573 vm_object_offset_t paging_offset
= 0;
574 vm_object_offset_t last_offset
= offset
;
575 memory_object_lock_result_t page_lock_result
;
576 memory_object_lock_result_t pageout_action
;
578 pageout_action
= MEMORY_OBJECT_LOCK_RESULT_DONE
;
581 offset
< offset_end
&& object
->resident_page_count
;
582 offset
+= PAGE_SIZE_64
) {
585 * Limit the number of pages to be cleaned at once.
587 if (data_cnt
>= PAGE_SIZE
* MAX_UPL_TRANSFER
) {
588 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
589 pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
593 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
594 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
596 XPR(XPR_MEMORY_OBJECT
,
597 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
598 (integer_t
)object
, offset
, page_lock_result
, 0, 0);
600 switch (page_lock_result
)
602 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
604 * End of a cluster of dirty pages.
607 LIST_REQ_PAGEOUT_PAGES(object
,
608 data_cnt
, pageout_action
,
609 paging_offset
, offset_resid
, io_errno
, should_iosync
);
615 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
617 * Since it is necessary to block,
618 * clean any dirty pages now.
621 LIST_REQ_PAGEOUT_PAGES(object
,
622 data_cnt
, pageout_action
,
623 paging_offset
, offset_resid
, io_errno
, should_iosync
);
627 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
630 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
:
631 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
633 * The clean and return cases are similar.
635 * if this would form a discontiguous block,
636 * clean the old pages and start anew.
638 * Mark the page busy since we will unlock the
639 * object if we issue the LIST_REQ_PAGEOUT
643 ((last_offset
!= offset
) || (pageout_action
!= page_lock_result
))) {
644 LIST_REQ_PAGEOUT_PAGES(object
,
645 data_cnt
, pageout_action
,
646 paging_offset
, offset_resid
, io_errno
, should_iosync
);
652 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
656 pageout_action
= page_lock_result
;
657 paging_offset
= offset
;
659 data_cnt
+= PAGE_SIZE
;
660 last_offset
= offset
+ PAGE_SIZE_64
;
662 vm_page_lock_queues();
666 m
->list_req_pending
= TRUE
;
671 * and add additional state
678 vm_page_unlock_queues();
687 * We have completed the scan for applicable pages.
688 * Clean any pages that have been saved.
691 LIST_REQ_PAGEOUT_PAGES(object
,
692 data_cnt
, pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
700 * Routine: vm_object_update
702 * Work function for m_o_lock_request(), vm_o_sync().
704 * Called with object locked and paging ref taken.
708 register vm_object_t object
,
709 register vm_object_offset_t offset
,
710 register vm_object_size_t size
,
711 register vm_object_offset_t
*resid_offset
,
713 memory_object_return_t should_return
,
715 vm_prot_t protection
)
717 vm_object_t copy_object
;
718 boolean_t data_returned
= FALSE
;
719 boolean_t update_cow
;
720 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
721 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
724 #define MAX_EXTENTS 8
725 #define EXTENT_SIZE (1024 * 1024 * 256)
726 #define RESIDENT_LIMIT (1024 * 32)
728 vm_object_offset_t e_base
;
729 vm_object_offset_t e_min
;
730 vm_object_offset_t e_max
;
731 } extents
[MAX_EXTENTS
];
734 * To avoid blocking while scanning for pages, save
735 * dirty pages to be cleaned all at once.
737 * XXXO A similar strategy could be used to limit the
738 * number of times that a scan must be restarted for
739 * other reasons. Those pages that would require blocking
740 * could be temporarily collected in another list, or
741 * their offsets could be recorded in a small array.
745 * XXX NOTE: May want to consider converting this to a page list
746 * XXX vm_map_copy interface. Need to understand object
747 * XXX coalescing implications before doing so.
750 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
751 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
752 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
753 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
756 if((((copy_object
= object
->copy
) != NULL
) && update_cow
) ||
757 (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
759 vm_map_size_t copy_size
;
760 vm_map_offset_t copy_offset
;
764 kern_return_t error
= 0;
766 if(copy_object
!= NULL
) {
767 /* translate offset with respect to shadow's offset */
768 copy_offset
= (offset
>= copy_object
->shadow_offset
)?
769 (vm_map_offset_t
)(offset
- copy_object
->shadow_offset
) :
771 if(copy_offset
> copy_object
->size
)
772 copy_offset
= copy_object
->size
;
774 /* clip size with respect to shadow offset */
775 if (offset
>= copy_object
->shadow_offset
) {
777 } else if (size
>= copy_object
->shadow_offset
- offset
) {
779 (copy_object
->shadow_offset
- offset
);
784 if (copy_offset
+ copy_size
> copy_object
->size
) {
785 if (copy_object
->size
>= copy_offset
) {
786 copy_size
= copy_object
->size
- copy_offset
;
792 copy_size
+=copy_offset
;
794 vm_object_unlock(object
);
795 vm_object_lock(copy_object
);
797 copy_object
= object
;
799 copy_size
= offset
+ size
;
800 copy_offset
= offset
;
803 vm_object_paging_begin(copy_object
);
804 for (i
=copy_offset
; i
<copy_size
; i
+=PAGE_SIZE
) {
805 RETRY_COW_OF_LOCK_REQUEST
:
806 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
807 switch (vm_fault_page(copy_object
, i
,
808 VM_PROT_WRITE
|VM_PROT_READ
,
812 copy_offset
+copy_size
,
813 VM_BEHAVIOR_SEQUENTIAL
,
822 case VM_FAULT_SUCCESS
:
825 page
->object
, top_page
);
826 PAGE_WAKEUP_DONE(page
);
827 vm_page_lock_queues();
828 if (!page
->active
&& !page
->inactive
)
829 vm_page_activate(page
);
830 vm_page_unlock_queues();
831 vm_object_lock(copy_object
);
832 vm_object_paging_begin(copy_object
);
834 PAGE_WAKEUP_DONE(page
);
835 vm_page_lock_queues();
836 if (!page
->active
&& !page
->inactive
)
837 vm_page_activate(page
);
838 vm_page_unlock_queues();
842 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
843 vm_object_lock(copy_object
);
844 vm_object_paging_begin(copy_object
);
845 goto RETRY_COW_OF_LOCK_REQUEST
;
846 case VM_FAULT_INTERRUPTED
:
847 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
848 vm_object_lock(copy_object
);
849 vm_object_paging_begin(copy_object
);
850 goto RETRY_COW_OF_LOCK_REQUEST
;
851 case VM_FAULT_MEMORY_SHORTAGE
:
853 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
854 vm_object_lock(copy_object
);
855 vm_object_paging_begin(copy_object
);
856 goto RETRY_COW_OF_LOCK_REQUEST
;
857 case VM_FAULT_FICTITIOUS_SHORTAGE
:
858 vm_page_more_fictitious();
859 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
860 vm_object_lock(copy_object
);
861 vm_object_paging_begin(copy_object
);
862 goto RETRY_COW_OF_LOCK_REQUEST
;
863 case VM_FAULT_MEMORY_ERROR
:
864 vm_object_lock(object
);
865 goto BYPASS_COW_COPYIN
;
869 vm_object_paging_end(copy_object
);
870 if(copy_object
!= object
) {
871 vm_object_unlock(copy_object
);
872 vm_object_lock(object
);
875 if((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
878 if(((copy_object
= object
->copy
) != NULL
) &&
879 (flags
& MEMORY_OBJECT_DATA_PURGE
)) {
880 copy_object
->shadow_severed
= TRUE
;
881 copy_object
->shadowed
= FALSE
;
882 copy_object
->shadow
= NULL
;
883 /* delete the ref the COW was holding on the target object */
884 vm_object_deallocate(object
);
889 * when we have a really large range to check relative
890 * to the number of actual resident pages, we'd like
891 * to use the resident page list to drive our checks
892 * however, the object lock will get dropped while processing
893 * the page which means the resident queue can change which
894 * means we can't walk the queue as we process the pages
895 * we also want to do the processing in offset order to allow
896 * 'runs' of pages to be collected if we're being told to
897 * flush to disk... the resident page queue is NOT ordered.
899 * a temporary solution (until we figure out how to deal with
900 * large address spaces more generically) is to pre-flight
901 * the resident page queue (if it's small enough) and develop
902 * a collection of extents (that encompass actual resident pages)
903 * to visit. This will at least allow us to deal with some of the
904 * more pathological cases in a more efficient manner. The current
905 * worst case (a single resident page at the end of an extremely large
906 * range) can take minutes to complete for ranges in the terrabyte
907 * category... since this routine is called when truncating a file,
908 * and we currently support files up to 16 Tbytes in size, this
909 * is not a theoretical problem
912 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
913 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
915 vm_object_offset_t start
;
916 vm_object_offset_t end
;
917 vm_object_size_t e_mask
;
923 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
925 m
= (vm_page_t
) queue_first(&object
->memq
);
927 while (!queue_end(&object
->memq
, (queue_entry_t
) m
)) {
928 next
= (vm_page_t
) queue_next(&m
->listq
);
930 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
932 * this is a page we're interested in
933 * try to fit it into a current extent
935 for (n
= 0; n
< num_of_extents
; n
++) {
936 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
938 * use (PAGE_SIZE - 1) to determine the
939 * max offset so that we don't wrap if
940 * we're at the last page of the space
942 if (m
->offset
< extents
[n
].e_min
)
943 extents
[n
].e_min
= m
->offset
;
944 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
945 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
949 if (n
== num_of_extents
) {
951 * didn't find a current extent that can encompass
954 if (n
< MAX_EXTENTS
) {
956 * if we still have room,
957 * create a new extent
959 extents
[n
].e_base
= m
->offset
& e_mask
;
960 extents
[n
].e_min
= m
->offset
;
961 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
966 * no room to create a new extent...
967 * fall back to a single extent based
968 * on the min and max page offsets
969 * we find in the range we're interested in...
970 * first, look through the extent list and
971 * develop the overall min and max for the
972 * pages we've looked at up to this point
974 for (n
= 1; n
< num_of_extents
; n
++) {
975 if (extents
[n
].e_min
< extents
[0].e_min
)
976 extents
[0].e_min
= extents
[n
].e_min
;
977 if (extents
[n
].e_max
> extents
[0].e_max
)
978 extents
[0].e_max
= extents
[n
].e_max
;
981 * now setup to run through the remaining pages
982 * to determine the overall min and max
983 * offset for the specified range
985 extents
[0].e_base
= 0;
990 * by continuing, we'll reprocess the
991 * page that forced us to abandon trying
992 * to develop multiple extents
1001 extents
[0].e_min
= offset
;
1002 extents
[0].e_max
= offset
+ (size
- 1);
1006 for (n
= 0; n
< num_of_extents
; n
++) {
1007 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1008 should_flush
, should_return
, should_iosync
, protection
))
1009 data_returned
= TRUE
;
1011 return (data_returned
);
1016 * Routine: memory_object_synchronize_completed [user interface]
1018 * Tell kernel that previously synchronized data
1019 * (memory_object_synchronize) has been queue or placed on the
1022 * Note: there may be multiple synchronize requests for a given
1023 * memory object outstanding but they will not overlap.
1027 memory_object_synchronize_completed(
1028 memory_object_control_t control
,
1029 memory_object_offset_t offset
,
1035 object
= memory_object_control_to_vm_object(control
);
1037 XPR(XPR_MEMORY_OBJECT
,
1038 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1039 (integer_t
)object
, offset
, length
, 0, 0);
1042 * Look for bogus arguments
1045 if (object
== VM_OBJECT_NULL
)
1046 return (KERN_INVALID_ARGUMENT
);
1048 vm_object_lock(object
);
1051 * search for sync request structure
1053 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
1054 if (msr
->offset
== offset
&& msr
->length
== length
) {
1055 queue_remove(&object
->msr_q
, msr
, msync_req_t
, msr_q
);
1058 }/* queue_iterate */
1060 if (queue_end(&object
->msr_q
, (queue_entry_t
)msr
)) {
1061 vm_object_unlock(object
);
1062 return KERN_INVALID_ARGUMENT
;
1066 vm_object_unlock(object
);
1067 msr
->flag
= VM_MSYNC_DONE
;
1069 thread_wakeup((event_t
) msr
);
1071 return KERN_SUCCESS
;
1072 }/* memory_object_synchronize_completed */
1074 static kern_return_t
1075 vm_object_set_attributes_common(
1077 boolean_t may_cache
,
1078 memory_object_copy_strategy_t copy_strategy
,
1079 boolean_t temporary
,
1080 memory_object_cluster_size_t cluster_size
,
1081 boolean_t silent_overwrite
,
1082 boolean_t advisory_pageout
)
1084 boolean_t object_became_ready
;
1086 XPR(XPR_MEMORY_OBJECT
,
1087 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1088 (integer_t
)object
, (may_cache
&1)|((temporary
&1)<1), copy_strategy
, 0, 0);
1090 if (object
== VM_OBJECT_NULL
)
1091 return(KERN_INVALID_ARGUMENT
);
1094 * Verify the attributes of importance
1097 switch(copy_strategy
) {
1098 case MEMORY_OBJECT_COPY_NONE
:
1099 case MEMORY_OBJECT_COPY_DELAY
:
1102 return(KERN_INVALID_ARGUMENT
);
1105 #if !ADVISORY_PAGEOUT
1106 if (silent_overwrite
|| advisory_pageout
)
1107 return(KERN_INVALID_ARGUMENT
);
1109 #endif /* !ADVISORY_PAGEOUT */
1114 if (cluster_size
!= 0) {
1115 int pages_per_cluster
;
1116 pages_per_cluster
= atop_32(cluster_size
);
1118 * Cluster size must be integral multiple of page size,
1119 * and be a power of 2 number of pages.
1121 if ((cluster_size
& (PAGE_SIZE
-1)) ||
1122 ((pages_per_cluster
-1) & pages_per_cluster
))
1123 return KERN_INVALID_ARGUMENT
;
1126 vm_object_lock(object
);
1129 * Copy the attributes
1131 assert(!object
->internal
);
1132 object_became_ready
= !object
->pager_ready
;
1133 object
->copy_strategy
= copy_strategy
;
1134 object
->can_persist
= may_cache
;
1135 object
->temporary
= temporary
;
1136 object
->silent_overwrite
= silent_overwrite
;
1137 object
->advisory_pageout
= advisory_pageout
;
1138 if (cluster_size
== 0)
1139 cluster_size
= PAGE_SIZE
;
1140 object
->cluster_size
= cluster_size
;
1142 assert(cluster_size
>= PAGE_SIZE
&&
1143 cluster_size
% PAGE_SIZE
== 0);
1146 * Wake up anyone waiting for the ready attribute
1147 * to become asserted.
1150 if (object_became_ready
) {
1151 object
->pager_ready
= TRUE
;
1152 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1155 vm_object_unlock(object
);
1157 return(KERN_SUCCESS
);
1161 * Set the memory object attribute as provided.
1163 * XXX This routine cannot be completed until the vm_msync, clean
1164 * in place, and cluster work is completed. See ifdef notyet
1165 * below and note that vm_object_set_attributes_common()
1166 * may have to be expanded.
1169 memory_object_change_attributes(
1170 memory_object_control_t control
,
1171 memory_object_flavor_t flavor
,
1172 memory_object_info_t attributes
,
1173 mach_msg_type_number_t count
)
1176 kern_return_t result
= KERN_SUCCESS
;
1177 boolean_t temporary
;
1178 boolean_t may_cache
;
1179 boolean_t invalidate
;
1180 memory_object_cluster_size_t cluster_size
;
1181 memory_object_copy_strategy_t copy_strategy
;
1182 boolean_t silent_overwrite
;
1183 boolean_t advisory_pageout
;
1185 object
= memory_object_control_to_vm_object(control
);
1186 if (object
== VM_OBJECT_NULL
)
1187 return (KERN_INVALID_ARGUMENT
);
1189 vm_object_lock(object
);
1191 temporary
= object
->temporary
;
1192 may_cache
= object
->can_persist
;
1193 copy_strategy
= object
->copy_strategy
;
1194 silent_overwrite
= object
->silent_overwrite
;
1195 advisory_pageout
= object
->advisory_pageout
;
1197 invalidate
= object
->invalidate
;
1199 cluster_size
= object
->cluster_size
;
1200 vm_object_unlock(object
);
1203 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1205 old_memory_object_behave_info_t behave
;
1207 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1208 result
= KERN_INVALID_ARGUMENT
;
1212 behave
= (old_memory_object_behave_info_t
) attributes
;
1214 temporary
= behave
->temporary
;
1215 invalidate
= behave
->invalidate
;
1216 copy_strategy
= behave
->copy_strategy
;
1221 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1223 memory_object_behave_info_t behave
;
1225 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1226 result
= KERN_INVALID_ARGUMENT
;
1230 behave
= (memory_object_behave_info_t
) attributes
;
1232 temporary
= behave
->temporary
;
1233 invalidate
= behave
->invalidate
;
1234 copy_strategy
= behave
->copy_strategy
;
1235 silent_overwrite
= behave
->silent_overwrite
;
1236 advisory_pageout
= behave
->advisory_pageout
;
1240 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1242 memory_object_perf_info_t perf
;
1244 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1245 result
= KERN_INVALID_ARGUMENT
;
1249 perf
= (memory_object_perf_info_t
) attributes
;
1251 may_cache
= perf
->may_cache
;
1252 cluster_size
= round_page_32(perf
->cluster_size
);
1257 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1259 old_memory_object_attr_info_t attr
;
1261 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1262 result
= KERN_INVALID_ARGUMENT
;
1266 attr
= (old_memory_object_attr_info_t
) attributes
;
1268 may_cache
= attr
->may_cache
;
1269 copy_strategy
= attr
->copy_strategy
;
1270 cluster_size
= page_size
;
1275 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1277 memory_object_attr_info_t attr
;
1279 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1280 result
= KERN_INVALID_ARGUMENT
;
1284 attr
= (memory_object_attr_info_t
) attributes
;
1286 copy_strategy
= attr
->copy_strategy
;
1287 may_cache
= attr
->may_cache_object
;
1288 cluster_size
= attr
->cluster_size
;
1289 temporary
= attr
->temporary
;
1295 result
= KERN_INVALID_ARGUMENT
;
1299 if (result
!= KERN_SUCCESS
)
1302 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1303 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1310 * XXX may_cache may become a tri-valued variable to handle
1311 * XXX uncache if not in use.
1313 return (vm_object_set_attributes_common(object
,
1323 memory_object_get_attributes(
1324 memory_object_control_t control
,
1325 memory_object_flavor_t flavor
,
1326 memory_object_info_t attributes
, /* pointer to OUT array */
1327 mach_msg_type_number_t
*count
) /* IN/OUT */
1329 kern_return_t ret
= KERN_SUCCESS
;
1332 object
= memory_object_control_to_vm_object(control
);
1333 if (object
== VM_OBJECT_NULL
)
1334 return (KERN_INVALID_ARGUMENT
);
1336 vm_object_lock(object
);
1339 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1341 old_memory_object_behave_info_t behave
;
1343 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1344 ret
= KERN_INVALID_ARGUMENT
;
1348 behave
= (old_memory_object_behave_info_t
) attributes
;
1349 behave
->copy_strategy
= object
->copy_strategy
;
1350 behave
->temporary
= object
->temporary
;
1351 #if notyet /* remove when vm_msync complies and clean in place fini */
1352 behave
->invalidate
= object
->invalidate
;
1354 behave
->invalidate
= FALSE
;
1357 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1361 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1363 memory_object_behave_info_t behave
;
1365 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1366 ret
= KERN_INVALID_ARGUMENT
;
1370 behave
= (memory_object_behave_info_t
) attributes
;
1371 behave
->copy_strategy
= object
->copy_strategy
;
1372 behave
->temporary
= object
->temporary
;
1373 #if notyet /* remove when vm_msync complies and clean in place fini */
1374 behave
->invalidate
= object
->invalidate
;
1376 behave
->invalidate
= FALSE
;
1378 behave
->advisory_pageout
= object
->advisory_pageout
;
1379 behave
->silent_overwrite
= object
->silent_overwrite
;
1380 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1384 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1386 memory_object_perf_info_t perf
;
1388 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1389 ret
= KERN_INVALID_ARGUMENT
;
1393 perf
= (memory_object_perf_info_t
) attributes
;
1394 perf
->cluster_size
= object
->cluster_size
;
1395 perf
->may_cache
= object
->can_persist
;
1397 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1401 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1403 old_memory_object_attr_info_t attr
;
1405 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1406 ret
= KERN_INVALID_ARGUMENT
;
1410 attr
= (old_memory_object_attr_info_t
) attributes
;
1411 attr
->may_cache
= object
->can_persist
;
1412 attr
->copy_strategy
= object
->copy_strategy
;
1414 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1418 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1420 memory_object_attr_info_t attr
;
1422 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1423 ret
= KERN_INVALID_ARGUMENT
;
1427 attr
= (memory_object_attr_info_t
) attributes
;
1428 attr
->copy_strategy
= object
->copy_strategy
;
1429 attr
->cluster_size
= object
->cluster_size
;
1430 attr
->may_cache_object
= object
->can_persist
;
1431 attr
->temporary
= object
->temporary
;
1433 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1438 ret
= KERN_INVALID_ARGUMENT
;
1442 vm_object_unlock(object
);
1449 memory_object_iopl_request(
1451 memory_object_offset_t offset
,
1452 upl_size_t
*upl_size
,
1454 upl_page_info_array_t user_page_list
,
1455 unsigned int *page_list_count
,
1462 caller_flags
= *flags
;
1464 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1466 * For forward compatibility's sake,
1467 * reject any unknown flag.
1469 return KERN_INVALID_VALUE
;
1472 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1473 vm_named_entry_t named_entry
;
1475 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1476 /* a few checks to make sure user is obeying rules */
1477 if(*upl_size
== 0) {
1478 if(offset
>= named_entry
->size
)
1479 return(KERN_INVALID_RIGHT
);
1480 *upl_size
= named_entry
->size
- offset
;
1482 if(caller_flags
& UPL_COPYOUT_FROM
) {
1483 if((named_entry
->protection
& VM_PROT_READ
)
1485 return(KERN_INVALID_RIGHT
);
1488 if((named_entry
->protection
&
1489 (VM_PROT_READ
| VM_PROT_WRITE
))
1490 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1491 return(KERN_INVALID_RIGHT
);
1494 if(named_entry
->size
< (offset
+ *upl_size
))
1495 return(KERN_INVALID_ARGUMENT
);
1497 /* the callers parameter offset is defined to be the */
1498 /* offset from beginning of named entry offset in object */
1499 offset
= offset
+ named_entry
->offset
;
1501 if(named_entry
->is_sub_map
)
1502 return (KERN_INVALID_ARGUMENT
);
1504 named_entry_lock(named_entry
);
1506 if (named_entry
->is_pager
) {
1507 object
= vm_object_enter(named_entry
->backing
.pager
,
1508 named_entry
->offset
+ named_entry
->size
,
1509 named_entry
->internal
,
1512 if (object
== VM_OBJECT_NULL
) {
1513 named_entry_unlock(named_entry
);
1514 return(KERN_INVALID_OBJECT
);
1517 /* JMM - drop reference on pager here? */
1519 /* create an extra reference for the named entry */
1520 vm_object_lock(object
);
1521 vm_object_reference_locked(object
);
1522 named_entry
->backing
.object
= object
;
1523 named_entry
->is_pager
= FALSE
;
1524 named_entry_unlock(named_entry
);
1526 /* wait for object to be ready */
1527 while (!object
->pager_ready
) {
1528 vm_object_wait(object
,
1529 VM_OBJECT_EVENT_PAGER_READY
,
1531 vm_object_lock(object
);
1533 vm_object_unlock(object
);
1535 /* This is the case where we are going to map */
1536 /* an already mapped object. If the object is */
1537 /* not ready it is internal. An external */
1538 /* object cannot be mapped until it is ready */
1539 /* we can therefore avoid the ready check */
1541 object
= named_entry
->backing
.object
;
1542 vm_object_reference(object
);
1543 named_entry_unlock(named_entry
);
1546 memory_object_control_t control
;
1547 control
= (memory_object_control_t
)port
->ip_kobject
;
1548 if (control
== NULL
)
1549 return (KERN_INVALID_ARGUMENT
);
1550 object
= memory_object_control_to_vm_object(control
);
1551 if (object
== VM_OBJECT_NULL
)
1552 return (KERN_INVALID_ARGUMENT
);
1553 vm_object_reference(object
);
1555 if (object
== VM_OBJECT_NULL
)
1556 return (KERN_INVALID_ARGUMENT
);
1558 if (!object
->private) {
1559 if (*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
1560 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
1561 if (object
->phys_contiguous
) {
1562 *flags
= UPL_PHYS_CONTIG
;
1567 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1570 ret
= vm_object_iopl_request(object
,
1577 vm_object_deallocate(object
);
1582 * Routine: memory_object_upl_request [interface]
1584 * Cause the population of a portion of a vm_object.
1585 * Depending on the nature of the request, the pages
1586 * returned may be contain valid data or be uninitialized.
1591 memory_object_upl_request(
1592 memory_object_control_t control
,
1593 memory_object_offset_t offset
,
1596 upl_page_info_array_t user_page_list
,
1597 unsigned int *page_list_count
,
1602 object
= memory_object_control_to_vm_object(control
);
1603 if (object
== VM_OBJECT_NULL
)
1604 return (KERN_INVALID_ARGUMENT
);
1606 return vm_object_upl_request(object
,
1616 * Routine: memory_object_super_upl_request [interface]
1618 * Cause the population of a portion of a vm_object
1619 * in much the same way as memory_object_upl_request.
1620 * Depending on the nature of the request, the pages
1621 * returned may be contain valid data or be uninitialized.
1622 * However, the region may be expanded up to the super
1623 * cluster size provided.
1627 memory_object_super_upl_request(
1628 memory_object_control_t control
,
1629 memory_object_offset_t offset
,
1631 upl_size_t super_cluster
,
1633 upl_page_info_t
*user_page_list
,
1634 unsigned int *page_list_count
,
1639 object
= memory_object_control_to_vm_object(control
);
1640 if (object
== VM_OBJECT_NULL
)
1641 return (KERN_INVALID_ARGUMENT
);
1643 return vm_object_super_upl_request(object
,
1653 int vm_stat_discard_cleared_reply
= 0;
1654 int vm_stat_discard_cleared_unset
= 0;
1655 int vm_stat_discard_cleared_too_late
= 0;
1660 * Routine: host_default_memory_manager [interface]
1662 * set/get the default memory manager port and default cluster
1665 * If successful, consumes the supplied naked send right.
1668 host_default_memory_manager(
1669 host_priv_t host_priv
,
1670 memory_object_default_t
*default_manager
,
1671 memory_object_cluster_size_t cluster_size
)
1673 memory_object_default_t current_manager
;
1674 memory_object_default_t new_manager
;
1675 memory_object_default_t returned_manager
;
1677 if (host_priv
== HOST_PRIV_NULL
)
1678 return(KERN_INVALID_HOST
);
1680 assert(host_priv
== &realhost
);
1682 new_manager
= *default_manager
;
1683 mutex_lock(&memory_manager_default_lock
);
1684 current_manager
= memory_manager_default
;
1686 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1688 * Retrieve the current value.
1690 memory_object_default_reference(current_manager
);
1691 returned_manager
= current_manager
;
1694 * Retrieve the current value,
1695 * and replace it with the supplied value.
1696 * We return the old reference to the caller
1697 * but we have to take a reference on the new
1701 returned_manager
= current_manager
;
1702 memory_manager_default
= new_manager
;
1703 memory_object_default_reference(new_manager
);
1705 if (cluster_size
% PAGE_SIZE
!= 0) {
1707 mutex_unlock(&memory_manager_default_lock
);
1708 return KERN_INVALID_ARGUMENT
;
1710 cluster_size
= round_page_32(cluster_size
);
1713 memory_manager_default_cluster
= cluster_size
;
1716 * In case anyone's been waiting for a memory
1717 * manager to be established, wake them up.
1720 thread_wakeup((event_t
) &memory_manager_default
);
1723 mutex_unlock(&memory_manager_default_lock
);
1725 *default_manager
= returned_manager
;
1726 return(KERN_SUCCESS
);
1730 * Routine: memory_manager_default_reference
1732 * Returns a naked send right for the default
1733 * memory manager. The returned right is always
1734 * valid (not IP_NULL or IP_DEAD).
1737 __private_extern__ memory_object_default_t
1738 memory_manager_default_reference(
1739 memory_object_cluster_size_t
*cluster_size
)
1741 memory_object_default_t current_manager
;
1743 mutex_lock(&memory_manager_default_lock
);
1744 current_manager
= memory_manager_default
;
1745 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1748 res
= thread_sleep_mutex((event_t
) &memory_manager_default
,
1749 &memory_manager_default_lock
,
1751 assert(res
== THREAD_AWAKENED
);
1752 current_manager
= memory_manager_default
;
1754 memory_object_default_reference(current_manager
);
1755 *cluster_size
= memory_manager_default_cluster
;
1756 mutex_unlock(&memory_manager_default_lock
);
1758 return current_manager
;
1762 * Routine: memory_manager_default_check
1765 * Check whether a default memory manager has been set
1766 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1767 * and KERN_FAILURE if dmm does not exist.
1769 * If there is no default memory manager, log an error,
1770 * but only the first time.
1773 __private_extern__ kern_return_t
1774 memory_manager_default_check(void)
1776 memory_object_default_t current
;
1778 mutex_lock(&memory_manager_default_lock
);
1779 current
= memory_manager_default
;
1780 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1781 static boolean_t logged
; /* initialized to 0 */
1782 boolean_t complain
= !logged
;
1784 mutex_unlock(&memory_manager_default_lock
);
1786 printf("Warning: No default memory manager\n");
1787 return(KERN_FAILURE
);
1789 mutex_unlock(&memory_manager_default_lock
);
1790 return(KERN_SUCCESS
);
1794 __private_extern__
void
1795 memory_manager_default_init(void)
1797 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1798 mutex_init(&memory_manager_default_lock
, 0);
1803 /* Allow manipulation of individual page state. This is actually part of */
1804 /* the UPL regimen but takes place on the object rather than on a UPL */
1807 memory_object_page_op(
1808 memory_object_control_t control
,
1809 memory_object_offset_t offset
,
1811 ppnum_t
*phys_entry
,
1818 object
= memory_object_control_to_vm_object(control
);
1819 if (object
== VM_OBJECT_NULL
)
1820 return (KERN_INVALID_ARGUMENT
);
1822 vm_object_lock(object
);
1824 if(ops
& UPL_POP_PHYSICAL
) {
1825 if(object
->phys_contiguous
) {
1827 *phys_entry
= (ppnum_t
)
1828 (object
->shadow_offset
>> 12);
1830 vm_object_unlock(object
);
1831 return KERN_SUCCESS
;
1833 vm_object_unlock(object
);
1834 return KERN_INVALID_OBJECT
;
1837 if(object
->phys_contiguous
) {
1838 vm_object_unlock(object
);
1839 return KERN_INVALID_OBJECT
;
1843 if((dst_page
= vm_page_lookup(object
,offset
)) == VM_PAGE_NULL
) {
1844 vm_object_unlock(object
);
1845 return KERN_FAILURE
;
1848 /* Sync up on getting the busy bit */
1849 if((dst_page
->busy
|| dst_page
->cleaning
) &&
1850 (((ops
& UPL_POP_SET
) &&
1851 (ops
& UPL_POP_BUSY
)) || (ops
& UPL_POP_DUMP
))) {
1852 /* someone else is playing with the page, we will */
1854 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
1858 if (ops
& UPL_POP_DUMP
) {
1859 vm_page_lock_queues();
1861 if (dst_page
->no_isync
== FALSE
)
1862 pmap_disconnect(dst_page
->phys_page
);
1863 vm_page_free(dst_page
);
1865 vm_page_unlock_queues();
1872 /* Get the condition of flags before requested ops */
1873 /* are undertaken */
1875 if(dst_page
->dirty
) *flags
|= UPL_POP_DIRTY
;
1876 if(dst_page
->pageout
) *flags
|= UPL_POP_PAGEOUT
;
1877 if(dst_page
->precious
) *flags
|= UPL_POP_PRECIOUS
;
1878 if(dst_page
->absent
) *flags
|= UPL_POP_ABSENT
;
1879 if(dst_page
->busy
) *flags
|= UPL_POP_BUSY
;
1882 /* The caller should have made a call either contingent with */
1883 /* or prior to this call to set UPL_POP_BUSY */
1884 if(ops
& UPL_POP_SET
) {
1885 /* The protection granted with this assert will */
1886 /* not be complete. If the caller violates the */
1887 /* convention and attempts to change page state */
1888 /* without first setting busy we may not see it */
1889 /* because the page may already be busy. However */
1890 /* if such violations occur we will assert sooner */
1892 assert(dst_page
->busy
|| (ops
& UPL_POP_BUSY
));
1893 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= TRUE
;
1894 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= TRUE
;
1895 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= TRUE
;
1896 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= TRUE
;
1897 if (ops
& UPL_POP_BUSY
) dst_page
->busy
= TRUE
;
1900 if(ops
& UPL_POP_CLR
) {
1901 assert(dst_page
->busy
);
1902 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= FALSE
;
1903 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= FALSE
;
1904 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= FALSE
;
1905 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= FALSE
;
1906 if (ops
& UPL_POP_BUSY
) {
1907 dst_page
->busy
= FALSE
;
1908 PAGE_WAKEUP(dst_page
);
1912 if (dst_page
->encrypted
) {
1915 * We need to decrypt this encrypted page before the
1916 * caller can access its contents.
1917 * But if the caller really wants to access the page's
1918 * contents, they have to keep the page "busy".
1919 * Otherwise, the page could get recycled or re-encrypted
1922 if ((ops
& UPL_POP_SET
) && (ops
& UPL_POP_BUSY
) &&
1925 * The page is stable enough to be accessed by
1926 * the caller, so make sure its contents are
1929 vm_page_decrypt(dst_page
, 0);
1932 * The page is not busy, so don't bother
1933 * decrypting it, since anything could
1934 * happen to it between now and when the
1935 * caller wants to access it.
1936 * We should not give the caller access
1939 assert(!phys_entry
);
1945 * The physical page number will remain valid
1946 * only if the page is kept busy.
1947 * ENCRYPTED SWAP: make sure we don't let the
1948 * caller access an encrypted page.
1950 assert(dst_page
->busy
);
1951 assert(!dst_page
->encrypted
);
1952 *phys_entry
= dst_page
->phys_page
;
1958 vm_object_unlock(object
);
1959 return KERN_SUCCESS
;
1964 * memory_object_range_op offers performance enhancement over
1965 * memory_object_page_op for page_op functions which do not require page
1966 * level state to be returned from the call. Page_op was created to provide
1967 * a low-cost alternative to page manipulation via UPLs when only a single
1968 * page was involved. The range_op call establishes the ability in the _op
1969 * family of functions to work on multiple pages where the lack of page level
1970 * state handling allows the caller to avoid the overhead of the upl structures.
1974 memory_object_range_op(
1975 memory_object_control_t control
,
1976 memory_object_offset_t offset_beg
,
1977 memory_object_offset_t offset_end
,
1981 memory_object_offset_t offset
;
1985 object
= memory_object_control_to_vm_object(control
);
1986 if (object
== VM_OBJECT_NULL
)
1987 return (KERN_INVALID_ARGUMENT
);
1989 if (object
->resident_page_count
== 0) {
1991 if (ops
& UPL_ROP_PRESENT
)
1994 *range
= offset_end
- offset_beg
;
1996 return KERN_SUCCESS
;
1998 vm_object_lock(object
);
2000 if (object
->phys_contiguous
) {
2001 vm_object_unlock(object
);
2002 return KERN_INVALID_OBJECT
;
2005 offset
= offset_beg
;
2007 while (offset
< offset_end
) {
2008 dst_page
= vm_page_lookup(object
, offset
);
2009 if (dst_page
!= VM_PAGE_NULL
) {
2010 if (ops
& UPL_ROP_DUMP
) {
2011 if (dst_page
->busy
|| dst_page
->cleaning
) {
2013 * someone else is playing with the
2014 * page, we will have to wait
2017 dst_page
, THREAD_UNINT
);
2019 * need to relook the page up since it's
2020 * state may have changed while we slept
2021 * it might even belong to a different object
2026 vm_page_lock_queues();
2028 if (dst_page
->no_isync
== FALSE
)
2029 pmap_disconnect(dst_page
->phys_page
);
2030 vm_page_free(dst_page
);
2032 vm_page_unlock_queues();
2033 } else if (ops
& UPL_ROP_ABSENT
)
2035 } else if (ops
& UPL_ROP_PRESENT
)
2038 offset
+= PAGE_SIZE
;
2040 vm_object_unlock(object
);
2043 *range
= offset
- offset_beg
;
2045 return KERN_SUCCESS
;
2050 memory_object_pages_resident(
2051 memory_object_control_t control
,
2052 boolean_t
* has_pages_resident
)
2056 *has_pages_resident
= FALSE
;
2058 object
= memory_object_control_to_vm_object(control
);
2059 if (object
== VM_OBJECT_NULL
)
2060 return (KERN_INVALID_ARGUMENT
);
2062 if (object
->resident_page_count
)
2063 *has_pages_resident
= TRUE
;
2065 return (KERN_SUCCESS
);
2069 static zone_t mem_obj_control_zone
;
2071 __private_extern__
void
2072 memory_object_control_bootstrap(void)
2076 i
= (vm_size_t
) sizeof (struct memory_object_control
);
2077 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
2081 __private_extern__ memory_object_control_t
2082 memory_object_control_allocate(
2085 memory_object_control_t control
;
2087 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
2088 if (control
!= MEMORY_OBJECT_CONTROL_NULL
)
2089 control
->object
= object
;
2093 __private_extern__
void
2094 memory_object_control_collapse(
2095 memory_object_control_t control
,
2098 assert((control
->object
!= VM_OBJECT_NULL
) &&
2099 (control
->object
!= object
));
2100 control
->object
= object
;
2103 __private_extern__ vm_object_t
2104 memory_object_control_to_vm_object(
2105 memory_object_control_t control
)
2107 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
2108 return VM_OBJECT_NULL
;
2110 return (control
->object
);
2113 memory_object_control_t
2114 convert_port_to_mo_control(
2115 __unused mach_port_t port
)
2117 return MEMORY_OBJECT_CONTROL_NULL
;
2122 convert_mo_control_to_port(
2123 __unused memory_object_control_t control
)
2125 return MACH_PORT_NULL
;
2129 memory_object_control_reference(
2130 __unused memory_object_control_t control
)
2136 * We only every issue one of these references, so kill it
2137 * when that gets released (should switch the real reference
2138 * counting in true port-less EMMI).
2141 memory_object_control_deallocate(
2142 memory_object_control_t control
)
2144 zfree(mem_obj_control_zone
, control
);
2148 memory_object_control_disable(
2149 memory_object_control_t control
)
2151 assert(control
->object
!= VM_OBJECT_NULL
);
2152 control
->object
= VM_OBJECT_NULL
;
2156 memory_object_default_reference(
2157 memory_object_default_t dmm
)
2159 ipc_port_make_send(dmm
);
2163 memory_object_default_deallocate(
2164 memory_object_default_t dmm
)
2166 ipc_port_release_send(dmm
);
2170 convert_port_to_memory_object(
2171 __unused mach_port_t port
)
2173 return (MEMORY_OBJECT_NULL
);
2178 convert_memory_object_to_port(
2179 __unused memory_object_t object
)
2181 return (MACH_PORT_NULL
);
2185 /* Routine memory_object_reference */
2186 void memory_object_reference(
2187 memory_object_t memory_object
)
2191 if (memory_object
->pager
== &vnode_pager_workaround
) {
2192 vnode_pager_reference(memory_object
);
2193 } else if (memory_object
->pager
== &device_pager_workaround
) {
2194 device_pager_reference(memory_object
);
2197 dp_memory_object_reference(memory_object
);
2200 /* Routine memory_object_deallocate */
2201 void memory_object_deallocate(
2202 memory_object_t memory_object
)
2206 if (memory_object
->pager
== &vnode_pager_workaround
) {
2207 vnode_pager_deallocate(memory_object
);
2208 } else if (memory_object
->pager
== &device_pager_workaround
) {
2209 device_pager_deallocate(memory_object
);
2212 dp_memory_object_deallocate(memory_object
);
2216 /* Routine memory_object_init */
2217 kern_return_t memory_object_init
2219 memory_object_t memory_object
,
2220 memory_object_control_t memory_control
,
2221 memory_object_cluster_size_t memory_object_page_size
2225 if (memory_object
->pager
== &vnode_pager_workaround
) {
2226 return vnode_pager_init(memory_object
,
2228 memory_object_page_size
);
2229 } else if (memory_object
->pager
== &device_pager_workaround
) {
2230 return device_pager_init(memory_object
,
2232 memory_object_page_size
);
2235 return dp_memory_object_init(memory_object
,
2237 memory_object_page_size
);
2240 /* Routine memory_object_terminate */
2241 kern_return_t memory_object_terminate
2243 memory_object_t memory_object
2247 if (memory_object
->pager
== &vnode_pager_workaround
) {
2248 return vnode_pager_terminate(memory_object
);
2249 } else if (memory_object
->pager
== &device_pager_workaround
) {
2250 return device_pager_terminate(memory_object
);
2253 return dp_memory_object_terminate(memory_object
);
2256 /* Routine memory_object_data_request */
2257 kern_return_t memory_object_data_request
2259 memory_object_t memory_object
,
2260 memory_object_offset_t offset
,
2261 memory_object_cluster_size_t length
,
2262 vm_prot_t desired_access
2266 if (memory_object
->pager
== &vnode_pager_workaround
) {
2267 return vnode_pager_data_request(memory_object
,
2271 } else if (memory_object
->pager
== &device_pager_workaround
) {
2272 return device_pager_data_request(memory_object
,
2278 return dp_memory_object_data_request(memory_object
,
2284 /* Routine memory_object_data_return */
2285 kern_return_t memory_object_data_return
2287 memory_object_t memory_object
,
2288 memory_object_offset_t offset
,
2290 memory_object_offset_t
*resid_offset
,
2293 boolean_t kernel_copy
,
2298 if (memory_object
->pager
== &vnode_pager_workaround
) {
2299 return vnode_pager_data_return(memory_object
,
2307 } else if (memory_object
->pager
== &device_pager_workaround
) {
2309 return device_pager_data_return(memory_object
,
2319 return dp_memory_object_data_return(memory_object
,
2330 /* Routine memory_object_data_initialize */
2331 kern_return_t memory_object_data_initialize
2333 memory_object_t memory_object
,
2334 memory_object_offset_t offset
,
2339 if (memory_object
->pager
== &vnode_pager_workaround
) {
2340 return vnode_pager_data_initialize(memory_object
,
2343 } else if (memory_object
->pager
== &device_pager_workaround
) {
2344 return device_pager_data_initialize(memory_object
,
2349 return dp_memory_object_data_initialize(memory_object
,
2354 /* Routine memory_object_data_unlock */
2355 kern_return_t memory_object_data_unlock
2357 memory_object_t memory_object
,
2358 memory_object_offset_t offset
,
2360 vm_prot_t desired_access
2364 if (memory_object
->pager
== &vnode_pager_workaround
) {
2365 return vnode_pager_data_unlock(memory_object
,
2369 } else if (memory_object
->pager
== &device_pager_workaround
) {
2370 return device_pager_data_unlock(memory_object
,
2376 return dp_memory_object_data_unlock(memory_object
,
2382 /* Routine memory_object_synchronize */
2383 kern_return_t memory_object_synchronize
2385 memory_object_t memory_object
,
2386 memory_object_offset_t offset
,
2388 vm_sync_t sync_flags
2392 if (memory_object
->pager
== &vnode_pager_workaround
) {
2393 return vnode_pager_synchronize(memory_object
,
2397 } else if (memory_object
->pager
== &device_pager_workaround
) {
2398 return device_pager_synchronize(memory_object
,
2404 return dp_memory_object_synchronize(memory_object
,
2410 /* Routine memory_object_unmap */
2411 kern_return_t memory_object_unmap
2413 memory_object_t memory_object
2417 if (memory_object
->pager
== &vnode_pager_workaround
) {
2418 return vnode_pager_unmap(memory_object
);
2419 } else if (memory_object
->pager
== &device_pager_workaround
) {
2420 return device_pager_unmap(memory_object
);
2423 return dp_memory_object_unmap(memory_object
);
2426 /* Routine memory_object_create */
2427 kern_return_t memory_object_create
2429 memory_object_default_t default_memory_manager
,
2430 vm_size_t new_memory_object_size
,
2431 memory_object_t
*new_memory_object
2434 return default_pager_memory_object_create(default_memory_manager
,
2435 new_memory_object_size
,
2440 convert_port_to_upl(
2446 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2450 upl
= (upl_t
) port
->ip_kobject
;
2459 convert_upl_to_port(
2462 return MACH_PORT_NULL
;
2465 __private_extern__
void
2467 __unused ipc_port_t port
,
2468 __unused mach_port_mscount_t mscount
)