2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
53 * File: vm/memory_object.c
54 * Author: Michael Wayne Young
56 * External memory management interface control functions.
59 #include <advisory_pageout.h>
62 * Interface dependencies:
65 #include <mach/std_types.h> /* For pointer_t */
66 #include <mach/mach_types.h>
69 #include <mach/kern_return.h>
70 #include <mach/memory_object.h>
71 #include <mach/memory_object_default.h>
72 #include <mach/memory_object_control_server.h>
73 #include <mach/host_priv_server.h>
74 #include <mach/boolean.h>
75 #include <mach/vm_prot.h>
76 #include <mach/message.h>
79 * Implementation dependencies:
81 #include <string.h> /* For memcpy() */
84 #include <kern/host.h>
85 #include <kern/thread.h> /* For current_thread() */
86 #include <kern/ipc_mig.h>
87 #include <kern/misc_protos.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_fault.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_page.h>
93 #include <vm/vm_pageout.h>
94 #include <vm/pmap.h> /* For pmap_clear_modify */
95 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
96 #include <vm/vm_map.h> /* For vm_map_pageable */
99 #include <vm/vm_external.h>
100 #endif /* MACH_PAGEMAP */
102 #include <vm/vm_protos.h>
105 memory_object_default_t memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
106 vm_size_t memory_manager_default_cluster
= 0;
107 decl_mutex_data(, memory_manager_default_lock
)
111 * Routine: memory_object_should_return_page
114 * Determine whether the given page should be returned,
115 * based on the page's state and on the given return policy.
117 * We should return the page if one of the following is true:
119 * 1. Page is dirty and should_return is not RETURN_NONE.
120 * 2. Page is precious and should_return is RETURN_ALL.
121 * 3. Should_return is RETURN_ANYTHING.
123 * As a side effect, m->dirty will be made consistent
124 * with pmap_is_modified(m), if should_return is not
125 * MEMORY_OBJECT_RETURN_NONE.
128 #define memory_object_should_return_page(m, should_return) \
129 (should_return != MEMORY_OBJECT_RETURN_NONE && \
130 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
131 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
132 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
134 typedef int memory_object_lock_result_t
;
136 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
137 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
138 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
139 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
141 memory_object_lock_result_t
memory_object_lock_page(
143 memory_object_return_t should_return
,
144 boolean_t should_flush
,
148 * Routine: memory_object_lock_page
151 * Perform the appropriate lock operations on the
152 * given page. See the description of
153 * "memory_object_lock_request" for the meanings
156 * Returns an indication that the operation
157 * completed, blocked, or that the page must
160 memory_object_lock_result_t
161 memory_object_lock_page(
163 memory_object_return_t should_return
,
164 boolean_t should_flush
,
167 XPR(XPR_MEMORY_OBJECT
,
168 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
169 (integer_t
)m
, should_return
, should_flush
, prot
, 0);
172 * If we cannot change access to the page,
173 * either because a mapping is in progress
174 * (busy page) or because a mapping has been
175 * wired, then give up.
178 if (m
->busy
|| m
->cleaning
)
179 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
182 * Don't worry about pages for which the kernel
183 * does not have any data.
186 if (m
->absent
|| m
->error
|| m
->restart
) {
187 if(m
->error
&& should_flush
) {
188 /* dump the page, pager wants us to */
189 /* clean it up and there is no */
190 /* relevant data to return */
191 if(m
->wire_count
== 0) {
193 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
196 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
200 assert(!m
->fictitious
);
202 if (m
->wire_count
!= 0) {
204 * If no change would take place
205 * anyway, return successfully.
209 * No change to page lock [2 checks] AND
210 * Should not return page
212 * XXX This doesn't handle sending a copy of a wired
213 * XXX page to the pager, but that will require some
214 * XXX significant surgery.
217 (m
->page_lock
== prot
|| prot
== VM_PROT_NO_CHANGE
) &&
218 ! memory_object_should_return_page(m
, should_return
)) {
221 * Restart page unlock requests,
222 * even though no change took place.
223 * [Memory managers may be expecting
224 * to see new requests.]
226 m
->unlock_request
= VM_PROT_NONE
;
229 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
232 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
);
236 * If the page is to be flushed, allow
237 * that to be done as part of the protection.
246 * If we are decreasing permission, do it now;
247 * let the fault handler take care of increases
248 * (pmap_page_protect may not increase protection).
251 if (prot
!= VM_PROT_NO_CHANGE
) {
252 if ((m
->page_lock
^ prot
) & prot
) {
253 pmap_page_protect(m
->phys_page
, VM_PROT_ALL
& ~prot
);
256 /* code associated with the vestigial
257 * memory_object_data_unlock
260 m
->lock_supplied
= TRUE
;
261 if (prot
!= VM_PROT_NONE
)
267 * Restart any past unlock requests, even if no
268 * change resulted. If the manager explicitly
269 * requested no protection change, then it is assumed
270 * to be remembering past requests.
273 m
->unlock_request
= VM_PROT_NONE
;
279 * Handle page returning.
282 if (memory_object_should_return_page(m
, should_return
)) {
285 * If we weren't planning
286 * to flush the page anyway,
287 * we may need to remove the
288 * page from the pageout
289 * system and from physical
293 vm_page_lock_queues();
294 VM_PAGE_QUEUES_REMOVE(m
);
295 vm_page_unlock_queues();
298 pmap_disconnect(m
->phys_page
);
301 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
);
303 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
);
314 * XXX Make clean but not flush a paging hint,
315 * and deactivate the pages. This is a hack
316 * because it overloads flush/clean with
317 * implementation-dependent meaning. This only
318 * happens to pages that are already clean.
321 if (vm_page_deactivate_hint
&&
322 (should_return
!= MEMORY_OBJECT_RETURN_NONE
)) {
323 vm_page_lock_queues();
324 vm_page_deactivate(m
);
325 vm_page_unlock_queues();
329 return(MEMORY_OBJECT_LOCK_RESULT_DONE
);
332 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
335 register int upl_flags; \
337 vm_object_unlock(object); \
340 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
342 upl_flags = UPL_MSYNC; \
344 (void) memory_object_data_return(object->pager, \
349 (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
353 vm_object_lock(object); \
357 * Routine: memory_object_lock_request [user interface]
360 * Control use of the data associated with the given
361 * memory object. For each page in the given range,
362 * perform the following operations, in order:
363 * 1) restrict access to the page (disallow
364 * forms specified by "prot");
365 * 2) return data to the manager (if "should_return"
366 * is RETURN_DIRTY and the page is dirty, or
367 * "should_return" is RETURN_ALL and the page
368 * is either dirty or precious); and,
369 * 3) flush the cached copy (if "should_flush"
371 * The set of pages is defined by a starting offset
372 * ("offset") and size ("size"). Only pages with the
373 * same page alignment as the starting offset are
376 * A single acknowledgement is sent (to the "reply_to"
377 * port) when these actions are complete. If successful,
378 * the naked send right for reply_to is consumed.
382 memory_object_lock_request(
383 memory_object_control_t control
,
384 memory_object_offset_t offset
,
385 memory_object_size_t size
,
386 memory_object_offset_t
* resid_offset
,
388 memory_object_return_t should_return
,
393 __unused boolean_t should_flush
;
395 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
397 XPR(XPR_MEMORY_OBJECT
,
398 "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
399 (integer_t
)control
, offset
, size
,
400 (((should_return
&1)<<1)|should_flush
), prot
);
403 * Check for bogus arguments.
405 object
= memory_object_control_to_vm_object(control
);
406 if (object
== VM_OBJECT_NULL
)
407 return (KERN_INVALID_ARGUMENT
);
409 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
410 return (KERN_INVALID_ARGUMENT
);
412 size
= round_page_64(size
);
415 * Lock the object, and acquire a paging reference to
416 * prevent the memory_object reference from being released.
418 vm_object_lock(object
);
419 vm_object_paging_begin(object
);
420 offset
-= object
->paging_offset
;
422 (void)vm_object_update(object
,
423 offset
, size
, resid_offset
, io_errno
, should_return
, flags
, prot
);
425 vm_object_paging_end(object
);
426 vm_object_unlock(object
);
428 return (KERN_SUCCESS
);
432 * memory_object_release_name: [interface]
434 * Enforces name semantic on memory_object reference count decrement
435 * This routine should not be called unless the caller holds a name
436 * reference gained through the memory_object_named_create or the
437 * memory_object_rename call.
438 * If the TERMINATE_IDLE flag is set, the call will return if the
439 * reference count is not 1. i.e. idle with the only remaining reference
441 * If the decision is made to proceed the name field flag is set to
442 * false and the reference count is decremented. If the RESPECT_CACHE
443 * flag is set and the reference count has gone to zero, the
444 * memory_object is checked to see if it is cacheable otherwise when
445 * the reference count is zero, it is simply terminated.
449 memory_object_release_name(
450 memory_object_control_t control
,
455 object
= memory_object_control_to_vm_object(control
);
456 if (object
== VM_OBJECT_NULL
)
457 return (KERN_INVALID_ARGUMENT
);
459 return vm_object_release_name(object
, flags
);
465 * Routine: memory_object_destroy [user interface]
467 * Shut down a memory object, despite the
468 * presence of address map (or other) references
472 memory_object_destroy(
473 memory_object_control_t control
,
474 kern_return_t reason
)
478 object
= memory_object_control_to_vm_object(control
);
479 if (object
== VM_OBJECT_NULL
)
480 return (KERN_INVALID_ARGUMENT
);
482 return (vm_object_destroy(object
, reason
));
486 * Routine: vm_object_sync
488 * Kernel internal function to synch out pages in a given
489 * range within an object to its memory manager. Much the
490 * same as memory_object_lock_request but page protection
493 * If the should_flush and should_return flags are true pages
494 * are flushed, that is dirty & precious pages are written to
495 * the memory manager and then discarded. If should_return
496 * is false, only precious pages are returned to the memory
499 * If should flush is false and should_return true, the memory
500 * manager's copy of the pages is updated. If should_return
501 * is also false, only the precious pages are updated. This
502 * last option is of limited utility.
505 * FALSE if no pages were returned to the pager
512 vm_object_offset_t offset
,
513 vm_object_size_t size
,
514 boolean_t should_flush
,
515 boolean_t should_return
,
516 boolean_t should_iosync
)
522 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
523 (integer_t
)object
, offset
, size
, should_flush
, should_return
);
526 * Lock the object, and acquire a paging reference to
527 * prevent the memory_object and control ports from
530 vm_object_lock(object
);
531 vm_object_paging_begin(object
);
534 flags
= MEMORY_OBJECT_DATA_FLUSH
;
539 flags
|= MEMORY_OBJECT_IO_SYNC
;
541 rv
= vm_object_update(object
, offset
, (vm_object_size_t
)size
, NULL
, NULL
,
543 MEMORY_OBJECT_RETURN_ALL
:
544 MEMORY_OBJECT_RETURN_NONE
,
549 vm_object_paging_end(object
);
550 vm_object_unlock(object
);
558 vm_object_update_extent(
560 vm_object_offset_t offset
,
561 vm_object_offset_t offset_end
,
562 vm_object_offset_t
*offset_resid
,
564 boolean_t should_flush
,
565 memory_object_return_t should_return
,
566 boolean_t should_iosync
,
571 vm_size_t data_cnt
= 0;
572 vm_object_offset_t paging_offset
= 0;
573 vm_object_offset_t last_offset
= offset
;
574 memory_object_lock_result_t page_lock_result
;
575 memory_object_lock_result_t pageout_action
;
577 pageout_action
= MEMORY_OBJECT_LOCK_RESULT_DONE
;
580 offset
< offset_end
&& object
->resident_page_count
;
581 offset
+= PAGE_SIZE_64
) {
584 * Limit the number of pages to be cleaned at once.
586 if (data_cnt
>= PAGE_SIZE
* MAX_UPL_TRANSFER
) {
587 LIST_REQ_PAGEOUT_PAGES(object
, data_cnt
,
588 pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
592 while ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
593 page_lock_result
= memory_object_lock_page(m
, should_return
, should_flush
, prot
);
595 XPR(XPR_MEMORY_OBJECT
,
596 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
597 (integer_t
)object
, offset
, page_lock_result
, 0, 0);
599 switch (page_lock_result
)
601 case MEMORY_OBJECT_LOCK_RESULT_DONE
:
603 * End of a cluster of dirty pages.
606 LIST_REQ_PAGEOUT_PAGES(object
,
607 data_cnt
, pageout_action
,
608 paging_offset
, offset_resid
, io_errno
, should_iosync
);
614 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK
:
616 * Since it is necessary to block,
617 * clean any dirty pages now.
620 LIST_REQ_PAGEOUT_PAGES(object
,
621 data_cnt
, pageout_action
,
622 paging_offset
, offset_resid
, io_errno
, should_iosync
);
626 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
629 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN
:
630 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN
:
632 * The clean and return cases are similar.
634 * if this would form a discontiguous block,
635 * clean the old pages and start anew.
637 * Mark the page busy since we will unlock the
638 * object if we issue the LIST_REQ_PAGEOUT
642 ((last_offset
!= offset
) || (pageout_action
!= page_lock_result
))) {
643 LIST_REQ_PAGEOUT_PAGES(object
,
644 data_cnt
, pageout_action
,
645 paging_offset
, offset_resid
, io_errno
, should_iosync
);
651 PAGE_SLEEP(object
, m
, THREAD_UNINT
);
655 pageout_action
= page_lock_result
;
656 paging_offset
= offset
;
658 data_cnt
+= PAGE_SIZE
;
659 last_offset
= offset
+ PAGE_SIZE_64
;
661 vm_page_lock_queues();
665 m
->list_req_pending
= TRUE
;
670 * and add additional state
677 vm_page_unlock_queues();
686 * We have completed the scan for applicable pages.
687 * Clean any pages that have been saved.
690 LIST_REQ_PAGEOUT_PAGES(object
,
691 data_cnt
, pageout_action
, paging_offset
, offset_resid
, io_errno
, should_iosync
);
699 * Routine: vm_object_update
701 * Work function for m_o_lock_request(), vm_o_sync().
703 * Called with object locked and paging ref taken.
707 register vm_object_t object
,
708 register vm_object_offset_t offset
,
709 register vm_object_size_t size
,
710 register vm_object_offset_t
*resid_offset
,
712 memory_object_return_t should_return
,
714 vm_prot_t protection
)
716 vm_object_t copy_object
;
717 boolean_t data_returned
= FALSE
;
718 boolean_t update_cow
;
719 boolean_t should_flush
= (flags
& MEMORY_OBJECT_DATA_FLUSH
) ? TRUE
: FALSE
;
720 boolean_t should_iosync
= (flags
& MEMORY_OBJECT_IO_SYNC
) ? TRUE
: FALSE
;
723 #define MAX_EXTENTS 8
724 #define EXTENT_SIZE (1024 * 1024 * 256)
725 #define RESIDENT_LIMIT (1024 * 32)
727 vm_object_offset_t e_base
;
728 vm_object_offset_t e_min
;
729 vm_object_offset_t e_max
;
730 } extents
[MAX_EXTENTS
];
733 * To avoid blocking while scanning for pages, save
734 * dirty pages to be cleaned all at once.
736 * XXXO A similar strategy could be used to limit the
737 * number of times that a scan must be restarted for
738 * other reasons. Those pages that would require blocking
739 * could be temporarily collected in another list, or
740 * their offsets could be recorded in a small array.
744 * XXX NOTE: May want to consider converting this to a page list
745 * XXX vm_map_copy interface. Need to understand object
746 * XXX coalescing implications before doing so.
749 update_cow
= ((flags
& MEMORY_OBJECT_DATA_FLUSH
)
750 && (!(flags
& MEMORY_OBJECT_DATA_NO_CHANGE
) &&
751 !(flags
& MEMORY_OBJECT_DATA_PURGE
)))
752 || (flags
& MEMORY_OBJECT_COPY_SYNC
);
755 if((((copy_object
= object
->copy
) != NULL
) && update_cow
) ||
756 (flags
& MEMORY_OBJECT_DATA_SYNC
)) {
758 vm_map_size_t copy_size
;
759 vm_map_offset_t copy_offset
;
763 kern_return_t error
= 0;
765 if(copy_object
!= NULL
) {
766 /* translate offset with respect to shadow's offset */
767 copy_offset
= (offset
>= copy_object
->shadow_offset
)?
768 (vm_map_offset_t
)(offset
- copy_object
->shadow_offset
) :
770 if(copy_offset
> copy_object
->size
)
771 copy_offset
= copy_object
->size
;
773 /* clip size with respect to shadow offset */
774 if (offset
>= copy_object
->shadow_offset
) {
776 } else if (size
>= copy_object
->shadow_offset
- offset
) {
778 (copy_object
->shadow_offset
- offset
);
783 if (copy_offset
+ copy_size
> copy_object
->size
) {
784 if (copy_object
->size
>= copy_offset
) {
785 copy_size
= copy_object
->size
- copy_offset
;
791 copy_size
+=copy_offset
;
793 vm_object_unlock(object
);
794 vm_object_lock(copy_object
);
796 copy_object
= object
;
798 copy_size
= offset
+ size
;
799 copy_offset
= offset
;
802 vm_object_paging_begin(copy_object
);
803 for (i
=copy_offset
; i
<copy_size
; i
+=PAGE_SIZE
) {
804 RETRY_COW_OF_LOCK_REQUEST
:
805 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
806 switch (vm_fault_page(copy_object
, i
,
807 VM_PROT_WRITE
|VM_PROT_READ
,
811 copy_offset
+copy_size
,
812 VM_BEHAVIOR_SEQUENTIAL
,
821 case VM_FAULT_SUCCESS
:
824 page
->object
, top_page
);
825 PAGE_WAKEUP_DONE(page
);
826 vm_page_lock_queues();
827 if (!page
->active
&& !page
->inactive
)
828 vm_page_activate(page
);
829 vm_page_unlock_queues();
830 vm_object_lock(copy_object
);
831 vm_object_paging_begin(copy_object
);
833 PAGE_WAKEUP_DONE(page
);
834 vm_page_lock_queues();
835 if (!page
->active
&& !page
->inactive
)
836 vm_page_activate(page
);
837 vm_page_unlock_queues();
841 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
842 vm_object_lock(copy_object
);
843 vm_object_paging_begin(copy_object
);
844 goto RETRY_COW_OF_LOCK_REQUEST
;
845 case VM_FAULT_INTERRUPTED
:
846 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
847 vm_object_lock(copy_object
);
848 vm_object_paging_begin(copy_object
);
849 goto RETRY_COW_OF_LOCK_REQUEST
;
850 case VM_FAULT_MEMORY_SHORTAGE
:
852 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
853 vm_object_lock(copy_object
);
854 vm_object_paging_begin(copy_object
);
855 goto RETRY_COW_OF_LOCK_REQUEST
;
856 case VM_FAULT_FICTITIOUS_SHORTAGE
:
857 vm_page_more_fictitious();
858 prot
= VM_PROT_WRITE
|VM_PROT_READ
;
859 vm_object_lock(copy_object
);
860 vm_object_paging_begin(copy_object
);
861 goto RETRY_COW_OF_LOCK_REQUEST
;
862 case VM_FAULT_MEMORY_ERROR
:
863 vm_object_lock(object
);
864 goto BYPASS_COW_COPYIN
;
868 vm_object_paging_end(copy_object
);
869 if(copy_object
!= object
) {
870 vm_object_unlock(copy_object
);
871 vm_object_lock(object
);
874 if((flags
& (MEMORY_OBJECT_DATA_SYNC
| MEMORY_OBJECT_COPY_SYNC
))) {
877 if(((copy_object
= object
->copy
) != NULL
) &&
878 (flags
& MEMORY_OBJECT_DATA_PURGE
)) {
879 copy_object
->shadow_severed
= TRUE
;
880 copy_object
->shadowed
= FALSE
;
881 copy_object
->shadow
= NULL
;
882 /* delete the ref the COW was holding on the target object */
883 vm_object_deallocate(object
);
888 * when we have a really large range to check relative
889 * to the number of actual resident pages, we'd like
890 * to use the resident page list to drive our checks
891 * however, the object lock will get dropped while processing
892 * the page which means the resident queue can change which
893 * means we can't walk the queue as we process the pages
894 * we also want to do the processing in offset order to allow
895 * 'runs' of pages to be collected if we're being told to
896 * flush to disk... the resident page queue is NOT ordered.
898 * a temporary solution (until we figure out how to deal with
899 * large address spaces more generically) is to pre-flight
900 * the resident page queue (if it's small enough) and develop
901 * a collection of extents (that encompass actual resident pages)
902 * to visit. This will at least allow us to deal with some of the
903 * more pathological cases in a more efficient manner. The current
904 * worst case (a single resident page at the end of an extremely large
905 * range) can take minutes to complete for ranges in the terrabyte
906 * category... since this routine is called when truncating a file,
907 * and we currently support files up to 16 Tbytes in size, this
908 * is not a theoretical problem
911 if ((object
->resident_page_count
< RESIDENT_LIMIT
) &&
912 (atop_64(size
) > (unsigned)(object
->resident_page_count
/(8 * MAX_EXTENTS
)))) {
914 vm_object_offset_t start
;
915 vm_object_offset_t end
;
916 vm_object_size_t e_mask
;
922 e_mask
= ~((vm_object_size_t
)(EXTENT_SIZE
- 1));
924 m
= (vm_page_t
) queue_first(&object
->memq
);
926 while (!queue_end(&object
->memq
, (queue_entry_t
) m
)) {
927 next
= (vm_page_t
) queue_next(&m
->listq
);
929 if ((m
->offset
>= start
) && (m
->offset
< end
)) {
931 * this is a page we're interested in
932 * try to fit it into a current extent
934 for (n
= 0; n
< num_of_extents
; n
++) {
935 if ((m
->offset
& e_mask
) == extents
[n
].e_base
) {
937 * use (PAGE_SIZE - 1) to determine the
938 * max offset so that we don't wrap if
939 * we're at the last page of the space
941 if (m
->offset
< extents
[n
].e_min
)
942 extents
[n
].e_min
= m
->offset
;
943 else if ((m
->offset
+ (PAGE_SIZE
- 1)) > extents
[n
].e_max
)
944 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
948 if (n
== num_of_extents
) {
950 * didn't find a current extent that can encompass
953 if (n
< MAX_EXTENTS
) {
955 * if we still have room,
956 * create a new extent
958 extents
[n
].e_base
= m
->offset
& e_mask
;
959 extents
[n
].e_min
= m
->offset
;
960 extents
[n
].e_max
= m
->offset
+ (PAGE_SIZE
- 1);
965 * no room to create a new extent...
966 * fall back to a single extent based
967 * on the min and max page offsets
968 * we find in the range we're interested in...
969 * first, look through the extent list and
970 * develop the overall min and max for the
971 * pages we've looked at up to this point
973 for (n
= 1; n
< num_of_extents
; n
++) {
974 if (extents
[n
].e_min
< extents
[0].e_min
)
975 extents
[0].e_min
= extents
[n
].e_min
;
976 if (extents
[n
].e_max
> extents
[0].e_max
)
977 extents
[0].e_max
= extents
[n
].e_max
;
980 * now setup to run through the remaining pages
981 * to determine the overall min and max
982 * offset for the specified range
984 extents
[0].e_base
= 0;
989 * by continuing, we'll reprocess the
990 * page that forced us to abandon trying
991 * to develop multiple extents
1000 extents
[0].e_min
= offset
;
1001 extents
[0].e_max
= offset
+ (size
- 1);
1005 for (n
= 0; n
< num_of_extents
; n
++) {
1006 if (vm_object_update_extent(object
, extents
[n
].e_min
, extents
[n
].e_max
, resid_offset
, io_errno
,
1007 should_flush
, should_return
, should_iosync
, protection
))
1008 data_returned
= TRUE
;
1010 return (data_returned
);
1015 * Routine: memory_object_synchronize_completed [user interface]
1017 * Tell kernel that previously synchronized data
1018 * (memory_object_synchronize) has been queue or placed on the
1021 * Note: there may be multiple synchronize requests for a given
1022 * memory object outstanding but they will not overlap.
1026 memory_object_synchronize_completed(
1027 memory_object_control_t control
,
1028 memory_object_offset_t offset
,
1034 object
= memory_object_control_to_vm_object(control
);
1036 XPR(XPR_MEMORY_OBJECT
,
1037 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1038 (integer_t
)object
, offset
, length
, 0, 0);
1041 * Look for bogus arguments
1044 if (object
== VM_OBJECT_NULL
)
1045 return (KERN_INVALID_ARGUMENT
);
1047 vm_object_lock(object
);
1050 * search for sync request structure
1052 queue_iterate(&object
->msr_q
, msr
, msync_req_t
, msr_q
) {
1053 if (msr
->offset
== offset
&& msr
->length
== length
) {
1054 queue_remove(&object
->msr_q
, msr
, msync_req_t
, msr_q
);
1057 }/* queue_iterate */
1059 if (queue_end(&object
->msr_q
, (queue_entry_t
)msr
)) {
1060 vm_object_unlock(object
);
1061 return KERN_INVALID_ARGUMENT
;
1065 vm_object_unlock(object
);
1066 msr
->flag
= VM_MSYNC_DONE
;
1068 thread_wakeup((event_t
) msr
);
1070 return KERN_SUCCESS
;
1071 }/* memory_object_synchronize_completed */
1073 static kern_return_t
1074 vm_object_set_attributes_common(
1076 boolean_t may_cache
,
1077 memory_object_copy_strategy_t copy_strategy
,
1078 boolean_t temporary
,
1079 memory_object_cluster_size_t cluster_size
,
1080 boolean_t silent_overwrite
,
1081 boolean_t advisory_pageout
)
1083 boolean_t object_became_ready
;
1085 XPR(XPR_MEMORY_OBJECT
,
1086 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1087 (integer_t
)object
, (may_cache
&1)|((temporary
&1)<1), copy_strategy
, 0, 0);
1089 if (object
== VM_OBJECT_NULL
)
1090 return(KERN_INVALID_ARGUMENT
);
1093 * Verify the attributes of importance
1096 switch(copy_strategy
) {
1097 case MEMORY_OBJECT_COPY_NONE
:
1098 case MEMORY_OBJECT_COPY_DELAY
:
1101 return(KERN_INVALID_ARGUMENT
);
1104 #if !ADVISORY_PAGEOUT
1105 if (silent_overwrite
|| advisory_pageout
)
1106 return(KERN_INVALID_ARGUMENT
);
1108 #endif /* !ADVISORY_PAGEOUT */
1113 if (cluster_size
!= 0) {
1114 int pages_per_cluster
;
1115 pages_per_cluster
= atop_32(cluster_size
);
1117 * Cluster size must be integral multiple of page size,
1118 * and be a power of 2 number of pages.
1120 if ((cluster_size
& (PAGE_SIZE
-1)) ||
1121 ((pages_per_cluster
-1) & pages_per_cluster
))
1122 return KERN_INVALID_ARGUMENT
;
1125 vm_object_lock(object
);
1128 * Copy the attributes
1130 assert(!object
->internal
);
1131 object_became_ready
= !object
->pager_ready
;
1132 object
->copy_strategy
= copy_strategy
;
1133 object
->can_persist
= may_cache
;
1134 object
->temporary
= temporary
;
1135 object
->silent_overwrite
= silent_overwrite
;
1136 object
->advisory_pageout
= advisory_pageout
;
1137 if (cluster_size
== 0)
1138 cluster_size
= PAGE_SIZE
;
1139 object
->cluster_size
= cluster_size
;
1141 assert(cluster_size
>= PAGE_SIZE
&&
1142 cluster_size
% PAGE_SIZE
== 0);
1145 * Wake up anyone waiting for the ready attribute
1146 * to become asserted.
1149 if (object_became_ready
) {
1150 object
->pager_ready
= TRUE
;
1151 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
1154 vm_object_unlock(object
);
1156 return(KERN_SUCCESS
);
1160 * Set the memory object attribute as provided.
1162 * XXX This routine cannot be completed until the vm_msync, clean
1163 * in place, and cluster work is completed. See ifdef notyet
1164 * below and note that vm_object_set_attributes_common()
1165 * may have to be expanded.
1168 memory_object_change_attributes(
1169 memory_object_control_t control
,
1170 memory_object_flavor_t flavor
,
1171 memory_object_info_t attributes
,
1172 mach_msg_type_number_t count
)
1175 kern_return_t result
= KERN_SUCCESS
;
1176 boolean_t temporary
;
1177 boolean_t may_cache
;
1178 boolean_t invalidate
;
1179 memory_object_cluster_size_t cluster_size
;
1180 memory_object_copy_strategy_t copy_strategy
;
1181 boolean_t silent_overwrite
;
1182 boolean_t advisory_pageout
;
1184 object
= memory_object_control_to_vm_object(control
);
1185 if (object
== VM_OBJECT_NULL
)
1186 return (KERN_INVALID_ARGUMENT
);
1188 vm_object_lock(object
);
1190 temporary
= object
->temporary
;
1191 may_cache
= object
->can_persist
;
1192 copy_strategy
= object
->copy_strategy
;
1193 silent_overwrite
= object
->silent_overwrite
;
1194 advisory_pageout
= object
->advisory_pageout
;
1196 invalidate
= object
->invalidate
;
1198 cluster_size
= object
->cluster_size
;
1199 vm_object_unlock(object
);
1202 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1204 old_memory_object_behave_info_t behave
;
1206 if (count
!= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1207 result
= KERN_INVALID_ARGUMENT
;
1211 behave
= (old_memory_object_behave_info_t
) attributes
;
1213 temporary
= behave
->temporary
;
1214 invalidate
= behave
->invalidate
;
1215 copy_strategy
= behave
->copy_strategy
;
1220 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1222 memory_object_behave_info_t behave
;
1224 if (count
!= MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1225 result
= KERN_INVALID_ARGUMENT
;
1229 behave
= (memory_object_behave_info_t
) attributes
;
1231 temporary
= behave
->temporary
;
1232 invalidate
= behave
->invalidate
;
1233 copy_strategy
= behave
->copy_strategy
;
1234 silent_overwrite
= behave
->silent_overwrite
;
1235 advisory_pageout
= behave
->advisory_pageout
;
1239 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1241 memory_object_perf_info_t perf
;
1243 if (count
!= MEMORY_OBJECT_PERF_INFO_COUNT
) {
1244 result
= KERN_INVALID_ARGUMENT
;
1248 perf
= (memory_object_perf_info_t
) attributes
;
1250 may_cache
= perf
->may_cache
;
1251 cluster_size
= round_page_32(perf
->cluster_size
);
1256 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1258 old_memory_object_attr_info_t attr
;
1260 if (count
!= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1261 result
= KERN_INVALID_ARGUMENT
;
1265 attr
= (old_memory_object_attr_info_t
) attributes
;
1267 may_cache
= attr
->may_cache
;
1268 copy_strategy
= attr
->copy_strategy
;
1269 cluster_size
= page_size
;
1274 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1276 memory_object_attr_info_t attr
;
1278 if (count
!= MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1279 result
= KERN_INVALID_ARGUMENT
;
1283 attr
= (memory_object_attr_info_t
) attributes
;
1285 copy_strategy
= attr
->copy_strategy
;
1286 may_cache
= attr
->may_cache_object
;
1287 cluster_size
= attr
->cluster_size
;
1288 temporary
= attr
->temporary
;
1294 result
= KERN_INVALID_ARGUMENT
;
1298 if (result
!= KERN_SUCCESS
)
1301 if (copy_strategy
== MEMORY_OBJECT_COPY_TEMPORARY
) {
1302 copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
1309 * XXX may_cache may become a tri-valued variable to handle
1310 * XXX uncache if not in use.
1312 return (vm_object_set_attributes_common(object
,
1322 memory_object_get_attributes(
1323 memory_object_control_t control
,
1324 memory_object_flavor_t flavor
,
1325 memory_object_info_t attributes
, /* pointer to OUT array */
1326 mach_msg_type_number_t
*count
) /* IN/OUT */
1328 kern_return_t ret
= KERN_SUCCESS
;
1331 object
= memory_object_control_to_vm_object(control
);
1332 if (object
== VM_OBJECT_NULL
)
1333 return (KERN_INVALID_ARGUMENT
);
1335 vm_object_lock(object
);
1338 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO
:
1340 old_memory_object_behave_info_t behave
;
1342 if (*count
< OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1343 ret
= KERN_INVALID_ARGUMENT
;
1347 behave
= (old_memory_object_behave_info_t
) attributes
;
1348 behave
->copy_strategy
= object
->copy_strategy
;
1349 behave
->temporary
= object
->temporary
;
1350 #if notyet /* remove when vm_msync complies and clean in place fini */
1351 behave
->invalidate
= object
->invalidate
;
1353 behave
->invalidate
= FALSE
;
1356 *count
= OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1360 case MEMORY_OBJECT_BEHAVIOR_INFO
:
1362 memory_object_behave_info_t behave
;
1364 if (*count
< MEMORY_OBJECT_BEHAVE_INFO_COUNT
) {
1365 ret
= KERN_INVALID_ARGUMENT
;
1369 behave
= (memory_object_behave_info_t
) attributes
;
1370 behave
->copy_strategy
= object
->copy_strategy
;
1371 behave
->temporary
= object
->temporary
;
1372 #if notyet /* remove when vm_msync complies and clean in place fini */
1373 behave
->invalidate
= object
->invalidate
;
1375 behave
->invalidate
= FALSE
;
1377 behave
->advisory_pageout
= object
->advisory_pageout
;
1378 behave
->silent_overwrite
= object
->silent_overwrite
;
1379 *count
= MEMORY_OBJECT_BEHAVE_INFO_COUNT
;
1383 case MEMORY_OBJECT_PERFORMANCE_INFO
:
1385 memory_object_perf_info_t perf
;
1387 if (*count
< MEMORY_OBJECT_PERF_INFO_COUNT
) {
1388 ret
= KERN_INVALID_ARGUMENT
;
1392 perf
= (memory_object_perf_info_t
) attributes
;
1393 perf
->cluster_size
= object
->cluster_size
;
1394 perf
->may_cache
= object
->can_persist
;
1396 *count
= MEMORY_OBJECT_PERF_INFO_COUNT
;
1400 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO
:
1402 old_memory_object_attr_info_t attr
;
1404 if (*count
< OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1405 ret
= KERN_INVALID_ARGUMENT
;
1409 attr
= (old_memory_object_attr_info_t
) attributes
;
1410 attr
->may_cache
= object
->can_persist
;
1411 attr
->copy_strategy
= object
->copy_strategy
;
1413 *count
= OLD_MEMORY_OBJECT_ATTR_INFO_COUNT
;
1417 case MEMORY_OBJECT_ATTRIBUTE_INFO
:
1419 memory_object_attr_info_t attr
;
1421 if (*count
< MEMORY_OBJECT_ATTR_INFO_COUNT
) {
1422 ret
= KERN_INVALID_ARGUMENT
;
1426 attr
= (memory_object_attr_info_t
) attributes
;
1427 attr
->copy_strategy
= object
->copy_strategy
;
1428 attr
->cluster_size
= object
->cluster_size
;
1429 attr
->may_cache_object
= object
->can_persist
;
1430 attr
->temporary
= object
->temporary
;
1432 *count
= MEMORY_OBJECT_ATTR_INFO_COUNT
;
1437 ret
= KERN_INVALID_ARGUMENT
;
1441 vm_object_unlock(object
);
1448 memory_object_iopl_request(
1450 memory_object_offset_t offset
,
1451 upl_size_t
*upl_size
,
1453 upl_page_info_array_t user_page_list
,
1454 unsigned int *page_list_count
,
1461 caller_flags
= *flags
;
1463 if (caller_flags
& ~UPL_VALID_FLAGS
) {
1465 * For forward compatibility's sake,
1466 * reject any unknown flag.
1468 return KERN_INVALID_VALUE
;
1471 if (ip_kotype(port
) == IKOT_NAMED_ENTRY
) {
1472 vm_named_entry_t named_entry
;
1474 named_entry
= (vm_named_entry_t
)port
->ip_kobject
;
1475 /* a few checks to make sure user is obeying rules */
1476 if(*upl_size
== 0) {
1477 if(offset
>= named_entry
->size
)
1478 return(KERN_INVALID_RIGHT
);
1479 *upl_size
= named_entry
->size
- offset
;
1481 if(caller_flags
& UPL_COPYOUT_FROM
) {
1482 if((named_entry
->protection
& VM_PROT_READ
)
1484 return(KERN_INVALID_RIGHT
);
1487 if((named_entry
->protection
&
1488 (VM_PROT_READ
| VM_PROT_WRITE
))
1489 != (VM_PROT_READ
| VM_PROT_WRITE
)) {
1490 return(KERN_INVALID_RIGHT
);
1493 if(named_entry
->size
< (offset
+ *upl_size
))
1494 return(KERN_INVALID_ARGUMENT
);
1496 /* the callers parameter offset is defined to be the */
1497 /* offset from beginning of named entry offset in object */
1498 offset
= offset
+ named_entry
->offset
;
1500 if(named_entry
->is_sub_map
)
1501 return (KERN_INVALID_ARGUMENT
);
1503 named_entry_lock(named_entry
);
1505 if (named_entry
->is_pager
) {
1506 object
= vm_object_enter(named_entry
->backing
.pager
,
1507 named_entry
->offset
+ named_entry
->size
,
1508 named_entry
->internal
,
1511 if (object
== VM_OBJECT_NULL
) {
1512 named_entry_unlock(named_entry
);
1513 return(KERN_INVALID_OBJECT
);
1516 /* JMM - drop reference on pager here? */
1518 /* create an extra reference for the named entry */
1519 vm_object_lock(object
);
1520 vm_object_reference_locked(object
);
1521 named_entry
->backing
.object
= object
;
1522 named_entry
->is_pager
= FALSE
;
1523 named_entry_unlock(named_entry
);
1525 /* wait for object to be ready */
1526 while (!object
->pager_ready
) {
1527 vm_object_wait(object
,
1528 VM_OBJECT_EVENT_PAGER_READY
,
1530 vm_object_lock(object
);
1532 vm_object_unlock(object
);
1534 /* This is the case where we are going to map */
1535 /* an already mapped object. If the object is */
1536 /* not ready it is internal. An external */
1537 /* object cannot be mapped until it is ready */
1538 /* we can therefore avoid the ready check */
1540 object
= named_entry
->backing
.object
;
1541 vm_object_reference(object
);
1542 named_entry_unlock(named_entry
);
1545 memory_object_control_t control
;
1546 control
= (memory_object_control_t
)port
->ip_kobject
;
1547 if (control
== NULL
)
1548 return (KERN_INVALID_ARGUMENT
);
1549 object
= memory_object_control_to_vm_object(control
);
1550 if (object
== VM_OBJECT_NULL
)
1551 return (KERN_INVALID_ARGUMENT
);
1552 vm_object_reference(object
);
1554 if (object
== VM_OBJECT_NULL
)
1555 return (KERN_INVALID_ARGUMENT
);
1557 if (!object
->private) {
1558 if (*upl_size
> (MAX_UPL_TRANSFER
*PAGE_SIZE
))
1559 *upl_size
= (MAX_UPL_TRANSFER
*PAGE_SIZE
);
1560 if (object
->phys_contiguous
) {
1561 *flags
= UPL_PHYS_CONTIG
;
1566 *flags
= UPL_DEV_MEMORY
| UPL_PHYS_CONTIG
;
1569 ret
= vm_object_iopl_request(object
,
1576 vm_object_deallocate(object
);
1581 * Routine: memory_object_upl_request [interface]
1583 * Cause the population of a portion of a vm_object.
1584 * Depending on the nature of the request, the pages
1585 * returned may be contain valid data or be uninitialized.
1590 memory_object_upl_request(
1591 memory_object_control_t control
,
1592 memory_object_offset_t offset
,
1595 upl_page_info_array_t user_page_list
,
1596 unsigned int *page_list_count
,
1601 object
= memory_object_control_to_vm_object(control
);
1602 if (object
== VM_OBJECT_NULL
)
1603 return (KERN_INVALID_ARGUMENT
);
1605 return vm_object_upl_request(object
,
1615 * Routine: memory_object_super_upl_request [interface]
1617 * Cause the population of a portion of a vm_object
1618 * in much the same way as memory_object_upl_request.
1619 * Depending on the nature of the request, the pages
1620 * returned may be contain valid data or be uninitialized.
1621 * However, the region may be expanded up to the super
1622 * cluster size provided.
1626 memory_object_super_upl_request(
1627 memory_object_control_t control
,
1628 memory_object_offset_t offset
,
1630 upl_size_t super_cluster
,
1632 upl_page_info_t
*user_page_list
,
1633 unsigned int *page_list_count
,
1638 object
= memory_object_control_to_vm_object(control
);
1639 if (object
== VM_OBJECT_NULL
)
1640 return (KERN_INVALID_ARGUMENT
);
1642 return vm_object_super_upl_request(object
,
1652 int vm_stat_discard_cleared_reply
= 0;
1653 int vm_stat_discard_cleared_unset
= 0;
1654 int vm_stat_discard_cleared_too_late
= 0;
1659 * Routine: host_default_memory_manager [interface]
1661 * set/get the default memory manager port and default cluster
1664 * If successful, consumes the supplied naked send right.
1667 host_default_memory_manager(
1668 host_priv_t host_priv
,
1669 memory_object_default_t
*default_manager
,
1670 memory_object_cluster_size_t cluster_size
)
1672 memory_object_default_t current_manager
;
1673 memory_object_default_t new_manager
;
1674 memory_object_default_t returned_manager
;
1676 if (host_priv
== HOST_PRIV_NULL
)
1677 return(KERN_INVALID_HOST
);
1679 assert(host_priv
== &realhost
);
1681 new_manager
= *default_manager
;
1682 mutex_lock(&memory_manager_default_lock
);
1683 current_manager
= memory_manager_default
;
1685 if (new_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1687 * Retrieve the current value.
1689 memory_object_default_reference(current_manager
);
1690 returned_manager
= current_manager
;
1693 * Retrieve the current value,
1694 * and replace it with the supplied value.
1695 * We return the old reference to the caller
1696 * but we have to take a reference on the new
1700 returned_manager
= current_manager
;
1701 memory_manager_default
= new_manager
;
1702 memory_object_default_reference(new_manager
);
1704 if (cluster_size
% PAGE_SIZE
!= 0) {
1706 mutex_unlock(&memory_manager_default_lock
);
1707 return KERN_INVALID_ARGUMENT
;
1709 cluster_size
= round_page_32(cluster_size
);
1712 memory_manager_default_cluster
= cluster_size
;
1715 * In case anyone's been waiting for a memory
1716 * manager to be established, wake them up.
1719 thread_wakeup((event_t
) &memory_manager_default
);
1722 mutex_unlock(&memory_manager_default_lock
);
1724 *default_manager
= returned_manager
;
1725 return(KERN_SUCCESS
);
1729 * Routine: memory_manager_default_reference
1731 * Returns a naked send right for the default
1732 * memory manager. The returned right is always
1733 * valid (not IP_NULL or IP_DEAD).
1736 __private_extern__ memory_object_default_t
1737 memory_manager_default_reference(
1738 memory_object_cluster_size_t
*cluster_size
)
1740 memory_object_default_t current_manager
;
1742 mutex_lock(&memory_manager_default_lock
);
1743 current_manager
= memory_manager_default
;
1744 while (current_manager
== MEMORY_OBJECT_DEFAULT_NULL
) {
1747 res
= thread_sleep_mutex((event_t
) &memory_manager_default
,
1748 &memory_manager_default_lock
,
1750 assert(res
== THREAD_AWAKENED
);
1751 current_manager
= memory_manager_default
;
1753 memory_object_default_reference(current_manager
);
1754 *cluster_size
= memory_manager_default_cluster
;
1755 mutex_unlock(&memory_manager_default_lock
);
1757 return current_manager
;
1761 * Routine: memory_manager_default_check
1764 * Check whether a default memory manager has been set
1765 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1766 * and KERN_FAILURE if dmm does not exist.
1768 * If there is no default memory manager, log an error,
1769 * but only the first time.
1772 __private_extern__ kern_return_t
1773 memory_manager_default_check(void)
1775 memory_object_default_t current
;
1777 mutex_lock(&memory_manager_default_lock
);
1778 current
= memory_manager_default
;
1779 if (current
== MEMORY_OBJECT_DEFAULT_NULL
) {
1780 static boolean_t logged
; /* initialized to 0 */
1781 boolean_t complain
= !logged
;
1783 mutex_unlock(&memory_manager_default_lock
);
1785 printf("Warning: No default memory manager\n");
1786 return(KERN_FAILURE
);
1788 mutex_unlock(&memory_manager_default_lock
);
1789 return(KERN_SUCCESS
);
1793 __private_extern__
void
1794 memory_manager_default_init(void)
1796 memory_manager_default
= MEMORY_OBJECT_DEFAULT_NULL
;
1797 mutex_init(&memory_manager_default_lock
, 0);
1802 /* Allow manipulation of individual page state. This is actually part of */
1803 /* the UPL regimen but takes place on the object rather than on a UPL */
1806 memory_object_page_op(
1807 memory_object_control_t control
,
1808 memory_object_offset_t offset
,
1810 ppnum_t
*phys_entry
,
1817 object
= memory_object_control_to_vm_object(control
);
1818 if (object
== VM_OBJECT_NULL
)
1819 return (KERN_INVALID_ARGUMENT
);
1821 vm_object_lock(object
);
1823 if(ops
& UPL_POP_PHYSICAL
) {
1824 if(object
->phys_contiguous
) {
1826 *phys_entry
= (ppnum_t
)
1827 (object
->shadow_offset
>> 12);
1829 vm_object_unlock(object
);
1830 return KERN_SUCCESS
;
1832 vm_object_unlock(object
);
1833 return KERN_INVALID_OBJECT
;
1836 if(object
->phys_contiguous
) {
1837 vm_object_unlock(object
);
1838 return KERN_INVALID_OBJECT
;
1842 if((dst_page
= vm_page_lookup(object
,offset
)) == VM_PAGE_NULL
) {
1843 vm_object_unlock(object
);
1844 return KERN_FAILURE
;
1847 /* Sync up on getting the busy bit */
1848 if((dst_page
->busy
|| dst_page
->cleaning
) &&
1849 (((ops
& UPL_POP_SET
) &&
1850 (ops
& UPL_POP_BUSY
)) || (ops
& UPL_POP_DUMP
))) {
1851 /* someone else is playing with the page, we will */
1853 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
1857 if (ops
& UPL_POP_DUMP
) {
1858 vm_page_lock_queues();
1860 if (dst_page
->no_isync
== FALSE
)
1861 pmap_disconnect(dst_page
->phys_page
);
1862 vm_page_free(dst_page
);
1864 vm_page_unlock_queues();
1871 /* Get the condition of flags before requested ops */
1872 /* are undertaken */
1874 if(dst_page
->dirty
) *flags
|= UPL_POP_DIRTY
;
1875 if(dst_page
->pageout
) *flags
|= UPL_POP_PAGEOUT
;
1876 if(dst_page
->precious
) *flags
|= UPL_POP_PRECIOUS
;
1877 if(dst_page
->absent
) *flags
|= UPL_POP_ABSENT
;
1878 if(dst_page
->busy
) *flags
|= UPL_POP_BUSY
;
1881 /* The caller should have made a call either contingent with */
1882 /* or prior to this call to set UPL_POP_BUSY */
1883 if(ops
& UPL_POP_SET
) {
1884 /* The protection granted with this assert will */
1885 /* not be complete. If the caller violates the */
1886 /* convention and attempts to change page state */
1887 /* without first setting busy we may not see it */
1888 /* because the page may already be busy. However */
1889 /* if such violations occur we will assert sooner */
1891 assert(dst_page
->busy
|| (ops
& UPL_POP_BUSY
));
1892 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= TRUE
;
1893 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= TRUE
;
1894 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= TRUE
;
1895 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= TRUE
;
1896 if (ops
& UPL_POP_BUSY
) dst_page
->busy
= TRUE
;
1899 if(ops
& UPL_POP_CLR
) {
1900 assert(dst_page
->busy
);
1901 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= FALSE
;
1902 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= FALSE
;
1903 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= FALSE
;
1904 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= FALSE
;
1905 if (ops
& UPL_POP_BUSY
) {
1906 dst_page
->busy
= FALSE
;
1907 PAGE_WAKEUP(dst_page
);
1911 if (dst_page
->encrypted
) {
1914 * We need to decrypt this encrypted page before the
1915 * caller can access its contents.
1916 * But if the caller really wants to access the page's
1917 * contents, they have to keep the page "busy".
1918 * Otherwise, the page could get recycled or re-encrypted
1921 if ((ops
& UPL_POP_SET
) && (ops
& UPL_POP_BUSY
) &&
1924 * The page is stable enough to be accessed by
1925 * the caller, so make sure its contents are
1928 vm_page_decrypt(dst_page
, 0);
1931 * The page is not busy, so don't bother
1932 * decrypting it, since anything could
1933 * happen to it between now and when the
1934 * caller wants to access it.
1935 * We should not give the caller access
1938 assert(!phys_entry
);
1944 * The physical page number will remain valid
1945 * only if the page is kept busy.
1946 * ENCRYPTED SWAP: make sure we don't let the
1947 * caller access an encrypted page.
1949 assert(dst_page
->busy
);
1950 assert(!dst_page
->encrypted
);
1951 *phys_entry
= dst_page
->phys_page
;
1957 vm_object_unlock(object
);
1958 return KERN_SUCCESS
;
1963 * memory_object_range_op offers performance enhancement over
1964 * memory_object_page_op for page_op functions which do not require page
1965 * level state to be returned from the call. Page_op was created to provide
1966 * a low-cost alternative to page manipulation via UPLs when only a single
1967 * page was involved. The range_op call establishes the ability in the _op
1968 * family of functions to work on multiple pages where the lack of page level
1969 * state handling allows the caller to avoid the overhead of the upl structures.
1973 memory_object_range_op(
1974 memory_object_control_t control
,
1975 memory_object_offset_t offset_beg
,
1976 memory_object_offset_t offset_end
,
1980 memory_object_offset_t offset
;
1984 object
= memory_object_control_to_vm_object(control
);
1985 if (object
== VM_OBJECT_NULL
)
1986 return (KERN_INVALID_ARGUMENT
);
1988 if (object
->resident_page_count
== 0) {
1990 if (ops
& UPL_ROP_PRESENT
)
1993 *range
= offset_end
- offset_beg
;
1995 return KERN_SUCCESS
;
1997 vm_object_lock(object
);
1999 if (object
->phys_contiguous
) {
2000 vm_object_unlock(object
);
2001 return KERN_INVALID_OBJECT
;
2004 offset
= offset_beg
;
2006 while (offset
< offset_end
) {
2007 dst_page
= vm_page_lookup(object
, offset
);
2008 if (dst_page
!= VM_PAGE_NULL
) {
2009 if (ops
& UPL_ROP_DUMP
) {
2010 if (dst_page
->busy
|| dst_page
->cleaning
) {
2012 * someone else is playing with the
2013 * page, we will have to wait
2016 dst_page
, THREAD_UNINT
);
2018 * need to relook the page up since it's
2019 * state may have changed while we slept
2020 * it might even belong to a different object
2025 vm_page_lock_queues();
2027 if (dst_page
->no_isync
== FALSE
)
2028 pmap_disconnect(dst_page
->phys_page
);
2029 vm_page_free(dst_page
);
2031 vm_page_unlock_queues();
2032 } else if (ops
& UPL_ROP_ABSENT
)
2034 } else if (ops
& UPL_ROP_PRESENT
)
2037 offset
+= PAGE_SIZE
;
2039 vm_object_unlock(object
);
2042 *range
= offset
- offset_beg
;
2044 return KERN_SUCCESS
;
2049 memory_object_pages_resident(
2050 memory_object_control_t control
,
2051 boolean_t
* has_pages_resident
)
2055 *has_pages_resident
= FALSE
;
2057 object
= memory_object_control_to_vm_object(control
);
2058 if (object
== VM_OBJECT_NULL
)
2059 return (KERN_INVALID_ARGUMENT
);
2061 if (object
->resident_page_count
)
2062 *has_pages_resident
= TRUE
;
2064 return (KERN_SUCCESS
);
2068 static zone_t mem_obj_control_zone
;
2070 __private_extern__
void
2071 memory_object_control_bootstrap(void)
2075 i
= (vm_size_t
) sizeof (struct memory_object_control
);
2076 mem_obj_control_zone
= zinit (i
, 8192*i
, 4096, "mem_obj_control");
2080 __private_extern__ memory_object_control_t
2081 memory_object_control_allocate(
2084 memory_object_control_t control
;
2086 control
= (memory_object_control_t
)zalloc(mem_obj_control_zone
);
2087 if (control
!= MEMORY_OBJECT_CONTROL_NULL
)
2088 control
->object
= object
;
2092 __private_extern__
void
2093 memory_object_control_collapse(
2094 memory_object_control_t control
,
2097 assert((control
->object
!= VM_OBJECT_NULL
) &&
2098 (control
->object
!= object
));
2099 control
->object
= object
;
2102 __private_extern__ vm_object_t
2103 memory_object_control_to_vm_object(
2104 memory_object_control_t control
)
2106 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
2107 return VM_OBJECT_NULL
;
2109 return (control
->object
);
2112 memory_object_control_t
2113 convert_port_to_mo_control(
2114 __unused mach_port_t port
)
2116 return MEMORY_OBJECT_CONTROL_NULL
;
2121 convert_mo_control_to_port(
2122 __unused memory_object_control_t control
)
2124 return MACH_PORT_NULL
;
2128 memory_object_control_reference(
2129 __unused memory_object_control_t control
)
2135 * We only every issue one of these references, so kill it
2136 * when that gets released (should switch the real reference
2137 * counting in true port-less EMMI).
2140 memory_object_control_deallocate(
2141 memory_object_control_t control
)
2143 zfree(mem_obj_control_zone
, control
);
2147 memory_object_control_disable(
2148 memory_object_control_t control
)
2150 assert(control
->object
!= VM_OBJECT_NULL
);
2151 control
->object
= VM_OBJECT_NULL
;
2155 memory_object_default_reference(
2156 memory_object_default_t dmm
)
2158 ipc_port_make_send(dmm
);
2162 memory_object_default_deallocate(
2163 memory_object_default_t dmm
)
2165 ipc_port_release_send(dmm
);
2169 convert_port_to_memory_object(
2170 __unused mach_port_t port
)
2172 return (MEMORY_OBJECT_NULL
);
2177 convert_memory_object_to_port(
2178 __unused memory_object_t object
)
2180 return (MACH_PORT_NULL
);
2184 /* Routine memory_object_reference */
2185 void memory_object_reference(
2186 memory_object_t memory_object
)
2190 if (memory_object
->pager
== &vnode_pager_workaround
) {
2191 vnode_pager_reference(memory_object
);
2192 } else if (memory_object
->pager
== &device_pager_workaround
) {
2193 device_pager_reference(memory_object
);
2196 dp_memory_object_reference(memory_object
);
2199 /* Routine memory_object_deallocate */
2200 void memory_object_deallocate(
2201 memory_object_t memory_object
)
2205 if (memory_object
->pager
== &vnode_pager_workaround
) {
2206 vnode_pager_deallocate(memory_object
);
2207 } else if (memory_object
->pager
== &device_pager_workaround
) {
2208 device_pager_deallocate(memory_object
);
2211 dp_memory_object_deallocate(memory_object
);
2215 /* Routine memory_object_init */
2216 kern_return_t memory_object_init
2218 memory_object_t memory_object
,
2219 memory_object_control_t memory_control
,
2220 memory_object_cluster_size_t memory_object_page_size
2224 if (memory_object
->pager
== &vnode_pager_workaround
) {
2225 return vnode_pager_init(memory_object
,
2227 memory_object_page_size
);
2228 } else if (memory_object
->pager
== &device_pager_workaround
) {
2229 return device_pager_init(memory_object
,
2231 memory_object_page_size
);
2234 return dp_memory_object_init(memory_object
,
2236 memory_object_page_size
);
2239 /* Routine memory_object_terminate */
2240 kern_return_t memory_object_terminate
2242 memory_object_t memory_object
2246 if (memory_object
->pager
== &vnode_pager_workaround
) {
2247 return vnode_pager_terminate(memory_object
);
2248 } else if (memory_object
->pager
== &device_pager_workaround
) {
2249 return device_pager_terminate(memory_object
);
2252 return dp_memory_object_terminate(memory_object
);
2255 /* Routine memory_object_data_request */
2256 kern_return_t memory_object_data_request
2258 memory_object_t memory_object
,
2259 memory_object_offset_t offset
,
2260 memory_object_cluster_size_t length
,
2261 vm_prot_t desired_access
2265 if (memory_object
->pager
== &vnode_pager_workaround
) {
2266 return vnode_pager_data_request(memory_object
,
2270 } else if (memory_object
->pager
== &device_pager_workaround
) {
2271 return device_pager_data_request(memory_object
,
2277 return dp_memory_object_data_request(memory_object
,
2283 /* Routine memory_object_data_return */
2284 kern_return_t memory_object_data_return
2286 memory_object_t memory_object
,
2287 memory_object_offset_t offset
,
2289 memory_object_offset_t
*resid_offset
,
2292 boolean_t kernel_copy
,
2297 if (memory_object
->pager
== &vnode_pager_workaround
) {
2298 return vnode_pager_data_return(memory_object
,
2306 } else if (memory_object
->pager
== &device_pager_workaround
) {
2308 return device_pager_data_return(memory_object
,
2318 return dp_memory_object_data_return(memory_object
,
2329 /* Routine memory_object_data_initialize */
2330 kern_return_t memory_object_data_initialize
2332 memory_object_t memory_object
,
2333 memory_object_offset_t offset
,
2338 if (memory_object
->pager
== &vnode_pager_workaround
) {
2339 return vnode_pager_data_initialize(memory_object
,
2342 } else if (memory_object
->pager
== &device_pager_workaround
) {
2343 return device_pager_data_initialize(memory_object
,
2348 return dp_memory_object_data_initialize(memory_object
,
2353 /* Routine memory_object_data_unlock */
2354 kern_return_t memory_object_data_unlock
2356 memory_object_t memory_object
,
2357 memory_object_offset_t offset
,
2359 vm_prot_t desired_access
2363 if (memory_object
->pager
== &vnode_pager_workaround
) {
2364 return vnode_pager_data_unlock(memory_object
,
2368 } else if (memory_object
->pager
== &device_pager_workaround
) {
2369 return device_pager_data_unlock(memory_object
,
2375 return dp_memory_object_data_unlock(memory_object
,
2381 /* Routine memory_object_synchronize */
2382 kern_return_t memory_object_synchronize
2384 memory_object_t memory_object
,
2385 memory_object_offset_t offset
,
2387 vm_sync_t sync_flags
2391 if (memory_object
->pager
== &vnode_pager_workaround
) {
2392 return vnode_pager_synchronize(memory_object
,
2396 } else if (memory_object
->pager
== &device_pager_workaround
) {
2397 return device_pager_synchronize(memory_object
,
2403 return dp_memory_object_synchronize(memory_object
,
2409 /* Routine memory_object_unmap */
2410 kern_return_t memory_object_unmap
2412 memory_object_t memory_object
2416 if (memory_object
->pager
== &vnode_pager_workaround
) {
2417 return vnode_pager_unmap(memory_object
);
2418 } else if (memory_object
->pager
== &device_pager_workaround
) {
2419 return device_pager_unmap(memory_object
);
2422 return dp_memory_object_unmap(memory_object
);
2425 /* Routine memory_object_create */
2426 kern_return_t memory_object_create
2428 memory_object_default_t default_memory_manager
,
2429 vm_size_t new_memory_object_size
,
2430 memory_object_t
*new_memory_object
2433 return default_pager_memory_object_create(default_memory_manager
,
2434 new_memory_object_size
,
2439 convert_port_to_upl(
2445 if (!ip_active(port
) || (ip_kotype(port
) != IKOT_UPL
)) {
2449 upl
= (upl_t
) port
->ip_kobject
;
2458 convert_upl_to_port(
2461 return MACH_PORT_NULL
;
2464 __private_extern__
void
2466 __unused ipc_port_t port
,
2467 __unused mach_port_mscount_t mscount
)