]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/memory_object.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / osfmk / vm / memory_object.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
61 *
62 * External memory management interface control functions.
63 */
64
65 #include <advisory_pageout.h>
66
67 /*
68 * Interface dependencies:
69 */
70
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
73
74 #include <mach/mig.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
83
84 /*
85 * Implementation dependencies:
86 */
87 #include <string.h> /* For memcpy() */
88
89 #include <kern/xpr.h>
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
94
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
103 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
104
105 #if MACH_PAGEMAP
106 #include <vm/vm_external.h>
107 #endif /* MACH_PAGEMAP */
108
109 #include <vm/vm_protos.h>
110
111
112 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
113 decl_mutex_data(, memory_manager_default_lock)
114
115
116 /*
117 * Routine: memory_object_should_return_page
118 *
119 * Description:
120 * Determine whether the given page should be returned,
121 * based on the page's state and on the given return policy.
122 *
123 * We should return the page if one of the following is true:
124 *
125 * 1. Page is dirty and should_return is not RETURN_NONE.
126 * 2. Page is precious and should_return is RETURN_ALL.
127 * 3. Should_return is RETURN_ANYTHING.
128 *
129 * As a side effect, m->dirty will be made consistent
130 * with pmap_is_modified(m), if should_return is not
131 * MEMORY_OBJECT_RETURN_NONE.
132 */
133
134 #define memory_object_should_return_page(m, should_return) \
135 (should_return != MEMORY_OBJECT_RETURN_NONE && \
136 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
137 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
138 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
139
140 typedef int memory_object_lock_result_t;
141
142 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
143 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
146
147 memory_object_lock_result_t memory_object_lock_page(
148 vm_page_t m,
149 memory_object_return_t should_return,
150 boolean_t should_flush,
151 vm_prot_t prot);
152
153 /*
154 * Routine: memory_object_lock_page
155 *
156 * Description:
157 * Perform the appropriate lock operations on the
158 * given page. See the description of
159 * "memory_object_lock_request" for the meanings
160 * of the arguments.
161 *
162 * Returns an indication that the operation
163 * completed, blocked, or that the page must
164 * be cleaned.
165 */
166 memory_object_lock_result_t
167 memory_object_lock_page(
168 vm_page_t m,
169 memory_object_return_t should_return,
170 boolean_t should_flush,
171 vm_prot_t prot)
172 {
173 XPR(XPR_MEMORY_OBJECT,
174 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
175 (integer_t)m, should_return, should_flush, prot, 0);
176
177 /*
178 * If we cannot change access to the page,
179 * either because a mapping is in progress
180 * (busy page) or because a mapping has been
181 * wired, then give up.
182 */
183
184 if (m->busy || m->cleaning)
185 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
186
187 /*
188 * Don't worry about pages for which the kernel
189 * does not have any data.
190 */
191
192 if (m->absent || m->error || m->restart) {
193 if(m->error && should_flush) {
194 /* dump the page, pager wants us to */
195 /* clean it up and there is no */
196 /* relevant data to return */
197 if(m->wire_count == 0) {
198 VM_PAGE_FREE(m);
199 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
200 }
201 } else {
202 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
203 }
204 }
205
206 assert(!m->fictitious);
207
208 /*
209 * If the page is wired, just clean or return the page if needed.
210 * Wired pages don't get flushed or disconnected from the pmap.
211 */
212
213 if (m->wire_count != 0) {
214 if (memory_object_should_return_page(m, should_return)) {
215 if (m->dirty)
216 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
217 else
218 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
219 }
220
221 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
222 }
223
224 /*
225 * If the page is to be flushed, allow
226 * that to be done as part of the protection.
227 */
228
229 if (should_flush)
230 prot = VM_PROT_ALL;
231
232 /*
233 * Set the page lock.
234 *
235 * If we are decreasing permission, do it now;
236 * let the fault handler take care of increases
237 * (pmap_page_protect may not increase protection).
238 */
239
240 if (prot != VM_PROT_NO_CHANGE) {
241 pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
242
243 PAGE_WAKEUP(m);
244 }
245
246 /*
247 * Handle page returning.
248 */
249 if (memory_object_should_return_page(m, should_return)) {
250
251 /*
252 * If we weren't planning
253 * to flush the page anyway,
254 * we may need to remove the
255 * page from the pageout
256 * system and from physical
257 * maps now.
258 */
259
260 vm_page_lockspin_queues();
261 VM_PAGE_QUEUES_REMOVE(m);
262 vm_page_unlock_queues();
263
264 if (!should_flush)
265 pmap_disconnect(m->phys_page);
266
267 if (m->dirty)
268 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
269 else
270 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
271 }
272
273 /*
274 * Handle flushing
275 */
276 if (should_flush) {
277 VM_PAGE_FREE(m);
278 } else {
279 /*
280 * XXX Make clean but not flush a paging hint,
281 * and deactivate the pages. This is a hack
282 * because it overloads flush/clean with
283 * implementation-dependent meaning. This only
284 * happens to pages that are already clean.
285 */
286
287 if (vm_page_deactivate_hint &&
288 (should_return != MEMORY_OBJECT_RETURN_NONE)) {
289 vm_page_lock_queues();
290 vm_page_deactivate(m);
291 vm_page_unlock_queues();
292 }
293 }
294
295 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
296 }
297
298 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
299 MACRO_BEGIN \
300 \
301 register int upl_flags; \
302 memory_object_t pager; \
303 \
304 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
305 vm_object_paging_begin(object); \
306 vm_object_unlock(object); \
307 \
308 if (iosync) \
309 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
310 else \
311 upl_flags = UPL_MSYNC; \
312 \
313 (void) memory_object_data_return(pager, \
314 po, \
315 data_cnt, \
316 ro, \
317 ioerr, \
318 (action) == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN,\
319 !should_flush, \
320 upl_flags); \
321 \
322 vm_object_lock(object); \
323 vm_object_paging_end(object); \
324 } \
325 MACRO_END
326
327 /*
328 * Routine: memory_object_lock_request [user interface]
329 *
330 * Description:
331 * Control use of the data associated with the given
332 * memory object. For each page in the given range,
333 * perform the following operations, in order:
334 * 1) restrict access to the page (disallow
335 * forms specified by "prot");
336 * 2) return data to the manager (if "should_return"
337 * is RETURN_DIRTY and the page is dirty, or
338 * "should_return" is RETURN_ALL and the page
339 * is either dirty or precious); and,
340 * 3) flush the cached copy (if "should_flush"
341 * is asserted).
342 * The set of pages is defined by a starting offset
343 * ("offset") and size ("size"). Only pages with the
344 * same page alignment as the starting offset are
345 * considered.
346 *
347 * A single acknowledgement is sent (to the "reply_to"
348 * port) when these actions are complete. If successful,
349 * the naked send right for reply_to is consumed.
350 */
351
352 kern_return_t
353 memory_object_lock_request(
354 memory_object_control_t control,
355 memory_object_offset_t offset,
356 memory_object_size_t size,
357 memory_object_offset_t * resid_offset,
358 int * io_errno,
359 memory_object_return_t should_return,
360 int flags,
361 vm_prot_t prot)
362 {
363 vm_object_t object;
364 __unused boolean_t should_flush;
365
366 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
367
368 XPR(XPR_MEMORY_OBJECT,
369 "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
370 (integer_t)control, offset, size,
371 (((should_return&1)<<1)|should_flush), prot);
372
373 /*
374 * Check for bogus arguments.
375 */
376 object = memory_object_control_to_vm_object(control);
377 if (object == VM_OBJECT_NULL)
378 return (KERN_INVALID_ARGUMENT);
379
380 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
381 return (KERN_INVALID_ARGUMENT);
382
383 size = round_page_64(size);
384
385 /*
386 * Lock the object, and acquire a paging reference to
387 * prevent the memory_object reference from being released.
388 */
389 vm_object_lock(object);
390 vm_object_paging_begin(object);
391 offset -= object->paging_offset;
392
393 (void)vm_object_update(object,
394 offset, size, resid_offset, io_errno, should_return, flags, prot);
395
396 vm_object_paging_end(object);
397 vm_object_unlock(object);
398
399 return (KERN_SUCCESS);
400 }
401
402 /*
403 * memory_object_release_name: [interface]
404 *
405 * Enforces name semantic on memory_object reference count decrement
406 * This routine should not be called unless the caller holds a name
407 * reference gained through the memory_object_named_create or the
408 * memory_object_rename call.
409 * If the TERMINATE_IDLE flag is set, the call will return if the
410 * reference count is not 1. i.e. idle with the only remaining reference
411 * being the name.
412 * If the decision is made to proceed the name field flag is set to
413 * false and the reference count is decremented. If the RESPECT_CACHE
414 * flag is set and the reference count has gone to zero, the
415 * memory_object is checked to see if it is cacheable otherwise when
416 * the reference count is zero, it is simply terminated.
417 */
418
419 kern_return_t
420 memory_object_release_name(
421 memory_object_control_t control,
422 int flags)
423 {
424 vm_object_t object;
425
426 object = memory_object_control_to_vm_object(control);
427 if (object == VM_OBJECT_NULL)
428 return (KERN_INVALID_ARGUMENT);
429
430 return vm_object_release_name(object, flags);
431 }
432
433
434
435 /*
436 * Routine: memory_object_destroy [user interface]
437 * Purpose:
438 * Shut down a memory object, despite the
439 * presence of address map (or other) references
440 * to the vm_object.
441 */
442 kern_return_t
443 memory_object_destroy(
444 memory_object_control_t control,
445 kern_return_t reason)
446 {
447 vm_object_t object;
448
449 object = memory_object_control_to_vm_object(control);
450 if (object == VM_OBJECT_NULL)
451 return (KERN_INVALID_ARGUMENT);
452
453 return (vm_object_destroy(object, reason));
454 }
455
456 /*
457 * Routine: vm_object_sync
458 *
459 * Kernel internal function to synch out pages in a given
460 * range within an object to its memory manager. Much the
461 * same as memory_object_lock_request but page protection
462 * is not changed.
463 *
464 * If the should_flush and should_return flags are true pages
465 * are flushed, that is dirty & precious pages are written to
466 * the memory manager and then discarded. If should_return
467 * is false, only precious pages are returned to the memory
468 * manager.
469 *
470 * If should flush is false and should_return true, the memory
471 * manager's copy of the pages is updated. If should_return
472 * is also false, only the precious pages are updated. This
473 * last option is of limited utility.
474 *
475 * Returns:
476 * FALSE if no pages were returned to the pager
477 * TRUE otherwise.
478 */
479
480 boolean_t
481 vm_object_sync(
482 vm_object_t object,
483 vm_object_offset_t offset,
484 vm_object_size_t size,
485 boolean_t should_flush,
486 boolean_t should_return,
487 boolean_t should_iosync)
488 {
489 boolean_t rv;
490 int flags;
491
492 XPR(XPR_VM_OBJECT,
493 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
494 (integer_t)object, offset, size, should_flush, should_return);
495
496 /*
497 * Lock the object, and acquire a paging reference to
498 * prevent the memory_object and control ports from
499 * being destroyed.
500 */
501 vm_object_lock(object);
502 vm_object_paging_begin(object);
503
504 if (should_flush)
505 flags = MEMORY_OBJECT_DATA_FLUSH;
506 else
507 flags = 0;
508
509 if (should_iosync)
510 flags |= MEMORY_OBJECT_IO_SYNC;
511
512 rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
513 (should_return) ?
514 MEMORY_OBJECT_RETURN_ALL :
515 MEMORY_OBJECT_RETURN_NONE,
516 flags,
517 VM_PROT_NO_CHANGE);
518
519
520 vm_object_paging_end(object);
521 vm_object_unlock(object);
522 return rv;
523 }
524
525
526
527
528 static int
529 vm_object_update_extent(
530 vm_object_t object,
531 vm_object_offset_t offset,
532 vm_object_offset_t offset_end,
533 vm_object_offset_t *offset_resid,
534 int *io_errno,
535 boolean_t should_flush,
536 memory_object_return_t should_return,
537 boolean_t should_iosync,
538 vm_prot_t prot)
539 {
540 vm_page_t m;
541 int retval = 0;
542 vm_size_t data_cnt = 0;
543 vm_object_offset_t paging_offset = 0;
544 vm_object_offset_t last_offset = offset;
545 memory_object_lock_result_t page_lock_result;
546 memory_object_lock_result_t pageout_action;
547
548 pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE;
549
550 for (;
551 offset < offset_end && object->resident_page_count;
552 offset += PAGE_SIZE_64) {
553
554 /*
555 * Limit the number of pages to be cleaned at once.
556 */
557 if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) {
558 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
559 pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
560 data_cnt = 0;
561 }
562
563 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
564 page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
565
566 XPR(XPR_MEMORY_OBJECT,
567 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
568 (integer_t)object, offset, page_lock_result, 0, 0);
569
570 switch (page_lock_result)
571 {
572 case MEMORY_OBJECT_LOCK_RESULT_DONE:
573 /*
574 * End of a cluster of dirty pages.
575 */
576 if (data_cnt) {
577 LIST_REQ_PAGEOUT_PAGES(object,
578 data_cnt, pageout_action,
579 paging_offset, offset_resid, io_errno, should_iosync);
580 data_cnt = 0;
581 continue;
582 }
583 break;
584
585 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
586 /*
587 * Since it is necessary to block,
588 * clean any dirty pages now.
589 */
590 if (data_cnt) {
591 LIST_REQ_PAGEOUT_PAGES(object,
592 data_cnt, pageout_action,
593 paging_offset, offset_resid, io_errno, should_iosync);
594 data_cnt = 0;
595 continue;
596 }
597 PAGE_SLEEP(object, m, THREAD_UNINT);
598 continue;
599
600 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
601 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
602 /*
603 * The clean and return cases are similar.
604 *
605 * if this would form a discontiguous block,
606 * clean the old pages and start anew.
607 *
608 * Mark the page busy since we will unlock the
609 * object if we issue the LIST_REQ_PAGEOUT
610 */
611 m->busy = TRUE;
612 if (data_cnt &&
613 ((last_offset != offset) || (pageout_action != page_lock_result))) {
614 LIST_REQ_PAGEOUT_PAGES(object,
615 data_cnt, pageout_action,
616 paging_offset, offset_resid, io_errno, should_iosync);
617 data_cnt = 0;
618 }
619 m->busy = FALSE;
620
621 if (m->cleaning) {
622 PAGE_SLEEP(object, m, THREAD_UNINT);
623 continue;
624 }
625 if (data_cnt == 0) {
626 pageout_action = page_lock_result;
627 paging_offset = offset;
628 }
629 data_cnt += PAGE_SIZE;
630 last_offset = offset + PAGE_SIZE_64;
631
632 vm_page_lockspin_queues();
633 /*
634 * Clean
635 */
636 m->list_req_pending = TRUE;
637 m->cleaning = TRUE;
638
639 if (should_flush &&
640 /* let's no flush a wired page... */
641 !m->wire_count) {
642 /*
643 * and add additional state
644 * for the flush
645 */
646 m->busy = TRUE;
647 m->pageout = TRUE;
648 vm_page_wire(m);
649 }
650 vm_page_unlock_queues();
651
652 retval = 1;
653 break;
654 }
655 break;
656 }
657 }
658 /*
659 * We have completed the scan for applicable pages.
660 * Clean any pages that have been saved.
661 */
662 if (data_cnt) {
663 LIST_REQ_PAGEOUT_PAGES(object,
664 data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
665 }
666 return (retval);
667 }
668
669
670
671 /*
672 * Routine: vm_object_update
673 * Description:
674 * Work function for m_o_lock_request(), vm_o_sync().
675 *
676 * Called with object locked and paging ref taken.
677 */
678 kern_return_t
679 vm_object_update(
680 register vm_object_t object,
681 register vm_object_offset_t offset,
682 register vm_object_size_t size,
683 register vm_object_offset_t *resid_offset,
684 int *io_errno,
685 memory_object_return_t should_return,
686 int flags,
687 vm_prot_t protection)
688 {
689 vm_object_t copy_object = VM_OBJECT_NULL;
690 boolean_t data_returned = FALSE;
691 boolean_t update_cow;
692 boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
693 boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
694 int num_of_extents;
695 int n;
696 #define MAX_EXTENTS 8
697 #define EXTENT_SIZE (1024 * 1024 * 256)
698 #define RESIDENT_LIMIT (1024 * 32)
699 struct extent {
700 vm_object_offset_t e_base;
701 vm_object_offset_t e_min;
702 vm_object_offset_t e_max;
703 } extents[MAX_EXTENTS];
704
705 /*
706 * To avoid blocking while scanning for pages, save
707 * dirty pages to be cleaned all at once.
708 *
709 * XXXO A similar strategy could be used to limit the
710 * number of times that a scan must be restarted for
711 * other reasons. Those pages that would require blocking
712 * could be temporarily collected in another list, or
713 * their offsets could be recorded in a small array.
714 */
715
716 /*
717 * XXX NOTE: May want to consider converting this to a page list
718 * XXX vm_map_copy interface. Need to understand object
719 * XXX coalescing implications before doing so.
720 */
721
722 update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
723 && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
724 !(flags & MEMORY_OBJECT_DATA_PURGE)))
725 || (flags & MEMORY_OBJECT_COPY_SYNC);
726
727 if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) {
728 int collisions = 0;
729
730 while ((copy_object = object->copy) != VM_OBJECT_NULL) {
731 /*
732 * need to do a try here since we're swimming upstream
733 * against the normal lock ordering... however, we need
734 * to hold the object stable until we gain control of the
735 * copy object so we have to be careful how we approach this
736 */
737 if (vm_object_lock_try(copy_object)) {
738 /*
739 * we 'won' the lock on the copy object...
740 * no need to hold the object lock any longer...
741 * take a real reference on the copy object because
742 * we're going to call vm_fault_page on it which may
743 * under certain conditions drop the lock and the paging
744 * reference we're about to take... the reference
745 * will keep the copy object from going away if that happens
746 */
747 vm_object_unlock(object);
748 vm_object_reference_locked(copy_object);
749 break;
750 }
751 vm_object_unlock(object);
752
753 collisions++;
754 mutex_pause(collisions);
755
756 vm_object_lock(object);
757 }
758 }
759 if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) {
760 vm_map_size_t i;
761 vm_map_size_t copy_size;
762 vm_map_offset_t copy_offset;
763 vm_prot_t prot;
764 vm_page_t page;
765 vm_page_t top_page;
766 kern_return_t error = 0;
767 struct vm_object_fault_info fault_info;
768
769 if (copy_object != VM_OBJECT_NULL) {
770 /*
771 * translate offset with respect to shadow's offset
772 */
773 copy_offset = (offset >= copy_object->shadow_offset) ?
774 (vm_map_offset_t)(offset - copy_object->shadow_offset) :
775 (vm_map_offset_t) 0;
776
777 if (copy_offset > copy_object->size)
778 copy_offset = copy_object->size;
779
780 /*
781 * clip size with respect to shadow offset
782 */
783 if (offset >= copy_object->shadow_offset) {
784 copy_size = size;
785 } else if (size >= copy_object->shadow_offset - offset) {
786 copy_size = size - (copy_object->shadow_offset - offset);
787 } else {
788 copy_size = 0;
789 }
790
791 if (copy_offset + copy_size > copy_object->size) {
792 if (copy_object->size >= copy_offset) {
793 copy_size = copy_object->size - copy_offset;
794 } else {
795 copy_size = 0;
796 }
797 }
798 copy_size+=copy_offset;
799
800 } else {
801 copy_object = object;
802
803 copy_size = offset + size;
804 copy_offset = offset;
805 }
806 fault_info.interruptible = THREAD_UNINT;
807 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
808 fault_info.user_tag = 0;
809 fault_info.lo_offset = copy_offset;
810 fault_info.hi_offset = copy_size;
811 fault_info.no_cache = FALSE;
812
813 vm_object_paging_begin(copy_object);
814
815 for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
816 RETRY_COW_OF_LOCK_REQUEST:
817 fault_info.cluster_size = copy_size - i;
818
819 prot = VM_PROT_WRITE|VM_PROT_READ;
820 switch (vm_fault_page(copy_object, i,
821 VM_PROT_WRITE|VM_PROT_READ,
822 FALSE,
823 &prot,
824 &page,
825 &top_page,
826 (int *)0,
827 &error,
828 FALSE,
829 FALSE, &fault_info)) {
830
831 case VM_FAULT_SUCCESS:
832 if (top_page) {
833 vm_fault_cleanup(
834 page->object, top_page);
835 vm_object_lock(copy_object);
836 vm_object_paging_begin(copy_object);
837 }
838 vm_page_lock_queues();
839 if (!page->active && !page->inactive)
840 vm_page_deactivate(page);
841 vm_page_unlock_queues();
842
843 PAGE_WAKEUP_DONE(page);
844 break;
845 case VM_FAULT_RETRY:
846 prot = VM_PROT_WRITE|VM_PROT_READ;
847 vm_object_lock(copy_object);
848 vm_object_paging_begin(copy_object);
849 goto RETRY_COW_OF_LOCK_REQUEST;
850 case VM_FAULT_INTERRUPTED:
851 prot = VM_PROT_WRITE|VM_PROT_READ;
852 vm_object_lock(copy_object);
853 vm_object_paging_begin(copy_object);
854 goto RETRY_COW_OF_LOCK_REQUEST;
855 case VM_FAULT_MEMORY_SHORTAGE:
856 VM_PAGE_WAIT();
857 prot = VM_PROT_WRITE|VM_PROT_READ;
858 vm_object_lock(copy_object);
859 vm_object_paging_begin(copy_object);
860 goto RETRY_COW_OF_LOCK_REQUEST;
861 case VM_FAULT_FICTITIOUS_SHORTAGE:
862 vm_page_more_fictitious();
863 prot = VM_PROT_WRITE|VM_PROT_READ;
864 vm_object_lock(copy_object);
865 vm_object_paging_begin(copy_object);
866 goto RETRY_COW_OF_LOCK_REQUEST;
867 case VM_FAULT_MEMORY_ERROR:
868 if (object != copy_object)
869 vm_object_deallocate(copy_object);
870 vm_object_lock(object);
871 goto BYPASS_COW_COPYIN;
872 }
873
874 }
875 vm_object_paging_end(copy_object);
876 }
877 if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
878 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
879 vm_object_unlock(copy_object);
880 vm_object_deallocate(copy_object);
881 vm_object_lock(object);
882 }
883 return KERN_SUCCESS;
884 }
885 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
886 if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
887 copy_object->shadow_severed = TRUE;
888 copy_object->shadowed = FALSE;
889 copy_object->shadow = NULL;
890 /*
891 * delete the ref the COW was holding on the target object
892 */
893 vm_object_deallocate(object);
894 }
895 vm_object_unlock(copy_object);
896 vm_object_deallocate(copy_object);
897 vm_object_lock(object);
898 }
899 BYPASS_COW_COPYIN:
900
901 /*
902 * when we have a really large range to check relative
903 * to the number of actual resident pages, we'd like
904 * to use the resident page list to drive our checks
905 * however, the object lock will get dropped while processing
906 * the page which means the resident queue can change which
907 * means we can't walk the queue as we process the pages
908 * we also want to do the processing in offset order to allow
909 * 'runs' of pages to be collected if we're being told to
910 * flush to disk... the resident page queue is NOT ordered.
911 *
912 * a temporary solution (until we figure out how to deal with
913 * large address spaces more generically) is to pre-flight
914 * the resident page queue (if it's small enough) and develop
915 * a collection of extents (that encompass actual resident pages)
916 * to visit. This will at least allow us to deal with some of the
917 * more pathological cases in a more efficient manner. The current
918 * worst case (a single resident page at the end of an extremely large
919 * range) can take minutes to complete for ranges in the terrabyte
920 * category... since this routine is called when truncating a file,
921 * and we currently support files up to 16 Tbytes in size, this
922 * is not a theoretical problem
923 */
924
925 if ((object->resident_page_count < RESIDENT_LIMIT) &&
926 (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
927 vm_page_t next;
928 vm_object_offset_t start;
929 vm_object_offset_t end;
930 vm_object_size_t e_mask;
931 vm_page_t m;
932
933 start = offset;
934 end = offset + size;
935 num_of_extents = 0;
936 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
937
938 m = (vm_page_t) queue_first(&object->memq);
939
940 while (!queue_end(&object->memq, (queue_entry_t) m)) {
941 next = (vm_page_t) queue_next(&m->listq);
942
943 if ((m->offset >= start) && (m->offset < end)) {
944 /*
945 * this is a page we're interested in
946 * try to fit it into a current extent
947 */
948 for (n = 0; n < num_of_extents; n++) {
949 if ((m->offset & e_mask) == extents[n].e_base) {
950 /*
951 * use (PAGE_SIZE - 1) to determine the
952 * max offset so that we don't wrap if
953 * we're at the last page of the space
954 */
955 if (m->offset < extents[n].e_min)
956 extents[n].e_min = m->offset;
957 else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
958 extents[n].e_max = m->offset + (PAGE_SIZE - 1);
959 break;
960 }
961 }
962 if (n == num_of_extents) {
963 /*
964 * didn't find a current extent that can encompass
965 * this page
966 */
967 if (n < MAX_EXTENTS) {
968 /*
969 * if we still have room,
970 * create a new extent
971 */
972 extents[n].e_base = m->offset & e_mask;
973 extents[n].e_min = m->offset;
974 extents[n].e_max = m->offset + (PAGE_SIZE - 1);
975
976 num_of_extents++;
977 } else {
978 /*
979 * no room to create a new extent...
980 * fall back to a single extent based
981 * on the min and max page offsets
982 * we find in the range we're interested in...
983 * first, look through the extent list and
984 * develop the overall min and max for the
985 * pages we've looked at up to this point
986 */
987 for (n = 1; n < num_of_extents; n++) {
988 if (extents[n].e_min < extents[0].e_min)
989 extents[0].e_min = extents[n].e_min;
990 if (extents[n].e_max > extents[0].e_max)
991 extents[0].e_max = extents[n].e_max;
992 }
993 /*
994 * now setup to run through the remaining pages
995 * to determine the overall min and max
996 * offset for the specified range
997 */
998 extents[0].e_base = 0;
999 e_mask = 0;
1000 num_of_extents = 1;
1001
1002 /*
1003 * by continuing, we'll reprocess the
1004 * page that forced us to abandon trying
1005 * to develop multiple extents
1006 */
1007 continue;
1008 }
1009 }
1010 }
1011 m = next;
1012 }
1013 } else {
1014 extents[0].e_min = offset;
1015 extents[0].e_max = offset + (size - 1);
1016
1017 num_of_extents = 1;
1018 }
1019 for (n = 0; n < num_of_extents; n++) {
1020 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1021 should_flush, should_return, should_iosync, protection))
1022 data_returned = TRUE;
1023 }
1024 return (data_returned);
1025 }
1026
1027
1028 /*
1029 * Routine: memory_object_synchronize_completed [user interface]
1030 *
1031 * Tell kernel that previously synchronized data
1032 * (memory_object_synchronize) has been queue or placed on the
1033 * backing storage.
1034 *
1035 * Note: there may be multiple synchronize requests for a given
1036 * memory object outstanding but they will not overlap.
1037 */
1038
1039 kern_return_t
1040 memory_object_synchronize_completed(
1041 memory_object_control_t control,
1042 memory_object_offset_t offset,
1043 vm_offset_t length)
1044 {
1045 vm_object_t object;
1046 msync_req_t msr;
1047
1048 object = memory_object_control_to_vm_object(control);
1049
1050 XPR(XPR_MEMORY_OBJECT,
1051 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1052 (integer_t)object, offset, length, 0, 0);
1053
1054 /*
1055 * Look for bogus arguments
1056 */
1057
1058 if (object == VM_OBJECT_NULL)
1059 return (KERN_INVALID_ARGUMENT);
1060
1061 vm_object_lock(object);
1062
1063 /*
1064 * search for sync request structure
1065 */
1066 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
1067 if (msr->offset == offset && msr->length == length) {
1068 queue_remove(&object->msr_q, msr, msync_req_t, msr_q);
1069 break;
1070 }
1071 }/* queue_iterate */
1072
1073 if (queue_end(&object->msr_q, (queue_entry_t)msr)) {
1074 vm_object_unlock(object);
1075 return KERN_INVALID_ARGUMENT;
1076 }
1077
1078 msr_lock(msr);
1079 vm_object_unlock(object);
1080 msr->flag = VM_MSYNC_DONE;
1081 msr_unlock(msr);
1082 thread_wakeup((event_t) msr);
1083
1084 return KERN_SUCCESS;
1085 }/* memory_object_synchronize_completed */
1086
1087 static kern_return_t
1088 vm_object_set_attributes_common(
1089 vm_object_t object,
1090 boolean_t may_cache,
1091 memory_object_copy_strategy_t copy_strategy,
1092 boolean_t temporary,
1093 boolean_t silent_overwrite,
1094 boolean_t advisory_pageout)
1095 {
1096 boolean_t object_became_ready;
1097
1098 XPR(XPR_MEMORY_OBJECT,
1099 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1100 (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
1101
1102 if (object == VM_OBJECT_NULL)
1103 return(KERN_INVALID_ARGUMENT);
1104
1105 /*
1106 * Verify the attributes of importance
1107 */
1108
1109 switch(copy_strategy) {
1110 case MEMORY_OBJECT_COPY_NONE:
1111 case MEMORY_OBJECT_COPY_DELAY:
1112 break;
1113 default:
1114 return(KERN_INVALID_ARGUMENT);
1115 }
1116
1117 #if !ADVISORY_PAGEOUT
1118 if (silent_overwrite || advisory_pageout)
1119 return(KERN_INVALID_ARGUMENT);
1120
1121 #endif /* !ADVISORY_PAGEOUT */
1122 if (may_cache)
1123 may_cache = TRUE;
1124 if (temporary)
1125 temporary = TRUE;
1126
1127 vm_object_lock(object);
1128
1129 /*
1130 * Copy the attributes
1131 */
1132 assert(!object->internal);
1133 object_became_ready = !object->pager_ready;
1134 object->copy_strategy = copy_strategy;
1135 object->can_persist = may_cache;
1136 object->temporary = temporary;
1137 object->silent_overwrite = silent_overwrite;
1138 object->advisory_pageout = advisory_pageout;
1139
1140 /*
1141 * Wake up anyone waiting for the ready attribute
1142 * to become asserted.
1143 */
1144
1145 if (object_became_ready) {
1146 object->pager_ready = TRUE;
1147 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1148 }
1149
1150 vm_object_unlock(object);
1151
1152 return(KERN_SUCCESS);
1153 }
1154
1155 /*
1156 * Set the memory object attribute as provided.
1157 *
1158 * XXX This routine cannot be completed until the vm_msync, clean
1159 * in place, and cluster work is completed. See ifdef notyet
1160 * below and note that vm_object_set_attributes_common()
1161 * may have to be expanded.
1162 */
1163 kern_return_t
1164 memory_object_change_attributes(
1165 memory_object_control_t control,
1166 memory_object_flavor_t flavor,
1167 memory_object_info_t attributes,
1168 mach_msg_type_number_t count)
1169 {
1170 vm_object_t object;
1171 kern_return_t result = KERN_SUCCESS;
1172 boolean_t temporary;
1173 boolean_t may_cache;
1174 boolean_t invalidate;
1175 memory_object_copy_strategy_t copy_strategy;
1176 boolean_t silent_overwrite;
1177 boolean_t advisory_pageout;
1178
1179 object = memory_object_control_to_vm_object(control);
1180 if (object == VM_OBJECT_NULL)
1181 return (KERN_INVALID_ARGUMENT);
1182
1183 vm_object_lock(object);
1184
1185 temporary = object->temporary;
1186 may_cache = object->can_persist;
1187 copy_strategy = object->copy_strategy;
1188 silent_overwrite = object->silent_overwrite;
1189 advisory_pageout = object->advisory_pageout;
1190 #if notyet
1191 invalidate = object->invalidate;
1192 #endif
1193 vm_object_unlock(object);
1194
1195 switch (flavor) {
1196 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1197 {
1198 old_memory_object_behave_info_t behave;
1199
1200 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1201 result = KERN_INVALID_ARGUMENT;
1202 break;
1203 }
1204
1205 behave = (old_memory_object_behave_info_t) attributes;
1206
1207 temporary = behave->temporary;
1208 invalidate = behave->invalidate;
1209 copy_strategy = behave->copy_strategy;
1210
1211 break;
1212 }
1213
1214 case MEMORY_OBJECT_BEHAVIOR_INFO:
1215 {
1216 memory_object_behave_info_t behave;
1217
1218 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1219 result = KERN_INVALID_ARGUMENT;
1220 break;
1221 }
1222
1223 behave = (memory_object_behave_info_t) attributes;
1224
1225 temporary = behave->temporary;
1226 invalidate = behave->invalidate;
1227 copy_strategy = behave->copy_strategy;
1228 silent_overwrite = behave->silent_overwrite;
1229 advisory_pageout = behave->advisory_pageout;
1230 break;
1231 }
1232
1233 case MEMORY_OBJECT_PERFORMANCE_INFO:
1234 {
1235 memory_object_perf_info_t perf;
1236
1237 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1238 result = KERN_INVALID_ARGUMENT;
1239 break;
1240 }
1241
1242 perf = (memory_object_perf_info_t) attributes;
1243
1244 may_cache = perf->may_cache;
1245
1246 break;
1247 }
1248
1249 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1250 {
1251 old_memory_object_attr_info_t attr;
1252
1253 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1254 result = KERN_INVALID_ARGUMENT;
1255 break;
1256 }
1257
1258 attr = (old_memory_object_attr_info_t) attributes;
1259
1260 may_cache = attr->may_cache;
1261 copy_strategy = attr->copy_strategy;
1262
1263 break;
1264 }
1265
1266 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1267 {
1268 memory_object_attr_info_t attr;
1269
1270 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1271 result = KERN_INVALID_ARGUMENT;
1272 break;
1273 }
1274
1275 attr = (memory_object_attr_info_t) attributes;
1276
1277 copy_strategy = attr->copy_strategy;
1278 may_cache = attr->may_cache_object;
1279 temporary = attr->temporary;
1280
1281 break;
1282 }
1283
1284 default:
1285 result = KERN_INVALID_ARGUMENT;
1286 break;
1287 }
1288
1289 if (result != KERN_SUCCESS)
1290 return(result);
1291
1292 if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1293 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1294 temporary = TRUE;
1295 } else {
1296 temporary = FALSE;
1297 }
1298
1299 /*
1300 * XXX may_cache may become a tri-valued variable to handle
1301 * XXX uncache if not in use.
1302 */
1303 return (vm_object_set_attributes_common(object,
1304 may_cache,
1305 copy_strategy,
1306 temporary,
1307 silent_overwrite,
1308 advisory_pageout));
1309 }
1310
1311 kern_return_t
1312 memory_object_get_attributes(
1313 memory_object_control_t control,
1314 memory_object_flavor_t flavor,
1315 memory_object_info_t attributes, /* pointer to OUT array */
1316 mach_msg_type_number_t *count) /* IN/OUT */
1317 {
1318 kern_return_t ret = KERN_SUCCESS;
1319 vm_object_t object;
1320
1321 object = memory_object_control_to_vm_object(control);
1322 if (object == VM_OBJECT_NULL)
1323 return (KERN_INVALID_ARGUMENT);
1324
1325 vm_object_lock(object);
1326
1327 switch (flavor) {
1328 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1329 {
1330 old_memory_object_behave_info_t behave;
1331
1332 if (*count < OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1333 ret = KERN_INVALID_ARGUMENT;
1334 break;
1335 }
1336
1337 behave = (old_memory_object_behave_info_t) attributes;
1338 behave->copy_strategy = object->copy_strategy;
1339 behave->temporary = object->temporary;
1340 #if notyet /* remove when vm_msync complies and clean in place fini */
1341 behave->invalidate = object->invalidate;
1342 #else
1343 behave->invalidate = FALSE;
1344 #endif
1345
1346 *count = OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1347 break;
1348 }
1349
1350 case MEMORY_OBJECT_BEHAVIOR_INFO:
1351 {
1352 memory_object_behave_info_t behave;
1353
1354 if (*count < MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1355 ret = KERN_INVALID_ARGUMENT;
1356 break;
1357 }
1358
1359 behave = (memory_object_behave_info_t) attributes;
1360 behave->copy_strategy = object->copy_strategy;
1361 behave->temporary = object->temporary;
1362 #if notyet /* remove when vm_msync complies and clean in place fini */
1363 behave->invalidate = object->invalidate;
1364 #else
1365 behave->invalidate = FALSE;
1366 #endif
1367 behave->advisory_pageout = object->advisory_pageout;
1368 behave->silent_overwrite = object->silent_overwrite;
1369 *count = MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1370 break;
1371 }
1372
1373 case MEMORY_OBJECT_PERFORMANCE_INFO:
1374 {
1375 memory_object_perf_info_t perf;
1376
1377 if (*count < MEMORY_OBJECT_PERF_INFO_COUNT) {
1378 ret = KERN_INVALID_ARGUMENT;
1379 break;
1380 }
1381
1382 perf = (memory_object_perf_info_t) attributes;
1383 perf->cluster_size = PAGE_SIZE;
1384 perf->may_cache = object->can_persist;
1385
1386 *count = MEMORY_OBJECT_PERF_INFO_COUNT;
1387 break;
1388 }
1389
1390 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1391 {
1392 old_memory_object_attr_info_t attr;
1393
1394 if (*count < OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1395 ret = KERN_INVALID_ARGUMENT;
1396 break;
1397 }
1398
1399 attr = (old_memory_object_attr_info_t) attributes;
1400 attr->may_cache = object->can_persist;
1401 attr->copy_strategy = object->copy_strategy;
1402
1403 *count = OLD_MEMORY_OBJECT_ATTR_INFO_COUNT;
1404 break;
1405 }
1406
1407 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1408 {
1409 memory_object_attr_info_t attr;
1410
1411 if (*count < MEMORY_OBJECT_ATTR_INFO_COUNT) {
1412 ret = KERN_INVALID_ARGUMENT;
1413 break;
1414 }
1415
1416 attr = (memory_object_attr_info_t) attributes;
1417 attr->copy_strategy = object->copy_strategy;
1418 attr->cluster_size = PAGE_SIZE;
1419 attr->may_cache_object = object->can_persist;
1420 attr->temporary = object->temporary;
1421
1422 *count = MEMORY_OBJECT_ATTR_INFO_COUNT;
1423 break;
1424 }
1425
1426 default:
1427 ret = KERN_INVALID_ARGUMENT;
1428 break;
1429 }
1430
1431 vm_object_unlock(object);
1432
1433 return(ret);
1434 }
1435
1436
1437 kern_return_t
1438 memory_object_iopl_request(
1439 ipc_port_t port,
1440 memory_object_offset_t offset,
1441 upl_size_t *upl_size,
1442 upl_t *upl_ptr,
1443 upl_page_info_array_t user_page_list,
1444 unsigned int *page_list_count,
1445 int *flags)
1446 {
1447 vm_object_t object;
1448 kern_return_t ret;
1449 int caller_flags;
1450
1451 caller_flags = *flags;
1452
1453 if (caller_flags & ~UPL_VALID_FLAGS) {
1454 /*
1455 * For forward compatibility's sake,
1456 * reject any unknown flag.
1457 */
1458 return KERN_INVALID_VALUE;
1459 }
1460
1461 if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
1462 vm_named_entry_t named_entry;
1463
1464 named_entry = (vm_named_entry_t)port->ip_kobject;
1465 /* a few checks to make sure user is obeying rules */
1466 if(*upl_size == 0) {
1467 if(offset >= named_entry->size)
1468 return(KERN_INVALID_RIGHT);
1469 *upl_size = named_entry->size - offset;
1470 }
1471 if(caller_flags & UPL_COPYOUT_FROM) {
1472 if((named_entry->protection & VM_PROT_READ)
1473 != VM_PROT_READ) {
1474 return(KERN_INVALID_RIGHT);
1475 }
1476 } else {
1477 if((named_entry->protection &
1478 (VM_PROT_READ | VM_PROT_WRITE))
1479 != (VM_PROT_READ | VM_PROT_WRITE)) {
1480 return(KERN_INVALID_RIGHT);
1481 }
1482 }
1483 if(named_entry->size < (offset + *upl_size))
1484 return(KERN_INVALID_ARGUMENT);
1485
1486 /* the callers parameter offset is defined to be the */
1487 /* offset from beginning of named entry offset in object */
1488 offset = offset + named_entry->offset;
1489
1490 if(named_entry->is_sub_map)
1491 return (KERN_INVALID_ARGUMENT);
1492
1493 named_entry_lock(named_entry);
1494
1495 if (named_entry->is_pager) {
1496 object = vm_object_enter(named_entry->backing.pager,
1497 named_entry->offset + named_entry->size,
1498 named_entry->internal,
1499 FALSE,
1500 FALSE);
1501 if (object == VM_OBJECT_NULL) {
1502 named_entry_unlock(named_entry);
1503 return(KERN_INVALID_OBJECT);
1504 }
1505
1506 /* JMM - drop reference on pager here? */
1507
1508 /* create an extra reference for the named entry */
1509 vm_object_lock(object);
1510 vm_object_reference_locked(object);
1511 named_entry->backing.object = object;
1512 named_entry->is_pager = FALSE;
1513 named_entry_unlock(named_entry);
1514
1515 /* wait for object to be ready */
1516 while (!object->pager_ready) {
1517 vm_object_wait(object,
1518 VM_OBJECT_EVENT_PAGER_READY,
1519 THREAD_UNINT);
1520 vm_object_lock(object);
1521 }
1522 vm_object_unlock(object);
1523 } else {
1524 /* This is the case where we are going to map */
1525 /* an already mapped object. If the object is */
1526 /* not ready it is internal. An external */
1527 /* object cannot be mapped until it is ready */
1528 /* we can therefore avoid the ready check */
1529 /* in this case. */
1530 object = named_entry->backing.object;
1531 vm_object_reference(object);
1532 named_entry_unlock(named_entry);
1533 }
1534 } else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) {
1535 memory_object_control_t control;
1536 control = (memory_object_control_t) port;
1537 if (control == NULL)
1538 return (KERN_INVALID_ARGUMENT);
1539 object = memory_object_control_to_vm_object(control);
1540 if (object == VM_OBJECT_NULL)
1541 return (KERN_INVALID_ARGUMENT);
1542 vm_object_reference(object);
1543 } else {
1544 return KERN_INVALID_ARGUMENT;
1545 }
1546 if (object == VM_OBJECT_NULL)
1547 return (KERN_INVALID_ARGUMENT);
1548
1549 if (!object->private) {
1550 if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
1551 *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
1552 if (object->phys_contiguous) {
1553 *flags = UPL_PHYS_CONTIG;
1554 } else {
1555 *flags = 0;
1556 }
1557 } else {
1558 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1559 }
1560
1561 ret = vm_object_iopl_request(object,
1562 offset,
1563 *upl_size,
1564 upl_ptr,
1565 user_page_list,
1566 page_list_count,
1567 caller_flags);
1568 vm_object_deallocate(object);
1569 return ret;
1570 }
1571
1572 /*
1573 * Routine: memory_object_upl_request [interface]
1574 * Purpose:
1575 * Cause the population of a portion of a vm_object.
1576 * Depending on the nature of the request, the pages
1577 * returned may be contain valid data or be uninitialized.
1578 *
1579 */
1580
1581 kern_return_t
1582 memory_object_upl_request(
1583 memory_object_control_t control,
1584 memory_object_offset_t offset,
1585 upl_size_t size,
1586 upl_t *upl_ptr,
1587 upl_page_info_array_t user_page_list,
1588 unsigned int *page_list_count,
1589 int cntrl_flags)
1590 {
1591 vm_object_t object;
1592
1593 object = memory_object_control_to_vm_object(control);
1594 if (object == VM_OBJECT_NULL)
1595 return (KERN_INVALID_ARGUMENT);
1596
1597 return vm_object_upl_request(object,
1598 offset,
1599 size,
1600 upl_ptr,
1601 user_page_list,
1602 page_list_count,
1603 cntrl_flags);
1604 }
1605
1606 /*
1607 * Routine: memory_object_super_upl_request [interface]
1608 * Purpose:
1609 * Cause the population of a portion of a vm_object
1610 * in much the same way as memory_object_upl_request.
1611 * Depending on the nature of the request, the pages
1612 * returned may be contain valid data or be uninitialized.
1613 * However, the region may be expanded up to the super
1614 * cluster size provided.
1615 */
1616
1617 kern_return_t
1618 memory_object_super_upl_request(
1619 memory_object_control_t control,
1620 memory_object_offset_t offset,
1621 upl_size_t size,
1622 upl_size_t super_cluster,
1623 upl_t *upl,
1624 upl_page_info_t *user_page_list,
1625 unsigned int *page_list_count,
1626 int cntrl_flags)
1627 {
1628 vm_object_t object;
1629
1630 object = memory_object_control_to_vm_object(control);
1631 if (object == VM_OBJECT_NULL)
1632 return (KERN_INVALID_ARGUMENT);
1633
1634 return vm_object_super_upl_request(object,
1635 offset,
1636 size,
1637 super_cluster,
1638 upl,
1639 user_page_list,
1640 page_list_count,
1641 cntrl_flags);
1642 }
1643
1644 kern_return_t
1645 memory_object_cluster_size(memory_object_control_t control, memory_object_offset_t *start,
1646 vm_size_t *length, memory_object_fault_info_t fault_info)
1647 {
1648 vm_object_t object;
1649
1650 object = memory_object_control_to_vm_object(control);
1651
1652 if (object == VM_OBJECT_NULL || object->paging_offset > *start)
1653 return (KERN_INVALID_ARGUMENT);
1654
1655 *start -= object->paging_offset;
1656
1657 vm_object_cluster_size(object, (vm_object_offset_t *)start, length, (vm_object_fault_info_t)fault_info);
1658
1659 *start += object->paging_offset;
1660
1661 return (KERN_SUCCESS);
1662 }
1663
1664
1665 int vm_stat_discard_cleared_reply = 0;
1666 int vm_stat_discard_cleared_unset = 0;
1667 int vm_stat_discard_cleared_too_late = 0;
1668
1669
1670
1671 /*
1672 * Routine: host_default_memory_manager [interface]
1673 * Purpose:
1674 * set/get the default memory manager port and default cluster
1675 * size.
1676 *
1677 * If successful, consumes the supplied naked send right.
1678 */
1679 kern_return_t
1680 host_default_memory_manager(
1681 host_priv_t host_priv,
1682 memory_object_default_t *default_manager,
1683 __unused memory_object_cluster_size_t cluster_size)
1684 {
1685 memory_object_default_t current_manager;
1686 memory_object_default_t new_manager;
1687 memory_object_default_t returned_manager;
1688 kern_return_t result = KERN_SUCCESS;
1689
1690 if (host_priv == HOST_PRIV_NULL)
1691 return(KERN_INVALID_HOST);
1692
1693 assert(host_priv == &realhost);
1694
1695 new_manager = *default_manager;
1696 mutex_lock(&memory_manager_default_lock);
1697 current_manager = memory_manager_default;
1698 returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
1699
1700 if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1701 /*
1702 * Retrieve the current value.
1703 */
1704 returned_manager = current_manager;
1705 memory_object_default_reference(returned_manager);
1706 } else {
1707
1708 /*
1709 * If this is the first non-null manager, start
1710 * up the internal pager support.
1711 */
1712 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1713 result = vm_pageout_internal_start();
1714 if (result != KERN_SUCCESS)
1715 goto out;
1716 }
1717
1718 /*
1719 * Retrieve the current value,
1720 * and replace it with the supplied value.
1721 * We return the old reference to the caller
1722 * but we have to take a reference on the new
1723 * one.
1724 */
1725 returned_manager = current_manager;
1726 memory_manager_default = new_manager;
1727 memory_object_default_reference(new_manager);
1728
1729 /*
1730 * In case anyone's been waiting for a memory
1731 * manager to be established, wake them up.
1732 */
1733
1734 thread_wakeup((event_t) &memory_manager_default);
1735 }
1736 out:
1737 mutex_unlock(&memory_manager_default_lock);
1738
1739 *default_manager = returned_manager;
1740 return(result);
1741 }
1742
1743 /*
1744 * Routine: memory_manager_default_reference
1745 * Purpose:
1746 * Returns a naked send right for the default
1747 * memory manager. The returned right is always
1748 * valid (not IP_NULL or IP_DEAD).
1749 */
1750
1751 __private_extern__ memory_object_default_t
1752 memory_manager_default_reference(void)
1753 {
1754 memory_object_default_t current_manager;
1755
1756 mutex_lock(&memory_manager_default_lock);
1757 current_manager = memory_manager_default;
1758 while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1759 wait_result_t res;
1760
1761 res = thread_sleep_mutex((event_t) &memory_manager_default,
1762 &memory_manager_default_lock,
1763 THREAD_UNINT);
1764 assert(res == THREAD_AWAKENED);
1765 current_manager = memory_manager_default;
1766 }
1767 memory_object_default_reference(current_manager);
1768 mutex_unlock(&memory_manager_default_lock);
1769
1770 return current_manager;
1771 }
1772
1773 /*
1774 * Routine: memory_manager_default_check
1775 *
1776 * Purpose:
1777 * Check whether a default memory manager has been set
1778 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1779 * and KERN_FAILURE if dmm does not exist.
1780 *
1781 * If there is no default memory manager, log an error,
1782 * but only the first time.
1783 *
1784 */
1785 __private_extern__ kern_return_t
1786 memory_manager_default_check(void)
1787 {
1788 memory_object_default_t current;
1789
1790 mutex_lock(&memory_manager_default_lock);
1791 current = memory_manager_default;
1792 if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1793 static boolean_t logged; /* initialized to 0 */
1794 boolean_t complain = !logged;
1795 logged = TRUE;
1796 mutex_unlock(&memory_manager_default_lock);
1797 if (complain)
1798 printf("Warning: No default memory manager\n");
1799 return(KERN_FAILURE);
1800 } else {
1801 mutex_unlock(&memory_manager_default_lock);
1802 return(KERN_SUCCESS);
1803 }
1804 }
1805
1806 __private_extern__ void
1807 memory_manager_default_init(void)
1808 {
1809 memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
1810 mutex_init(&memory_manager_default_lock, 0);
1811 }
1812
1813
1814
1815 /* Allow manipulation of individual page state. This is actually part of */
1816 /* the UPL regimen but takes place on the object rather than on a UPL */
1817
1818 kern_return_t
1819 memory_object_page_op(
1820 memory_object_control_t control,
1821 memory_object_offset_t offset,
1822 int ops,
1823 ppnum_t *phys_entry,
1824 int *flags)
1825 {
1826 vm_object_t object;
1827
1828 object = memory_object_control_to_vm_object(control);
1829 if (object == VM_OBJECT_NULL)
1830 return (KERN_INVALID_ARGUMENT);
1831
1832 return vm_object_page_op(object, offset, ops, phys_entry, flags);
1833 }
1834
1835 /*
1836 * memory_object_range_op offers performance enhancement over
1837 * memory_object_page_op for page_op functions which do not require page
1838 * level state to be returned from the call. Page_op was created to provide
1839 * a low-cost alternative to page manipulation via UPLs when only a single
1840 * page was involved. The range_op call establishes the ability in the _op
1841 * family of functions to work on multiple pages where the lack of page level
1842 * state handling allows the caller to avoid the overhead of the upl structures.
1843 */
1844
1845 kern_return_t
1846 memory_object_range_op(
1847 memory_object_control_t control,
1848 memory_object_offset_t offset_beg,
1849 memory_object_offset_t offset_end,
1850 int ops,
1851 int *range)
1852 {
1853 vm_object_t object;
1854
1855 object = memory_object_control_to_vm_object(control);
1856 if (object == VM_OBJECT_NULL)
1857 return (KERN_INVALID_ARGUMENT);
1858
1859 return vm_object_range_op(object,
1860 offset_beg,
1861 offset_end,
1862 ops,
1863 range);
1864 }
1865
1866
1867 kern_return_t
1868 memory_object_pages_resident(
1869 memory_object_control_t control,
1870 boolean_t * has_pages_resident)
1871 {
1872 vm_object_t object;
1873
1874 *has_pages_resident = FALSE;
1875
1876 object = memory_object_control_to_vm_object(control);
1877 if (object == VM_OBJECT_NULL)
1878 return (KERN_INVALID_ARGUMENT);
1879
1880 if (object->resident_page_count)
1881 *has_pages_resident = TRUE;
1882
1883 return (KERN_SUCCESS);
1884 }
1885
1886 kern_return_t
1887 memory_object_signed(
1888 memory_object_control_t control,
1889 boolean_t is_signed)
1890 {
1891 vm_object_t object;
1892
1893 object = memory_object_control_to_vm_object(control);
1894 if (object == VM_OBJECT_NULL)
1895 return KERN_INVALID_ARGUMENT;
1896
1897 vm_object_lock(object);
1898 object->code_signed = is_signed;
1899 vm_object_unlock(object);
1900
1901 return KERN_SUCCESS;
1902 }
1903
1904 static zone_t mem_obj_control_zone;
1905
1906 __private_extern__ void
1907 memory_object_control_bootstrap(void)
1908 {
1909 int i;
1910
1911 i = (vm_size_t) sizeof (struct memory_object_control);
1912 mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
1913 return;
1914 }
1915
1916 __private_extern__ memory_object_control_t
1917 memory_object_control_allocate(
1918 vm_object_t object)
1919 {
1920 memory_object_control_t control;
1921
1922 control = (memory_object_control_t)zalloc(mem_obj_control_zone);
1923 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1924 control->moc_object = object;
1925 control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */
1926 }
1927 return (control);
1928 }
1929
1930 __private_extern__ void
1931 memory_object_control_collapse(
1932 memory_object_control_t control,
1933 vm_object_t object)
1934 {
1935 assert((control->moc_object != VM_OBJECT_NULL) &&
1936 (control->moc_object != object));
1937 control->moc_object = object;
1938 }
1939
1940 __private_extern__ vm_object_t
1941 memory_object_control_to_vm_object(
1942 memory_object_control_t control)
1943 {
1944 if (control == MEMORY_OBJECT_CONTROL_NULL ||
1945 control->moc_ikot != IKOT_MEM_OBJ_CONTROL)
1946 return VM_OBJECT_NULL;
1947
1948 return (control->moc_object);
1949 }
1950
1951 memory_object_control_t
1952 convert_port_to_mo_control(
1953 __unused mach_port_t port)
1954 {
1955 return MEMORY_OBJECT_CONTROL_NULL;
1956 }
1957
1958
1959 mach_port_t
1960 convert_mo_control_to_port(
1961 __unused memory_object_control_t control)
1962 {
1963 return MACH_PORT_NULL;
1964 }
1965
1966 void
1967 memory_object_control_reference(
1968 __unused memory_object_control_t control)
1969 {
1970 return;
1971 }
1972
1973 /*
1974 * We only every issue one of these references, so kill it
1975 * when that gets released (should switch the real reference
1976 * counting in true port-less EMMI).
1977 */
1978 void
1979 memory_object_control_deallocate(
1980 memory_object_control_t control)
1981 {
1982 zfree(mem_obj_control_zone, control);
1983 }
1984
1985 void
1986 memory_object_control_disable(
1987 memory_object_control_t control)
1988 {
1989 assert(control->moc_object != VM_OBJECT_NULL);
1990 control->moc_object = VM_OBJECT_NULL;
1991 }
1992
1993 void
1994 memory_object_default_reference(
1995 memory_object_default_t dmm)
1996 {
1997 ipc_port_make_send(dmm);
1998 }
1999
2000 void
2001 memory_object_default_deallocate(
2002 memory_object_default_t dmm)
2003 {
2004 ipc_port_release_send(dmm);
2005 }
2006
2007 memory_object_t
2008 convert_port_to_memory_object(
2009 __unused mach_port_t port)
2010 {
2011 return (MEMORY_OBJECT_NULL);
2012 }
2013
2014
2015 mach_port_t
2016 convert_memory_object_to_port(
2017 __unused memory_object_t object)
2018 {
2019 return (MACH_PORT_NULL);
2020 }
2021
2022
2023 /* Routine memory_object_reference */
2024 void memory_object_reference(
2025 memory_object_t memory_object)
2026 {
2027 (memory_object->mo_pager_ops->memory_object_reference)(
2028 memory_object);
2029 }
2030
2031 /* Routine memory_object_deallocate */
2032 void memory_object_deallocate(
2033 memory_object_t memory_object)
2034 {
2035 (memory_object->mo_pager_ops->memory_object_deallocate)(
2036 memory_object);
2037 }
2038
2039
2040 /* Routine memory_object_init */
2041 kern_return_t memory_object_init
2042 (
2043 memory_object_t memory_object,
2044 memory_object_control_t memory_control,
2045 memory_object_cluster_size_t memory_object_page_size
2046 )
2047 {
2048 return (memory_object->mo_pager_ops->memory_object_init)(
2049 memory_object,
2050 memory_control,
2051 memory_object_page_size);
2052 }
2053
2054 /* Routine memory_object_terminate */
2055 kern_return_t memory_object_terminate
2056 (
2057 memory_object_t memory_object
2058 )
2059 {
2060 return (memory_object->mo_pager_ops->memory_object_terminate)(
2061 memory_object);
2062 }
2063
2064 /* Routine memory_object_data_request */
2065 kern_return_t memory_object_data_request
2066 (
2067 memory_object_t memory_object,
2068 memory_object_offset_t offset,
2069 memory_object_cluster_size_t length,
2070 vm_prot_t desired_access,
2071 memory_object_fault_info_t fault_info
2072 )
2073 {
2074 return (memory_object->mo_pager_ops->memory_object_data_request)(
2075 memory_object,
2076 offset,
2077 length,
2078 desired_access,
2079 fault_info);
2080 }
2081
2082 /* Routine memory_object_data_return */
2083 kern_return_t memory_object_data_return
2084 (
2085 memory_object_t memory_object,
2086 memory_object_offset_t offset,
2087 vm_size_t size,
2088 memory_object_offset_t *resid_offset,
2089 int *io_error,
2090 boolean_t dirty,
2091 boolean_t kernel_copy,
2092 int upl_flags
2093 )
2094 {
2095 return (memory_object->mo_pager_ops->memory_object_data_return)(
2096 memory_object,
2097 offset,
2098 size,
2099 resid_offset,
2100 io_error,
2101 dirty,
2102 kernel_copy,
2103 upl_flags);
2104 }
2105
2106 /* Routine memory_object_data_initialize */
2107 kern_return_t memory_object_data_initialize
2108 (
2109 memory_object_t memory_object,
2110 memory_object_offset_t offset,
2111 vm_size_t size
2112 )
2113 {
2114 return (memory_object->mo_pager_ops->memory_object_data_initialize)(
2115 memory_object,
2116 offset,
2117 size);
2118 }
2119
2120 /* Routine memory_object_data_unlock */
2121 kern_return_t memory_object_data_unlock
2122 (
2123 memory_object_t memory_object,
2124 memory_object_offset_t offset,
2125 vm_size_t size,
2126 vm_prot_t desired_access
2127 )
2128 {
2129 return (memory_object->mo_pager_ops->memory_object_data_unlock)(
2130 memory_object,
2131 offset,
2132 size,
2133 desired_access);
2134 }
2135
2136 /* Routine memory_object_synchronize */
2137 kern_return_t memory_object_synchronize
2138 (
2139 memory_object_t memory_object,
2140 memory_object_offset_t offset,
2141 vm_size_t size,
2142 vm_sync_t sync_flags
2143 )
2144 {
2145 return (memory_object->mo_pager_ops->memory_object_synchronize)(
2146 memory_object,
2147 offset,
2148 size,
2149 sync_flags);
2150 }
2151
2152
2153 /*
2154 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2155 * each time a "named" VM object gets mapped directly or indirectly
2156 * (copy-on-write mapping). A "named" VM object has an extra reference held
2157 * by the pager to keep it alive until the pager decides that the
2158 * memory object (and its VM object) can be reclaimed.
2159 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2160 * the mappings of that memory object have been removed.
2161 *
2162 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2163 * are serialized (through object->mapping_in_progress), to ensure that the
2164 * pager gets a consistent view of the mapping status of the memory object.
2165 *
2166 * This allows the pager to keep track of how many times a memory object
2167 * has been mapped and with which protections, to decide when it can be
2168 * reclaimed.
2169 */
2170
2171 /* Routine memory_object_map */
2172 kern_return_t memory_object_map
2173 (
2174 memory_object_t memory_object,
2175 vm_prot_t prot
2176 )
2177 {
2178 return (memory_object->mo_pager_ops->memory_object_map)(
2179 memory_object,
2180 prot);
2181 }
2182
2183 /* Routine memory_object_last_unmap */
2184 kern_return_t memory_object_last_unmap
2185 (
2186 memory_object_t memory_object
2187 )
2188 {
2189 return (memory_object->mo_pager_ops->memory_object_last_unmap)(
2190 memory_object);
2191 }
2192
2193 /* Routine memory_object_create */
2194 kern_return_t memory_object_create
2195 (
2196 memory_object_default_t default_memory_manager,
2197 vm_size_t new_memory_object_size,
2198 memory_object_t *new_memory_object
2199 )
2200 {
2201 return default_pager_memory_object_create(default_memory_manager,
2202 new_memory_object_size,
2203 new_memory_object);
2204 }
2205
2206 upl_t
2207 convert_port_to_upl(
2208 ipc_port_t port)
2209 {
2210 upl_t upl;
2211
2212 ip_lock(port);
2213 if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
2214 ip_unlock(port);
2215 return (upl_t)NULL;
2216 }
2217 upl = (upl_t) port->ip_kobject;
2218 ip_unlock(port);
2219 upl_lock(upl);
2220 upl->ref_count+=1;
2221 upl_unlock(upl);
2222 return upl;
2223 }
2224
2225 mach_port_t
2226 convert_upl_to_port(
2227 __unused upl_t upl)
2228 {
2229 return MACH_PORT_NULL;
2230 }
2231
2232 __private_extern__ void
2233 upl_no_senders(
2234 __unused ipc_port_t port,
2235 __unused mach_port_mscount_t mscount)
2236 {
2237 return;
2238 }