]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/memory_object.c
xnu-1228.5.20.tar.gz
[apple/xnu.git] / osfmk / vm / memory_object.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
61 *
62 * External memory management interface control functions.
63 */
64
65 #include <advisory_pageout.h>
66
67 /*
68 * Interface dependencies:
69 */
70
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
73
74 #include <mach/mig.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
83
84 /*
85 * Implementation dependencies:
86 */
87 #include <string.h> /* For memcpy() */
88
89 #include <kern/xpr.h>
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
94
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
103 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
104
105 #if MACH_PAGEMAP
106 #include <vm/vm_external.h>
107 #endif /* MACH_PAGEMAP */
108
109 #include <vm/vm_protos.h>
110
111
112 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
113 decl_mutex_data(, memory_manager_default_lock)
114
115
116 /*
117 * Routine: memory_object_should_return_page
118 *
119 * Description:
120 * Determine whether the given page should be returned,
121 * based on the page's state and on the given return policy.
122 *
123 * We should return the page if one of the following is true:
124 *
125 * 1. Page is dirty and should_return is not RETURN_NONE.
126 * 2. Page is precious and should_return is RETURN_ALL.
127 * 3. Should_return is RETURN_ANYTHING.
128 *
129 * As a side effect, m->dirty will be made consistent
130 * with pmap_is_modified(m), if should_return is not
131 * MEMORY_OBJECT_RETURN_NONE.
132 */
133
134 #define memory_object_should_return_page(m, should_return) \
135 (should_return != MEMORY_OBJECT_RETURN_NONE && \
136 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
137 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
138 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
139
140 typedef int memory_object_lock_result_t;
141
142 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
143 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
146
147 memory_object_lock_result_t memory_object_lock_page(
148 vm_page_t m,
149 memory_object_return_t should_return,
150 boolean_t should_flush,
151 vm_prot_t prot);
152
153 /*
154 * Routine: memory_object_lock_page
155 *
156 * Description:
157 * Perform the appropriate lock operations on the
158 * given page. See the description of
159 * "memory_object_lock_request" for the meanings
160 * of the arguments.
161 *
162 * Returns an indication that the operation
163 * completed, blocked, or that the page must
164 * be cleaned.
165 */
166 memory_object_lock_result_t
167 memory_object_lock_page(
168 vm_page_t m,
169 memory_object_return_t should_return,
170 boolean_t should_flush,
171 vm_prot_t prot)
172 {
173 XPR(XPR_MEMORY_OBJECT,
174 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
175 (integer_t)m, should_return, should_flush, prot, 0);
176
177 /*
178 * If we cannot change access to the page,
179 * either because a mapping is in progress
180 * (busy page) or because a mapping has been
181 * wired, then give up.
182 */
183
184 if (m->busy || m->cleaning)
185 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
186
187 /*
188 * Don't worry about pages for which the kernel
189 * does not have any data.
190 */
191
192 if (m->absent || m->error || m->restart) {
193 if(m->error && should_flush) {
194 /* dump the page, pager wants us to */
195 /* clean it up and there is no */
196 /* relevant data to return */
197 if(m->wire_count == 0) {
198 VM_PAGE_FREE(m);
199 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
200 }
201 } else {
202 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
203 }
204 }
205
206 assert(!m->fictitious);
207
208 /*
209 * If the page is wired, just clean or return the page if needed.
210 * Wired pages don't get flushed or disconnected from the pmap.
211 */
212
213 if (m->wire_count != 0) {
214 if (memory_object_should_return_page(m, should_return)) {
215 if (m->dirty)
216 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
217 else
218 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
219 }
220
221 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
222 }
223
224 /*
225 * If the page is to be flushed, allow
226 * that to be done as part of the protection.
227 */
228
229 if (should_flush)
230 prot = VM_PROT_ALL;
231
232 /*
233 * Set the page lock.
234 *
235 * If we are decreasing permission, do it now;
236 * let the fault handler take care of increases
237 * (pmap_page_protect may not increase protection).
238 */
239
240 if (prot != VM_PROT_NO_CHANGE) {
241 pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
242
243 PAGE_WAKEUP(m);
244 }
245
246 /*
247 * Handle page returning.
248 */
249 if (memory_object_should_return_page(m, should_return)) {
250
251 /*
252 * If we weren't planning
253 * to flush the page anyway,
254 * we may need to remove the
255 * page from the pageout
256 * system and from physical
257 * maps now.
258 */
259
260 vm_page_lockspin_queues();
261 VM_PAGE_QUEUES_REMOVE(m);
262 vm_page_unlock_queues();
263
264 if (!should_flush)
265 pmap_disconnect(m->phys_page);
266
267 if (m->dirty)
268 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
269 else
270 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
271 }
272
273 /*
274 * Handle flushing
275 */
276 if (should_flush) {
277 VM_PAGE_FREE(m);
278 } else {
279 /*
280 * XXX Make clean but not flush a paging hint,
281 * and deactivate the pages. This is a hack
282 * because it overloads flush/clean with
283 * implementation-dependent meaning. This only
284 * happens to pages that are already clean.
285 */
286
287 if (vm_page_deactivate_hint &&
288 (should_return != MEMORY_OBJECT_RETURN_NONE)) {
289 vm_page_lock_queues();
290 vm_page_deactivate(m);
291 vm_page_unlock_queues();
292 }
293 }
294
295 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
296 }
297
298 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
299 MACRO_BEGIN \
300 \
301 register int upl_flags; \
302 memory_object_t pager; \
303 \
304 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
305 vm_object_paging_begin(object); \
306 vm_object_unlock(object); \
307 \
308 if (iosync) \
309 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
310 else \
311 upl_flags = UPL_MSYNC; \
312 \
313 (void) memory_object_data_return(pager, \
314 po, \
315 data_cnt, \
316 ro, \
317 ioerr, \
318 (action) == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN,\
319 !should_flush, \
320 upl_flags); \
321 \
322 vm_object_lock(object); \
323 vm_object_paging_end(object); \
324 } \
325 MACRO_END
326
327 /*
328 * Routine: memory_object_lock_request [user interface]
329 *
330 * Description:
331 * Control use of the data associated with the given
332 * memory object. For each page in the given range,
333 * perform the following operations, in order:
334 * 1) restrict access to the page (disallow
335 * forms specified by "prot");
336 * 2) return data to the manager (if "should_return"
337 * is RETURN_DIRTY and the page is dirty, or
338 * "should_return" is RETURN_ALL and the page
339 * is either dirty or precious); and,
340 * 3) flush the cached copy (if "should_flush"
341 * is asserted).
342 * The set of pages is defined by a starting offset
343 * ("offset") and size ("size"). Only pages with the
344 * same page alignment as the starting offset are
345 * considered.
346 *
347 * A single acknowledgement is sent (to the "reply_to"
348 * port) when these actions are complete. If successful,
349 * the naked send right for reply_to is consumed.
350 */
351
352 kern_return_t
353 memory_object_lock_request(
354 memory_object_control_t control,
355 memory_object_offset_t offset,
356 memory_object_size_t size,
357 memory_object_offset_t * resid_offset,
358 int * io_errno,
359 memory_object_return_t should_return,
360 int flags,
361 vm_prot_t prot)
362 {
363 vm_object_t object;
364 __unused boolean_t should_flush;
365
366 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
367
368 XPR(XPR_MEMORY_OBJECT,
369 "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
370 (integer_t)control, offset, size,
371 (((should_return&1)<<1)|should_flush), prot);
372
373 /*
374 * Check for bogus arguments.
375 */
376 object = memory_object_control_to_vm_object(control);
377 if (object == VM_OBJECT_NULL)
378 return (KERN_INVALID_ARGUMENT);
379
380 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
381 return (KERN_INVALID_ARGUMENT);
382
383 size = round_page_64(size);
384
385 /*
386 * Lock the object, and acquire a paging reference to
387 * prevent the memory_object reference from being released.
388 */
389 vm_object_lock(object);
390 vm_object_paging_begin(object);
391 offset -= object->paging_offset;
392
393 (void)vm_object_update(object,
394 offset, size, resid_offset, io_errno, should_return, flags, prot);
395
396 vm_object_paging_end(object);
397 vm_object_unlock(object);
398
399 return (KERN_SUCCESS);
400 }
401
402 /*
403 * memory_object_release_name: [interface]
404 *
405 * Enforces name semantic on memory_object reference count decrement
406 * This routine should not be called unless the caller holds a name
407 * reference gained through the memory_object_named_create or the
408 * memory_object_rename call.
409 * If the TERMINATE_IDLE flag is set, the call will return if the
410 * reference count is not 1. i.e. idle with the only remaining reference
411 * being the name.
412 * If the decision is made to proceed the name field flag is set to
413 * false and the reference count is decremented. If the RESPECT_CACHE
414 * flag is set and the reference count has gone to zero, the
415 * memory_object is checked to see if it is cacheable otherwise when
416 * the reference count is zero, it is simply terminated.
417 */
418
419 kern_return_t
420 memory_object_release_name(
421 memory_object_control_t control,
422 int flags)
423 {
424 vm_object_t object;
425
426 object = memory_object_control_to_vm_object(control);
427 if (object == VM_OBJECT_NULL)
428 return (KERN_INVALID_ARGUMENT);
429
430 return vm_object_release_name(object, flags);
431 }
432
433
434
435 /*
436 * Routine: memory_object_destroy [user interface]
437 * Purpose:
438 * Shut down a memory object, despite the
439 * presence of address map (or other) references
440 * to the vm_object.
441 */
442 kern_return_t
443 memory_object_destroy(
444 memory_object_control_t control,
445 kern_return_t reason)
446 {
447 vm_object_t object;
448
449 object = memory_object_control_to_vm_object(control);
450 if (object == VM_OBJECT_NULL)
451 return (KERN_INVALID_ARGUMENT);
452
453 return (vm_object_destroy(object, reason));
454 }
455
456 /*
457 * Routine: vm_object_sync
458 *
459 * Kernel internal function to synch out pages in a given
460 * range within an object to its memory manager. Much the
461 * same as memory_object_lock_request but page protection
462 * is not changed.
463 *
464 * If the should_flush and should_return flags are true pages
465 * are flushed, that is dirty & precious pages are written to
466 * the memory manager and then discarded. If should_return
467 * is false, only precious pages are returned to the memory
468 * manager.
469 *
470 * If should flush is false and should_return true, the memory
471 * manager's copy of the pages is updated. If should_return
472 * is also false, only the precious pages are updated. This
473 * last option is of limited utility.
474 *
475 * Returns:
476 * FALSE if no pages were returned to the pager
477 * TRUE otherwise.
478 */
479
480 boolean_t
481 vm_object_sync(
482 vm_object_t object,
483 vm_object_offset_t offset,
484 vm_object_size_t size,
485 boolean_t should_flush,
486 boolean_t should_return,
487 boolean_t should_iosync)
488 {
489 boolean_t rv;
490 int flags;
491
492 XPR(XPR_VM_OBJECT,
493 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
494 (integer_t)object, offset, size, should_flush, should_return);
495
496 /*
497 * Lock the object, and acquire a paging reference to
498 * prevent the memory_object and control ports from
499 * being destroyed.
500 */
501 vm_object_lock(object);
502 vm_object_paging_begin(object);
503
504 if (should_flush)
505 flags = MEMORY_OBJECT_DATA_FLUSH;
506 else
507 flags = 0;
508
509 if (should_iosync)
510 flags |= MEMORY_OBJECT_IO_SYNC;
511
512 rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
513 (should_return) ?
514 MEMORY_OBJECT_RETURN_ALL :
515 MEMORY_OBJECT_RETURN_NONE,
516 flags,
517 VM_PROT_NO_CHANGE);
518
519
520 vm_object_paging_end(object);
521 vm_object_unlock(object);
522 return rv;
523 }
524
525
526
527
528 static int
529 vm_object_update_extent(
530 vm_object_t object,
531 vm_object_offset_t offset,
532 vm_object_offset_t offset_end,
533 vm_object_offset_t *offset_resid,
534 int *io_errno,
535 boolean_t should_flush,
536 memory_object_return_t should_return,
537 boolean_t should_iosync,
538 vm_prot_t prot)
539 {
540 vm_page_t m;
541 int retval = 0;
542 vm_size_t data_cnt = 0;
543 vm_object_offset_t paging_offset = 0;
544 vm_object_offset_t last_offset = offset;
545 memory_object_lock_result_t page_lock_result;
546 memory_object_lock_result_t pageout_action;
547
548 pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE;
549
550 for (;
551 offset < offset_end && object->resident_page_count;
552 offset += PAGE_SIZE_64) {
553
554 /*
555 * Limit the number of pages to be cleaned at once.
556 */
557 if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) {
558 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
559 pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
560 data_cnt = 0;
561 }
562
563 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
564 page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
565
566 XPR(XPR_MEMORY_OBJECT,
567 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
568 (integer_t)object, offset, page_lock_result, 0, 0);
569
570 switch (page_lock_result)
571 {
572 case MEMORY_OBJECT_LOCK_RESULT_DONE:
573 /*
574 * End of a cluster of dirty pages.
575 */
576 if (data_cnt) {
577 LIST_REQ_PAGEOUT_PAGES(object,
578 data_cnt, pageout_action,
579 paging_offset, offset_resid, io_errno, should_iosync);
580 data_cnt = 0;
581 continue;
582 }
583 break;
584
585 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
586 /*
587 * Since it is necessary to block,
588 * clean any dirty pages now.
589 */
590 if (data_cnt) {
591 LIST_REQ_PAGEOUT_PAGES(object,
592 data_cnt, pageout_action,
593 paging_offset, offset_resid, io_errno, should_iosync);
594 data_cnt = 0;
595 continue;
596 }
597 PAGE_SLEEP(object, m, THREAD_UNINT);
598 continue;
599
600 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
601 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
602 /*
603 * The clean and return cases are similar.
604 *
605 * if this would form a discontiguous block,
606 * clean the old pages and start anew.
607 *
608 * Mark the page busy since we will unlock the
609 * object if we issue the LIST_REQ_PAGEOUT
610 */
611 m->busy = TRUE;
612 if (data_cnt &&
613 ((last_offset != offset) || (pageout_action != page_lock_result))) {
614 LIST_REQ_PAGEOUT_PAGES(object,
615 data_cnt, pageout_action,
616 paging_offset, offset_resid, io_errno, should_iosync);
617 data_cnt = 0;
618 }
619 m->busy = FALSE;
620
621 if (m->cleaning) {
622 PAGE_SLEEP(object, m, THREAD_UNINT);
623 continue;
624 }
625 if (data_cnt == 0) {
626 pageout_action = page_lock_result;
627 paging_offset = offset;
628 }
629 data_cnt += PAGE_SIZE;
630 last_offset = offset + PAGE_SIZE_64;
631
632 vm_page_lockspin_queues();
633 /*
634 * Clean
635 */
636 m->list_req_pending = TRUE;
637 m->cleaning = TRUE;
638
639 if (should_flush) {
640 /*
641 * and add additional state
642 * for the flush
643 */
644 m->busy = TRUE;
645 m->pageout = TRUE;
646 vm_page_wire(m);
647 }
648 vm_page_unlock_queues();
649
650 retval = 1;
651 break;
652 }
653 break;
654 }
655 }
656 /*
657 * We have completed the scan for applicable pages.
658 * Clean any pages that have been saved.
659 */
660 if (data_cnt) {
661 LIST_REQ_PAGEOUT_PAGES(object,
662 data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
663 }
664 return (retval);
665 }
666
667
668
669 /*
670 * Routine: vm_object_update
671 * Description:
672 * Work function for m_o_lock_request(), vm_o_sync().
673 *
674 * Called with object locked and paging ref taken.
675 */
676 kern_return_t
677 vm_object_update(
678 register vm_object_t object,
679 register vm_object_offset_t offset,
680 register vm_object_size_t size,
681 register vm_object_offset_t *resid_offset,
682 int *io_errno,
683 memory_object_return_t should_return,
684 int flags,
685 vm_prot_t protection)
686 {
687 vm_object_t copy_object = VM_OBJECT_NULL;
688 boolean_t data_returned = FALSE;
689 boolean_t update_cow;
690 boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
691 boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
692 int num_of_extents;
693 int n;
694 #define MAX_EXTENTS 8
695 #define EXTENT_SIZE (1024 * 1024 * 256)
696 #define RESIDENT_LIMIT (1024 * 32)
697 struct extent {
698 vm_object_offset_t e_base;
699 vm_object_offset_t e_min;
700 vm_object_offset_t e_max;
701 } extents[MAX_EXTENTS];
702
703 /*
704 * To avoid blocking while scanning for pages, save
705 * dirty pages to be cleaned all at once.
706 *
707 * XXXO A similar strategy could be used to limit the
708 * number of times that a scan must be restarted for
709 * other reasons. Those pages that would require blocking
710 * could be temporarily collected in another list, or
711 * their offsets could be recorded in a small array.
712 */
713
714 /*
715 * XXX NOTE: May want to consider converting this to a page list
716 * XXX vm_map_copy interface. Need to understand object
717 * XXX coalescing implications before doing so.
718 */
719
720 update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
721 && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
722 !(flags & MEMORY_OBJECT_DATA_PURGE)))
723 || (flags & MEMORY_OBJECT_COPY_SYNC);
724
725 if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) {
726 int collisions = 0;
727
728 while ((copy_object = object->copy) != VM_OBJECT_NULL) {
729 /*
730 * need to do a try here since we're swimming upstream
731 * against the normal lock ordering... however, we need
732 * to hold the object stable until we gain control of the
733 * copy object so we have to be careful how we approach this
734 */
735 if (vm_object_lock_try(copy_object)) {
736 /*
737 * we 'won' the lock on the copy object...
738 * no need to hold the object lock any longer...
739 * take a real reference on the copy object because
740 * we're going to call vm_fault_page on it which may
741 * under certain conditions drop the lock and the paging
742 * reference we're about to take... the reference
743 * will keep the copy object from going away if that happens
744 */
745 vm_object_unlock(object);
746 vm_object_reference_locked(copy_object);
747 break;
748 }
749 vm_object_unlock(object);
750
751 collisions++;
752 mutex_pause(collisions);
753
754 vm_object_lock(object);
755 }
756 }
757 if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) {
758 vm_map_size_t i;
759 vm_map_size_t copy_size;
760 vm_map_offset_t copy_offset;
761 vm_prot_t prot;
762 vm_page_t page;
763 vm_page_t top_page;
764 kern_return_t error = 0;
765 struct vm_object_fault_info fault_info;
766
767 if (copy_object != VM_OBJECT_NULL) {
768 /*
769 * translate offset with respect to shadow's offset
770 */
771 copy_offset = (offset >= copy_object->shadow_offset) ?
772 (vm_map_offset_t)(offset - copy_object->shadow_offset) :
773 (vm_map_offset_t) 0;
774
775 if (copy_offset > copy_object->size)
776 copy_offset = copy_object->size;
777
778 /*
779 * clip size with respect to shadow offset
780 */
781 if (offset >= copy_object->shadow_offset) {
782 copy_size = size;
783 } else if (size >= copy_object->shadow_offset - offset) {
784 copy_size = size - (copy_object->shadow_offset - offset);
785 } else {
786 copy_size = 0;
787 }
788
789 if (copy_offset + copy_size > copy_object->size) {
790 if (copy_object->size >= copy_offset) {
791 copy_size = copy_object->size - copy_offset;
792 } else {
793 copy_size = 0;
794 }
795 }
796 copy_size+=copy_offset;
797
798 } else {
799 copy_object = object;
800
801 copy_size = offset + size;
802 copy_offset = offset;
803 }
804 fault_info.interruptible = THREAD_UNINT;
805 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
806 fault_info.user_tag = 0;
807 fault_info.lo_offset = copy_offset;
808 fault_info.hi_offset = copy_size;
809 fault_info.no_cache = FALSE;
810
811 vm_object_paging_begin(copy_object);
812
813 for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
814 RETRY_COW_OF_LOCK_REQUEST:
815 fault_info.cluster_size = copy_size - i;
816
817 prot = VM_PROT_WRITE|VM_PROT_READ;
818 switch (vm_fault_page(copy_object, i,
819 VM_PROT_WRITE|VM_PROT_READ,
820 FALSE,
821 &prot,
822 &page,
823 &top_page,
824 (int *)0,
825 &error,
826 FALSE,
827 FALSE, &fault_info)) {
828
829 case VM_FAULT_SUCCESS:
830 if (top_page) {
831 vm_fault_cleanup(
832 page->object, top_page);
833 vm_object_lock(copy_object);
834 vm_object_paging_begin(copy_object);
835 }
836 vm_page_lock_queues();
837 if (!page->active && !page->inactive)
838 vm_page_deactivate(page);
839 vm_page_unlock_queues();
840
841 PAGE_WAKEUP_DONE(page);
842 break;
843 case VM_FAULT_RETRY:
844 prot = VM_PROT_WRITE|VM_PROT_READ;
845 vm_object_lock(copy_object);
846 vm_object_paging_begin(copy_object);
847 goto RETRY_COW_OF_LOCK_REQUEST;
848 case VM_FAULT_INTERRUPTED:
849 prot = VM_PROT_WRITE|VM_PROT_READ;
850 vm_object_lock(copy_object);
851 vm_object_paging_begin(copy_object);
852 goto RETRY_COW_OF_LOCK_REQUEST;
853 case VM_FAULT_MEMORY_SHORTAGE:
854 VM_PAGE_WAIT();
855 prot = VM_PROT_WRITE|VM_PROT_READ;
856 vm_object_lock(copy_object);
857 vm_object_paging_begin(copy_object);
858 goto RETRY_COW_OF_LOCK_REQUEST;
859 case VM_FAULT_FICTITIOUS_SHORTAGE:
860 vm_page_more_fictitious();
861 prot = VM_PROT_WRITE|VM_PROT_READ;
862 vm_object_lock(copy_object);
863 vm_object_paging_begin(copy_object);
864 goto RETRY_COW_OF_LOCK_REQUEST;
865 case VM_FAULT_MEMORY_ERROR:
866 if (object != copy_object)
867 vm_object_deallocate(copy_object);
868 vm_object_lock(object);
869 goto BYPASS_COW_COPYIN;
870 }
871
872 }
873 vm_object_paging_end(copy_object);
874 }
875 if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
876 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
877 vm_object_unlock(copy_object);
878 vm_object_deallocate(copy_object);
879 vm_object_lock(object);
880 }
881 return KERN_SUCCESS;
882 }
883 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
884 if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
885 copy_object->shadow_severed = TRUE;
886 copy_object->shadowed = FALSE;
887 copy_object->shadow = NULL;
888 /*
889 * delete the ref the COW was holding on the target object
890 */
891 vm_object_deallocate(object);
892 }
893 vm_object_unlock(copy_object);
894 vm_object_deallocate(copy_object);
895 vm_object_lock(object);
896 }
897 BYPASS_COW_COPYIN:
898
899 /*
900 * when we have a really large range to check relative
901 * to the number of actual resident pages, we'd like
902 * to use the resident page list to drive our checks
903 * however, the object lock will get dropped while processing
904 * the page which means the resident queue can change which
905 * means we can't walk the queue as we process the pages
906 * we also want to do the processing in offset order to allow
907 * 'runs' of pages to be collected if we're being told to
908 * flush to disk... the resident page queue is NOT ordered.
909 *
910 * a temporary solution (until we figure out how to deal with
911 * large address spaces more generically) is to pre-flight
912 * the resident page queue (if it's small enough) and develop
913 * a collection of extents (that encompass actual resident pages)
914 * to visit. This will at least allow us to deal with some of the
915 * more pathological cases in a more efficient manner. The current
916 * worst case (a single resident page at the end of an extremely large
917 * range) can take minutes to complete for ranges in the terrabyte
918 * category... since this routine is called when truncating a file,
919 * and we currently support files up to 16 Tbytes in size, this
920 * is not a theoretical problem
921 */
922
923 if ((object->resident_page_count < RESIDENT_LIMIT) &&
924 (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
925 vm_page_t next;
926 vm_object_offset_t start;
927 vm_object_offset_t end;
928 vm_object_size_t e_mask;
929 vm_page_t m;
930
931 start = offset;
932 end = offset + size;
933 num_of_extents = 0;
934 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
935
936 m = (vm_page_t) queue_first(&object->memq);
937
938 while (!queue_end(&object->memq, (queue_entry_t) m)) {
939 next = (vm_page_t) queue_next(&m->listq);
940
941 if ((m->offset >= start) && (m->offset < end)) {
942 /*
943 * this is a page we're interested in
944 * try to fit it into a current extent
945 */
946 for (n = 0; n < num_of_extents; n++) {
947 if ((m->offset & e_mask) == extents[n].e_base) {
948 /*
949 * use (PAGE_SIZE - 1) to determine the
950 * max offset so that we don't wrap if
951 * we're at the last page of the space
952 */
953 if (m->offset < extents[n].e_min)
954 extents[n].e_min = m->offset;
955 else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
956 extents[n].e_max = m->offset + (PAGE_SIZE - 1);
957 break;
958 }
959 }
960 if (n == num_of_extents) {
961 /*
962 * didn't find a current extent that can encompass
963 * this page
964 */
965 if (n < MAX_EXTENTS) {
966 /*
967 * if we still have room,
968 * create a new extent
969 */
970 extents[n].e_base = m->offset & e_mask;
971 extents[n].e_min = m->offset;
972 extents[n].e_max = m->offset + (PAGE_SIZE - 1);
973
974 num_of_extents++;
975 } else {
976 /*
977 * no room to create a new extent...
978 * fall back to a single extent based
979 * on the min and max page offsets
980 * we find in the range we're interested in...
981 * first, look through the extent list and
982 * develop the overall min and max for the
983 * pages we've looked at up to this point
984 */
985 for (n = 1; n < num_of_extents; n++) {
986 if (extents[n].e_min < extents[0].e_min)
987 extents[0].e_min = extents[n].e_min;
988 if (extents[n].e_max > extents[0].e_max)
989 extents[0].e_max = extents[n].e_max;
990 }
991 /*
992 * now setup to run through the remaining pages
993 * to determine the overall min and max
994 * offset for the specified range
995 */
996 extents[0].e_base = 0;
997 e_mask = 0;
998 num_of_extents = 1;
999
1000 /*
1001 * by continuing, we'll reprocess the
1002 * page that forced us to abandon trying
1003 * to develop multiple extents
1004 */
1005 continue;
1006 }
1007 }
1008 }
1009 m = next;
1010 }
1011 } else {
1012 extents[0].e_min = offset;
1013 extents[0].e_max = offset + (size - 1);
1014
1015 num_of_extents = 1;
1016 }
1017 for (n = 0; n < num_of_extents; n++) {
1018 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1019 should_flush, should_return, should_iosync, protection))
1020 data_returned = TRUE;
1021 }
1022 return (data_returned);
1023 }
1024
1025
1026 /*
1027 * Routine: memory_object_synchronize_completed [user interface]
1028 *
1029 * Tell kernel that previously synchronized data
1030 * (memory_object_synchronize) has been queue or placed on the
1031 * backing storage.
1032 *
1033 * Note: there may be multiple synchronize requests for a given
1034 * memory object outstanding but they will not overlap.
1035 */
1036
1037 kern_return_t
1038 memory_object_synchronize_completed(
1039 memory_object_control_t control,
1040 memory_object_offset_t offset,
1041 vm_offset_t length)
1042 {
1043 vm_object_t object;
1044 msync_req_t msr;
1045
1046 object = memory_object_control_to_vm_object(control);
1047
1048 XPR(XPR_MEMORY_OBJECT,
1049 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1050 (integer_t)object, offset, length, 0, 0);
1051
1052 /*
1053 * Look for bogus arguments
1054 */
1055
1056 if (object == VM_OBJECT_NULL)
1057 return (KERN_INVALID_ARGUMENT);
1058
1059 vm_object_lock(object);
1060
1061 /*
1062 * search for sync request structure
1063 */
1064 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
1065 if (msr->offset == offset && msr->length == length) {
1066 queue_remove(&object->msr_q, msr, msync_req_t, msr_q);
1067 break;
1068 }
1069 }/* queue_iterate */
1070
1071 if (queue_end(&object->msr_q, (queue_entry_t)msr)) {
1072 vm_object_unlock(object);
1073 return KERN_INVALID_ARGUMENT;
1074 }
1075
1076 msr_lock(msr);
1077 vm_object_unlock(object);
1078 msr->flag = VM_MSYNC_DONE;
1079 msr_unlock(msr);
1080 thread_wakeup((event_t) msr);
1081
1082 return KERN_SUCCESS;
1083 }/* memory_object_synchronize_completed */
1084
1085 static kern_return_t
1086 vm_object_set_attributes_common(
1087 vm_object_t object,
1088 boolean_t may_cache,
1089 memory_object_copy_strategy_t copy_strategy,
1090 boolean_t temporary,
1091 boolean_t silent_overwrite,
1092 boolean_t advisory_pageout)
1093 {
1094 boolean_t object_became_ready;
1095
1096 XPR(XPR_MEMORY_OBJECT,
1097 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1098 (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
1099
1100 if (object == VM_OBJECT_NULL)
1101 return(KERN_INVALID_ARGUMENT);
1102
1103 /*
1104 * Verify the attributes of importance
1105 */
1106
1107 switch(copy_strategy) {
1108 case MEMORY_OBJECT_COPY_NONE:
1109 case MEMORY_OBJECT_COPY_DELAY:
1110 break;
1111 default:
1112 return(KERN_INVALID_ARGUMENT);
1113 }
1114
1115 #if !ADVISORY_PAGEOUT
1116 if (silent_overwrite || advisory_pageout)
1117 return(KERN_INVALID_ARGUMENT);
1118
1119 #endif /* !ADVISORY_PAGEOUT */
1120 if (may_cache)
1121 may_cache = TRUE;
1122 if (temporary)
1123 temporary = TRUE;
1124
1125 vm_object_lock(object);
1126
1127 /*
1128 * Copy the attributes
1129 */
1130 assert(!object->internal);
1131 object_became_ready = !object->pager_ready;
1132 object->copy_strategy = copy_strategy;
1133 object->can_persist = may_cache;
1134 object->temporary = temporary;
1135 object->silent_overwrite = silent_overwrite;
1136 object->advisory_pageout = advisory_pageout;
1137
1138 /*
1139 * Wake up anyone waiting for the ready attribute
1140 * to become asserted.
1141 */
1142
1143 if (object_became_ready) {
1144 object->pager_ready = TRUE;
1145 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1146 }
1147
1148 vm_object_unlock(object);
1149
1150 return(KERN_SUCCESS);
1151 }
1152
1153 /*
1154 * Set the memory object attribute as provided.
1155 *
1156 * XXX This routine cannot be completed until the vm_msync, clean
1157 * in place, and cluster work is completed. See ifdef notyet
1158 * below and note that vm_object_set_attributes_common()
1159 * may have to be expanded.
1160 */
1161 kern_return_t
1162 memory_object_change_attributes(
1163 memory_object_control_t control,
1164 memory_object_flavor_t flavor,
1165 memory_object_info_t attributes,
1166 mach_msg_type_number_t count)
1167 {
1168 vm_object_t object;
1169 kern_return_t result = KERN_SUCCESS;
1170 boolean_t temporary;
1171 boolean_t may_cache;
1172 boolean_t invalidate;
1173 memory_object_copy_strategy_t copy_strategy;
1174 boolean_t silent_overwrite;
1175 boolean_t advisory_pageout;
1176
1177 object = memory_object_control_to_vm_object(control);
1178 if (object == VM_OBJECT_NULL)
1179 return (KERN_INVALID_ARGUMENT);
1180
1181 vm_object_lock(object);
1182
1183 temporary = object->temporary;
1184 may_cache = object->can_persist;
1185 copy_strategy = object->copy_strategy;
1186 silent_overwrite = object->silent_overwrite;
1187 advisory_pageout = object->advisory_pageout;
1188 #if notyet
1189 invalidate = object->invalidate;
1190 #endif
1191 vm_object_unlock(object);
1192
1193 switch (flavor) {
1194 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1195 {
1196 old_memory_object_behave_info_t behave;
1197
1198 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1199 result = KERN_INVALID_ARGUMENT;
1200 break;
1201 }
1202
1203 behave = (old_memory_object_behave_info_t) attributes;
1204
1205 temporary = behave->temporary;
1206 invalidate = behave->invalidate;
1207 copy_strategy = behave->copy_strategy;
1208
1209 break;
1210 }
1211
1212 case MEMORY_OBJECT_BEHAVIOR_INFO:
1213 {
1214 memory_object_behave_info_t behave;
1215
1216 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1217 result = KERN_INVALID_ARGUMENT;
1218 break;
1219 }
1220
1221 behave = (memory_object_behave_info_t) attributes;
1222
1223 temporary = behave->temporary;
1224 invalidate = behave->invalidate;
1225 copy_strategy = behave->copy_strategy;
1226 silent_overwrite = behave->silent_overwrite;
1227 advisory_pageout = behave->advisory_pageout;
1228 break;
1229 }
1230
1231 case MEMORY_OBJECT_PERFORMANCE_INFO:
1232 {
1233 memory_object_perf_info_t perf;
1234
1235 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1236 result = KERN_INVALID_ARGUMENT;
1237 break;
1238 }
1239
1240 perf = (memory_object_perf_info_t) attributes;
1241
1242 may_cache = perf->may_cache;
1243
1244 break;
1245 }
1246
1247 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1248 {
1249 old_memory_object_attr_info_t attr;
1250
1251 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1252 result = KERN_INVALID_ARGUMENT;
1253 break;
1254 }
1255
1256 attr = (old_memory_object_attr_info_t) attributes;
1257
1258 may_cache = attr->may_cache;
1259 copy_strategy = attr->copy_strategy;
1260
1261 break;
1262 }
1263
1264 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1265 {
1266 memory_object_attr_info_t attr;
1267
1268 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1269 result = KERN_INVALID_ARGUMENT;
1270 break;
1271 }
1272
1273 attr = (memory_object_attr_info_t) attributes;
1274
1275 copy_strategy = attr->copy_strategy;
1276 may_cache = attr->may_cache_object;
1277 temporary = attr->temporary;
1278
1279 break;
1280 }
1281
1282 default:
1283 result = KERN_INVALID_ARGUMENT;
1284 break;
1285 }
1286
1287 if (result != KERN_SUCCESS)
1288 return(result);
1289
1290 if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1291 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1292 temporary = TRUE;
1293 } else {
1294 temporary = FALSE;
1295 }
1296
1297 /*
1298 * XXX may_cache may become a tri-valued variable to handle
1299 * XXX uncache if not in use.
1300 */
1301 return (vm_object_set_attributes_common(object,
1302 may_cache,
1303 copy_strategy,
1304 temporary,
1305 silent_overwrite,
1306 advisory_pageout));
1307 }
1308
1309 kern_return_t
1310 memory_object_get_attributes(
1311 memory_object_control_t control,
1312 memory_object_flavor_t flavor,
1313 memory_object_info_t attributes, /* pointer to OUT array */
1314 mach_msg_type_number_t *count) /* IN/OUT */
1315 {
1316 kern_return_t ret = KERN_SUCCESS;
1317 vm_object_t object;
1318
1319 object = memory_object_control_to_vm_object(control);
1320 if (object == VM_OBJECT_NULL)
1321 return (KERN_INVALID_ARGUMENT);
1322
1323 vm_object_lock(object);
1324
1325 switch (flavor) {
1326 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1327 {
1328 old_memory_object_behave_info_t behave;
1329
1330 if (*count < OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1331 ret = KERN_INVALID_ARGUMENT;
1332 break;
1333 }
1334
1335 behave = (old_memory_object_behave_info_t) attributes;
1336 behave->copy_strategy = object->copy_strategy;
1337 behave->temporary = object->temporary;
1338 #if notyet /* remove when vm_msync complies and clean in place fini */
1339 behave->invalidate = object->invalidate;
1340 #else
1341 behave->invalidate = FALSE;
1342 #endif
1343
1344 *count = OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1345 break;
1346 }
1347
1348 case MEMORY_OBJECT_BEHAVIOR_INFO:
1349 {
1350 memory_object_behave_info_t behave;
1351
1352 if (*count < MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1353 ret = KERN_INVALID_ARGUMENT;
1354 break;
1355 }
1356
1357 behave = (memory_object_behave_info_t) attributes;
1358 behave->copy_strategy = object->copy_strategy;
1359 behave->temporary = object->temporary;
1360 #if notyet /* remove when vm_msync complies and clean in place fini */
1361 behave->invalidate = object->invalidate;
1362 #else
1363 behave->invalidate = FALSE;
1364 #endif
1365 behave->advisory_pageout = object->advisory_pageout;
1366 behave->silent_overwrite = object->silent_overwrite;
1367 *count = MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1368 break;
1369 }
1370
1371 case MEMORY_OBJECT_PERFORMANCE_INFO:
1372 {
1373 memory_object_perf_info_t perf;
1374
1375 if (*count < MEMORY_OBJECT_PERF_INFO_COUNT) {
1376 ret = KERN_INVALID_ARGUMENT;
1377 break;
1378 }
1379
1380 perf = (memory_object_perf_info_t) attributes;
1381 perf->cluster_size = PAGE_SIZE;
1382 perf->may_cache = object->can_persist;
1383
1384 *count = MEMORY_OBJECT_PERF_INFO_COUNT;
1385 break;
1386 }
1387
1388 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1389 {
1390 old_memory_object_attr_info_t attr;
1391
1392 if (*count < OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1393 ret = KERN_INVALID_ARGUMENT;
1394 break;
1395 }
1396
1397 attr = (old_memory_object_attr_info_t) attributes;
1398 attr->may_cache = object->can_persist;
1399 attr->copy_strategy = object->copy_strategy;
1400
1401 *count = OLD_MEMORY_OBJECT_ATTR_INFO_COUNT;
1402 break;
1403 }
1404
1405 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1406 {
1407 memory_object_attr_info_t attr;
1408
1409 if (*count < MEMORY_OBJECT_ATTR_INFO_COUNT) {
1410 ret = KERN_INVALID_ARGUMENT;
1411 break;
1412 }
1413
1414 attr = (memory_object_attr_info_t) attributes;
1415 attr->copy_strategy = object->copy_strategy;
1416 attr->cluster_size = PAGE_SIZE;
1417 attr->may_cache_object = object->can_persist;
1418 attr->temporary = object->temporary;
1419
1420 *count = MEMORY_OBJECT_ATTR_INFO_COUNT;
1421 break;
1422 }
1423
1424 default:
1425 ret = KERN_INVALID_ARGUMENT;
1426 break;
1427 }
1428
1429 vm_object_unlock(object);
1430
1431 return(ret);
1432 }
1433
1434
1435 kern_return_t
1436 memory_object_iopl_request(
1437 ipc_port_t port,
1438 memory_object_offset_t offset,
1439 upl_size_t *upl_size,
1440 upl_t *upl_ptr,
1441 upl_page_info_array_t user_page_list,
1442 unsigned int *page_list_count,
1443 int *flags)
1444 {
1445 vm_object_t object;
1446 kern_return_t ret;
1447 int caller_flags;
1448
1449 caller_flags = *flags;
1450
1451 if (caller_flags & ~UPL_VALID_FLAGS) {
1452 /*
1453 * For forward compatibility's sake,
1454 * reject any unknown flag.
1455 */
1456 return KERN_INVALID_VALUE;
1457 }
1458
1459 if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
1460 vm_named_entry_t named_entry;
1461
1462 named_entry = (vm_named_entry_t)port->ip_kobject;
1463 /* a few checks to make sure user is obeying rules */
1464 if(*upl_size == 0) {
1465 if(offset >= named_entry->size)
1466 return(KERN_INVALID_RIGHT);
1467 *upl_size = named_entry->size - offset;
1468 }
1469 if(caller_flags & UPL_COPYOUT_FROM) {
1470 if((named_entry->protection & VM_PROT_READ)
1471 != VM_PROT_READ) {
1472 return(KERN_INVALID_RIGHT);
1473 }
1474 } else {
1475 if((named_entry->protection &
1476 (VM_PROT_READ | VM_PROT_WRITE))
1477 != (VM_PROT_READ | VM_PROT_WRITE)) {
1478 return(KERN_INVALID_RIGHT);
1479 }
1480 }
1481 if(named_entry->size < (offset + *upl_size))
1482 return(KERN_INVALID_ARGUMENT);
1483
1484 /* the callers parameter offset is defined to be the */
1485 /* offset from beginning of named entry offset in object */
1486 offset = offset + named_entry->offset;
1487
1488 if(named_entry->is_sub_map)
1489 return (KERN_INVALID_ARGUMENT);
1490
1491 named_entry_lock(named_entry);
1492
1493 if (named_entry->is_pager) {
1494 object = vm_object_enter(named_entry->backing.pager,
1495 named_entry->offset + named_entry->size,
1496 named_entry->internal,
1497 FALSE,
1498 FALSE);
1499 if (object == VM_OBJECT_NULL) {
1500 named_entry_unlock(named_entry);
1501 return(KERN_INVALID_OBJECT);
1502 }
1503
1504 /* JMM - drop reference on pager here? */
1505
1506 /* create an extra reference for the named entry */
1507 vm_object_lock(object);
1508 vm_object_reference_locked(object);
1509 named_entry->backing.object = object;
1510 named_entry->is_pager = FALSE;
1511 named_entry_unlock(named_entry);
1512
1513 /* wait for object to be ready */
1514 while (!object->pager_ready) {
1515 vm_object_wait(object,
1516 VM_OBJECT_EVENT_PAGER_READY,
1517 THREAD_UNINT);
1518 vm_object_lock(object);
1519 }
1520 vm_object_unlock(object);
1521 } else {
1522 /* This is the case where we are going to map */
1523 /* an already mapped object. If the object is */
1524 /* not ready it is internal. An external */
1525 /* object cannot be mapped until it is ready */
1526 /* we can therefore avoid the ready check */
1527 /* in this case. */
1528 object = named_entry->backing.object;
1529 vm_object_reference(object);
1530 named_entry_unlock(named_entry);
1531 }
1532 } else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) {
1533 memory_object_control_t control;
1534 control = (memory_object_control_t) port;
1535 if (control == NULL)
1536 return (KERN_INVALID_ARGUMENT);
1537 object = memory_object_control_to_vm_object(control);
1538 if (object == VM_OBJECT_NULL)
1539 return (KERN_INVALID_ARGUMENT);
1540 vm_object_reference(object);
1541 } else {
1542 return KERN_INVALID_ARGUMENT;
1543 }
1544 if (object == VM_OBJECT_NULL)
1545 return (KERN_INVALID_ARGUMENT);
1546
1547 if (!object->private) {
1548 if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
1549 *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
1550 if (object->phys_contiguous) {
1551 *flags = UPL_PHYS_CONTIG;
1552 } else {
1553 *flags = 0;
1554 }
1555 } else {
1556 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1557 }
1558
1559 ret = vm_object_iopl_request(object,
1560 offset,
1561 *upl_size,
1562 upl_ptr,
1563 user_page_list,
1564 page_list_count,
1565 caller_flags);
1566 vm_object_deallocate(object);
1567 return ret;
1568 }
1569
1570 /*
1571 * Routine: memory_object_upl_request [interface]
1572 * Purpose:
1573 * Cause the population of a portion of a vm_object.
1574 * Depending on the nature of the request, the pages
1575 * returned may be contain valid data or be uninitialized.
1576 *
1577 */
1578
1579 kern_return_t
1580 memory_object_upl_request(
1581 memory_object_control_t control,
1582 memory_object_offset_t offset,
1583 upl_size_t size,
1584 upl_t *upl_ptr,
1585 upl_page_info_array_t user_page_list,
1586 unsigned int *page_list_count,
1587 int cntrl_flags)
1588 {
1589 vm_object_t object;
1590
1591 object = memory_object_control_to_vm_object(control);
1592 if (object == VM_OBJECT_NULL)
1593 return (KERN_INVALID_ARGUMENT);
1594
1595 return vm_object_upl_request(object,
1596 offset,
1597 size,
1598 upl_ptr,
1599 user_page_list,
1600 page_list_count,
1601 cntrl_flags);
1602 }
1603
1604 /*
1605 * Routine: memory_object_super_upl_request [interface]
1606 * Purpose:
1607 * Cause the population of a portion of a vm_object
1608 * in much the same way as memory_object_upl_request.
1609 * Depending on the nature of the request, the pages
1610 * returned may be contain valid data or be uninitialized.
1611 * However, the region may be expanded up to the super
1612 * cluster size provided.
1613 */
1614
1615 kern_return_t
1616 memory_object_super_upl_request(
1617 memory_object_control_t control,
1618 memory_object_offset_t offset,
1619 upl_size_t size,
1620 upl_size_t super_cluster,
1621 upl_t *upl,
1622 upl_page_info_t *user_page_list,
1623 unsigned int *page_list_count,
1624 int cntrl_flags)
1625 {
1626 vm_object_t object;
1627
1628 object = memory_object_control_to_vm_object(control);
1629 if (object == VM_OBJECT_NULL)
1630 return (KERN_INVALID_ARGUMENT);
1631
1632 return vm_object_super_upl_request(object,
1633 offset,
1634 size,
1635 super_cluster,
1636 upl,
1637 user_page_list,
1638 page_list_count,
1639 cntrl_flags);
1640 }
1641
1642 kern_return_t
1643 memory_object_cluster_size(memory_object_control_t control, memory_object_offset_t *start,
1644 vm_size_t *length, memory_object_fault_info_t fault_info)
1645 {
1646 vm_object_t object;
1647
1648 object = memory_object_control_to_vm_object(control);
1649
1650 if (object == VM_OBJECT_NULL || object->paging_offset > *start)
1651 return (KERN_INVALID_ARGUMENT);
1652
1653 *start -= object->paging_offset;
1654
1655 vm_object_cluster_size(object, (vm_object_offset_t *)start, length, (vm_object_fault_info_t)fault_info);
1656
1657 *start += object->paging_offset;
1658
1659 return (KERN_SUCCESS);
1660 }
1661
1662
1663 int vm_stat_discard_cleared_reply = 0;
1664 int vm_stat_discard_cleared_unset = 0;
1665 int vm_stat_discard_cleared_too_late = 0;
1666
1667
1668
1669 /*
1670 * Routine: host_default_memory_manager [interface]
1671 * Purpose:
1672 * set/get the default memory manager port and default cluster
1673 * size.
1674 *
1675 * If successful, consumes the supplied naked send right.
1676 */
1677 kern_return_t
1678 host_default_memory_manager(
1679 host_priv_t host_priv,
1680 memory_object_default_t *default_manager,
1681 __unused memory_object_cluster_size_t cluster_size)
1682 {
1683 memory_object_default_t current_manager;
1684 memory_object_default_t new_manager;
1685 memory_object_default_t returned_manager;
1686 kern_return_t result = KERN_SUCCESS;
1687
1688 if (host_priv == HOST_PRIV_NULL)
1689 return(KERN_INVALID_HOST);
1690
1691 assert(host_priv == &realhost);
1692
1693 new_manager = *default_manager;
1694 mutex_lock(&memory_manager_default_lock);
1695 current_manager = memory_manager_default;
1696 returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
1697
1698 if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1699 /*
1700 * Retrieve the current value.
1701 */
1702 returned_manager = current_manager;
1703 memory_object_default_reference(returned_manager);
1704 } else {
1705
1706 /*
1707 * If this is the first non-null manager, start
1708 * up the internal pager support.
1709 */
1710 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1711 result = vm_pageout_internal_start();
1712 if (result != KERN_SUCCESS)
1713 goto out;
1714 }
1715
1716 /*
1717 * Retrieve the current value,
1718 * and replace it with the supplied value.
1719 * We return the old reference to the caller
1720 * but we have to take a reference on the new
1721 * one.
1722 */
1723 returned_manager = current_manager;
1724 memory_manager_default = new_manager;
1725 memory_object_default_reference(new_manager);
1726
1727 /*
1728 * In case anyone's been waiting for a memory
1729 * manager to be established, wake them up.
1730 */
1731
1732 thread_wakeup((event_t) &memory_manager_default);
1733 }
1734 out:
1735 mutex_unlock(&memory_manager_default_lock);
1736
1737 *default_manager = returned_manager;
1738 return(result);
1739 }
1740
1741 /*
1742 * Routine: memory_manager_default_reference
1743 * Purpose:
1744 * Returns a naked send right for the default
1745 * memory manager. The returned right is always
1746 * valid (not IP_NULL or IP_DEAD).
1747 */
1748
1749 __private_extern__ memory_object_default_t
1750 memory_manager_default_reference(void)
1751 {
1752 memory_object_default_t current_manager;
1753
1754 mutex_lock(&memory_manager_default_lock);
1755 current_manager = memory_manager_default;
1756 while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1757 wait_result_t res;
1758
1759 res = thread_sleep_mutex((event_t) &memory_manager_default,
1760 &memory_manager_default_lock,
1761 THREAD_UNINT);
1762 assert(res == THREAD_AWAKENED);
1763 current_manager = memory_manager_default;
1764 }
1765 memory_object_default_reference(current_manager);
1766 mutex_unlock(&memory_manager_default_lock);
1767
1768 return current_manager;
1769 }
1770
1771 /*
1772 * Routine: memory_manager_default_check
1773 *
1774 * Purpose:
1775 * Check whether a default memory manager has been set
1776 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1777 * and KERN_FAILURE if dmm does not exist.
1778 *
1779 * If there is no default memory manager, log an error,
1780 * but only the first time.
1781 *
1782 */
1783 __private_extern__ kern_return_t
1784 memory_manager_default_check(void)
1785 {
1786 memory_object_default_t current;
1787
1788 mutex_lock(&memory_manager_default_lock);
1789 current = memory_manager_default;
1790 if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1791 static boolean_t logged; /* initialized to 0 */
1792 boolean_t complain = !logged;
1793 logged = TRUE;
1794 mutex_unlock(&memory_manager_default_lock);
1795 if (complain)
1796 printf("Warning: No default memory manager\n");
1797 return(KERN_FAILURE);
1798 } else {
1799 mutex_unlock(&memory_manager_default_lock);
1800 return(KERN_SUCCESS);
1801 }
1802 }
1803
1804 __private_extern__ void
1805 memory_manager_default_init(void)
1806 {
1807 memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
1808 mutex_init(&memory_manager_default_lock, 0);
1809 }
1810
1811
1812
1813 /* Allow manipulation of individual page state. This is actually part of */
1814 /* the UPL regimen but takes place on the object rather than on a UPL */
1815
1816 kern_return_t
1817 memory_object_page_op(
1818 memory_object_control_t control,
1819 memory_object_offset_t offset,
1820 int ops,
1821 ppnum_t *phys_entry,
1822 int *flags)
1823 {
1824 vm_object_t object;
1825
1826 object = memory_object_control_to_vm_object(control);
1827 if (object == VM_OBJECT_NULL)
1828 return (KERN_INVALID_ARGUMENT);
1829
1830 return vm_object_page_op(object, offset, ops, phys_entry, flags);
1831 }
1832
1833 /*
1834 * memory_object_range_op offers performance enhancement over
1835 * memory_object_page_op for page_op functions which do not require page
1836 * level state to be returned from the call. Page_op was created to provide
1837 * a low-cost alternative to page manipulation via UPLs when only a single
1838 * page was involved. The range_op call establishes the ability in the _op
1839 * family of functions to work on multiple pages where the lack of page level
1840 * state handling allows the caller to avoid the overhead of the upl structures.
1841 */
1842
1843 kern_return_t
1844 memory_object_range_op(
1845 memory_object_control_t control,
1846 memory_object_offset_t offset_beg,
1847 memory_object_offset_t offset_end,
1848 int ops,
1849 int *range)
1850 {
1851 vm_object_t object;
1852
1853 object = memory_object_control_to_vm_object(control);
1854 if (object == VM_OBJECT_NULL)
1855 return (KERN_INVALID_ARGUMENT);
1856
1857 return vm_object_range_op(object,
1858 offset_beg,
1859 offset_end,
1860 ops,
1861 range);
1862 }
1863
1864
1865 kern_return_t
1866 memory_object_pages_resident(
1867 memory_object_control_t control,
1868 boolean_t * has_pages_resident)
1869 {
1870 vm_object_t object;
1871
1872 *has_pages_resident = FALSE;
1873
1874 object = memory_object_control_to_vm_object(control);
1875 if (object == VM_OBJECT_NULL)
1876 return (KERN_INVALID_ARGUMENT);
1877
1878 if (object->resident_page_count)
1879 *has_pages_resident = TRUE;
1880
1881 return (KERN_SUCCESS);
1882 }
1883
1884 kern_return_t
1885 memory_object_signed(
1886 memory_object_control_t control,
1887 boolean_t is_signed)
1888 {
1889 vm_object_t object;
1890
1891 object = memory_object_control_to_vm_object(control);
1892 if (object == VM_OBJECT_NULL)
1893 return KERN_INVALID_ARGUMENT;
1894
1895 vm_object_lock(object);
1896 object->code_signed = is_signed;
1897 vm_object_unlock(object);
1898
1899 return KERN_SUCCESS;
1900 }
1901
1902 static zone_t mem_obj_control_zone;
1903
1904 __private_extern__ void
1905 memory_object_control_bootstrap(void)
1906 {
1907 int i;
1908
1909 i = (vm_size_t) sizeof (struct memory_object_control);
1910 mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
1911 return;
1912 }
1913
1914 __private_extern__ memory_object_control_t
1915 memory_object_control_allocate(
1916 vm_object_t object)
1917 {
1918 memory_object_control_t control;
1919
1920 control = (memory_object_control_t)zalloc(mem_obj_control_zone);
1921 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1922 control->moc_object = object;
1923 control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */
1924 }
1925 return (control);
1926 }
1927
1928 __private_extern__ void
1929 memory_object_control_collapse(
1930 memory_object_control_t control,
1931 vm_object_t object)
1932 {
1933 assert((control->moc_object != VM_OBJECT_NULL) &&
1934 (control->moc_object != object));
1935 control->moc_object = object;
1936 }
1937
1938 __private_extern__ vm_object_t
1939 memory_object_control_to_vm_object(
1940 memory_object_control_t control)
1941 {
1942 if (control == MEMORY_OBJECT_CONTROL_NULL ||
1943 control->moc_ikot != IKOT_MEM_OBJ_CONTROL)
1944 return VM_OBJECT_NULL;
1945
1946 return (control->moc_object);
1947 }
1948
1949 memory_object_control_t
1950 convert_port_to_mo_control(
1951 __unused mach_port_t port)
1952 {
1953 return MEMORY_OBJECT_CONTROL_NULL;
1954 }
1955
1956
1957 mach_port_t
1958 convert_mo_control_to_port(
1959 __unused memory_object_control_t control)
1960 {
1961 return MACH_PORT_NULL;
1962 }
1963
1964 void
1965 memory_object_control_reference(
1966 __unused memory_object_control_t control)
1967 {
1968 return;
1969 }
1970
1971 /*
1972 * We only every issue one of these references, so kill it
1973 * when that gets released (should switch the real reference
1974 * counting in true port-less EMMI).
1975 */
1976 void
1977 memory_object_control_deallocate(
1978 memory_object_control_t control)
1979 {
1980 zfree(mem_obj_control_zone, control);
1981 }
1982
1983 void
1984 memory_object_control_disable(
1985 memory_object_control_t control)
1986 {
1987 assert(control->moc_object != VM_OBJECT_NULL);
1988 control->moc_object = VM_OBJECT_NULL;
1989 }
1990
1991 void
1992 memory_object_default_reference(
1993 memory_object_default_t dmm)
1994 {
1995 ipc_port_make_send(dmm);
1996 }
1997
1998 void
1999 memory_object_default_deallocate(
2000 memory_object_default_t dmm)
2001 {
2002 ipc_port_release_send(dmm);
2003 }
2004
2005 memory_object_t
2006 convert_port_to_memory_object(
2007 __unused mach_port_t port)
2008 {
2009 return (MEMORY_OBJECT_NULL);
2010 }
2011
2012
2013 mach_port_t
2014 convert_memory_object_to_port(
2015 __unused memory_object_t object)
2016 {
2017 return (MACH_PORT_NULL);
2018 }
2019
2020
2021 /* Routine memory_object_reference */
2022 void memory_object_reference(
2023 memory_object_t memory_object)
2024 {
2025 (memory_object->mo_pager_ops->memory_object_reference)(
2026 memory_object);
2027 }
2028
2029 /* Routine memory_object_deallocate */
2030 void memory_object_deallocate(
2031 memory_object_t memory_object)
2032 {
2033 (memory_object->mo_pager_ops->memory_object_deallocate)(
2034 memory_object);
2035 }
2036
2037
2038 /* Routine memory_object_init */
2039 kern_return_t memory_object_init
2040 (
2041 memory_object_t memory_object,
2042 memory_object_control_t memory_control,
2043 memory_object_cluster_size_t memory_object_page_size
2044 )
2045 {
2046 return (memory_object->mo_pager_ops->memory_object_init)(
2047 memory_object,
2048 memory_control,
2049 memory_object_page_size);
2050 }
2051
2052 /* Routine memory_object_terminate */
2053 kern_return_t memory_object_terminate
2054 (
2055 memory_object_t memory_object
2056 )
2057 {
2058 return (memory_object->mo_pager_ops->memory_object_terminate)(
2059 memory_object);
2060 }
2061
2062 /* Routine memory_object_data_request */
2063 kern_return_t memory_object_data_request
2064 (
2065 memory_object_t memory_object,
2066 memory_object_offset_t offset,
2067 memory_object_cluster_size_t length,
2068 vm_prot_t desired_access,
2069 memory_object_fault_info_t fault_info
2070 )
2071 {
2072 return (memory_object->mo_pager_ops->memory_object_data_request)(
2073 memory_object,
2074 offset,
2075 length,
2076 desired_access,
2077 fault_info);
2078 }
2079
2080 /* Routine memory_object_data_return */
2081 kern_return_t memory_object_data_return
2082 (
2083 memory_object_t memory_object,
2084 memory_object_offset_t offset,
2085 vm_size_t size,
2086 memory_object_offset_t *resid_offset,
2087 int *io_error,
2088 boolean_t dirty,
2089 boolean_t kernel_copy,
2090 int upl_flags
2091 )
2092 {
2093 return (memory_object->mo_pager_ops->memory_object_data_return)(
2094 memory_object,
2095 offset,
2096 size,
2097 resid_offset,
2098 io_error,
2099 dirty,
2100 kernel_copy,
2101 upl_flags);
2102 }
2103
2104 /* Routine memory_object_data_initialize */
2105 kern_return_t memory_object_data_initialize
2106 (
2107 memory_object_t memory_object,
2108 memory_object_offset_t offset,
2109 vm_size_t size
2110 )
2111 {
2112 return (memory_object->mo_pager_ops->memory_object_data_initialize)(
2113 memory_object,
2114 offset,
2115 size);
2116 }
2117
2118 /* Routine memory_object_data_unlock */
2119 kern_return_t memory_object_data_unlock
2120 (
2121 memory_object_t memory_object,
2122 memory_object_offset_t offset,
2123 vm_size_t size,
2124 vm_prot_t desired_access
2125 )
2126 {
2127 return (memory_object->mo_pager_ops->memory_object_data_unlock)(
2128 memory_object,
2129 offset,
2130 size,
2131 desired_access);
2132 }
2133
2134 /* Routine memory_object_synchronize */
2135 kern_return_t memory_object_synchronize
2136 (
2137 memory_object_t memory_object,
2138 memory_object_offset_t offset,
2139 vm_size_t size,
2140 vm_sync_t sync_flags
2141 )
2142 {
2143 return (memory_object->mo_pager_ops->memory_object_synchronize)(
2144 memory_object,
2145 offset,
2146 size,
2147 sync_flags);
2148 }
2149
2150 /* Routine memory_object_unmap */
2151 kern_return_t memory_object_unmap
2152 (
2153 memory_object_t memory_object
2154 )
2155 {
2156 return (memory_object->mo_pager_ops->memory_object_unmap)(
2157 memory_object);
2158 }
2159
2160 /* Routine memory_object_create */
2161 kern_return_t memory_object_create
2162 (
2163 memory_object_default_t default_memory_manager,
2164 vm_size_t new_memory_object_size,
2165 memory_object_t *new_memory_object
2166 )
2167 {
2168 return default_pager_memory_object_create(default_memory_manager,
2169 new_memory_object_size,
2170 new_memory_object);
2171 }
2172
2173 upl_t
2174 convert_port_to_upl(
2175 ipc_port_t port)
2176 {
2177 upl_t upl;
2178
2179 ip_lock(port);
2180 if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
2181 ip_unlock(port);
2182 return (upl_t)NULL;
2183 }
2184 upl = (upl_t) port->ip_kobject;
2185 ip_unlock(port);
2186 upl_lock(upl);
2187 upl->ref_count+=1;
2188 upl_unlock(upl);
2189 return upl;
2190 }
2191
2192 mach_port_t
2193 convert_upl_to_port(
2194 __unused upl_t upl)
2195 {
2196 return MACH_PORT_NULL;
2197 }
2198
2199 __private_extern__ void
2200 upl_no_senders(
2201 __unused ipc_port_t port,
2202 __unused mach_port_mscount_t mscount)
2203 {
2204 return;
2205 }