]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/memory_object.c
xnu-792.22.5.tar.gz
[apple/xnu.git] / osfmk / vm / memory_object.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
61 *
62 * External memory management interface control functions.
63 */
64
65 #include <advisory_pageout.h>
66
67 /*
68 * Interface dependencies:
69 */
70
71 #include <mach/std_types.h> /* For pointer_t */
72 #include <mach/mach_types.h>
73
74 #include <mach/mig.h>
75 #include <mach/kern_return.h>
76 #include <mach/memory_object.h>
77 #include <mach/memory_object_default.h>
78 #include <mach/memory_object_control_server.h>
79 #include <mach/host_priv_server.h>
80 #include <mach/boolean.h>
81 #include <mach/vm_prot.h>
82 #include <mach/message.h>
83
84 /*
85 * Implementation dependencies:
86 */
87 #include <string.h> /* For memcpy() */
88
89 #include <kern/xpr.h>
90 #include <kern/host.h>
91 #include <kern/thread.h> /* For current_thread() */
92 #include <kern/ipc_mig.h>
93 #include <kern/misc_protos.h>
94
95 #include <vm/vm_object.h>
96 #include <vm/vm_fault.h>
97 #include <vm/memory_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h> /* For pmap_clear_modify */
101 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
102 #include <vm/vm_map.h> /* For vm_map_pageable */
103
104 #if MACH_PAGEMAP
105 #include <vm/vm_external.h>
106 #endif /* MACH_PAGEMAP */
107
108 #include <vm/vm_protos.h>
109
110
111 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
112 vm_size_t memory_manager_default_cluster = 0;
113 decl_mutex_data(, memory_manager_default_lock)
114
115
116 /*
117 * Routine: memory_object_should_return_page
118 *
119 * Description:
120 * Determine whether the given page should be returned,
121 * based on the page's state and on the given return policy.
122 *
123 * We should return the page if one of the following is true:
124 *
125 * 1. Page is dirty and should_return is not RETURN_NONE.
126 * 2. Page is precious and should_return is RETURN_ALL.
127 * 3. Should_return is RETURN_ANYTHING.
128 *
129 * As a side effect, m->dirty will be made consistent
130 * with pmap_is_modified(m), if should_return is not
131 * MEMORY_OBJECT_RETURN_NONE.
132 */
133
134 #define memory_object_should_return_page(m, should_return) \
135 (should_return != MEMORY_OBJECT_RETURN_NONE && \
136 (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
137 ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
138 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
139
140 typedef int memory_object_lock_result_t;
141
142 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
143 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
146
147 memory_object_lock_result_t memory_object_lock_page(
148 vm_page_t m,
149 memory_object_return_t should_return,
150 boolean_t should_flush,
151 vm_prot_t prot);
152
153 /*
154 * Routine: memory_object_lock_page
155 *
156 * Description:
157 * Perform the appropriate lock operations on the
158 * given page. See the description of
159 * "memory_object_lock_request" for the meanings
160 * of the arguments.
161 *
162 * Returns an indication that the operation
163 * completed, blocked, or that the page must
164 * be cleaned.
165 */
166 memory_object_lock_result_t
167 memory_object_lock_page(
168 vm_page_t m,
169 memory_object_return_t should_return,
170 boolean_t should_flush,
171 vm_prot_t prot)
172 {
173 XPR(XPR_MEMORY_OBJECT,
174 "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
175 (integer_t)m, should_return, should_flush, prot, 0);
176
177 /*
178 * If we cannot change access to the page,
179 * either because a mapping is in progress
180 * (busy page) or because a mapping has been
181 * wired, then give up.
182 */
183
184 if (m->busy || m->cleaning)
185 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
186
187 /*
188 * Don't worry about pages for which the kernel
189 * does not have any data.
190 */
191
192 if (m->absent || m->error || m->restart) {
193 if(m->error && should_flush) {
194 /* dump the page, pager wants us to */
195 /* clean it up and there is no */
196 /* relevant data to return */
197 if(m->wire_count == 0) {
198 VM_PAGE_FREE(m);
199 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
200 }
201 } else {
202 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
203 }
204 }
205
206 assert(!m->fictitious);
207
208 if (m->wire_count != 0) {
209 /*
210 * If no change would take place
211 * anyway, return successfully.
212 *
213 * No change means:
214 * Not flushing AND
215 * No change to page lock [2 checks] AND
216 * Should not return page
217 *
218 * XXX This doesn't handle sending a copy of a wired
219 * XXX page to the pager, but that will require some
220 * XXX significant surgery.
221 */
222 if (!should_flush &&
223 (m->page_lock == prot || prot == VM_PROT_NO_CHANGE) &&
224 ! memory_object_should_return_page(m, should_return)) {
225
226 /*
227 * Restart page unlock requests,
228 * even though no change took place.
229 * [Memory managers may be expecting
230 * to see new requests.]
231 */
232 m->unlock_request = VM_PROT_NONE;
233 PAGE_WAKEUP(m);
234
235 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
236 }
237
238 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
239 }
240
241 /*
242 * If the page is to be flushed, allow
243 * that to be done as part of the protection.
244 */
245
246 if (should_flush)
247 prot = VM_PROT_ALL;
248
249 /*
250 * Set the page lock.
251 *
252 * If we are decreasing permission, do it now;
253 * let the fault handler take care of increases
254 * (pmap_page_protect may not increase protection).
255 */
256
257 if (prot != VM_PROT_NO_CHANGE) {
258 if ((m->page_lock ^ prot) & prot) {
259 pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
260 }
261 #if 0
262 /* code associated with the vestigial
263 * memory_object_data_unlock
264 */
265 m->page_lock = prot;
266 m->lock_supplied = TRUE;
267 if (prot != VM_PROT_NONE)
268 m->unusual = TRUE;
269 else
270 m->unusual = FALSE;
271
272 /*
273 * Restart any past unlock requests, even if no
274 * change resulted. If the manager explicitly
275 * requested no protection change, then it is assumed
276 * to be remembering past requests.
277 */
278
279 m->unlock_request = VM_PROT_NONE;
280 #endif /* 0 */
281 PAGE_WAKEUP(m);
282 }
283
284 /*
285 * Handle page returning.
286 */
287
288 if (memory_object_should_return_page(m, should_return)) {
289
290 /*
291 * If we weren't planning
292 * to flush the page anyway,
293 * we may need to remove the
294 * page from the pageout
295 * system and from physical
296 * maps now.
297 */
298
299 vm_page_lock_queues();
300 VM_PAGE_QUEUES_REMOVE(m);
301 vm_page_unlock_queues();
302
303 if (!should_flush)
304 pmap_disconnect(m->phys_page);
305
306 if (m->dirty)
307 return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
308 else
309 return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
310 }
311
312 /*
313 * Handle flushing
314 */
315
316 if (should_flush) {
317 VM_PAGE_FREE(m);
318 } else {
319 /*
320 * XXX Make clean but not flush a paging hint,
321 * and deactivate the pages. This is a hack
322 * because it overloads flush/clean with
323 * implementation-dependent meaning. This only
324 * happens to pages that are already clean.
325 */
326
327 if (vm_page_deactivate_hint &&
328 (should_return != MEMORY_OBJECT_RETURN_NONE)) {
329 vm_page_lock_queues();
330 vm_page_deactivate(m);
331 vm_page_unlock_queues();
332 }
333 }
334
335 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
336 }
337
338 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \
339 MACRO_BEGIN \
340 \
341 register int upl_flags; \
342 \
343 vm_object_unlock(object); \
344 \
345 if (iosync) \
346 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
347 else \
348 upl_flags = UPL_MSYNC; \
349 \
350 (void) memory_object_data_return(object->pager, \
351 po, \
352 data_cnt, \
353 ro, \
354 ioerr, \
355 (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
356 !should_flush, \
357 upl_flags); \
358 \
359 vm_object_lock(object); \
360 MACRO_END
361
362 /*
363 * Routine: memory_object_lock_request [user interface]
364 *
365 * Description:
366 * Control use of the data associated with the given
367 * memory object. For each page in the given range,
368 * perform the following operations, in order:
369 * 1) restrict access to the page (disallow
370 * forms specified by "prot");
371 * 2) return data to the manager (if "should_return"
372 * is RETURN_DIRTY and the page is dirty, or
373 * "should_return" is RETURN_ALL and the page
374 * is either dirty or precious); and,
375 * 3) flush the cached copy (if "should_flush"
376 * is asserted).
377 * The set of pages is defined by a starting offset
378 * ("offset") and size ("size"). Only pages with the
379 * same page alignment as the starting offset are
380 * considered.
381 *
382 * A single acknowledgement is sent (to the "reply_to"
383 * port) when these actions are complete. If successful,
384 * the naked send right for reply_to is consumed.
385 */
386
387 kern_return_t
388 memory_object_lock_request(
389 memory_object_control_t control,
390 memory_object_offset_t offset,
391 memory_object_size_t size,
392 memory_object_offset_t * resid_offset,
393 int * io_errno,
394 memory_object_return_t should_return,
395 int flags,
396 vm_prot_t prot)
397 {
398 vm_object_t object;
399 __unused boolean_t should_flush;
400
401 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
402
403 XPR(XPR_MEMORY_OBJECT,
404 "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
405 (integer_t)control, offset, size,
406 (((should_return&1)<<1)|should_flush), prot);
407
408 /*
409 * Check for bogus arguments.
410 */
411 object = memory_object_control_to_vm_object(control);
412 if (object == VM_OBJECT_NULL)
413 return (KERN_INVALID_ARGUMENT);
414
415 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
416 return (KERN_INVALID_ARGUMENT);
417
418 size = round_page_64(size);
419
420 /*
421 * Lock the object, and acquire a paging reference to
422 * prevent the memory_object reference from being released.
423 */
424 vm_object_lock(object);
425 vm_object_paging_begin(object);
426 offset -= object->paging_offset;
427
428 (void)vm_object_update(object,
429 offset, size, resid_offset, io_errno, should_return, flags, prot);
430
431 vm_object_paging_end(object);
432 vm_object_unlock(object);
433
434 return (KERN_SUCCESS);
435 }
436
437 /*
438 * memory_object_release_name: [interface]
439 *
440 * Enforces name semantic on memory_object reference count decrement
441 * This routine should not be called unless the caller holds a name
442 * reference gained through the memory_object_named_create or the
443 * memory_object_rename call.
444 * If the TERMINATE_IDLE flag is set, the call will return if the
445 * reference count is not 1. i.e. idle with the only remaining reference
446 * being the name.
447 * If the decision is made to proceed the name field flag is set to
448 * false and the reference count is decremented. If the RESPECT_CACHE
449 * flag is set and the reference count has gone to zero, the
450 * memory_object is checked to see if it is cacheable otherwise when
451 * the reference count is zero, it is simply terminated.
452 */
453
454 kern_return_t
455 memory_object_release_name(
456 memory_object_control_t control,
457 int flags)
458 {
459 vm_object_t object;
460
461 object = memory_object_control_to_vm_object(control);
462 if (object == VM_OBJECT_NULL)
463 return (KERN_INVALID_ARGUMENT);
464
465 return vm_object_release_name(object, flags);
466 }
467
468
469
470 /*
471 * Routine: memory_object_destroy [user interface]
472 * Purpose:
473 * Shut down a memory object, despite the
474 * presence of address map (or other) references
475 * to the vm_object.
476 */
477 kern_return_t
478 memory_object_destroy(
479 memory_object_control_t control,
480 kern_return_t reason)
481 {
482 vm_object_t object;
483
484 object = memory_object_control_to_vm_object(control);
485 if (object == VM_OBJECT_NULL)
486 return (KERN_INVALID_ARGUMENT);
487
488 return (vm_object_destroy(object, reason));
489 }
490
491 /*
492 * Routine: vm_object_sync
493 *
494 * Kernel internal function to synch out pages in a given
495 * range within an object to its memory manager. Much the
496 * same as memory_object_lock_request but page protection
497 * is not changed.
498 *
499 * If the should_flush and should_return flags are true pages
500 * are flushed, that is dirty & precious pages are written to
501 * the memory manager and then discarded. If should_return
502 * is false, only precious pages are returned to the memory
503 * manager.
504 *
505 * If should flush is false and should_return true, the memory
506 * manager's copy of the pages is updated. If should_return
507 * is also false, only the precious pages are updated. This
508 * last option is of limited utility.
509 *
510 * Returns:
511 * FALSE if no pages were returned to the pager
512 * TRUE otherwise.
513 */
514
515 boolean_t
516 vm_object_sync(
517 vm_object_t object,
518 vm_object_offset_t offset,
519 vm_object_size_t size,
520 boolean_t should_flush,
521 boolean_t should_return,
522 boolean_t should_iosync)
523 {
524 boolean_t rv;
525 int flags;
526
527 XPR(XPR_VM_OBJECT,
528 "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
529 (integer_t)object, offset, size, should_flush, should_return);
530
531 /*
532 * Lock the object, and acquire a paging reference to
533 * prevent the memory_object and control ports from
534 * being destroyed.
535 */
536 vm_object_lock(object);
537 vm_object_paging_begin(object);
538
539 if (should_flush)
540 flags = MEMORY_OBJECT_DATA_FLUSH;
541 else
542 flags = 0;
543
544 if (should_iosync)
545 flags |= MEMORY_OBJECT_IO_SYNC;
546
547 rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
548 (should_return) ?
549 MEMORY_OBJECT_RETURN_ALL :
550 MEMORY_OBJECT_RETURN_NONE,
551 flags,
552 VM_PROT_NO_CHANGE);
553
554
555 vm_object_paging_end(object);
556 vm_object_unlock(object);
557 return rv;
558 }
559
560
561
562
563 static int
564 vm_object_update_extent(
565 vm_object_t object,
566 vm_object_offset_t offset,
567 vm_object_offset_t offset_end,
568 vm_object_offset_t *offset_resid,
569 int *io_errno,
570 boolean_t should_flush,
571 memory_object_return_t should_return,
572 boolean_t should_iosync,
573 vm_prot_t prot)
574 {
575 vm_page_t m;
576 int retval = 0;
577 vm_size_t data_cnt = 0;
578 vm_object_offset_t paging_offset = 0;
579 vm_object_offset_t last_offset = offset;
580 memory_object_lock_result_t page_lock_result;
581 memory_object_lock_result_t pageout_action;
582
583 pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE;
584
585 for (;
586 offset < offset_end && object->resident_page_count;
587 offset += PAGE_SIZE_64) {
588
589 /*
590 * Limit the number of pages to be cleaned at once.
591 */
592 if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) {
593 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
594 pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
595 data_cnt = 0;
596 }
597
598 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
599 page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
600
601 XPR(XPR_MEMORY_OBJECT,
602 "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
603 (integer_t)object, offset, page_lock_result, 0, 0);
604
605 switch (page_lock_result)
606 {
607 case MEMORY_OBJECT_LOCK_RESULT_DONE:
608 /*
609 * End of a cluster of dirty pages.
610 */
611 if (data_cnt) {
612 LIST_REQ_PAGEOUT_PAGES(object,
613 data_cnt, pageout_action,
614 paging_offset, offset_resid, io_errno, should_iosync);
615 data_cnt = 0;
616 continue;
617 }
618 break;
619
620 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
621 /*
622 * Since it is necessary to block,
623 * clean any dirty pages now.
624 */
625 if (data_cnt) {
626 LIST_REQ_PAGEOUT_PAGES(object,
627 data_cnt, pageout_action,
628 paging_offset, offset_resid, io_errno, should_iosync);
629 data_cnt = 0;
630 continue;
631 }
632 PAGE_SLEEP(object, m, THREAD_UNINT);
633 continue;
634
635 case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
636 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
637 /*
638 * The clean and return cases are similar.
639 *
640 * if this would form a discontiguous block,
641 * clean the old pages and start anew.
642 *
643 * Mark the page busy since we will unlock the
644 * object if we issue the LIST_REQ_PAGEOUT
645 */
646 m->busy = TRUE;
647 if (data_cnt &&
648 ((last_offset != offset) || (pageout_action != page_lock_result))) {
649 LIST_REQ_PAGEOUT_PAGES(object,
650 data_cnt, pageout_action,
651 paging_offset, offset_resid, io_errno, should_iosync);
652 data_cnt = 0;
653 }
654 m->busy = FALSE;
655
656 if (m->cleaning) {
657 PAGE_SLEEP(object, m, THREAD_UNINT);
658 continue;
659 }
660 if (data_cnt == 0) {
661 pageout_action = page_lock_result;
662 paging_offset = offset;
663 }
664 data_cnt += PAGE_SIZE;
665 last_offset = offset + PAGE_SIZE_64;
666
667 vm_page_lock_queues();
668 /*
669 * Clean
670 */
671 m->list_req_pending = TRUE;
672 m->cleaning = TRUE;
673
674 if (should_flush) {
675 /*
676 * and add additional state
677 * for the flush
678 */
679 m->busy = TRUE;
680 m->pageout = TRUE;
681 vm_page_wire(m);
682 }
683 vm_page_unlock_queues();
684
685 retval = 1;
686 break;
687 }
688 break;
689 }
690 }
691 /*
692 * We have completed the scan for applicable pages.
693 * Clean any pages that have been saved.
694 */
695 if (data_cnt) {
696 LIST_REQ_PAGEOUT_PAGES(object,
697 data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
698 }
699 return (retval);
700 }
701
702
703
704 /*
705 * Routine: vm_object_update
706 * Description:
707 * Work function for m_o_lock_request(), vm_o_sync().
708 *
709 * Called with object locked and paging ref taken.
710 */
711 kern_return_t
712 vm_object_update(
713 register vm_object_t object,
714 register vm_object_offset_t offset,
715 register vm_object_size_t size,
716 register vm_object_offset_t *resid_offset,
717 int *io_errno,
718 memory_object_return_t should_return,
719 int flags,
720 vm_prot_t protection)
721 {
722 vm_object_t copy_object;
723 boolean_t data_returned = FALSE;
724 boolean_t update_cow;
725 boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
726 boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
727 int num_of_extents;
728 int n;
729 #define MAX_EXTENTS 8
730 #define EXTENT_SIZE (1024 * 1024 * 256)
731 #define RESIDENT_LIMIT (1024 * 32)
732 struct extent {
733 vm_object_offset_t e_base;
734 vm_object_offset_t e_min;
735 vm_object_offset_t e_max;
736 } extents[MAX_EXTENTS];
737
738 /*
739 * To avoid blocking while scanning for pages, save
740 * dirty pages to be cleaned all at once.
741 *
742 * XXXO A similar strategy could be used to limit the
743 * number of times that a scan must be restarted for
744 * other reasons. Those pages that would require blocking
745 * could be temporarily collected in another list, or
746 * their offsets could be recorded in a small array.
747 */
748
749 /*
750 * XXX NOTE: May want to consider converting this to a page list
751 * XXX vm_map_copy interface. Need to understand object
752 * XXX coalescing implications before doing so.
753 */
754
755 update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
756 && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
757 !(flags & MEMORY_OBJECT_DATA_PURGE)))
758 || (flags & MEMORY_OBJECT_COPY_SYNC);
759
760
761 if((((copy_object = object->copy) != NULL) && update_cow) ||
762 (flags & MEMORY_OBJECT_DATA_SYNC)) {
763 vm_map_size_t i;
764 vm_map_size_t copy_size;
765 vm_map_offset_t copy_offset;
766 vm_prot_t prot;
767 vm_page_t page;
768 vm_page_t top_page;
769 kern_return_t error = 0;
770
771 if(copy_object != NULL) {
772 /* translate offset with respect to shadow's offset */
773 copy_offset = (offset >= copy_object->shadow_offset)?
774 (vm_map_offset_t)(offset - copy_object->shadow_offset) :
775 (vm_map_offset_t) 0;
776 if(copy_offset > copy_object->size)
777 copy_offset = copy_object->size;
778
779 /* clip size with respect to shadow offset */
780 if (offset >= copy_object->shadow_offset) {
781 copy_size = size;
782 } else if (size >= copy_object->shadow_offset - offset) {
783 copy_size = size -
784 (copy_object->shadow_offset - offset);
785 } else {
786 copy_size = 0;
787 }
788
789 if (copy_offset + copy_size > copy_object->size) {
790 if (copy_object->size >= copy_offset) {
791 copy_size = copy_object->size - copy_offset;
792 } else {
793 copy_size = 0;
794 }
795 }
796
797 copy_size+=copy_offset;
798
799 vm_object_unlock(object);
800 vm_object_lock(copy_object);
801 } else {
802 copy_object = object;
803
804 copy_size = offset + size;
805 copy_offset = offset;
806 }
807
808 vm_object_paging_begin(copy_object);
809 for (i=copy_offset; i<copy_size; i+=PAGE_SIZE) {
810 RETRY_COW_OF_LOCK_REQUEST:
811 prot = VM_PROT_WRITE|VM_PROT_READ;
812 switch (vm_fault_page(copy_object, i,
813 VM_PROT_WRITE|VM_PROT_READ,
814 FALSE,
815 THREAD_UNINT,
816 copy_offset,
817 copy_offset+copy_size,
818 VM_BEHAVIOR_SEQUENTIAL,
819 &prot,
820 &page,
821 &top_page,
822 (int *)0,
823 &error,
824 FALSE,
825 FALSE, NULL, 0)) {
826
827 case VM_FAULT_SUCCESS:
828 if(top_page) {
829 vm_fault_cleanup(
830 page->object, top_page);
831 PAGE_WAKEUP_DONE(page);
832 vm_page_lock_queues();
833 if (!page->active && !page->inactive)
834 vm_page_activate(page);
835 vm_page_unlock_queues();
836 vm_object_lock(copy_object);
837 vm_object_paging_begin(copy_object);
838 } else {
839 PAGE_WAKEUP_DONE(page);
840 vm_page_lock_queues();
841 if (!page->active && !page->inactive)
842 vm_page_activate(page);
843 vm_page_unlock_queues();
844 }
845 break;
846 case VM_FAULT_RETRY:
847 prot = VM_PROT_WRITE|VM_PROT_READ;
848 vm_object_lock(copy_object);
849 vm_object_paging_begin(copy_object);
850 goto RETRY_COW_OF_LOCK_REQUEST;
851 case VM_FAULT_INTERRUPTED:
852 prot = VM_PROT_WRITE|VM_PROT_READ;
853 vm_object_lock(copy_object);
854 vm_object_paging_begin(copy_object);
855 goto RETRY_COW_OF_LOCK_REQUEST;
856 case VM_FAULT_MEMORY_SHORTAGE:
857 VM_PAGE_WAIT();
858 prot = VM_PROT_WRITE|VM_PROT_READ;
859 vm_object_lock(copy_object);
860 vm_object_paging_begin(copy_object);
861 goto RETRY_COW_OF_LOCK_REQUEST;
862 case VM_FAULT_FICTITIOUS_SHORTAGE:
863 vm_page_more_fictitious();
864 prot = VM_PROT_WRITE|VM_PROT_READ;
865 vm_object_lock(copy_object);
866 vm_object_paging_begin(copy_object);
867 goto RETRY_COW_OF_LOCK_REQUEST;
868 case VM_FAULT_MEMORY_ERROR:
869 vm_object_lock(object);
870 goto BYPASS_COW_COPYIN;
871 }
872
873 }
874 vm_object_paging_end(copy_object);
875 if(copy_object != object) {
876 vm_object_unlock(copy_object);
877 vm_object_lock(object);
878 }
879 }
880 if((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
881 return KERN_SUCCESS;
882 }
883 if(((copy_object = object->copy) != NULL) &&
884 (flags & MEMORY_OBJECT_DATA_PURGE)) {
885 copy_object->shadow_severed = TRUE;
886 copy_object->shadowed = FALSE;
887 copy_object->shadow = NULL;
888 /* delete the ref the COW was holding on the target object */
889 vm_object_deallocate(object);
890 }
891 BYPASS_COW_COPYIN:
892
893 /*
894 * when we have a really large range to check relative
895 * to the number of actual resident pages, we'd like
896 * to use the resident page list to drive our checks
897 * however, the object lock will get dropped while processing
898 * the page which means the resident queue can change which
899 * means we can't walk the queue as we process the pages
900 * we also want to do the processing in offset order to allow
901 * 'runs' of pages to be collected if we're being told to
902 * flush to disk... the resident page queue is NOT ordered.
903 *
904 * a temporary solution (until we figure out how to deal with
905 * large address spaces more generically) is to pre-flight
906 * the resident page queue (if it's small enough) and develop
907 * a collection of extents (that encompass actual resident pages)
908 * to visit. This will at least allow us to deal with some of the
909 * more pathological cases in a more efficient manner. The current
910 * worst case (a single resident page at the end of an extremely large
911 * range) can take minutes to complete for ranges in the terrabyte
912 * category... since this routine is called when truncating a file,
913 * and we currently support files up to 16 Tbytes in size, this
914 * is not a theoretical problem
915 */
916
917 if ((object->resident_page_count < RESIDENT_LIMIT) &&
918 (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
919 vm_page_t next;
920 vm_object_offset_t start;
921 vm_object_offset_t end;
922 vm_object_size_t e_mask;
923 vm_page_t m;
924
925 start = offset;
926 end = offset + size;
927 num_of_extents = 0;
928 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
929
930 m = (vm_page_t) queue_first(&object->memq);
931
932 while (!queue_end(&object->memq, (queue_entry_t) m)) {
933 next = (vm_page_t) queue_next(&m->listq);
934
935 if ((m->offset >= start) && (m->offset < end)) {
936 /*
937 * this is a page we're interested in
938 * try to fit it into a current extent
939 */
940 for (n = 0; n < num_of_extents; n++) {
941 if ((m->offset & e_mask) == extents[n].e_base) {
942 /*
943 * use (PAGE_SIZE - 1) to determine the
944 * max offset so that we don't wrap if
945 * we're at the last page of the space
946 */
947 if (m->offset < extents[n].e_min)
948 extents[n].e_min = m->offset;
949 else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
950 extents[n].e_max = m->offset + (PAGE_SIZE - 1);
951 break;
952 }
953 }
954 if (n == num_of_extents) {
955 /*
956 * didn't find a current extent that can encompass
957 * this page
958 */
959 if (n < MAX_EXTENTS) {
960 /*
961 * if we still have room,
962 * create a new extent
963 */
964 extents[n].e_base = m->offset & e_mask;
965 extents[n].e_min = m->offset;
966 extents[n].e_max = m->offset + (PAGE_SIZE - 1);
967
968 num_of_extents++;
969 } else {
970 /*
971 * no room to create a new extent...
972 * fall back to a single extent based
973 * on the min and max page offsets
974 * we find in the range we're interested in...
975 * first, look through the extent list and
976 * develop the overall min and max for the
977 * pages we've looked at up to this point
978 */
979 for (n = 1; n < num_of_extents; n++) {
980 if (extents[n].e_min < extents[0].e_min)
981 extents[0].e_min = extents[n].e_min;
982 if (extents[n].e_max > extents[0].e_max)
983 extents[0].e_max = extents[n].e_max;
984 }
985 /*
986 * now setup to run through the remaining pages
987 * to determine the overall min and max
988 * offset for the specified range
989 */
990 extents[0].e_base = 0;
991 e_mask = 0;
992 num_of_extents = 1;
993
994 /*
995 * by continuing, we'll reprocess the
996 * page that forced us to abandon trying
997 * to develop multiple extents
998 */
999 continue;
1000 }
1001 }
1002 }
1003 m = next;
1004 }
1005 } else {
1006 extents[0].e_min = offset;
1007 extents[0].e_max = offset + (size - 1);
1008
1009 num_of_extents = 1;
1010 }
1011 for (n = 0; n < num_of_extents; n++) {
1012 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1013 should_flush, should_return, should_iosync, protection))
1014 data_returned = TRUE;
1015 }
1016 return (data_returned);
1017 }
1018
1019
1020 /*
1021 * Routine: memory_object_synchronize_completed [user interface]
1022 *
1023 * Tell kernel that previously synchronized data
1024 * (memory_object_synchronize) has been queue or placed on the
1025 * backing storage.
1026 *
1027 * Note: there may be multiple synchronize requests for a given
1028 * memory object outstanding but they will not overlap.
1029 */
1030
1031 kern_return_t
1032 memory_object_synchronize_completed(
1033 memory_object_control_t control,
1034 memory_object_offset_t offset,
1035 vm_offset_t length)
1036 {
1037 vm_object_t object;
1038 msync_req_t msr;
1039
1040 object = memory_object_control_to_vm_object(control);
1041
1042 XPR(XPR_MEMORY_OBJECT,
1043 "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1044 (integer_t)object, offset, length, 0, 0);
1045
1046 /*
1047 * Look for bogus arguments
1048 */
1049
1050 if (object == VM_OBJECT_NULL)
1051 return (KERN_INVALID_ARGUMENT);
1052
1053 vm_object_lock(object);
1054
1055 /*
1056 * search for sync request structure
1057 */
1058 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
1059 if (msr->offset == offset && msr->length == length) {
1060 queue_remove(&object->msr_q, msr, msync_req_t, msr_q);
1061 break;
1062 }
1063 }/* queue_iterate */
1064
1065 if (queue_end(&object->msr_q, (queue_entry_t)msr)) {
1066 vm_object_unlock(object);
1067 return KERN_INVALID_ARGUMENT;
1068 }
1069
1070 msr_lock(msr);
1071 vm_object_unlock(object);
1072 msr->flag = VM_MSYNC_DONE;
1073 msr_unlock(msr);
1074 thread_wakeup((event_t) msr);
1075
1076 return KERN_SUCCESS;
1077 }/* memory_object_synchronize_completed */
1078
1079 static kern_return_t
1080 vm_object_set_attributes_common(
1081 vm_object_t object,
1082 boolean_t may_cache,
1083 memory_object_copy_strategy_t copy_strategy,
1084 boolean_t temporary,
1085 memory_object_cluster_size_t cluster_size,
1086 boolean_t silent_overwrite,
1087 boolean_t advisory_pageout)
1088 {
1089 boolean_t object_became_ready;
1090
1091 XPR(XPR_MEMORY_OBJECT,
1092 "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1093 (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
1094
1095 if (object == VM_OBJECT_NULL)
1096 return(KERN_INVALID_ARGUMENT);
1097
1098 /*
1099 * Verify the attributes of importance
1100 */
1101
1102 switch(copy_strategy) {
1103 case MEMORY_OBJECT_COPY_NONE:
1104 case MEMORY_OBJECT_COPY_DELAY:
1105 break;
1106 default:
1107 return(KERN_INVALID_ARGUMENT);
1108 }
1109
1110 #if !ADVISORY_PAGEOUT
1111 if (silent_overwrite || advisory_pageout)
1112 return(KERN_INVALID_ARGUMENT);
1113
1114 #endif /* !ADVISORY_PAGEOUT */
1115 if (may_cache)
1116 may_cache = TRUE;
1117 if (temporary)
1118 temporary = TRUE;
1119 if (cluster_size != 0) {
1120 int pages_per_cluster;
1121 pages_per_cluster = atop_32(cluster_size);
1122 /*
1123 * Cluster size must be integral multiple of page size,
1124 * and be a power of 2 number of pages.
1125 */
1126 if ((cluster_size & (PAGE_SIZE-1)) ||
1127 ((pages_per_cluster-1) & pages_per_cluster))
1128 return KERN_INVALID_ARGUMENT;
1129 }
1130
1131 vm_object_lock(object);
1132
1133 /*
1134 * Copy the attributes
1135 */
1136 assert(!object->internal);
1137 object_became_ready = !object->pager_ready;
1138 object->copy_strategy = copy_strategy;
1139 object->can_persist = may_cache;
1140 object->temporary = temporary;
1141 object->silent_overwrite = silent_overwrite;
1142 object->advisory_pageout = advisory_pageout;
1143 if (cluster_size == 0)
1144 cluster_size = PAGE_SIZE;
1145 object->cluster_size = cluster_size;
1146
1147 assert(cluster_size >= PAGE_SIZE &&
1148 cluster_size % PAGE_SIZE == 0);
1149
1150 /*
1151 * Wake up anyone waiting for the ready attribute
1152 * to become asserted.
1153 */
1154
1155 if (object_became_ready) {
1156 object->pager_ready = TRUE;
1157 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1158 }
1159
1160 vm_object_unlock(object);
1161
1162 return(KERN_SUCCESS);
1163 }
1164
1165 /*
1166 * Set the memory object attribute as provided.
1167 *
1168 * XXX This routine cannot be completed until the vm_msync, clean
1169 * in place, and cluster work is completed. See ifdef notyet
1170 * below and note that vm_object_set_attributes_common()
1171 * may have to be expanded.
1172 */
1173 kern_return_t
1174 memory_object_change_attributes(
1175 memory_object_control_t control,
1176 memory_object_flavor_t flavor,
1177 memory_object_info_t attributes,
1178 mach_msg_type_number_t count)
1179 {
1180 vm_object_t object;
1181 kern_return_t result = KERN_SUCCESS;
1182 boolean_t temporary;
1183 boolean_t may_cache;
1184 boolean_t invalidate;
1185 memory_object_cluster_size_t cluster_size;
1186 memory_object_copy_strategy_t copy_strategy;
1187 boolean_t silent_overwrite;
1188 boolean_t advisory_pageout;
1189
1190 object = memory_object_control_to_vm_object(control);
1191 if (object == VM_OBJECT_NULL)
1192 return (KERN_INVALID_ARGUMENT);
1193
1194 vm_object_lock(object);
1195
1196 temporary = object->temporary;
1197 may_cache = object->can_persist;
1198 copy_strategy = object->copy_strategy;
1199 silent_overwrite = object->silent_overwrite;
1200 advisory_pageout = object->advisory_pageout;
1201 #if notyet
1202 invalidate = object->invalidate;
1203 #endif
1204 cluster_size = object->cluster_size;
1205 vm_object_unlock(object);
1206
1207 switch (flavor) {
1208 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1209 {
1210 old_memory_object_behave_info_t behave;
1211
1212 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1213 result = KERN_INVALID_ARGUMENT;
1214 break;
1215 }
1216
1217 behave = (old_memory_object_behave_info_t) attributes;
1218
1219 temporary = behave->temporary;
1220 invalidate = behave->invalidate;
1221 copy_strategy = behave->copy_strategy;
1222
1223 break;
1224 }
1225
1226 case MEMORY_OBJECT_BEHAVIOR_INFO:
1227 {
1228 memory_object_behave_info_t behave;
1229
1230 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1231 result = KERN_INVALID_ARGUMENT;
1232 break;
1233 }
1234
1235 behave = (memory_object_behave_info_t) attributes;
1236
1237 temporary = behave->temporary;
1238 invalidate = behave->invalidate;
1239 copy_strategy = behave->copy_strategy;
1240 silent_overwrite = behave->silent_overwrite;
1241 advisory_pageout = behave->advisory_pageout;
1242 break;
1243 }
1244
1245 case MEMORY_OBJECT_PERFORMANCE_INFO:
1246 {
1247 memory_object_perf_info_t perf;
1248
1249 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1250 result = KERN_INVALID_ARGUMENT;
1251 break;
1252 }
1253
1254 perf = (memory_object_perf_info_t) attributes;
1255
1256 may_cache = perf->may_cache;
1257 cluster_size = round_page_32(perf->cluster_size);
1258
1259 break;
1260 }
1261
1262 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1263 {
1264 old_memory_object_attr_info_t attr;
1265
1266 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1267 result = KERN_INVALID_ARGUMENT;
1268 break;
1269 }
1270
1271 attr = (old_memory_object_attr_info_t) attributes;
1272
1273 may_cache = attr->may_cache;
1274 copy_strategy = attr->copy_strategy;
1275 cluster_size = page_size;
1276
1277 break;
1278 }
1279
1280 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1281 {
1282 memory_object_attr_info_t attr;
1283
1284 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1285 result = KERN_INVALID_ARGUMENT;
1286 break;
1287 }
1288
1289 attr = (memory_object_attr_info_t) attributes;
1290
1291 copy_strategy = attr->copy_strategy;
1292 may_cache = attr->may_cache_object;
1293 cluster_size = attr->cluster_size;
1294 temporary = attr->temporary;
1295
1296 break;
1297 }
1298
1299 default:
1300 result = KERN_INVALID_ARGUMENT;
1301 break;
1302 }
1303
1304 if (result != KERN_SUCCESS)
1305 return(result);
1306
1307 if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1308 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1309 temporary = TRUE;
1310 } else {
1311 temporary = FALSE;
1312 }
1313
1314 /*
1315 * XXX may_cache may become a tri-valued variable to handle
1316 * XXX uncache if not in use.
1317 */
1318 return (vm_object_set_attributes_common(object,
1319 may_cache,
1320 copy_strategy,
1321 temporary,
1322 cluster_size,
1323 silent_overwrite,
1324 advisory_pageout));
1325 }
1326
1327 kern_return_t
1328 memory_object_get_attributes(
1329 memory_object_control_t control,
1330 memory_object_flavor_t flavor,
1331 memory_object_info_t attributes, /* pointer to OUT array */
1332 mach_msg_type_number_t *count) /* IN/OUT */
1333 {
1334 kern_return_t ret = KERN_SUCCESS;
1335 vm_object_t object;
1336
1337 object = memory_object_control_to_vm_object(control);
1338 if (object == VM_OBJECT_NULL)
1339 return (KERN_INVALID_ARGUMENT);
1340
1341 vm_object_lock(object);
1342
1343 switch (flavor) {
1344 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1345 {
1346 old_memory_object_behave_info_t behave;
1347
1348 if (*count < OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1349 ret = KERN_INVALID_ARGUMENT;
1350 break;
1351 }
1352
1353 behave = (old_memory_object_behave_info_t) attributes;
1354 behave->copy_strategy = object->copy_strategy;
1355 behave->temporary = object->temporary;
1356 #if notyet /* remove when vm_msync complies and clean in place fini */
1357 behave->invalidate = object->invalidate;
1358 #else
1359 behave->invalidate = FALSE;
1360 #endif
1361
1362 *count = OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1363 break;
1364 }
1365
1366 case MEMORY_OBJECT_BEHAVIOR_INFO:
1367 {
1368 memory_object_behave_info_t behave;
1369
1370 if (*count < MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1371 ret = KERN_INVALID_ARGUMENT;
1372 break;
1373 }
1374
1375 behave = (memory_object_behave_info_t) attributes;
1376 behave->copy_strategy = object->copy_strategy;
1377 behave->temporary = object->temporary;
1378 #if notyet /* remove when vm_msync complies and clean in place fini */
1379 behave->invalidate = object->invalidate;
1380 #else
1381 behave->invalidate = FALSE;
1382 #endif
1383 behave->advisory_pageout = object->advisory_pageout;
1384 behave->silent_overwrite = object->silent_overwrite;
1385 *count = MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1386 break;
1387 }
1388
1389 case MEMORY_OBJECT_PERFORMANCE_INFO:
1390 {
1391 memory_object_perf_info_t perf;
1392
1393 if (*count < MEMORY_OBJECT_PERF_INFO_COUNT) {
1394 ret = KERN_INVALID_ARGUMENT;
1395 break;
1396 }
1397
1398 perf = (memory_object_perf_info_t) attributes;
1399 perf->cluster_size = object->cluster_size;
1400 perf->may_cache = object->can_persist;
1401
1402 *count = MEMORY_OBJECT_PERF_INFO_COUNT;
1403 break;
1404 }
1405
1406 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1407 {
1408 old_memory_object_attr_info_t attr;
1409
1410 if (*count < OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1411 ret = KERN_INVALID_ARGUMENT;
1412 break;
1413 }
1414
1415 attr = (old_memory_object_attr_info_t) attributes;
1416 attr->may_cache = object->can_persist;
1417 attr->copy_strategy = object->copy_strategy;
1418
1419 *count = OLD_MEMORY_OBJECT_ATTR_INFO_COUNT;
1420 break;
1421 }
1422
1423 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1424 {
1425 memory_object_attr_info_t attr;
1426
1427 if (*count < MEMORY_OBJECT_ATTR_INFO_COUNT) {
1428 ret = KERN_INVALID_ARGUMENT;
1429 break;
1430 }
1431
1432 attr = (memory_object_attr_info_t) attributes;
1433 attr->copy_strategy = object->copy_strategy;
1434 attr->cluster_size = object->cluster_size;
1435 attr->may_cache_object = object->can_persist;
1436 attr->temporary = object->temporary;
1437
1438 *count = MEMORY_OBJECT_ATTR_INFO_COUNT;
1439 break;
1440 }
1441
1442 default:
1443 ret = KERN_INVALID_ARGUMENT;
1444 break;
1445 }
1446
1447 vm_object_unlock(object);
1448
1449 return(ret);
1450 }
1451
1452
1453 kern_return_t
1454 memory_object_iopl_request(
1455 ipc_port_t port,
1456 memory_object_offset_t offset,
1457 upl_size_t *upl_size,
1458 upl_t *upl_ptr,
1459 upl_page_info_array_t user_page_list,
1460 unsigned int *page_list_count,
1461 int *flags)
1462 {
1463 vm_object_t object;
1464 kern_return_t ret;
1465 int caller_flags;
1466
1467 caller_flags = *flags;
1468
1469 if (caller_flags & ~UPL_VALID_FLAGS) {
1470 /*
1471 * For forward compatibility's sake,
1472 * reject any unknown flag.
1473 */
1474 return KERN_INVALID_VALUE;
1475 }
1476
1477 if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
1478 vm_named_entry_t named_entry;
1479
1480 named_entry = (vm_named_entry_t)port->ip_kobject;
1481 /* a few checks to make sure user is obeying rules */
1482 if(*upl_size == 0) {
1483 if(offset >= named_entry->size)
1484 return(KERN_INVALID_RIGHT);
1485 *upl_size = named_entry->size - offset;
1486 }
1487 if(caller_flags & UPL_COPYOUT_FROM) {
1488 if((named_entry->protection & VM_PROT_READ)
1489 != VM_PROT_READ) {
1490 return(KERN_INVALID_RIGHT);
1491 }
1492 } else {
1493 if((named_entry->protection &
1494 (VM_PROT_READ | VM_PROT_WRITE))
1495 != (VM_PROT_READ | VM_PROT_WRITE)) {
1496 return(KERN_INVALID_RIGHT);
1497 }
1498 }
1499 if(named_entry->size < (offset + *upl_size))
1500 return(KERN_INVALID_ARGUMENT);
1501
1502 /* the callers parameter offset is defined to be the */
1503 /* offset from beginning of named entry offset in object */
1504 offset = offset + named_entry->offset;
1505
1506 if(named_entry->is_sub_map)
1507 return (KERN_INVALID_ARGUMENT);
1508
1509 named_entry_lock(named_entry);
1510
1511 if (named_entry->is_pager) {
1512 object = vm_object_enter(named_entry->backing.pager,
1513 named_entry->offset + named_entry->size,
1514 named_entry->internal,
1515 FALSE,
1516 FALSE);
1517 if (object == VM_OBJECT_NULL) {
1518 named_entry_unlock(named_entry);
1519 return(KERN_INVALID_OBJECT);
1520 }
1521
1522 /* JMM - drop reference on pager here? */
1523
1524 /* create an extra reference for the named entry */
1525 vm_object_lock(object);
1526 vm_object_reference_locked(object);
1527 named_entry->backing.object = object;
1528 named_entry->is_pager = FALSE;
1529 named_entry_unlock(named_entry);
1530
1531 /* wait for object to be ready */
1532 while (!object->pager_ready) {
1533 vm_object_wait(object,
1534 VM_OBJECT_EVENT_PAGER_READY,
1535 THREAD_UNINT);
1536 vm_object_lock(object);
1537 }
1538 vm_object_unlock(object);
1539 } else {
1540 /* This is the case where we are going to map */
1541 /* an already mapped object. If the object is */
1542 /* not ready it is internal. An external */
1543 /* object cannot be mapped until it is ready */
1544 /* we can therefore avoid the ready check */
1545 /* in this case. */
1546 object = named_entry->backing.object;
1547 vm_object_reference(object);
1548 named_entry_unlock(named_entry);
1549 }
1550 } else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) {
1551 memory_object_control_t control;
1552 control = (memory_object_control_t) port;
1553 if (control == NULL)
1554 return (KERN_INVALID_ARGUMENT);
1555 object = memory_object_control_to_vm_object(control);
1556 if (object == VM_OBJECT_NULL)
1557 return (KERN_INVALID_ARGUMENT);
1558 vm_object_reference(object);
1559 } else {
1560 return KERN_INVALID_ARGUMENT;
1561 }
1562 if (object == VM_OBJECT_NULL)
1563 return (KERN_INVALID_ARGUMENT);
1564
1565 if (!object->private) {
1566 if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
1567 *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
1568 if (object->phys_contiguous) {
1569 *flags = UPL_PHYS_CONTIG;
1570 } else {
1571 *flags = 0;
1572 }
1573 } else {
1574 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1575 }
1576
1577 ret = vm_object_iopl_request(object,
1578 offset,
1579 *upl_size,
1580 upl_ptr,
1581 user_page_list,
1582 page_list_count,
1583 caller_flags);
1584 vm_object_deallocate(object);
1585 return ret;
1586 }
1587
1588 /*
1589 * Routine: memory_object_upl_request [interface]
1590 * Purpose:
1591 * Cause the population of a portion of a vm_object.
1592 * Depending on the nature of the request, the pages
1593 * returned may be contain valid data or be uninitialized.
1594 *
1595 */
1596
1597 kern_return_t
1598 memory_object_upl_request(
1599 memory_object_control_t control,
1600 memory_object_offset_t offset,
1601 upl_size_t size,
1602 upl_t *upl_ptr,
1603 upl_page_info_array_t user_page_list,
1604 unsigned int *page_list_count,
1605 int cntrl_flags)
1606 {
1607 vm_object_t object;
1608
1609 object = memory_object_control_to_vm_object(control);
1610 if (object == VM_OBJECT_NULL)
1611 return (KERN_INVALID_ARGUMENT);
1612
1613 return vm_object_upl_request(object,
1614 offset,
1615 size,
1616 upl_ptr,
1617 user_page_list,
1618 page_list_count,
1619 cntrl_flags);
1620 }
1621
1622 /*
1623 * Routine: memory_object_super_upl_request [interface]
1624 * Purpose:
1625 * Cause the population of a portion of a vm_object
1626 * in much the same way as memory_object_upl_request.
1627 * Depending on the nature of the request, the pages
1628 * returned may be contain valid data or be uninitialized.
1629 * However, the region may be expanded up to the super
1630 * cluster size provided.
1631 */
1632
1633 kern_return_t
1634 memory_object_super_upl_request(
1635 memory_object_control_t control,
1636 memory_object_offset_t offset,
1637 upl_size_t size,
1638 upl_size_t super_cluster,
1639 upl_t *upl,
1640 upl_page_info_t *user_page_list,
1641 unsigned int *page_list_count,
1642 int cntrl_flags)
1643 {
1644 vm_object_t object;
1645
1646 object = memory_object_control_to_vm_object(control);
1647 if (object == VM_OBJECT_NULL)
1648 return (KERN_INVALID_ARGUMENT);
1649
1650 return vm_object_super_upl_request(object,
1651 offset,
1652 size,
1653 super_cluster,
1654 upl,
1655 user_page_list,
1656 page_list_count,
1657 cntrl_flags);
1658 }
1659
1660 int vm_stat_discard_cleared_reply = 0;
1661 int vm_stat_discard_cleared_unset = 0;
1662 int vm_stat_discard_cleared_too_late = 0;
1663
1664
1665
1666 /*
1667 * Routine: host_default_memory_manager [interface]
1668 * Purpose:
1669 * set/get the default memory manager port and default cluster
1670 * size.
1671 *
1672 * If successful, consumes the supplied naked send right.
1673 */
1674 kern_return_t
1675 host_default_memory_manager(
1676 host_priv_t host_priv,
1677 memory_object_default_t *default_manager,
1678 memory_object_cluster_size_t cluster_size)
1679 {
1680 memory_object_default_t current_manager;
1681 memory_object_default_t new_manager;
1682 memory_object_default_t returned_manager;
1683
1684 if (host_priv == HOST_PRIV_NULL)
1685 return(KERN_INVALID_HOST);
1686
1687 assert(host_priv == &realhost);
1688
1689 new_manager = *default_manager;
1690 mutex_lock(&memory_manager_default_lock);
1691 current_manager = memory_manager_default;
1692
1693 if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1694 /*
1695 * Retrieve the current value.
1696 */
1697 memory_object_default_reference(current_manager);
1698 returned_manager = current_manager;
1699 } else {
1700 /*
1701 * Retrieve the current value,
1702 * and replace it with the supplied value.
1703 * We return the old reference to the caller
1704 * but we have to take a reference on the new
1705 * one.
1706 */
1707
1708 returned_manager = current_manager;
1709 memory_manager_default = new_manager;
1710 memory_object_default_reference(new_manager);
1711
1712 if (cluster_size % PAGE_SIZE != 0) {
1713 #if 0
1714 mutex_unlock(&memory_manager_default_lock);
1715 return KERN_INVALID_ARGUMENT;
1716 #else
1717 cluster_size = round_page_32(cluster_size);
1718 #endif
1719 }
1720 memory_manager_default_cluster = cluster_size;
1721
1722 /*
1723 * In case anyone's been waiting for a memory
1724 * manager to be established, wake them up.
1725 */
1726
1727 thread_wakeup((event_t) &memory_manager_default);
1728 }
1729
1730 mutex_unlock(&memory_manager_default_lock);
1731
1732 *default_manager = returned_manager;
1733 return(KERN_SUCCESS);
1734 }
1735
1736 /*
1737 * Routine: memory_manager_default_reference
1738 * Purpose:
1739 * Returns a naked send right for the default
1740 * memory manager. The returned right is always
1741 * valid (not IP_NULL or IP_DEAD).
1742 */
1743
1744 __private_extern__ memory_object_default_t
1745 memory_manager_default_reference(
1746 memory_object_cluster_size_t *cluster_size)
1747 {
1748 memory_object_default_t current_manager;
1749
1750 mutex_lock(&memory_manager_default_lock);
1751 current_manager = memory_manager_default;
1752 while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1753 wait_result_t res;
1754
1755 res = thread_sleep_mutex((event_t) &memory_manager_default,
1756 &memory_manager_default_lock,
1757 THREAD_UNINT);
1758 assert(res == THREAD_AWAKENED);
1759 current_manager = memory_manager_default;
1760 }
1761 memory_object_default_reference(current_manager);
1762 *cluster_size = memory_manager_default_cluster;
1763 mutex_unlock(&memory_manager_default_lock);
1764
1765 return current_manager;
1766 }
1767
1768 /*
1769 * Routine: memory_manager_default_check
1770 *
1771 * Purpose:
1772 * Check whether a default memory manager has been set
1773 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1774 * and KERN_FAILURE if dmm does not exist.
1775 *
1776 * If there is no default memory manager, log an error,
1777 * but only the first time.
1778 *
1779 */
1780 __private_extern__ kern_return_t
1781 memory_manager_default_check(void)
1782 {
1783 memory_object_default_t current;
1784
1785 mutex_lock(&memory_manager_default_lock);
1786 current = memory_manager_default;
1787 if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1788 static boolean_t logged; /* initialized to 0 */
1789 boolean_t complain = !logged;
1790 logged = TRUE;
1791 mutex_unlock(&memory_manager_default_lock);
1792 if (complain)
1793 printf("Warning: No default memory manager\n");
1794 return(KERN_FAILURE);
1795 } else {
1796 mutex_unlock(&memory_manager_default_lock);
1797 return(KERN_SUCCESS);
1798 }
1799 }
1800
1801 __private_extern__ void
1802 memory_manager_default_init(void)
1803 {
1804 memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
1805 mutex_init(&memory_manager_default_lock, 0);
1806 }
1807
1808
1809
1810 /* Allow manipulation of individual page state. This is actually part of */
1811 /* the UPL regimen but takes place on the object rather than on a UPL */
1812
1813 kern_return_t
1814 memory_object_page_op(
1815 memory_object_control_t control,
1816 memory_object_offset_t offset,
1817 int ops,
1818 ppnum_t *phys_entry,
1819 int *flags)
1820 {
1821 vm_object_t object;
1822
1823 object = memory_object_control_to_vm_object(control);
1824 if (object == VM_OBJECT_NULL)
1825 return (KERN_INVALID_ARGUMENT);
1826
1827 return vm_object_page_op(object, offset, ops, phys_entry, flags);
1828 }
1829
1830 /*
1831 * memory_object_range_op offers performance enhancement over
1832 * memory_object_page_op for page_op functions which do not require page
1833 * level state to be returned from the call. Page_op was created to provide
1834 * a low-cost alternative to page manipulation via UPLs when only a single
1835 * page was involved. The range_op call establishes the ability in the _op
1836 * family of functions to work on multiple pages where the lack of page level
1837 * state handling allows the caller to avoid the overhead of the upl structures.
1838 */
1839
1840 kern_return_t
1841 memory_object_range_op(
1842 memory_object_control_t control,
1843 memory_object_offset_t offset_beg,
1844 memory_object_offset_t offset_end,
1845 int ops,
1846 int *range)
1847 {
1848 vm_object_t object;
1849
1850 object = memory_object_control_to_vm_object(control);
1851 if (object == VM_OBJECT_NULL)
1852 return (KERN_INVALID_ARGUMENT);
1853
1854 return vm_object_range_op(object,
1855 offset_beg,
1856 offset_end,
1857 ops,
1858 range);
1859 }
1860
1861
1862 kern_return_t
1863 memory_object_pages_resident(
1864 memory_object_control_t control,
1865 boolean_t * has_pages_resident)
1866 {
1867 vm_object_t object;
1868
1869 *has_pages_resident = FALSE;
1870
1871 object = memory_object_control_to_vm_object(control);
1872 if (object == VM_OBJECT_NULL)
1873 return (KERN_INVALID_ARGUMENT);
1874
1875 if (object->resident_page_count)
1876 *has_pages_resident = TRUE;
1877
1878 return (KERN_SUCCESS);
1879 }
1880
1881
1882 static zone_t mem_obj_control_zone;
1883
1884 __private_extern__ void
1885 memory_object_control_bootstrap(void)
1886 {
1887 int i;
1888
1889 i = (vm_size_t) sizeof (struct memory_object_control);
1890 mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
1891 return;
1892 }
1893
1894 __private_extern__ memory_object_control_t
1895 memory_object_control_allocate(
1896 vm_object_t object)
1897 {
1898 memory_object_control_t control;
1899
1900 control = (memory_object_control_t)zalloc(mem_obj_control_zone);
1901 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1902 control->moc_object = object;
1903 control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */
1904 }
1905 return (control);
1906 }
1907
1908 __private_extern__ void
1909 memory_object_control_collapse(
1910 memory_object_control_t control,
1911 vm_object_t object)
1912 {
1913 assert((control->moc_object != VM_OBJECT_NULL) &&
1914 (control->moc_object != object));
1915 control->moc_object = object;
1916 }
1917
1918 __private_extern__ vm_object_t
1919 memory_object_control_to_vm_object(
1920 memory_object_control_t control)
1921 {
1922 if (control == MEMORY_OBJECT_CONTROL_NULL ||
1923 control->moc_ikot != IKOT_MEM_OBJ_CONTROL)
1924 return VM_OBJECT_NULL;
1925
1926 return (control->moc_object);
1927 }
1928
1929 memory_object_control_t
1930 convert_port_to_mo_control(
1931 __unused mach_port_t port)
1932 {
1933 return MEMORY_OBJECT_CONTROL_NULL;
1934 }
1935
1936
1937 mach_port_t
1938 convert_mo_control_to_port(
1939 __unused memory_object_control_t control)
1940 {
1941 return MACH_PORT_NULL;
1942 }
1943
1944 void
1945 memory_object_control_reference(
1946 __unused memory_object_control_t control)
1947 {
1948 return;
1949 }
1950
1951 /*
1952 * We only every issue one of these references, so kill it
1953 * when that gets released (should switch the real reference
1954 * counting in true port-less EMMI).
1955 */
1956 void
1957 memory_object_control_deallocate(
1958 memory_object_control_t control)
1959 {
1960 zfree(mem_obj_control_zone, control);
1961 }
1962
1963 void
1964 memory_object_control_disable(
1965 memory_object_control_t control)
1966 {
1967 assert(control->moc_object != VM_OBJECT_NULL);
1968 control->moc_object = VM_OBJECT_NULL;
1969 }
1970
1971 void
1972 memory_object_default_reference(
1973 memory_object_default_t dmm)
1974 {
1975 ipc_port_make_send(dmm);
1976 }
1977
1978 void
1979 memory_object_default_deallocate(
1980 memory_object_default_t dmm)
1981 {
1982 ipc_port_release_send(dmm);
1983 }
1984
1985 memory_object_t
1986 convert_port_to_memory_object(
1987 __unused mach_port_t port)
1988 {
1989 return (MEMORY_OBJECT_NULL);
1990 }
1991
1992
1993 mach_port_t
1994 convert_memory_object_to_port(
1995 __unused memory_object_t object)
1996 {
1997 return (MACH_PORT_NULL);
1998 }
1999
2000
2001 /* Routine memory_object_reference */
2002 void memory_object_reference(
2003 memory_object_t memory_object)
2004 {
2005 (memory_object->mo_pager_ops->memory_object_reference)(
2006 memory_object);
2007 }
2008
2009 /* Routine memory_object_deallocate */
2010 void memory_object_deallocate(
2011 memory_object_t memory_object)
2012 {
2013 (memory_object->mo_pager_ops->memory_object_deallocate)(
2014 memory_object);
2015 }
2016
2017
2018 /* Routine memory_object_init */
2019 kern_return_t memory_object_init
2020 (
2021 memory_object_t memory_object,
2022 memory_object_control_t memory_control,
2023 memory_object_cluster_size_t memory_object_page_size
2024 )
2025 {
2026 return (memory_object->mo_pager_ops->memory_object_init)(
2027 memory_object,
2028 memory_control,
2029 memory_object_page_size);
2030 }
2031
2032 /* Routine memory_object_terminate */
2033 kern_return_t memory_object_terminate
2034 (
2035 memory_object_t memory_object
2036 )
2037 {
2038 return (memory_object->mo_pager_ops->memory_object_terminate)(
2039 memory_object);
2040 }
2041
2042 /* Routine memory_object_data_request */
2043 kern_return_t memory_object_data_request
2044 (
2045 memory_object_t memory_object,
2046 memory_object_offset_t offset,
2047 memory_object_cluster_size_t length,
2048 vm_prot_t desired_access
2049 )
2050 {
2051 return (memory_object->mo_pager_ops->memory_object_data_request)(
2052 memory_object,
2053 offset,
2054 length,
2055 desired_access);
2056 }
2057
2058 /* Routine memory_object_data_return */
2059 kern_return_t memory_object_data_return
2060 (
2061 memory_object_t memory_object,
2062 memory_object_offset_t offset,
2063 vm_size_t size,
2064 memory_object_offset_t *resid_offset,
2065 int *io_error,
2066 boolean_t dirty,
2067 boolean_t kernel_copy,
2068 int upl_flags
2069 )
2070 {
2071 return (memory_object->mo_pager_ops->memory_object_data_return)(
2072 memory_object,
2073 offset,
2074 size,
2075 resid_offset,
2076 io_error,
2077 dirty,
2078 kernel_copy,
2079 upl_flags);
2080 }
2081
2082 /* Routine memory_object_data_initialize */
2083 kern_return_t memory_object_data_initialize
2084 (
2085 memory_object_t memory_object,
2086 memory_object_offset_t offset,
2087 vm_size_t size
2088 )
2089 {
2090 return (memory_object->mo_pager_ops->memory_object_data_initialize)(
2091 memory_object,
2092 offset,
2093 size);
2094 }
2095
2096 /* Routine memory_object_data_unlock */
2097 kern_return_t memory_object_data_unlock
2098 (
2099 memory_object_t memory_object,
2100 memory_object_offset_t offset,
2101 vm_size_t size,
2102 vm_prot_t desired_access
2103 )
2104 {
2105 return (memory_object->mo_pager_ops->memory_object_data_unlock)(
2106 memory_object,
2107 offset,
2108 size,
2109 desired_access);
2110 }
2111
2112 /* Routine memory_object_synchronize */
2113 kern_return_t memory_object_synchronize
2114 (
2115 memory_object_t memory_object,
2116 memory_object_offset_t offset,
2117 vm_size_t size,
2118 vm_sync_t sync_flags
2119 )
2120 {
2121 return (memory_object->mo_pager_ops->memory_object_synchronize)(
2122 memory_object,
2123 offset,
2124 size,
2125 sync_flags);
2126 }
2127
2128 /* Routine memory_object_unmap */
2129 kern_return_t memory_object_unmap
2130 (
2131 memory_object_t memory_object
2132 )
2133 {
2134 return (memory_object->mo_pager_ops->memory_object_unmap)(
2135 memory_object);
2136 }
2137
2138 /* Routine memory_object_create */
2139 kern_return_t memory_object_create
2140 (
2141 memory_object_default_t default_memory_manager,
2142 vm_size_t new_memory_object_size,
2143 memory_object_t *new_memory_object
2144 )
2145 {
2146 return default_pager_memory_object_create(default_memory_manager,
2147 new_memory_object_size,
2148 new_memory_object);
2149 }
2150
2151 upl_t
2152 convert_port_to_upl(
2153 ipc_port_t port)
2154 {
2155 upl_t upl;
2156
2157 ip_lock(port);
2158 if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
2159 ip_unlock(port);
2160 return (upl_t)NULL;
2161 }
2162 upl = (upl_t) port->ip_kobject;
2163 ip_unlock(port);
2164 upl_lock(upl);
2165 upl->ref_count+=1;
2166 upl_unlock(upl);
2167 return upl;
2168 }
2169
2170 mach_port_t
2171 convert_upl_to_port(
2172 __unused upl_t upl)
2173 {
2174 return MACH_PORT_NULL;
2175 }
2176
2177 __private_extern__ void
2178 upl_no_senders(
2179 __unused ipc_port_t port,
2180 __unused mach_port_mscount_t mscount)
2181 {
2182 return;
2183 }