]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/thread.h>
45 #include <kern/ipc_kobject.h>
46
47 #include <ipc/ipc_port.h>
48 #include <ipc/ipc_space.h>
49
50 #include <vm/vm_map.h>
51 #include <vm/vm_pageout.h>
52 #include <vm/memory_object.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_purgeable_internal.h>
56
57
58 /* BSD VM COMPONENT INTERFACES */
59 int
60 get_map_nentries(
61 vm_map_t);
62
63 vm_offset_t
64 get_map_start(
65 vm_map_t);
66
67 vm_offset_t
68 get_map_end(
69 vm_map_t);
70
71 /*
72 *
73 */
74 int
75 get_map_nentries(
76 vm_map_t map)
77 {
78 return(map->hdr.nentries);
79 }
80
81 mach_vm_offset_t
82 mach_get_vm_start(vm_map_t map)
83 {
84 return( vm_map_first_entry(map)->vme_start);
85 }
86
87 mach_vm_offset_t
88 mach_get_vm_end(vm_map_t map)
89 {
90 return( vm_map_last_entry(map)->vme_end);
91 }
92
93 /*
94 * BSD VNODE PAGER
95 */
96
97 const struct memory_object_pager_ops vnode_pager_ops = {
98 vnode_pager_reference,
99 vnode_pager_deallocate,
100 vnode_pager_init,
101 vnode_pager_terminate,
102 vnode_pager_data_request,
103 vnode_pager_data_return,
104 vnode_pager_data_initialize,
105 vnode_pager_data_unlock,
106 vnode_pager_synchronize,
107 vnode_pager_map,
108 vnode_pager_last_unmap,
109 NULL, /* data_reclaim */
110 "vnode pager"
111 };
112
113 typedef struct vnode_pager {
114 struct ipc_object_header pager_header; /* fake ip_kotype() */
115 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
116 unsigned int ref_count; /* reference count */
117 memory_object_control_t control_handle; /* mem object control handle */
118 struct vnode *vnode_handle; /* vnode handle */
119 } *vnode_pager_t;
120
121
122 #define pager_ikot pager_header.io_bits
123
124
125 kern_return_t
126 vnode_pager_cluster_read( /* forward */
127 vnode_pager_t,
128 vm_object_offset_t,
129 vm_object_offset_t,
130 uint32_t,
131 vm_size_t);
132
133 void
134 vnode_pager_cluster_write( /* forward */
135 vnode_pager_t,
136 vm_object_offset_t,
137 vm_size_t,
138 vm_object_offset_t *,
139 int *,
140 int);
141
142
143 vnode_pager_t
144 vnode_object_create( /* forward */
145 struct vnode *);
146
147 vnode_pager_t
148 vnode_pager_lookup( /* forward */
149 memory_object_t);
150
151 struct vnode *
152 vnode_pager_lookup_vnode( /* forward */
153 memory_object_t);
154
155 zone_t vnode_pager_zone;
156
157
158 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
159
160 /* TODO: Should be set dynamically by vnode_pager_init() */
161 #define CLUSTER_SHIFT 1
162
163 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
164 #define MAX_VNODE 10000
165
166
167 #if DEBUG
168 int pagerdebug=0;
169
170 #define PAGER_ALL 0xffffffff
171 #define PAGER_INIT 0x00000001
172 #define PAGER_PAGEIN 0x00000002
173
174 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
175 #else
176 #define PAGER_DEBUG(LEVEL, A)
177 #endif
178
179 extern int proc_resetpcontrol(int);
180
181 #if DEVELOPMENT || DEBUG
182 extern unsigned long vm_cs_validated_resets;
183 #endif
184
185
186 extern int uiomove64(addr64_t, int, void *);
187 #define MAX_RUN 32
188
189 int
190 memory_object_control_uiomove(
191 memory_object_control_t control,
192 memory_object_offset_t offset,
193 void * uio,
194 int start_offset,
195 int io_requested,
196 int mark_dirty,
197 int take_reference)
198 {
199 vm_object_t object;
200 vm_page_t dst_page;
201 int xsize;
202 int retval = 0;
203 int cur_run;
204 int cur_needed;
205 int i;
206 int orig_offset;
207 vm_page_t page_run[MAX_RUN];
208 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
209
210 object = memory_object_control_to_vm_object(control);
211 if (object == VM_OBJECT_NULL) {
212 return (0);
213 }
214 assert(!object->internal);
215
216 vm_object_lock(object);
217
218 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
219 /*
220 * We can't modify the pages without honoring
221 * copy-on-write obligations first, so fall off
222 * this optimized path and fall back to the regular
223 * path.
224 */
225 vm_object_unlock(object);
226 return 0;
227 }
228 orig_offset = start_offset;
229
230 dirty_count = 0;
231 while (io_requested && retval == 0) {
232
233 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
234
235 if (cur_needed > MAX_RUN)
236 cur_needed = MAX_RUN;
237
238 for (cur_run = 0; cur_run < cur_needed; ) {
239
240 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
241 break;
242
243
244 if (dst_page->busy || dst_page->cleaning) {
245 /*
246 * someone else is playing with the page... if we've
247 * already collected pages into this run, go ahead
248 * and process now, we can't block on this
249 * page while holding other pages in the BUSY state
250 * otherwise we will wait
251 */
252 if (cur_run)
253 break;
254 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
255 continue;
256 }
257 if (dst_page->laundry)
258 vm_pageout_steal_laundry(dst_page, FALSE);
259
260 /*
261 * this routine is only called when copying
262 * to/from real files... no need to consider
263 * encrypted swap pages
264 */
265 assert(!dst_page->encrypted);
266
267 if (mark_dirty) {
268 if (dst_page->dirty == FALSE)
269 dirty_count++;
270 SET_PAGE_DIRTY(dst_page, FALSE);
271 if (dst_page->cs_validated &&
272 !dst_page->cs_tainted) {
273 /*
274 * CODE SIGNING:
275 * We're modifying a code-signed
276 * page: force revalidate
277 */
278 dst_page->cs_validated = FALSE;
279 #if DEVELOPMENT || DEBUG
280 vm_cs_validated_resets++;
281 #endif
282 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
283 }
284 }
285 dst_page->busy = TRUE;
286
287 page_run[cur_run++] = dst_page;
288
289 offset += PAGE_SIZE_64;
290 }
291 if (cur_run == 0)
292 /*
293 * we hit a 'hole' in the cache or
294 * a page we don't want to try to handle,
295 * so bail at this point
296 * we'll unlock the object below
297 */
298 break;
299 vm_object_unlock(object);
300
301 for (i = 0; i < cur_run; i++) {
302
303 dst_page = page_run[i];
304
305 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
306 xsize = io_requested;
307
308 if ( (retval = uiomove64((addr64_t)(((addr64_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)) << PAGE_SHIFT) + start_offset), xsize, uio)) )
309 break;
310
311 io_requested -= xsize;
312 start_offset = 0;
313 }
314 vm_object_lock(object);
315
316 /*
317 * if we have more than 1 page to work on
318 * in the current run, or the original request
319 * started at offset 0 of the page, or we're
320 * processing multiple batches, we will move
321 * the pages to the tail of the inactive queue
322 * to implement an LRU for read/write accesses
323 *
324 * the check for orig_offset == 0 is there to
325 * mitigate the cost of small (< page_size) requests
326 * to the same page (this way we only move it once)
327 */
328 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
329
330 vm_page_lockspin_queues();
331
332 for (i = 0; i < cur_run; i++)
333 vm_page_lru(page_run[i]);
334
335 vm_page_unlock_queues();
336 }
337 for (i = 0; i < cur_run; i++) {
338 dst_page = page_run[i];
339
340 /*
341 * someone is explicitly referencing this page...
342 * update clustered and speculative state
343 *
344 */
345 if (dst_page->clustered)
346 VM_PAGE_CONSUME_CLUSTERED(dst_page);
347
348 PAGE_WAKEUP_DONE(dst_page);
349 }
350 orig_offset = 0;
351 }
352 if (object->pager)
353 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
354 vm_object_unlock(object);
355 return (retval);
356 }
357
358
359 /*
360 *
361 */
362 void
363 vnode_pager_bootstrap(void)
364 {
365 vm_size_t size;
366
367 size = (vm_size_t) sizeof(struct vnode_pager);
368 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
369 PAGE_SIZE, "vnode pager structures");
370 zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
371 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
372
373
374 #if CONFIG_CODE_DECRYPTION
375 apple_protect_pager_bootstrap();
376 #endif /* CONFIG_CODE_DECRYPTION */
377 swapfile_pager_bootstrap();
378 return;
379 }
380
381 /*
382 *
383 */
384 memory_object_t
385 vnode_pager_setup(
386 struct vnode *vp,
387 __unused memory_object_t pager)
388 {
389 vnode_pager_t vnode_object;
390
391 vnode_object = vnode_object_create(vp);
392 if (vnode_object == VNODE_PAGER_NULL)
393 panic("vnode_pager_setup: vnode_object_create() failed");
394 return((memory_object_t)vnode_object);
395 }
396
397 /*
398 *
399 */
400 kern_return_t
401 vnode_pager_init(memory_object_t mem_obj,
402 memory_object_control_t control,
403 #if !DEBUG
404 __unused
405 #endif
406 memory_object_cluster_size_t pg_size)
407 {
408 vnode_pager_t vnode_object;
409 kern_return_t kr;
410 memory_object_attr_info_data_t attributes;
411
412
413 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
414
415 if (control == MEMORY_OBJECT_CONTROL_NULL)
416 return KERN_INVALID_ARGUMENT;
417
418 vnode_object = vnode_pager_lookup(mem_obj);
419
420 memory_object_control_reference(control);
421
422 vnode_object->control_handle = control;
423
424 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
425 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
426 attributes.cluster_size = (1 << (PAGE_SHIFT));
427 attributes.may_cache_object = TRUE;
428 attributes.temporary = TRUE;
429
430 kr = memory_object_change_attributes(
431 control,
432 MEMORY_OBJECT_ATTRIBUTE_INFO,
433 (memory_object_info_t) &attributes,
434 MEMORY_OBJECT_ATTR_INFO_COUNT);
435 if (kr != KERN_SUCCESS)
436 panic("vnode_pager_init: memory_object_change_attributes() failed");
437
438 return(KERN_SUCCESS);
439 }
440
441 /*
442 *
443 */
444 kern_return_t
445 vnode_pager_data_return(
446 memory_object_t mem_obj,
447 memory_object_offset_t offset,
448 memory_object_cluster_size_t data_cnt,
449 memory_object_offset_t *resid_offset,
450 int *io_error,
451 __unused boolean_t dirty,
452 __unused boolean_t kernel_copy,
453 int upl_flags)
454 {
455 vnode_pager_t vnode_object;
456
457 vnode_object = vnode_pager_lookup(mem_obj);
458
459 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
460
461 return KERN_SUCCESS;
462 }
463
464 kern_return_t
465 vnode_pager_data_initialize(
466 __unused memory_object_t mem_obj,
467 __unused memory_object_offset_t offset,
468 __unused memory_object_cluster_size_t data_cnt)
469 {
470 panic("vnode_pager_data_initialize");
471 return KERN_FAILURE;
472 }
473
474 kern_return_t
475 vnode_pager_data_unlock(
476 __unused memory_object_t mem_obj,
477 __unused memory_object_offset_t offset,
478 __unused memory_object_size_t size,
479 __unused vm_prot_t desired_access)
480 {
481 return KERN_FAILURE;
482 }
483
484 kern_return_t
485 vnode_pager_get_isinuse(
486 memory_object_t mem_obj,
487 uint32_t *isinuse)
488 {
489 vnode_pager_t vnode_object;
490
491 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
492 *isinuse = 1;
493 return KERN_INVALID_ARGUMENT;
494 }
495
496 vnode_object = vnode_pager_lookup(mem_obj);
497
498 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
499 return KERN_SUCCESS;
500 }
501
502 kern_return_t
503 vnode_pager_get_throttle_io_limit(
504 memory_object_t mem_obj,
505 uint32_t *limit)
506 {
507 vnode_pager_t vnode_object;
508
509 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
510 return KERN_INVALID_ARGUMENT;
511
512 vnode_object = vnode_pager_lookup(mem_obj);
513
514 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
515 return KERN_SUCCESS;
516 }
517
518 kern_return_t
519 vnode_pager_get_isSSD(
520 memory_object_t mem_obj,
521 boolean_t *isSSD)
522 {
523 vnode_pager_t vnode_object;
524
525 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
526 return KERN_INVALID_ARGUMENT;
527
528 vnode_object = vnode_pager_lookup(mem_obj);
529
530 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
531 return KERN_SUCCESS;
532 }
533
534 kern_return_t
535 vnode_pager_get_object_size(
536 memory_object_t mem_obj,
537 memory_object_offset_t *length)
538 {
539 vnode_pager_t vnode_object;
540
541 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
542 *length = 0;
543 return KERN_INVALID_ARGUMENT;
544 }
545
546 vnode_object = vnode_pager_lookup(mem_obj);
547
548 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
549 return KERN_SUCCESS;
550 }
551
552 kern_return_t
553 vnode_pager_get_object_name(
554 memory_object_t mem_obj,
555 char *pathname,
556 vm_size_t pathname_len,
557 char *filename,
558 vm_size_t filename_len,
559 boolean_t *truncated_path_p)
560 {
561 vnode_pager_t vnode_object;
562
563 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
564 return KERN_INVALID_ARGUMENT;
565 }
566
567 vnode_object = vnode_pager_lookup(mem_obj);
568
569 return vnode_pager_get_name(vnode_object->vnode_handle,
570 pathname,
571 pathname_len,
572 filename,
573 filename_len,
574 truncated_path_p);
575 }
576
577 kern_return_t
578 vnode_pager_get_object_mtime(
579 memory_object_t mem_obj,
580 struct timespec *mtime,
581 struct timespec *cs_mtime)
582 {
583 vnode_pager_t vnode_object;
584
585 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
586 return KERN_INVALID_ARGUMENT;
587 }
588
589 vnode_object = vnode_pager_lookup(mem_obj);
590
591 return vnode_pager_get_mtime(vnode_object->vnode_handle,
592 mtime,
593 cs_mtime);
594 }
595
596 #if CHECK_CS_VALIDATION_BITMAP
597 kern_return_t
598 vnode_pager_cs_check_validation_bitmap(
599 memory_object_t mem_obj,
600 memory_object_offset_t offset,
601 int optype )
602 {
603 vnode_pager_t vnode_object;
604
605 if (mem_obj == MEMORY_OBJECT_NULL ||
606 mem_obj->mo_pager_ops != &vnode_pager_ops) {
607 return KERN_INVALID_ARGUMENT;
608 }
609
610 vnode_object = vnode_pager_lookup(mem_obj);
611 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
612 }
613 #endif /* CHECK_CS_VALIDATION_BITMAP */
614
615 /*
616 *
617 */
618 kern_return_t
619 vnode_pager_data_request(
620 memory_object_t mem_obj,
621 memory_object_offset_t offset,
622 __unused memory_object_cluster_size_t length,
623 __unused vm_prot_t desired_access,
624 memory_object_fault_info_t fault_info)
625 {
626 vnode_pager_t vnode_object;
627 memory_object_offset_t base_offset;
628 vm_size_t size;
629 uint32_t io_streaming = 0;
630
631 vnode_object = vnode_pager_lookup(mem_obj);
632
633 size = MAX_UPL_TRANSFER_BYTES;
634 base_offset = offset;
635
636 if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
637 size = PAGE_SIZE;
638
639 assert(offset >= base_offset &&
640 offset < base_offset + size);
641
642 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
643 }
644
645 /*
646 *
647 */
648 void
649 vnode_pager_reference(
650 memory_object_t mem_obj)
651 {
652 vnode_pager_t vnode_object;
653 unsigned int new_ref_count;
654
655 vnode_object = vnode_pager_lookup(mem_obj);
656 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
657 assert(new_ref_count > 1);
658 }
659
660 /*
661 *
662 */
663 void
664 vnode_pager_deallocate(
665 memory_object_t mem_obj)
666 {
667 vnode_pager_t vnode_object;
668
669 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
670
671 vnode_object = vnode_pager_lookup(mem_obj);
672
673 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
674 if (vnode_object->vnode_handle != NULL) {
675 vnode_pager_vrele(vnode_object->vnode_handle);
676 }
677 zfree(vnode_pager_zone, vnode_object);
678 }
679 return;
680 }
681
682 /*
683 *
684 */
685 kern_return_t
686 vnode_pager_terminate(
687 #if !DEBUG
688 __unused
689 #endif
690 memory_object_t mem_obj)
691 {
692 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
693
694 return(KERN_SUCCESS);
695 }
696
697 /*
698 *
699 */
700 kern_return_t
701 vnode_pager_synchronize(
702 memory_object_t mem_obj,
703 memory_object_offset_t offset,
704 memory_object_size_t length,
705 __unused vm_sync_t sync_flags)
706 {
707 vnode_pager_t vnode_object;
708
709 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
710
711 vnode_object = vnode_pager_lookup(mem_obj);
712
713 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
714
715 return (KERN_SUCCESS);
716 }
717
718 /*
719 *
720 */
721 kern_return_t
722 vnode_pager_map(
723 memory_object_t mem_obj,
724 vm_prot_t prot)
725 {
726 vnode_pager_t vnode_object;
727 int ret;
728 kern_return_t kr;
729
730 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
731
732 vnode_object = vnode_pager_lookup(mem_obj);
733
734 ret = ubc_map(vnode_object->vnode_handle, prot);
735
736 if (ret != 0) {
737 kr = KERN_FAILURE;
738 } else {
739 kr = KERN_SUCCESS;
740 }
741
742 return kr;
743 }
744
745 kern_return_t
746 vnode_pager_last_unmap(
747 memory_object_t mem_obj)
748 {
749 vnode_pager_t vnode_object;
750
751 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
752
753 vnode_object = vnode_pager_lookup(mem_obj);
754
755 ubc_unmap(vnode_object->vnode_handle);
756 return KERN_SUCCESS;
757 }
758
759
760
761 /*
762 *
763 */
764 void
765 vnode_pager_cluster_write(
766 vnode_pager_t vnode_object,
767 vm_object_offset_t offset,
768 vm_size_t cnt,
769 vm_object_offset_t * resid_offset,
770 int * io_error,
771 int upl_flags)
772 {
773 vm_size_t size;
774 int errno;
775
776 if (upl_flags & UPL_MSYNC) {
777
778 upl_flags |= UPL_VNODE_PAGER;
779
780 if ( (upl_flags & UPL_IOSYNC) && io_error)
781 upl_flags |= UPL_KEEPCACHED;
782
783 while (cnt) {
784 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
785
786 assert((upl_size_t) size == size);
787 vnode_pageout(vnode_object->vnode_handle,
788 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
789
790 if ( (upl_flags & UPL_KEEPCACHED) ) {
791 if ( (*io_error = errno) )
792 break;
793 }
794 cnt -= size;
795 offset += size;
796 }
797 if (resid_offset)
798 *resid_offset = offset;
799
800 } else {
801 vm_object_offset_t vnode_size;
802 vm_object_offset_t base_offset;
803
804 /*
805 * this is the pageout path
806 */
807 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
808
809 if (vnode_size > (offset + PAGE_SIZE)) {
810 /*
811 * preset the maximum size of the cluster
812 * and put us on a nice cluster boundary...
813 * and then clip the size to insure we
814 * don't request past the end of the underlying file
815 */
816 size = MAX_UPL_TRANSFER_BYTES;
817 base_offset = offset & ~((signed)(size - 1));
818
819 if ((base_offset + size) > vnode_size)
820 size = round_page(((vm_size_t)(vnode_size - base_offset)));
821 } else {
822 /*
823 * we've been requested to page out a page beyond the current
824 * end of the 'file'... don't try to cluster in this case...
825 * we still need to send this page through because it might
826 * be marked precious and the underlying filesystem may need
827 * to do something with it (besides page it out)...
828 */
829 base_offset = offset;
830 size = PAGE_SIZE;
831 }
832 assert((upl_size_t) size == size);
833 vnode_pageout(vnode_object->vnode_handle,
834 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
835 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
836 }
837 }
838
839
840 /*
841 *
842 */
843 kern_return_t
844 vnode_pager_cluster_read(
845 vnode_pager_t vnode_object,
846 vm_object_offset_t base_offset,
847 vm_object_offset_t offset,
848 uint32_t io_streaming,
849 vm_size_t cnt)
850 {
851 int local_error = 0;
852 int kret;
853 int flags = 0;
854
855 assert(! (cnt & PAGE_MASK));
856
857 if (io_streaming)
858 flags |= UPL_IOSTREAMING;
859
860 assert((upl_size_t) cnt == cnt);
861 kret = vnode_pagein(vnode_object->vnode_handle,
862 (upl_t) NULL,
863 (upl_offset_t) (offset - base_offset),
864 base_offset,
865 (upl_size_t) cnt,
866 flags,
867 &local_error);
868 /*
869 if(kret == PAGER_ABSENT) {
870 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
871 defined in bsd/vm/vm_pager.h However, we should not be including
872 that file here it is a layering violation.
873 */
874 if (kret == 1) {
875 int uplflags;
876 upl_t upl = NULL;
877 unsigned int count = 0;
878 kern_return_t kr;
879
880 uplflags = (UPL_NO_SYNC |
881 UPL_CLEAN_IN_PLACE |
882 UPL_SET_INTERNAL);
883 count = 0;
884 assert((upl_size_t) cnt == cnt);
885 kr = memory_object_upl_request(vnode_object->control_handle,
886 base_offset, (upl_size_t) cnt,
887 &upl, NULL, &count, uplflags);
888 if (kr == KERN_SUCCESS) {
889 upl_abort(upl, 0);
890 upl_deallocate(upl);
891 } else {
892 /*
893 * We couldn't gather the page list, probably
894 * because the memory object doesn't have a link
895 * to a VM object anymore (forced unmount, for
896 * example). Just return an error to the vm_fault()
897 * path and let it handle it.
898 */
899 }
900
901 return KERN_FAILURE;
902 }
903
904 return KERN_SUCCESS;
905
906 }
907
908
909 /*
910 *
911 */
912 void
913 vnode_pager_release_from_cache(
914 int *cnt)
915 {
916 memory_object_free_from_cache(
917 &realhost, &vnode_pager_ops, cnt);
918 }
919
920 /*
921 *
922 */
923 vnode_pager_t
924 vnode_object_create(
925 struct vnode *vp)
926 {
927 vnode_pager_t vnode_object;
928
929 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
930 if (vnode_object == VNODE_PAGER_NULL)
931 return(VNODE_PAGER_NULL);
932
933 /*
934 * The vm_map call takes both named entry ports and raw memory
935 * objects in the same parameter. We need to make sure that
936 * vm_map does not see this object as a named entry port. So,
937 * we reserve the first word in the object for a fake ip_kotype
938 * setting - that will tell vm_map to use it as a memory object.
939 */
940 vnode_object->pager_ops = &vnode_pager_ops;
941 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
942 vnode_object->ref_count = 1;
943 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
944 vnode_object->vnode_handle = vp;
945
946 return(vnode_object);
947 }
948
949 /*
950 *
951 */
952 vnode_pager_t
953 vnode_pager_lookup(
954 memory_object_t name)
955 {
956 vnode_pager_t vnode_object;
957
958 vnode_object = (vnode_pager_t)name;
959 assert(vnode_object->pager_ops == &vnode_pager_ops);
960 return (vnode_object);
961 }
962
963
964 struct vnode *
965 vnode_pager_lookup_vnode(
966 memory_object_t name)
967 {
968 vnode_pager_t vnode_object;
969 vnode_object = (vnode_pager_t)name;
970 if(vnode_object->pager_ops == &vnode_pager_ops)
971 return (vnode_object->vnode_handle);
972 else
973 return NULL;
974 }
975
976 /*********************** proc_info implementation *************/
977
978 #include <sys/bsdtask_info.h>
979
980 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
981
982
983 int
984 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
985 {
986
987 vm_map_t map;
988 vm_map_offset_t address = (vm_map_offset_t )arg;
989 vm_map_entry_t tmp_entry;
990 vm_map_entry_t entry;
991 vm_map_offset_t start;
992 vm_region_extended_info_data_t extended;
993 vm_region_top_info_data_t top;
994
995 task_lock(task);
996 map = task->map;
997 if (map == VM_MAP_NULL)
998 {
999 task_unlock(task);
1000 return(0);
1001 }
1002 vm_map_reference(map);
1003 task_unlock(task);
1004
1005 vm_map_lock_read(map);
1006
1007 start = address;
1008 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1009 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1010 vm_map_unlock_read(map);
1011 vm_map_deallocate(map);
1012 return(0);
1013 }
1014 } else {
1015 entry = tmp_entry;
1016 }
1017
1018 start = entry->vme_start;
1019
1020 pinfo->pri_offset = VME_OFFSET(entry);
1021 pinfo->pri_protection = entry->protection;
1022 pinfo->pri_max_protection = entry->max_protection;
1023 pinfo->pri_inheritance = entry->inheritance;
1024 pinfo->pri_behavior = entry->behavior;
1025 pinfo->pri_user_wired_count = entry->user_wired_count;
1026 pinfo->pri_user_tag = VME_ALIAS(entry);
1027
1028 if (entry->is_sub_map) {
1029 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1030 } else {
1031 if (entry->is_shared)
1032 pinfo->pri_flags |= PROC_REGION_SHARED;
1033 }
1034
1035
1036 extended.protection = entry->protection;
1037 extended.user_tag = VME_ALIAS(entry);
1038 extended.pages_resident = 0;
1039 extended.pages_swapped_out = 0;
1040 extended.pages_shared_now_private = 0;
1041 extended.pages_dirtied = 0;
1042 extended.external_pager = 0;
1043 extended.shadow_depth = 0;
1044
1045 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended);
1046
1047 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1048 extended.share_mode = SM_PRIVATE;
1049
1050 top.private_pages_resident = 0;
1051 top.shared_pages_resident = 0;
1052 vm_map_region_top_walk(entry, &top);
1053
1054
1055 pinfo->pri_pages_resident = extended.pages_resident;
1056 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1057 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1058 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1059 pinfo->pri_ref_count = extended.ref_count;
1060 pinfo->pri_shadow_depth = extended.shadow_depth;
1061 pinfo->pri_share_mode = extended.share_mode;
1062
1063 pinfo->pri_private_pages_resident = top.private_pages_resident;
1064 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1065 pinfo->pri_obj_id = top.obj_id;
1066
1067 pinfo->pri_address = (uint64_t)start;
1068 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1069 pinfo->pri_depth = 0;
1070
1071 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1072 *vnodeaddr = (uintptr_t)0;
1073
1074 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1075 vm_map_unlock_read(map);
1076 vm_map_deallocate(map);
1077 return(1);
1078 }
1079 }
1080
1081 vm_map_unlock_read(map);
1082 vm_map_deallocate(map);
1083 return(1);
1084 }
1085
1086 int
1087 fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1088 {
1089
1090 vm_map_t map;
1091 vm_map_offset_t address = (vm_map_offset_t )arg;
1092 vm_map_entry_t tmp_entry;
1093 vm_map_entry_t entry;
1094
1095 task_lock(task);
1096 map = task->map;
1097 if (map == VM_MAP_NULL)
1098 {
1099 task_unlock(task);
1100 return(0);
1101 }
1102 vm_map_reference(map);
1103 task_unlock(task);
1104
1105 vm_map_lock_read(map);
1106
1107 if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1108 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1109 vm_map_unlock_read(map);
1110 vm_map_deallocate(map);
1111 return(0);
1112 }
1113 } else {
1114 entry = tmp_entry;
1115 }
1116
1117 while (entry != vm_map_to_entry(map)) {
1118 *vnodeaddr = 0;
1119 *vid = 0;
1120
1121 if (entry->is_sub_map == 0) {
1122 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1123
1124 pinfo->pri_offset = VME_OFFSET(entry);
1125 pinfo->pri_protection = entry->protection;
1126 pinfo->pri_max_protection = entry->max_protection;
1127 pinfo->pri_inheritance = entry->inheritance;
1128 pinfo->pri_behavior = entry->behavior;
1129 pinfo->pri_user_wired_count = entry->user_wired_count;
1130 pinfo->pri_user_tag = VME_ALIAS(entry);
1131
1132 if (entry->is_shared)
1133 pinfo->pri_flags |= PROC_REGION_SHARED;
1134
1135 pinfo->pri_pages_resident = 0;
1136 pinfo->pri_pages_shared_now_private = 0;
1137 pinfo->pri_pages_swapped_out = 0;
1138 pinfo->pri_pages_dirtied = 0;
1139 pinfo->pri_ref_count = 0;
1140 pinfo->pri_shadow_depth = 0;
1141 pinfo->pri_share_mode = 0;
1142
1143 pinfo->pri_private_pages_resident = 0;
1144 pinfo->pri_shared_pages_resident = 0;
1145 pinfo->pri_obj_id = 0;
1146
1147 pinfo->pri_address = (uint64_t)entry->vme_start;
1148 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1149 pinfo->pri_depth = 0;
1150
1151 vm_map_unlock_read(map);
1152 vm_map_deallocate(map);
1153 return(1);
1154 }
1155 }
1156
1157 /* Keep searching for a vnode-backed mapping */
1158 entry = entry->vme_next;
1159 }
1160
1161 vm_map_unlock_read(map);
1162 vm_map_deallocate(map);
1163 return(0);
1164 }
1165
1166 static int
1167 fill_vnodeinfoforaddr(
1168 vm_map_entry_t entry,
1169 uintptr_t * vnodeaddr,
1170 uint32_t * vid)
1171 {
1172 vm_object_t top_object, object;
1173 memory_object_t memory_object;
1174 memory_object_pager_ops_t pager_ops;
1175 kern_return_t kr;
1176 int shadow_depth;
1177
1178
1179 if (entry->is_sub_map) {
1180 return(0);
1181 } else {
1182 /*
1183 * The last object in the shadow chain has the
1184 * relevant pager information.
1185 */
1186 top_object = VME_OBJECT(entry);
1187 if (top_object == VM_OBJECT_NULL) {
1188 object = VM_OBJECT_NULL;
1189 shadow_depth = 0;
1190 } else {
1191 vm_object_lock(top_object);
1192 for (object = top_object, shadow_depth = 0;
1193 object->shadow != VM_OBJECT_NULL;
1194 object = object->shadow, shadow_depth++) {
1195 vm_object_lock(object->shadow);
1196 vm_object_unlock(object);
1197 }
1198 }
1199 }
1200
1201 if (object == VM_OBJECT_NULL) {
1202 return(0);
1203 } else if (object->internal) {
1204 vm_object_unlock(object);
1205 return(0);
1206 } else if (! object->pager_ready ||
1207 object->terminating ||
1208 ! object->alive) {
1209 vm_object_unlock(object);
1210 return(0);
1211 } else {
1212 memory_object = object->pager;
1213 pager_ops = memory_object->mo_pager_ops;
1214 if (pager_ops == &vnode_pager_ops) {
1215 kr = vnode_pager_get_object_vnode(
1216 memory_object,
1217 vnodeaddr, vid);
1218 if (kr != KERN_SUCCESS) {
1219 vm_object_unlock(object);
1220 return(0);
1221 }
1222 } else {
1223 vm_object_unlock(object);
1224 return(0);
1225 }
1226 }
1227 vm_object_unlock(object);
1228 return(1);
1229 }
1230
1231 kern_return_t
1232 vnode_pager_get_object_vnode (
1233 memory_object_t mem_obj,
1234 uintptr_t * vnodeaddr,
1235 uint32_t * vid)
1236 {
1237 vnode_pager_t vnode_object;
1238
1239 vnode_object = vnode_pager_lookup(mem_obj);
1240 if (vnode_object->vnode_handle) {
1241 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1242 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1243
1244 return(KERN_SUCCESS);
1245 }
1246
1247 return(KERN_FAILURE);
1248 }
1249
1250 #if CONFIG_IOSCHED
1251 kern_return_t
1252 vnode_pager_get_object_devvp(
1253 memory_object_t mem_obj,
1254 uintptr_t *devvp)
1255 {
1256 struct vnode *vp;
1257 uint32_t vid;
1258
1259 if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS)
1260 return (KERN_FAILURE);
1261 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1262 if (*devvp)
1263 return (KERN_SUCCESS);
1264 return (KERN_FAILURE);
1265 }
1266 #endif
1267
1268 /*
1269 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1270 * object locked, otherwise return NULL with nothing locked.
1271 */
1272
1273 vm_object_t
1274 find_vnode_object(
1275 vm_map_entry_t entry
1276 )
1277 {
1278 vm_object_t top_object, object;
1279 memory_object_t memory_object;
1280 memory_object_pager_ops_t pager_ops;
1281
1282 if (!entry->is_sub_map) {
1283
1284 /*
1285 * The last object in the shadow chain has the
1286 * relevant pager information.
1287 */
1288
1289 top_object = VME_OBJECT(entry);
1290
1291 if (top_object) {
1292 vm_object_lock(top_object);
1293
1294 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1295 vm_object_lock(object->shadow);
1296 vm_object_unlock(object);
1297 }
1298
1299 if (object && !object->internal && object->pager_ready && !object->terminating &&
1300 object->alive) {
1301 memory_object = object->pager;
1302 pager_ops = memory_object->mo_pager_ops;
1303
1304 /*
1305 * If this object points to the vnode_pager_ops, then we found what we're
1306 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1307 * vnode and so we fall through to the bottom and return NULL.
1308 */
1309
1310 if (pager_ops == &vnode_pager_ops)
1311 return object; /* we return with the object locked */
1312 }
1313
1314 vm_object_unlock(object);
1315 }
1316
1317 }
1318
1319 return(VM_OBJECT_NULL);
1320 }