]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-4570.61.1.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/ledger.h>
45 #include <kern/thread.h>
46 #include <kern/ipc_kobject.h>
47
48 #include <ipc/ipc_port.h>
49 #include <ipc/ipc_space.h>
50
51 #include <vm/vm_map.h>
52 #include <vm/vm_pageout.h>
53 #include <vm/memory_object.h>
54 #include <vm/vm_pageout.h>
55 #include <vm/vm_protos.h>
56 #include <vm/vm_purgeable_internal.h>
57
58
59 /* BSD VM COMPONENT INTERFACES */
60 int
61 get_map_nentries(
62 vm_map_t);
63
64 vm_offset_t
65 get_map_start(
66 vm_map_t);
67
68 vm_offset_t
69 get_map_end(
70 vm_map_t);
71
72 /*
73 *
74 */
75 int
76 get_map_nentries(
77 vm_map_t map)
78 {
79 return(map->hdr.nentries);
80 }
81
82 mach_vm_offset_t
83 mach_get_vm_start(vm_map_t map)
84 {
85 return( vm_map_first_entry(map)->vme_start);
86 }
87
88 mach_vm_offset_t
89 mach_get_vm_end(vm_map_t map)
90 {
91 return( vm_map_last_entry(map)->vme_end);
92 }
93
94 /*
95 * BSD VNODE PAGER
96 */
97
98 const struct memory_object_pager_ops vnode_pager_ops = {
99 vnode_pager_reference,
100 vnode_pager_deallocate,
101 vnode_pager_init,
102 vnode_pager_terminate,
103 vnode_pager_data_request,
104 vnode_pager_data_return,
105 vnode_pager_data_initialize,
106 vnode_pager_data_unlock,
107 vnode_pager_synchronize,
108 vnode_pager_map,
109 vnode_pager_last_unmap,
110 NULL, /* data_reclaim */
111 "vnode pager"
112 };
113
114 typedef struct vnode_pager {
115 /* mandatory generic header */
116 struct memory_object vn_pgr_hdr;
117
118 /* pager-specific */
119 unsigned int ref_count; /* reference count */
120 struct vnode *vnode_handle; /* vnode handle */
121 } *vnode_pager_t;
122
123
124 kern_return_t
125 vnode_pager_cluster_read( /* forward */
126 vnode_pager_t,
127 vm_object_offset_t,
128 vm_object_offset_t,
129 uint32_t,
130 vm_size_t);
131
132 void
133 vnode_pager_cluster_write( /* forward */
134 vnode_pager_t,
135 vm_object_offset_t,
136 vm_size_t,
137 vm_object_offset_t *,
138 int *,
139 int);
140
141
142 vnode_pager_t
143 vnode_object_create( /* forward */
144 struct vnode *);
145
146 vnode_pager_t
147 vnode_pager_lookup( /* forward */
148 memory_object_t);
149
150 struct vnode *
151 vnode_pager_lookup_vnode( /* forward */
152 memory_object_t);
153
154 zone_t vnode_pager_zone;
155
156
157 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
158
159 /* TODO: Should be set dynamically by vnode_pager_init() */
160 #define CLUSTER_SHIFT 1
161
162 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
163 #define MAX_VNODE 10000
164
165
166 #if DEBUG
167 int pagerdebug=0;
168
169 #define PAGER_ALL 0xffffffff
170 #define PAGER_INIT 0x00000001
171 #define PAGER_PAGEIN 0x00000002
172
173 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
174 #else
175 #define PAGER_DEBUG(LEVEL, A)
176 #endif
177
178 extern int proc_resetpcontrol(int);
179
180 #if DEVELOPMENT || DEBUG
181 extern unsigned long vm_cs_validated_resets;
182 #endif
183
184
185 extern int uiomove64(addr64_t, int, void *);
186 #define MAX_RUN 32
187
188 int
189 memory_object_control_uiomove(
190 memory_object_control_t control,
191 memory_object_offset_t offset,
192 void * uio,
193 int start_offset,
194 int io_requested,
195 int mark_dirty,
196 int take_reference)
197 {
198 vm_object_t object;
199 vm_page_t dst_page;
200 int xsize;
201 int retval = 0;
202 int cur_run;
203 int cur_needed;
204 int i;
205 int orig_offset;
206 vm_page_t page_run[MAX_RUN];
207 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
208
209 object = memory_object_control_to_vm_object(control);
210 if (object == VM_OBJECT_NULL) {
211 return (0);
212 }
213 assert(!object->internal);
214
215 vm_object_lock(object);
216
217 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
218 /*
219 * We can't modify the pages without honoring
220 * copy-on-write obligations first, so fall off
221 * this optimized path and fall back to the regular
222 * path.
223 */
224 vm_object_unlock(object);
225 return 0;
226 }
227 orig_offset = start_offset;
228
229 dirty_count = 0;
230 while (io_requested && retval == 0) {
231
232 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
233
234 if (cur_needed > MAX_RUN)
235 cur_needed = MAX_RUN;
236
237 for (cur_run = 0; cur_run < cur_needed; ) {
238
239 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
240 break;
241
242
243 if (dst_page->busy || dst_page->cleaning) {
244 /*
245 * someone else is playing with the page... if we've
246 * already collected pages into this run, go ahead
247 * and process now, we can't block on this
248 * page while holding other pages in the BUSY state
249 * otherwise we will wait
250 */
251 if (cur_run)
252 break;
253 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
254 continue;
255 }
256 if (dst_page->laundry)
257 vm_pageout_steal_laundry(dst_page, FALSE);
258
259 if (mark_dirty) {
260 if (dst_page->dirty == FALSE)
261 dirty_count++;
262 SET_PAGE_DIRTY(dst_page, FALSE);
263 if (dst_page->cs_validated &&
264 !dst_page->cs_tainted) {
265 /*
266 * CODE SIGNING:
267 * We're modifying a code-signed
268 * page: force revalidate
269 */
270 dst_page->cs_validated = FALSE;
271 #if DEVELOPMENT || DEBUG
272 vm_cs_validated_resets++;
273 #endif
274 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
275 }
276 }
277 dst_page->busy = TRUE;
278
279 page_run[cur_run++] = dst_page;
280
281 offset += PAGE_SIZE_64;
282 }
283 if (cur_run == 0)
284 /*
285 * we hit a 'hole' in the cache or
286 * a page we don't want to try to handle,
287 * so bail at this point
288 * we'll unlock the object below
289 */
290 break;
291 vm_object_unlock(object);
292
293 for (i = 0; i < cur_run; i++) {
294
295 dst_page = page_run[i];
296
297 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
298 xsize = io_requested;
299
300 if ( (retval = uiomove64((addr64_t)(((addr64_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)) << PAGE_SHIFT) + start_offset), xsize, uio)) )
301 break;
302
303 io_requested -= xsize;
304 start_offset = 0;
305 }
306 vm_object_lock(object);
307
308 /*
309 * if we have more than 1 page to work on
310 * in the current run, or the original request
311 * started at offset 0 of the page, or we're
312 * processing multiple batches, we will move
313 * the pages to the tail of the inactive queue
314 * to implement an LRU for read/write accesses
315 *
316 * the check for orig_offset == 0 is there to
317 * mitigate the cost of small (< page_size) requests
318 * to the same page (this way we only move it once)
319 */
320 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
321
322 vm_page_lockspin_queues();
323
324 for (i = 0; i < cur_run; i++)
325 vm_page_lru(page_run[i]);
326
327 vm_page_unlock_queues();
328 }
329 for (i = 0; i < cur_run; i++) {
330 dst_page = page_run[i];
331
332 /*
333 * someone is explicitly referencing this page...
334 * update clustered and speculative state
335 *
336 */
337 if (dst_page->clustered)
338 VM_PAGE_CONSUME_CLUSTERED(dst_page);
339
340 PAGE_WAKEUP_DONE(dst_page);
341 }
342 orig_offset = 0;
343 }
344 if (object->pager)
345 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
346 vm_object_unlock(object);
347 return (retval);
348 }
349
350
351 /*
352 *
353 */
354 void
355 vnode_pager_bootstrap(void)
356 {
357 vm_size_t size;
358
359 size = (vm_size_t) sizeof(struct vnode_pager);
360 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
361 PAGE_SIZE, "vnode pager structures");
362 zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
363 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
364
365
366 #if CONFIG_CODE_DECRYPTION
367 apple_protect_pager_bootstrap();
368 #endif /* CONFIG_CODE_DECRYPTION */
369 swapfile_pager_bootstrap();
370 #if __arm64__
371 fourk_pager_bootstrap();
372 #endif /* __arm64__ */
373 return;
374 }
375
376 /*
377 *
378 */
379 memory_object_t
380 vnode_pager_setup(
381 struct vnode *vp,
382 __unused memory_object_t pager)
383 {
384 vnode_pager_t vnode_object;
385
386 vnode_object = vnode_object_create(vp);
387 if (vnode_object == VNODE_PAGER_NULL)
388 panic("vnode_pager_setup: vnode_object_create() failed");
389 return((memory_object_t)vnode_object);
390 }
391
392 /*
393 *
394 */
395 kern_return_t
396 vnode_pager_init(memory_object_t mem_obj,
397 memory_object_control_t control,
398 #if !DEBUG
399 __unused
400 #endif
401 memory_object_cluster_size_t pg_size)
402 {
403 vnode_pager_t vnode_object;
404 kern_return_t kr;
405 memory_object_attr_info_data_t attributes;
406
407
408 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
409
410 if (control == MEMORY_OBJECT_CONTROL_NULL)
411 return KERN_INVALID_ARGUMENT;
412
413 vnode_object = vnode_pager_lookup(mem_obj);
414
415 memory_object_control_reference(control);
416
417 vnode_object->vn_pgr_hdr.mo_control = control;
418
419 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
420 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
421 attributes.cluster_size = (1 << (PAGE_SHIFT));
422 attributes.may_cache_object = TRUE;
423 attributes.temporary = TRUE;
424
425 kr = memory_object_change_attributes(
426 control,
427 MEMORY_OBJECT_ATTRIBUTE_INFO,
428 (memory_object_info_t) &attributes,
429 MEMORY_OBJECT_ATTR_INFO_COUNT);
430 if (kr != KERN_SUCCESS)
431 panic("vnode_pager_init: memory_object_change_attributes() failed");
432
433 return(KERN_SUCCESS);
434 }
435
436 /*
437 *
438 */
439 kern_return_t
440 vnode_pager_data_return(
441 memory_object_t mem_obj,
442 memory_object_offset_t offset,
443 memory_object_cluster_size_t data_cnt,
444 memory_object_offset_t *resid_offset,
445 int *io_error,
446 __unused boolean_t dirty,
447 __unused boolean_t kernel_copy,
448 int upl_flags)
449 {
450 vnode_pager_t vnode_object;
451
452 vnode_object = vnode_pager_lookup(mem_obj);
453
454 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
455
456 return KERN_SUCCESS;
457 }
458
459 kern_return_t
460 vnode_pager_data_initialize(
461 __unused memory_object_t mem_obj,
462 __unused memory_object_offset_t offset,
463 __unused memory_object_cluster_size_t data_cnt)
464 {
465 panic("vnode_pager_data_initialize");
466 return KERN_FAILURE;
467 }
468
469 kern_return_t
470 vnode_pager_data_unlock(
471 __unused memory_object_t mem_obj,
472 __unused memory_object_offset_t offset,
473 __unused memory_object_size_t size,
474 __unused vm_prot_t desired_access)
475 {
476 return KERN_FAILURE;
477 }
478
479 kern_return_t
480 vnode_pager_get_isinuse(
481 memory_object_t mem_obj,
482 uint32_t *isinuse)
483 {
484 vnode_pager_t vnode_object;
485
486 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
487 *isinuse = 1;
488 return KERN_INVALID_ARGUMENT;
489 }
490
491 vnode_object = vnode_pager_lookup(mem_obj);
492
493 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
494 return KERN_SUCCESS;
495 }
496
497 kern_return_t
498 vnode_pager_get_throttle_io_limit(
499 memory_object_t mem_obj,
500 uint32_t *limit)
501 {
502 vnode_pager_t vnode_object;
503
504 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
505 return KERN_INVALID_ARGUMENT;
506
507 vnode_object = vnode_pager_lookup(mem_obj);
508
509 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
510 return KERN_SUCCESS;
511 }
512
513 kern_return_t
514 vnode_pager_get_isSSD(
515 memory_object_t mem_obj,
516 boolean_t *isSSD)
517 {
518 vnode_pager_t vnode_object;
519
520 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
521 return KERN_INVALID_ARGUMENT;
522
523 vnode_object = vnode_pager_lookup(mem_obj);
524
525 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
526 return KERN_SUCCESS;
527 }
528
529 kern_return_t
530 vnode_pager_get_object_size(
531 memory_object_t mem_obj,
532 memory_object_offset_t *length)
533 {
534 vnode_pager_t vnode_object;
535
536 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
537 *length = 0;
538 return KERN_INVALID_ARGUMENT;
539 }
540
541 vnode_object = vnode_pager_lookup(mem_obj);
542
543 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
544 return KERN_SUCCESS;
545 }
546
547 kern_return_t
548 vnode_pager_get_object_name(
549 memory_object_t mem_obj,
550 char *pathname,
551 vm_size_t pathname_len,
552 char *filename,
553 vm_size_t filename_len,
554 boolean_t *truncated_path_p)
555 {
556 vnode_pager_t vnode_object;
557
558 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
559 return KERN_INVALID_ARGUMENT;
560 }
561
562 vnode_object = vnode_pager_lookup(mem_obj);
563
564 return vnode_pager_get_name(vnode_object->vnode_handle,
565 pathname,
566 pathname_len,
567 filename,
568 filename_len,
569 truncated_path_p);
570 }
571
572 kern_return_t
573 vnode_pager_get_object_mtime(
574 memory_object_t mem_obj,
575 struct timespec *mtime,
576 struct timespec *cs_mtime)
577 {
578 vnode_pager_t vnode_object;
579
580 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
581 return KERN_INVALID_ARGUMENT;
582 }
583
584 vnode_object = vnode_pager_lookup(mem_obj);
585
586 return vnode_pager_get_mtime(vnode_object->vnode_handle,
587 mtime,
588 cs_mtime);
589 }
590
591 #if CHECK_CS_VALIDATION_BITMAP
592 kern_return_t
593 vnode_pager_cs_check_validation_bitmap(
594 memory_object_t mem_obj,
595 memory_object_offset_t offset,
596 int optype )
597 {
598 vnode_pager_t vnode_object;
599
600 if (mem_obj == MEMORY_OBJECT_NULL ||
601 mem_obj->mo_pager_ops != &vnode_pager_ops) {
602 return KERN_INVALID_ARGUMENT;
603 }
604
605 vnode_object = vnode_pager_lookup(mem_obj);
606 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
607 }
608 #endif /* CHECK_CS_VALIDATION_BITMAP */
609
610 /*
611 *
612 */
613 kern_return_t
614 vnode_pager_data_request(
615 memory_object_t mem_obj,
616 memory_object_offset_t offset,
617 __unused memory_object_cluster_size_t length,
618 __unused vm_prot_t desired_access,
619 memory_object_fault_info_t fault_info)
620 {
621 vnode_pager_t vnode_object;
622 memory_object_offset_t base_offset;
623 vm_size_t size;
624 uint32_t io_streaming = 0;
625
626 vnode_object = vnode_pager_lookup(mem_obj);
627
628 size = MAX_UPL_TRANSFER_BYTES;
629 base_offset = offset;
630
631 if (memory_object_cluster_size(vnode_object->vn_pgr_hdr.mo_control,
632 &base_offset, &size, &io_streaming,
633 fault_info) != KERN_SUCCESS)
634 size = PAGE_SIZE;
635
636 assert(offset >= base_offset &&
637 offset < base_offset + size);
638
639 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
640 }
641
642 /*
643 *
644 */
645 void
646 vnode_pager_reference(
647 memory_object_t mem_obj)
648 {
649 vnode_pager_t vnode_object;
650 unsigned int new_ref_count;
651
652 vnode_object = vnode_pager_lookup(mem_obj);
653 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
654 assert(new_ref_count > 1);
655 }
656
657 /*
658 *
659 */
660 void
661 vnode_pager_deallocate(
662 memory_object_t mem_obj)
663 {
664 vnode_pager_t vnode_object;
665
666 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
667
668 vnode_object = vnode_pager_lookup(mem_obj);
669
670 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
671 if (vnode_object->vnode_handle != NULL) {
672 vnode_pager_vrele(vnode_object->vnode_handle);
673 }
674 zfree(vnode_pager_zone, vnode_object);
675 }
676 return;
677 }
678
679 /*
680 *
681 */
682 kern_return_t
683 vnode_pager_terminate(
684 #if !DEBUG
685 __unused
686 #endif
687 memory_object_t mem_obj)
688 {
689 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
690
691 return(KERN_SUCCESS);
692 }
693
694 /*
695 *
696 */
697 kern_return_t
698 vnode_pager_synchronize(
699 __unused memory_object_t mem_obj,
700 __unused memory_object_offset_t offset,
701 __unused memory_object_size_t length,
702 __unused vm_sync_t sync_flags)
703 {
704 panic("vnode_pager_synchronize: memory_object_synchronize no longer supported\n");
705 return (KERN_FAILURE);
706 }
707
708 /*
709 *
710 */
711 kern_return_t
712 vnode_pager_map(
713 memory_object_t mem_obj,
714 vm_prot_t prot)
715 {
716 vnode_pager_t vnode_object;
717 int ret;
718 kern_return_t kr;
719
720 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
721
722 vnode_object = vnode_pager_lookup(mem_obj);
723
724 ret = ubc_map(vnode_object->vnode_handle, prot);
725
726 if (ret != 0) {
727 kr = KERN_FAILURE;
728 } else {
729 kr = KERN_SUCCESS;
730 }
731
732 return kr;
733 }
734
735 kern_return_t
736 vnode_pager_last_unmap(
737 memory_object_t mem_obj)
738 {
739 vnode_pager_t vnode_object;
740
741 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
742
743 vnode_object = vnode_pager_lookup(mem_obj);
744
745 ubc_unmap(vnode_object->vnode_handle);
746 return KERN_SUCCESS;
747 }
748
749
750
751 /*
752 *
753 */
754 void
755 vnode_pager_cluster_write(
756 vnode_pager_t vnode_object,
757 vm_object_offset_t offset,
758 vm_size_t cnt,
759 vm_object_offset_t * resid_offset,
760 int * io_error,
761 int upl_flags)
762 {
763 vm_size_t size;
764 int errno;
765
766 if (upl_flags & UPL_MSYNC) {
767
768 upl_flags |= UPL_VNODE_PAGER;
769
770 if ( (upl_flags & UPL_IOSYNC) && io_error)
771 upl_flags |= UPL_KEEPCACHED;
772
773 while (cnt) {
774 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
775
776 assert((upl_size_t) size == size);
777 vnode_pageout(vnode_object->vnode_handle,
778 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
779
780 if ( (upl_flags & UPL_KEEPCACHED) ) {
781 if ( (*io_error = errno) )
782 break;
783 }
784 cnt -= size;
785 offset += size;
786 }
787 if (resid_offset)
788 *resid_offset = offset;
789
790 } else {
791 vm_object_offset_t vnode_size;
792 vm_object_offset_t base_offset;
793
794 /*
795 * this is the pageout path
796 */
797 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
798
799 if (vnode_size > (offset + PAGE_SIZE)) {
800 /*
801 * preset the maximum size of the cluster
802 * and put us on a nice cluster boundary...
803 * and then clip the size to insure we
804 * don't request past the end of the underlying file
805 */
806 size = MAX_UPL_TRANSFER_BYTES;
807 base_offset = offset & ~((signed)(size - 1));
808
809 if ((base_offset + size) > vnode_size)
810 size = round_page(((vm_size_t)(vnode_size - base_offset)));
811 } else {
812 /*
813 * we've been requested to page out a page beyond the current
814 * end of the 'file'... don't try to cluster in this case...
815 * we still need to send this page through because it might
816 * be marked precious and the underlying filesystem may need
817 * to do something with it (besides page it out)...
818 */
819 base_offset = offset;
820 size = PAGE_SIZE;
821 }
822 assert((upl_size_t) size == size);
823 vnode_pageout(vnode_object->vnode_handle,
824 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
825 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
826 }
827 }
828
829
830 /*
831 *
832 */
833 kern_return_t
834 vnode_pager_cluster_read(
835 vnode_pager_t vnode_object,
836 vm_object_offset_t base_offset,
837 vm_object_offset_t offset,
838 uint32_t io_streaming,
839 vm_size_t cnt)
840 {
841 int local_error = 0;
842 int kret;
843 int flags = 0;
844
845 assert(! (cnt & PAGE_MASK));
846
847 if (io_streaming)
848 flags |= UPL_IOSTREAMING;
849
850 assert((upl_size_t) cnt == cnt);
851 kret = vnode_pagein(vnode_object->vnode_handle,
852 (upl_t) NULL,
853 (upl_offset_t) (offset - base_offset),
854 base_offset,
855 (upl_size_t) cnt,
856 flags,
857 &local_error);
858 /*
859 if(kret == PAGER_ABSENT) {
860 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
861 defined in bsd/vm/vm_pager.h However, we should not be including
862 that file here it is a layering violation.
863 */
864 if (kret == 1) {
865 int uplflags;
866 upl_t upl = NULL;
867 unsigned int count = 0;
868 kern_return_t kr;
869
870 uplflags = (UPL_NO_SYNC |
871 UPL_CLEAN_IN_PLACE |
872 UPL_SET_INTERNAL);
873 count = 0;
874 assert((upl_size_t) cnt == cnt);
875 kr = memory_object_upl_request(vnode_object->vn_pgr_hdr.mo_control,
876 base_offset, (upl_size_t) cnt,
877 &upl, NULL, &count, uplflags, VM_KERN_MEMORY_NONE);
878 if (kr == KERN_SUCCESS) {
879 upl_abort(upl, 0);
880 upl_deallocate(upl);
881 } else {
882 /*
883 * We couldn't gather the page list, probably
884 * because the memory object doesn't have a link
885 * to a VM object anymore (forced unmount, for
886 * example). Just return an error to the vm_fault()
887 * path and let it handle it.
888 */
889 }
890
891 return KERN_FAILURE;
892 }
893
894 return KERN_SUCCESS;
895
896 }
897
898 /*
899 *
900 */
901 vnode_pager_t
902 vnode_object_create(
903 struct vnode *vp)
904 {
905 vnode_pager_t vnode_object;
906
907 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
908 if (vnode_object == VNODE_PAGER_NULL)
909 return(VNODE_PAGER_NULL);
910
911 /*
912 * The vm_map call takes both named entry ports and raw memory
913 * objects in the same parameter. We need to make sure that
914 * vm_map does not see this object as a named entry port. So,
915 * we reserve the first word in the object for a fake ip_kotype
916 * setting - that will tell vm_map to use it as a memory object.
917 */
918 vnode_object->vn_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
919 vnode_object->vn_pgr_hdr.mo_pager_ops = &vnode_pager_ops;
920 vnode_object->vn_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
921
922 vnode_object->ref_count = 1;
923 vnode_object->vnode_handle = vp;
924
925 return(vnode_object);
926 }
927
928 /*
929 *
930 */
931 vnode_pager_t
932 vnode_pager_lookup(
933 memory_object_t name)
934 {
935 vnode_pager_t vnode_object;
936
937 vnode_object = (vnode_pager_t)name;
938 assert(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops);
939 return (vnode_object);
940 }
941
942
943 struct vnode *
944 vnode_pager_lookup_vnode(
945 memory_object_t name)
946 {
947 vnode_pager_t vnode_object;
948 vnode_object = (vnode_pager_t)name;
949 if(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops)
950 return (vnode_object->vnode_handle);
951 else
952 return NULL;
953 }
954
955 /*********************** proc_info implementation *************/
956
957 #include <sys/bsdtask_info.h>
958
959 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
960
961
962 int
963 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
964 {
965
966 vm_map_t map;
967 vm_map_offset_t address = (vm_map_offset_t )arg;
968 vm_map_entry_t tmp_entry;
969 vm_map_entry_t entry;
970 vm_map_offset_t start;
971 vm_region_extended_info_data_t extended;
972 vm_region_top_info_data_t top;
973 boolean_t do_region_footprint;
974
975 task_lock(task);
976 map = task->map;
977 if (map == VM_MAP_NULL)
978 {
979 task_unlock(task);
980 return(0);
981 }
982 vm_map_reference(map);
983 task_unlock(task);
984
985 do_region_footprint = task_self_region_footprint();
986
987 vm_map_lock_read(map);
988
989 start = address;
990
991 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
992 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
993 if (do_region_footprint &&
994 address == tmp_entry->vme_end) {
995 ledger_amount_t nonvol, nonvol_compressed;
996
997 /*
998 * This request is right after the last valid
999 * memory region; instead of reporting the
1000 * end of the address space, report a fake
1001 * memory region to account for non-volatile
1002 * purgeable memory owned by this task.
1003 */
1004
1005 ledger_get_balance(
1006 task->ledger,
1007 task_ledgers.purgeable_nonvolatile,
1008 &nonvol);
1009 ledger_get_balance(
1010 task->ledger,
1011 task_ledgers.purgeable_nonvolatile_compressed,
1012 &nonvol_compressed);
1013 if (nonvol + nonvol_compressed == 0) {
1014 /* nothing to report */
1015 vm_map_unlock_read(map);
1016 vm_map_deallocate(map);
1017 return 0;
1018 }
1019 /* provide fake region for purgeable */
1020 pinfo->pri_offset = address;
1021 pinfo->pri_protection = VM_PROT_DEFAULT;
1022 pinfo->pri_max_protection = VM_PROT_DEFAULT;
1023 pinfo->pri_inheritance = VM_INHERIT_NONE;
1024 pinfo->pri_behavior = VM_BEHAVIOR_DEFAULT;
1025 pinfo->pri_user_wired_count = 0;
1026 pinfo->pri_user_tag = -1;
1027 pinfo->pri_pages_resident =
1028 (uint32_t) (nonvol / PAGE_SIZE);
1029 pinfo->pri_pages_shared_now_private = 0;
1030 pinfo->pri_pages_swapped_out =
1031 (uint32_t) (nonvol_compressed / PAGE_SIZE);
1032 pinfo->pri_pages_dirtied =
1033 (uint32_t) (nonvol / PAGE_SIZE);
1034 pinfo->pri_ref_count = 1;
1035 pinfo->pri_shadow_depth = 0;
1036 pinfo->pri_share_mode = SM_PRIVATE;
1037 pinfo->pri_private_pages_resident =
1038 (uint32_t) (nonvol / PAGE_SIZE);
1039 pinfo->pri_shared_pages_resident = 0;
1040 pinfo->pri_obj_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
1041 pinfo->pri_address = address;
1042 pinfo->pri_size =
1043 (uint64_t) (nonvol + nonvol_compressed);
1044 pinfo->pri_depth = 0;
1045
1046 vm_map_unlock_read(map);
1047 vm_map_deallocate(map);
1048 return 1;
1049 }
1050 vm_map_unlock_read(map);
1051 vm_map_deallocate(map);
1052 return 0;
1053 }
1054 } else {
1055 entry = tmp_entry;
1056 }
1057
1058 start = entry->vme_start;
1059
1060 pinfo->pri_offset = VME_OFFSET(entry);
1061 pinfo->pri_protection = entry->protection;
1062 pinfo->pri_max_protection = entry->max_protection;
1063 pinfo->pri_inheritance = entry->inheritance;
1064 pinfo->pri_behavior = entry->behavior;
1065 pinfo->pri_user_wired_count = entry->user_wired_count;
1066 pinfo->pri_user_tag = VME_ALIAS(entry);
1067
1068 if (entry->is_sub_map) {
1069 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1070 } else {
1071 if (entry->is_shared)
1072 pinfo->pri_flags |= PROC_REGION_SHARED;
1073 }
1074
1075
1076 extended.protection = entry->protection;
1077 extended.user_tag = VME_ALIAS(entry);
1078 extended.pages_resident = 0;
1079 extended.pages_swapped_out = 0;
1080 extended.pages_shared_now_private = 0;
1081 extended.pages_dirtied = 0;
1082 extended.external_pager = 0;
1083 extended.shadow_depth = 0;
1084
1085 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended, TRUE, VM_REGION_EXTENDED_INFO_COUNT);
1086
1087 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1088 extended.share_mode = SM_PRIVATE;
1089
1090 top.private_pages_resident = 0;
1091 top.shared_pages_resident = 0;
1092 vm_map_region_top_walk(entry, &top);
1093
1094
1095 pinfo->pri_pages_resident = extended.pages_resident;
1096 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1097 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1098 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1099 pinfo->pri_ref_count = extended.ref_count;
1100 pinfo->pri_shadow_depth = extended.shadow_depth;
1101 pinfo->pri_share_mode = extended.share_mode;
1102
1103 pinfo->pri_private_pages_resident = top.private_pages_resident;
1104 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1105 pinfo->pri_obj_id = top.obj_id;
1106
1107 pinfo->pri_address = (uint64_t)start;
1108 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1109 pinfo->pri_depth = 0;
1110
1111 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1112 *vnodeaddr = (uintptr_t)0;
1113
1114 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1115 vm_map_unlock_read(map);
1116 vm_map_deallocate(map);
1117 return(1);
1118 }
1119 }
1120
1121 vm_map_unlock_read(map);
1122 vm_map_deallocate(map);
1123 return(1);
1124 }
1125
1126 int
1127 fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1128 {
1129
1130 vm_map_t map;
1131 vm_map_offset_t address = (vm_map_offset_t )arg;
1132 vm_map_entry_t tmp_entry;
1133 vm_map_entry_t entry;
1134
1135 task_lock(task);
1136 map = task->map;
1137 if (map == VM_MAP_NULL)
1138 {
1139 task_unlock(task);
1140 return(0);
1141 }
1142 vm_map_reference(map);
1143 task_unlock(task);
1144
1145 vm_map_lock_read(map);
1146
1147 if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1148 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1149 vm_map_unlock_read(map);
1150 vm_map_deallocate(map);
1151 return(0);
1152 }
1153 } else {
1154 entry = tmp_entry;
1155 }
1156
1157 while (entry != vm_map_to_entry(map)) {
1158 *vnodeaddr = 0;
1159 *vid = 0;
1160
1161 if (entry->is_sub_map == 0) {
1162 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1163
1164 pinfo->pri_offset = VME_OFFSET(entry);
1165 pinfo->pri_protection = entry->protection;
1166 pinfo->pri_max_protection = entry->max_protection;
1167 pinfo->pri_inheritance = entry->inheritance;
1168 pinfo->pri_behavior = entry->behavior;
1169 pinfo->pri_user_wired_count = entry->user_wired_count;
1170 pinfo->pri_user_tag = VME_ALIAS(entry);
1171
1172 if (entry->is_shared)
1173 pinfo->pri_flags |= PROC_REGION_SHARED;
1174
1175 pinfo->pri_pages_resident = 0;
1176 pinfo->pri_pages_shared_now_private = 0;
1177 pinfo->pri_pages_swapped_out = 0;
1178 pinfo->pri_pages_dirtied = 0;
1179 pinfo->pri_ref_count = 0;
1180 pinfo->pri_shadow_depth = 0;
1181 pinfo->pri_share_mode = 0;
1182
1183 pinfo->pri_private_pages_resident = 0;
1184 pinfo->pri_shared_pages_resident = 0;
1185 pinfo->pri_obj_id = 0;
1186
1187 pinfo->pri_address = (uint64_t)entry->vme_start;
1188 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1189 pinfo->pri_depth = 0;
1190
1191 vm_map_unlock_read(map);
1192 vm_map_deallocate(map);
1193 return(1);
1194 }
1195 }
1196
1197 /* Keep searching for a vnode-backed mapping */
1198 entry = entry->vme_next;
1199 }
1200
1201 vm_map_unlock_read(map);
1202 vm_map_deallocate(map);
1203 return(0);
1204 }
1205
1206 static int
1207 fill_vnodeinfoforaddr(
1208 vm_map_entry_t entry,
1209 uintptr_t * vnodeaddr,
1210 uint32_t * vid)
1211 {
1212 vm_object_t top_object, object;
1213 memory_object_t memory_object;
1214 memory_object_pager_ops_t pager_ops;
1215 kern_return_t kr;
1216 int shadow_depth;
1217
1218
1219 if (entry->is_sub_map) {
1220 return(0);
1221 } else {
1222 /*
1223 * The last object in the shadow chain has the
1224 * relevant pager information.
1225 */
1226 top_object = VME_OBJECT(entry);
1227 if (top_object == VM_OBJECT_NULL) {
1228 object = VM_OBJECT_NULL;
1229 shadow_depth = 0;
1230 } else {
1231 vm_object_lock(top_object);
1232 for (object = top_object, shadow_depth = 0;
1233 object->shadow != VM_OBJECT_NULL;
1234 object = object->shadow, shadow_depth++) {
1235 vm_object_lock(object->shadow);
1236 vm_object_unlock(object);
1237 }
1238 }
1239 }
1240
1241 if (object == VM_OBJECT_NULL) {
1242 return(0);
1243 } else if (object->internal) {
1244 vm_object_unlock(object);
1245 return(0);
1246 } else if (! object->pager_ready ||
1247 object->terminating ||
1248 ! object->alive) {
1249 vm_object_unlock(object);
1250 return(0);
1251 } else {
1252 memory_object = object->pager;
1253 pager_ops = memory_object->mo_pager_ops;
1254 if (pager_ops == &vnode_pager_ops) {
1255 kr = vnode_pager_get_object_vnode(
1256 memory_object,
1257 vnodeaddr, vid);
1258 if (kr != KERN_SUCCESS) {
1259 vm_object_unlock(object);
1260 return(0);
1261 }
1262 } else {
1263 vm_object_unlock(object);
1264 return(0);
1265 }
1266 }
1267 vm_object_unlock(object);
1268 return(1);
1269 }
1270
1271 kern_return_t
1272 vnode_pager_get_object_vnode (
1273 memory_object_t mem_obj,
1274 uintptr_t * vnodeaddr,
1275 uint32_t * vid)
1276 {
1277 vnode_pager_t vnode_object;
1278
1279 vnode_object = vnode_pager_lookup(mem_obj);
1280 if (vnode_object->vnode_handle) {
1281 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1282 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1283
1284 return(KERN_SUCCESS);
1285 }
1286
1287 return(KERN_FAILURE);
1288 }
1289
1290 #if CONFIG_IOSCHED
1291 kern_return_t
1292 vnode_pager_get_object_devvp(
1293 memory_object_t mem_obj,
1294 uintptr_t *devvp)
1295 {
1296 struct vnode *vp;
1297 uint32_t vid;
1298
1299 if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS)
1300 return (KERN_FAILURE);
1301 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1302 if (*devvp)
1303 return (KERN_SUCCESS);
1304 return (KERN_FAILURE);
1305 }
1306 #endif
1307
1308 /*
1309 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1310 * object locked, otherwise return NULL with nothing locked.
1311 */
1312
1313 vm_object_t
1314 find_vnode_object(
1315 vm_map_entry_t entry
1316 )
1317 {
1318 vm_object_t top_object, object;
1319 memory_object_t memory_object;
1320 memory_object_pager_ops_t pager_ops;
1321
1322 if (!entry->is_sub_map) {
1323
1324 /*
1325 * The last object in the shadow chain has the
1326 * relevant pager information.
1327 */
1328
1329 top_object = VME_OBJECT(entry);
1330
1331 if (top_object) {
1332 vm_object_lock(top_object);
1333
1334 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1335 vm_object_lock(object->shadow);
1336 vm_object_unlock(object);
1337 }
1338
1339 if (object && !object->internal && object->pager_ready && !object->terminating &&
1340 object->alive) {
1341 memory_object = object->pager;
1342 pager_ops = memory_object->mo_pager_ops;
1343
1344 /*
1345 * If this object points to the vnode_pager_ops, then we found what we're
1346 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1347 * vnode and so we fall through to the bottom and return NULL.
1348 */
1349
1350 if (pager_ops == &vnode_pager_ops)
1351 return object; /* we return with the object locked */
1352 }
1353
1354 vm_object_unlock(object);
1355 }
1356
1357 }
1358
1359 return(VM_OBJECT_NULL);
1360 }