]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/ledger.h>
45 #include <kern/thread.h>
46 #include <kern/ipc_kobject.h>
47 #include <os/refcnt.h>
48
49 #include <ipc/ipc_port.h>
50 #include <ipc/ipc_space.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return map->hdr.nentries;
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return vm_map_first_entry(map)->vme_start;
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return vm_map_last_entry(map)->vme_end;
93 }
94
95 /*
96 * BSD VNODE PAGER
97 */
98
99 const struct memory_object_pager_ops vnode_pager_ops = {
100 .memory_object_reference = vnode_pager_reference,
101 .memory_object_deallocate = vnode_pager_deallocate,
102 .memory_object_init = vnode_pager_init,
103 .memory_object_terminate = vnode_pager_terminate,
104 .memory_object_data_request = vnode_pager_data_request,
105 .memory_object_data_return = vnode_pager_data_return,
106 .memory_object_data_initialize = vnode_pager_data_initialize,
107 .memory_object_data_unlock = vnode_pager_data_unlock,
108 .memory_object_synchronize = vnode_pager_synchronize,
109 .memory_object_map = vnode_pager_map,
110 .memory_object_last_unmap = vnode_pager_last_unmap,
111 .memory_object_data_reclaim = NULL,
112 .memory_object_backing_object = NULL,
113 .memory_object_pager_name = "vnode pager"
114 };
115
116 typedef struct vnode_pager {
117 /* mandatory generic header */
118 struct memory_object vn_pgr_hdr;
119
120 /* pager-specific */
121 #if MEMORY_OBJECT_HAS_REFCOUNT
122 #define vn_pgr_hdr_ref vn_pgr_hdr.mo_ref
123 #else
124 os_ref_atomic_t vn_pgr_hdr_ref;
125 #endif
126 struct vnode *vnode_handle; /* vnode handle */
127 } *vnode_pager_t;
128
129
130 kern_return_t
131 vnode_pager_cluster_read( /* forward */
132 vnode_pager_t,
133 vm_object_offset_t,
134 vm_object_offset_t,
135 uint32_t,
136 vm_size_t);
137
138 void
139 vnode_pager_cluster_write( /* forward */
140 vnode_pager_t,
141 vm_object_offset_t,
142 vm_size_t,
143 vm_object_offset_t *,
144 int *,
145 int);
146
147
148 vnode_pager_t
149 vnode_object_create( /* forward */
150 struct vnode *);
151
152 vnode_pager_t
153 vnode_pager_lookup( /* forward */
154 memory_object_t);
155
156 struct vnode *
157 vnode_pager_lookup_vnode( /* forward */
158 memory_object_t);
159
160 ZONE_DECLARE(vnode_pager_zone, "vnode pager structures",
161 sizeof(struct vnode_pager), ZC_NOENCRYPT);
162
163 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
164
165 /* TODO: Should be set dynamically by vnode_pager_init() */
166 #define CLUSTER_SHIFT 1
167
168
169 #if DEBUG
170 int pagerdebug = 0;
171
172 #define PAGER_ALL 0xffffffff
173 #define PAGER_INIT 0x00000001
174 #define PAGER_PAGEIN 0x00000002
175
176 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
177 #else
178 #define PAGER_DEBUG(LEVEL, A)
179 #endif
180
181 extern int proc_resetpcontrol(int);
182
183
184 extern int uiomove64(addr64_t, int, void *);
185 #define MAX_RUN 32
186
187 int
188 memory_object_control_uiomove(
189 memory_object_control_t control,
190 memory_object_offset_t offset,
191 void * uio,
192 int start_offset,
193 int io_requested,
194 int mark_dirty,
195 int take_reference)
196 {
197 vm_object_t object;
198 vm_page_t dst_page;
199 int xsize;
200 int retval = 0;
201 int cur_run;
202 int cur_needed;
203 int i;
204 int orig_offset;
205 vm_page_t page_run[MAX_RUN];
206 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
207
208 object = memory_object_control_to_vm_object(control);
209 if (object == VM_OBJECT_NULL) {
210 return 0;
211 }
212 assert(!object->internal);
213
214 vm_object_lock(object);
215
216 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
217 /*
218 * We can't modify the pages without honoring
219 * copy-on-write obligations first, so fall off
220 * this optimized path and fall back to the regular
221 * path.
222 */
223 vm_object_unlock(object);
224 return 0;
225 }
226 orig_offset = start_offset;
227
228 dirty_count = 0;
229 while (io_requested && retval == 0) {
230 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
231
232 if (cur_needed > MAX_RUN) {
233 cur_needed = MAX_RUN;
234 }
235
236 for (cur_run = 0; cur_run < cur_needed;) {
237 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) {
238 break;
239 }
240
241
242 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
243 /*
244 * someone else is playing with the page... if we've
245 * already collected pages into this run, go ahead
246 * and process now, we can't block on this
247 * page while holding other pages in the BUSY state
248 * otherwise we will wait
249 */
250 if (cur_run) {
251 break;
252 }
253 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
254 continue;
255 }
256 if (dst_page->vmp_laundry) {
257 vm_pageout_steal_laundry(dst_page, FALSE);
258 }
259
260 if (mark_dirty) {
261 if (dst_page->vmp_dirty == FALSE) {
262 dirty_count++;
263 }
264 SET_PAGE_DIRTY(dst_page, FALSE);
265 if (dst_page->vmp_cs_validated &&
266 !dst_page->vmp_cs_tainted) {
267 /*
268 * CODE SIGNING:
269 * We're modifying a code-signed
270 * page: force revalidate
271 */
272 dst_page->vmp_cs_validated = VMP_CS_ALL_FALSE;
273
274 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
275
276 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
277 }
278 }
279 dst_page->vmp_busy = TRUE;
280
281 page_run[cur_run++] = dst_page;
282
283 offset += PAGE_SIZE_64;
284 }
285 if (cur_run == 0) {
286 /*
287 * we hit a 'hole' in the cache or
288 * a page we don't want to try to handle,
289 * so bail at this point
290 * we'll unlock the object below
291 */
292 break;
293 }
294 vm_object_unlock(object);
295
296 for (i = 0; i < cur_run; i++) {
297 dst_page = page_run[i];
298
299 if ((xsize = PAGE_SIZE - start_offset) > io_requested) {
300 xsize = io_requested;
301 }
302
303 if ((retval = uiomove64((addr64_t)(((addr64_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)) << PAGE_SHIFT) + start_offset), xsize, uio))) {
304 break;
305 }
306
307 io_requested -= xsize;
308 start_offset = 0;
309 }
310 vm_object_lock(object);
311
312 /*
313 * if we have more than 1 page to work on
314 * in the current run, or the original request
315 * started at offset 0 of the page, or we're
316 * processing multiple batches, we will move
317 * the pages to the tail of the inactive queue
318 * to implement an LRU for read/write accesses
319 *
320 * the check for orig_offset == 0 is there to
321 * mitigate the cost of small (< page_size) requests
322 * to the same page (this way we only move it once)
323 */
324 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
325 vm_page_lockspin_queues();
326
327 for (i = 0; i < cur_run; i++) {
328 vm_page_lru(page_run[i]);
329 }
330
331 vm_page_unlock_queues();
332 }
333 for (i = 0; i < cur_run; i++) {
334 dst_page = page_run[i];
335
336 /*
337 * someone is explicitly referencing this page...
338 * update clustered and speculative state
339 *
340 */
341 if (dst_page->vmp_clustered) {
342 VM_PAGE_CONSUME_CLUSTERED(dst_page);
343 }
344
345 PAGE_WAKEUP_DONE(dst_page);
346 }
347 orig_offset = 0;
348 }
349 if (object->pager) {
350 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
351 }
352 vm_object_unlock(object);
353 return retval;
354 }
355
356
357 /*
358 *
359 */
360 memory_object_t
361 vnode_pager_setup(
362 struct vnode *vp,
363 __unused memory_object_t pager)
364 {
365 vnode_pager_t vnode_object;
366
367 vnode_object = vnode_object_create(vp);
368 if (vnode_object == VNODE_PAGER_NULL) {
369 panic("vnode_pager_setup: vnode_object_create() failed");
370 }
371 return (memory_object_t)vnode_object;
372 }
373
374 /*
375 *
376 */
377 kern_return_t
378 vnode_pager_init(memory_object_t mem_obj,
379 memory_object_control_t control,
380 #if !DEBUG
381 __unused
382 #endif
383 memory_object_cluster_size_t pg_size)
384 {
385 vnode_pager_t vnode_object;
386 kern_return_t kr;
387 memory_object_attr_info_data_t attributes;
388
389
390 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
391
392 if (control == MEMORY_OBJECT_CONTROL_NULL) {
393 return KERN_INVALID_ARGUMENT;
394 }
395
396 vnode_object = vnode_pager_lookup(mem_obj);
397
398 memory_object_control_reference(control);
399
400 vnode_object->vn_pgr_hdr.mo_control = control;
401
402 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
403 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
404 attributes.cluster_size = (1 << (PAGE_SHIFT));
405 attributes.may_cache_object = TRUE;
406 attributes.temporary = TRUE;
407
408 kr = memory_object_change_attributes(
409 control,
410 MEMORY_OBJECT_ATTRIBUTE_INFO,
411 (memory_object_info_t) &attributes,
412 MEMORY_OBJECT_ATTR_INFO_COUNT);
413 if (kr != KERN_SUCCESS) {
414 panic("vnode_pager_init: memory_object_change_attributes() failed");
415 }
416
417 return KERN_SUCCESS;
418 }
419
420 /*
421 *
422 */
423 kern_return_t
424 vnode_pager_data_return(
425 memory_object_t mem_obj,
426 memory_object_offset_t offset,
427 memory_object_cluster_size_t data_cnt,
428 memory_object_offset_t *resid_offset,
429 int *io_error,
430 __unused boolean_t dirty,
431 __unused boolean_t kernel_copy,
432 int upl_flags)
433 {
434 vnode_pager_t vnode_object;
435
436 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
437
438 vnode_object = vnode_pager_lookup(mem_obj);
439
440 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
441
442 return KERN_SUCCESS;
443 }
444
445 kern_return_t
446 vnode_pager_data_initialize(
447 __unused memory_object_t mem_obj,
448 __unused memory_object_offset_t offset,
449 __unused memory_object_cluster_size_t data_cnt)
450 {
451 panic("vnode_pager_data_initialize");
452 return KERN_FAILURE;
453 }
454
455 kern_return_t
456 vnode_pager_data_unlock(
457 __unused memory_object_t mem_obj,
458 __unused memory_object_offset_t offset,
459 __unused memory_object_size_t size,
460 __unused vm_prot_t desired_access)
461 {
462 return KERN_FAILURE;
463 }
464
465 void
466 vnode_pager_dirtied(
467 memory_object_t mem_obj,
468 vm_object_offset_t s_offset,
469 vm_object_offset_t e_offset)
470 {
471 vnode_pager_t vnode_object;
472
473 if (mem_obj && mem_obj->mo_pager_ops == &vnode_pager_ops) {
474 vnode_object = vnode_pager_lookup(mem_obj);
475 vnode_pager_was_dirtied(vnode_object->vnode_handle, s_offset, e_offset);
476 }
477 }
478
479 kern_return_t
480 vnode_pager_get_isinuse(
481 memory_object_t mem_obj,
482 uint32_t *isinuse)
483 {
484 vnode_pager_t vnode_object;
485
486 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
487 *isinuse = 1;
488 return KERN_INVALID_ARGUMENT;
489 }
490
491 vnode_object = vnode_pager_lookup(mem_obj);
492
493 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
494 return KERN_SUCCESS;
495 }
496
497 kern_return_t
498 vnode_pager_get_throttle_io_limit(
499 memory_object_t mem_obj,
500 uint32_t *limit)
501 {
502 vnode_pager_t vnode_object;
503
504 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
505 return KERN_INVALID_ARGUMENT;
506 }
507
508 vnode_object = vnode_pager_lookup(mem_obj);
509
510 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
511 return KERN_SUCCESS;
512 }
513
514 kern_return_t
515 vnode_pager_get_isSSD(
516 memory_object_t mem_obj,
517 boolean_t *isSSD)
518 {
519 vnode_pager_t vnode_object;
520
521 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
522 return KERN_INVALID_ARGUMENT;
523 }
524
525 vnode_object = vnode_pager_lookup(mem_obj);
526
527 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
528 return KERN_SUCCESS;
529 }
530
531 kern_return_t
532 vnode_pager_get_object_size(
533 memory_object_t mem_obj,
534 memory_object_offset_t *length)
535 {
536 vnode_pager_t vnode_object;
537
538 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
539 *length = 0;
540 return KERN_INVALID_ARGUMENT;
541 }
542
543 vnode_object = vnode_pager_lookup(mem_obj);
544
545 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
546 return KERN_SUCCESS;
547 }
548
549 kern_return_t
550 vnode_pager_get_object_name(
551 memory_object_t mem_obj,
552 char *pathname,
553 vm_size_t pathname_len,
554 char *filename,
555 vm_size_t filename_len,
556 boolean_t *truncated_path_p)
557 {
558 vnode_pager_t vnode_object;
559
560 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
561 return KERN_INVALID_ARGUMENT;
562 }
563
564 vnode_object = vnode_pager_lookup(mem_obj);
565
566 return vnode_pager_get_name(vnode_object->vnode_handle,
567 pathname,
568 pathname_len,
569 filename,
570 filename_len,
571 truncated_path_p);
572 }
573
574 kern_return_t
575 vnode_pager_get_object_mtime(
576 memory_object_t mem_obj,
577 struct timespec *mtime,
578 struct timespec *cs_mtime)
579 {
580 vnode_pager_t vnode_object;
581
582 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
583 return KERN_INVALID_ARGUMENT;
584 }
585
586 vnode_object = vnode_pager_lookup(mem_obj);
587
588 return vnode_pager_get_mtime(vnode_object->vnode_handle,
589 mtime,
590 cs_mtime);
591 }
592
593 #if CHECK_CS_VALIDATION_BITMAP
594 kern_return_t
595 vnode_pager_cs_check_validation_bitmap(
596 memory_object_t mem_obj,
597 memory_object_offset_t offset,
598 int optype )
599 {
600 vnode_pager_t vnode_object;
601
602 if (mem_obj == MEMORY_OBJECT_NULL ||
603 mem_obj->mo_pager_ops != &vnode_pager_ops) {
604 return KERN_INVALID_ARGUMENT;
605 }
606
607 vnode_object = vnode_pager_lookup(mem_obj);
608 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
609 }
610 #endif /* CHECK_CS_VALIDATION_BITMAP */
611
612 /*
613 *
614 */
615 kern_return_t
616 vnode_pager_data_request(
617 memory_object_t mem_obj,
618 memory_object_offset_t offset,
619 __unused memory_object_cluster_size_t length,
620 __unused vm_prot_t desired_access,
621 memory_object_fault_info_t fault_info)
622 {
623 vnode_pager_t vnode_object;
624 memory_object_offset_t base_offset;
625 vm_size_t size;
626 uint32_t io_streaming = 0;
627
628 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
629
630 vnode_object = vnode_pager_lookup(mem_obj);
631
632 size = MAX_UPL_TRANSFER_BYTES;
633 base_offset = offset;
634
635 if (memory_object_cluster_size(vnode_object->vn_pgr_hdr.mo_control,
636 &base_offset, &size, &io_streaming,
637 fault_info) != KERN_SUCCESS) {
638 size = PAGE_SIZE;
639 }
640
641 assert(offset >= base_offset &&
642 offset < base_offset + size);
643
644 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
645 }
646
647 /*
648 *
649 */
650 void
651 vnode_pager_reference(
652 memory_object_t mem_obj)
653 {
654 vnode_pager_t vnode_object;
655
656 vnode_object = vnode_pager_lookup(mem_obj);
657 os_ref_retain_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
658 }
659
660 /*
661 *
662 */
663 void
664 vnode_pager_deallocate(
665 memory_object_t mem_obj)
666 {
667 vnode_pager_t vnode_object;
668
669 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
670
671 vnode_object = vnode_pager_lookup(mem_obj);
672
673 if (os_ref_release_raw(&vnode_object->vn_pgr_hdr_ref, NULL) == 0) {
674 if (vnode_object->vnode_handle != NULL) {
675 vnode_pager_vrele(vnode_object->vnode_handle);
676 }
677 zfree(vnode_pager_zone, vnode_object);
678 }
679 }
680
681 /*
682 *
683 */
684 kern_return_t
685 vnode_pager_terminate(
686 #if !DEBUG
687 __unused
688 #endif
689 memory_object_t mem_obj)
690 {
691 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
692
693 return KERN_SUCCESS;
694 }
695
696 /*
697 *
698 */
699 kern_return_t
700 vnode_pager_synchronize(
701 __unused memory_object_t mem_obj,
702 __unused memory_object_offset_t offset,
703 __unused memory_object_size_t length,
704 __unused vm_sync_t sync_flags)
705 {
706 panic("vnode_pager_synchronize: memory_object_synchronize no longer supported\n");
707 return KERN_FAILURE;
708 }
709
710 /*
711 *
712 */
713 kern_return_t
714 vnode_pager_map(
715 memory_object_t mem_obj,
716 vm_prot_t prot)
717 {
718 vnode_pager_t vnode_object;
719 int ret;
720 kern_return_t kr;
721
722 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
723
724 vnode_object = vnode_pager_lookup(mem_obj);
725
726 ret = ubc_map(vnode_object->vnode_handle, prot);
727
728 if (ret != 0) {
729 kr = KERN_FAILURE;
730 } else {
731 kr = KERN_SUCCESS;
732 }
733
734 return kr;
735 }
736
737 kern_return_t
738 vnode_pager_last_unmap(
739 memory_object_t mem_obj)
740 {
741 vnode_pager_t vnode_object;
742
743 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
744
745 vnode_object = vnode_pager_lookup(mem_obj);
746
747 ubc_unmap(vnode_object->vnode_handle);
748 return KERN_SUCCESS;
749 }
750
751
752
753 /*
754 *
755 */
756 void
757 vnode_pager_cluster_write(
758 vnode_pager_t vnode_object,
759 vm_object_offset_t offset,
760 vm_size_t cnt,
761 vm_object_offset_t * resid_offset,
762 int * io_error,
763 int upl_flags)
764 {
765 vm_size_t size;
766 int errno;
767
768 if (upl_flags & UPL_MSYNC) {
769 upl_flags |= UPL_VNODE_PAGER;
770
771 if ((upl_flags & UPL_IOSYNC) && io_error) {
772 upl_flags |= UPL_KEEPCACHED;
773 }
774
775 while (cnt) {
776 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
777
778 assert((upl_size_t) size == size);
779 vnode_pageout(vnode_object->vnode_handle,
780 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
781
782 if ((upl_flags & UPL_KEEPCACHED)) {
783 if ((*io_error = errno)) {
784 break;
785 }
786 }
787 cnt -= size;
788 offset += size;
789 }
790 if (resid_offset) {
791 *resid_offset = offset;
792 }
793 } else {
794 vm_object_offset_t vnode_size;
795 vm_object_offset_t base_offset;
796
797 /*
798 * this is the pageout path
799 */
800 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
801
802 if (vnode_size > (offset + PAGE_SIZE)) {
803 /*
804 * preset the maximum size of the cluster
805 * and put us on a nice cluster boundary...
806 * and then clip the size to insure we
807 * don't request past the end of the underlying file
808 */
809 size = MAX_UPL_TRANSFER_BYTES;
810 base_offset = offset & ~((signed)(size - 1));
811
812 if ((base_offset + size) > vnode_size) {
813 size = round_page(((vm_size_t)(vnode_size - base_offset)));
814 }
815 } else {
816 /*
817 * we've been requested to page out a page beyond the current
818 * end of the 'file'... don't try to cluster in this case...
819 * we still need to send this page through because it might
820 * be marked precious and the underlying filesystem may need
821 * to do something with it (besides page it out)...
822 */
823 base_offset = offset;
824 size = PAGE_SIZE;
825 }
826 assert((upl_size_t) size == size);
827 vnode_pageout(vnode_object->vnode_handle,
828 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
829 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
830 }
831 }
832
833
834 /*
835 *
836 */
837 kern_return_t
838 vnode_pager_cluster_read(
839 vnode_pager_t vnode_object,
840 vm_object_offset_t base_offset,
841 vm_object_offset_t offset,
842 uint32_t io_streaming,
843 vm_size_t cnt)
844 {
845 int local_error = 0;
846 int kret;
847 int flags = 0;
848
849 assert(!(cnt & PAGE_MASK));
850
851 if (io_streaming) {
852 flags |= UPL_IOSTREAMING;
853 }
854
855 assert((upl_size_t) cnt == cnt);
856 kret = vnode_pagein(vnode_object->vnode_handle,
857 (upl_t) NULL,
858 (upl_offset_t) (offset - base_offset),
859 base_offset,
860 (upl_size_t) cnt,
861 flags,
862 &local_error);
863 /*
864 * if(kret == PAGER_ABSENT) {
865 * Need to work out the defs here, 1 corresponds to PAGER_ABSENT
866 * defined in bsd/vm/vm_pager.h However, we should not be including
867 * that file here it is a layering violation.
868 */
869 if (kret == 1) {
870 int uplflags;
871 upl_t upl = NULL;
872 unsigned int count = 0;
873 kern_return_t kr;
874
875 uplflags = (UPL_NO_SYNC |
876 UPL_CLEAN_IN_PLACE |
877 UPL_SET_INTERNAL);
878 count = 0;
879 assert((upl_size_t) cnt == cnt);
880 kr = memory_object_upl_request(vnode_object->vn_pgr_hdr.mo_control,
881 base_offset, (upl_size_t) cnt,
882 &upl, NULL, &count, uplflags, VM_KERN_MEMORY_NONE);
883 if (kr == KERN_SUCCESS) {
884 upl_abort(upl, 0);
885 upl_deallocate(upl);
886 } else {
887 /*
888 * We couldn't gather the page list, probably
889 * because the memory object doesn't have a link
890 * to a VM object anymore (forced unmount, for
891 * example). Just return an error to the vm_fault()
892 * path and let it handle it.
893 */
894 }
895
896 return KERN_FAILURE;
897 }
898
899 return KERN_SUCCESS;
900 }
901
902 /*
903 *
904 */
905 vnode_pager_t
906 vnode_object_create(
907 struct vnode *vp)
908 {
909 vnode_pager_t vnode_object;
910
911 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
912 if (vnode_object == VNODE_PAGER_NULL) {
913 return VNODE_PAGER_NULL;
914 }
915
916 /*
917 * The vm_map call takes both named entry ports and raw memory
918 * objects in the same parameter. We need to make sure that
919 * vm_map does not see this object as a named entry port. So,
920 * we reserve the first word in the object for a fake ip_kotype
921 * setting - that will tell vm_map to use it as a memory object.
922 */
923 vnode_object->vn_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
924 vnode_object->vn_pgr_hdr.mo_pager_ops = &vnode_pager_ops;
925 vnode_object->vn_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
926
927 os_ref_init_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
928 vnode_object->vnode_handle = vp;
929
930 return vnode_object;
931 }
932
933 /*
934 *
935 */
936 vnode_pager_t
937 vnode_pager_lookup(
938 memory_object_t name)
939 {
940 vnode_pager_t vnode_object;
941
942 vnode_object = (vnode_pager_t)name;
943 assert(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops);
944 return vnode_object;
945 }
946
947
948 struct vnode *
949 vnode_pager_lookup_vnode(
950 memory_object_t name)
951 {
952 vnode_pager_t vnode_object;
953 vnode_object = (vnode_pager_t)name;
954 if (vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops) {
955 return vnode_object->vnode_handle;
956 } else {
957 return NULL;
958 }
959 }
960
961 /*********************** proc_info implementation *************/
962
963 #include <sys/bsdtask_info.h>
964
965 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
966
967 int
968 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
969 {
970 vm_map_t map;
971 vm_map_offset_t address = (vm_map_offset_t)arg;
972 vm_map_entry_t tmp_entry;
973 vm_map_entry_t entry;
974 vm_map_offset_t start;
975 vm_region_extended_info_data_t extended;
976 vm_region_top_info_data_t top;
977 boolean_t do_region_footprint;
978 int effective_page_shift, effective_page_size;
979
980 task_lock(task);
981 map = task->map;
982 if (map == VM_MAP_NULL) {
983 task_unlock(task);
984 return 0;
985 }
986
987 effective_page_shift = vm_self_region_page_shift(map);
988 effective_page_size = (1 << effective_page_shift);
989
990 vm_map_reference(map);
991 task_unlock(task);
992
993 do_region_footprint = task_self_region_footprint();
994
995 vm_map_lock_read(map);
996
997 start = address;
998
999 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1000 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1001 if (do_region_footprint &&
1002 address == tmp_entry->vme_end) {
1003 ledger_amount_t ledger_resident;
1004 ledger_amount_t ledger_compressed;
1005
1006 /*
1007 * This request is right after the last valid
1008 * memory region; instead of reporting the
1009 * end of the address space, report a fake
1010 * memory region to account for non-volatile
1011 * purgeable and/or ledger-tagged memory
1012 * owned by this task.
1013 */
1014 task_ledgers_footprint(task->ledger,
1015 &ledger_resident,
1016 &ledger_compressed);
1017 if (ledger_resident + ledger_compressed == 0) {
1018 /* nothing to report */
1019 vm_map_unlock_read(map);
1020 vm_map_deallocate(map);
1021 return 0;
1022 }
1023
1024 /* provide fake region for purgeable */
1025 pinfo->pri_offset = address;
1026 pinfo->pri_protection = VM_PROT_DEFAULT;
1027 pinfo->pri_max_protection = VM_PROT_DEFAULT;
1028 pinfo->pri_inheritance = VM_INHERIT_NONE;
1029 pinfo->pri_behavior = VM_BEHAVIOR_DEFAULT;
1030 pinfo->pri_user_wired_count = 0;
1031 pinfo->pri_user_tag = -1;
1032 pinfo->pri_pages_resident =
1033 (uint32_t) (ledger_resident / effective_page_size);
1034 pinfo->pri_pages_shared_now_private = 0;
1035 pinfo->pri_pages_swapped_out =
1036 (uint32_t) (ledger_compressed / effective_page_size);
1037 pinfo->pri_pages_dirtied =
1038 (uint32_t) (ledger_resident / effective_page_size);
1039 pinfo->pri_ref_count = 1;
1040 pinfo->pri_shadow_depth = 0;
1041 pinfo->pri_share_mode = SM_PRIVATE;
1042 pinfo->pri_private_pages_resident =
1043 (uint32_t) (ledger_resident / effective_page_size);
1044 pinfo->pri_shared_pages_resident = 0;
1045 pinfo->pri_obj_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
1046 pinfo->pri_address = address;
1047 pinfo->pri_size =
1048 (uint64_t) (ledger_resident + ledger_compressed);
1049 pinfo->pri_depth = 0;
1050
1051 vm_map_unlock_read(map);
1052 vm_map_deallocate(map);
1053 return 1;
1054 }
1055 vm_map_unlock_read(map);
1056 vm_map_deallocate(map);
1057 return 0;
1058 }
1059 } else {
1060 entry = tmp_entry;
1061 }
1062
1063 start = entry->vme_start;
1064
1065 pinfo->pri_offset = VME_OFFSET(entry);
1066 pinfo->pri_protection = entry->protection;
1067 pinfo->pri_max_protection = entry->max_protection;
1068 pinfo->pri_inheritance = entry->inheritance;
1069 pinfo->pri_behavior = entry->behavior;
1070 pinfo->pri_user_wired_count = entry->user_wired_count;
1071 pinfo->pri_user_tag = VME_ALIAS(entry);
1072
1073 if (entry->is_sub_map) {
1074 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1075 } else {
1076 if (entry->is_shared) {
1077 pinfo->pri_flags |= PROC_REGION_SHARED;
1078 }
1079 }
1080
1081
1082 extended.protection = entry->protection;
1083 extended.user_tag = VME_ALIAS(entry);
1084 extended.pages_resident = 0;
1085 extended.pages_swapped_out = 0;
1086 extended.pages_shared_now_private = 0;
1087 extended.pages_dirtied = 0;
1088 extended.external_pager = 0;
1089 extended.shadow_depth = 0;
1090
1091 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended, TRUE, VM_REGION_EXTENDED_INFO_COUNT);
1092
1093 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) {
1094 extended.share_mode = SM_PRIVATE;
1095 }
1096
1097 top.private_pages_resident = 0;
1098 top.shared_pages_resident = 0;
1099 vm_map_region_top_walk(entry, &top);
1100
1101
1102 pinfo->pri_pages_resident = extended.pages_resident;
1103 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1104 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1105 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1106 pinfo->pri_ref_count = extended.ref_count;
1107 pinfo->pri_shadow_depth = extended.shadow_depth;
1108 pinfo->pri_share_mode = extended.share_mode;
1109
1110 pinfo->pri_private_pages_resident = top.private_pages_resident;
1111 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1112 pinfo->pri_obj_id = top.obj_id;
1113
1114 pinfo->pri_address = (uint64_t)start;
1115 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1116 pinfo->pri_depth = 0;
1117
1118 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1119 *vnodeaddr = (uintptr_t)0;
1120
1121 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) == 0) {
1122 vm_map_unlock_read(map);
1123 vm_map_deallocate(map);
1124 return 1;
1125 }
1126 }
1127
1128 vm_map_unlock_read(map);
1129 vm_map_deallocate(map);
1130 return 1;
1131 }
1132
1133 int
1134 fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1135 {
1136 vm_map_t map;
1137 vm_map_offset_t address = (vm_map_offset_t)arg;
1138 vm_map_entry_t tmp_entry;
1139 vm_map_entry_t entry;
1140
1141 task_lock(task);
1142 map = task->map;
1143 if (map == VM_MAP_NULL) {
1144 task_unlock(task);
1145 return 0;
1146 }
1147 vm_map_reference(map);
1148 task_unlock(task);
1149
1150 vm_map_lock_read(map);
1151
1152 if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1153 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1154 vm_map_unlock_read(map);
1155 vm_map_deallocate(map);
1156 return 0;
1157 }
1158 } else {
1159 entry = tmp_entry;
1160 }
1161
1162 while (entry != vm_map_to_entry(map)) {
1163 *vnodeaddr = 0;
1164 *vid = 0;
1165
1166 if (entry->is_sub_map == 0) {
1167 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1168 pinfo->pri_offset = VME_OFFSET(entry);
1169 pinfo->pri_protection = entry->protection;
1170 pinfo->pri_max_protection = entry->max_protection;
1171 pinfo->pri_inheritance = entry->inheritance;
1172 pinfo->pri_behavior = entry->behavior;
1173 pinfo->pri_user_wired_count = entry->user_wired_count;
1174 pinfo->pri_user_tag = VME_ALIAS(entry);
1175
1176 if (entry->is_shared) {
1177 pinfo->pri_flags |= PROC_REGION_SHARED;
1178 }
1179
1180 pinfo->pri_pages_resident = 0;
1181 pinfo->pri_pages_shared_now_private = 0;
1182 pinfo->pri_pages_swapped_out = 0;
1183 pinfo->pri_pages_dirtied = 0;
1184 pinfo->pri_ref_count = 0;
1185 pinfo->pri_shadow_depth = 0;
1186 pinfo->pri_share_mode = 0;
1187
1188 pinfo->pri_private_pages_resident = 0;
1189 pinfo->pri_shared_pages_resident = 0;
1190 pinfo->pri_obj_id = 0;
1191
1192 pinfo->pri_address = (uint64_t)entry->vme_start;
1193 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1194 pinfo->pri_depth = 0;
1195
1196 vm_map_unlock_read(map);
1197 vm_map_deallocate(map);
1198 return 1;
1199 }
1200 }
1201
1202 /* Keep searching for a vnode-backed mapping */
1203 entry = entry->vme_next;
1204 }
1205
1206 vm_map_unlock_read(map);
1207 vm_map_deallocate(map);
1208 return 0;
1209 }
1210
1211 int
1212 find_region_details(task_t task, vm_map_offset_t offset,
1213 uintptr_t *vnodeaddr, uint32_t *vid,
1214 uint64_t *start, uint64_t *len)
1215 {
1216 vm_map_t map;
1217 vm_map_entry_t tmp_entry, entry;
1218 int rc = 0;
1219
1220 task_lock(task);
1221 map = task->map;
1222 if (map == VM_MAP_NULL) {
1223 task_unlock(task);
1224 return 0;
1225 }
1226 vm_map_reference(map);
1227 task_unlock(task);
1228
1229 vm_map_lock_read(map);
1230 if (!vm_map_lookup_entry(map, offset, &tmp_entry)) {
1231 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1232 rc = 0;
1233 goto ret;
1234 }
1235 } else {
1236 entry = tmp_entry;
1237 }
1238
1239 while (entry != vm_map_to_entry(map)) {
1240 *vnodeaddr = 0;
1241 *vid = 0;
1242 *start = 0;
1243 *len = 0;
1244
1245 if (entry->is_sub_map == 0) {
1246 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1247 *start = entry->vme_start;
1248 *len = entry->vme_end - entry->vme_start;
1249 rc = 1;
1250 goto ret;
1251 }
1252 }
1253
1254 entry = entry->vme_next;
1255 }
1256
1257 ret:
1258 vm_map_unlock_read(map);
1259 vm_map_deallocate(map);
1260 return rc;
1261 }
1262
1263 static int
1264 fill_vnodeinfoforaddr(
1265 vm_map_entry_t entry,
1266 uintptr_t * vnodeaddr,
1267 uint32_t * vid)
1268 {
1269 vm_object_t top_object, object;
1270 memory_object_t memory_object;
1271 memory_object_pager_ops_t pager_ops;
1272 kern_return_t kr;
1273 int shadow_depth;
1274
1275
1276 if (entry->is_sub_map) {
1277 return 0;
1278 } else {
1279 /*
1280 * The last object in the shadow chain has the
1281 * relevant pager information.
1282 */
1283 top_object = VME_OBJECT(entry);
1284 if (top_object == VM_OBJECT_NULL) {
1285 object = VM_OBJECT_NULL;
1286 shadow_depth = 0;
1287 } else {
1288 vm_object_lock(top_object);
1289 for (object = top_object, shadow_depth = 0;
1290 object->shadow != VM_OBJECT_NULL;
1291 object = object->shadow, shadow_depth++) {
1292 vm_object_lock(object->shadow);
1293 vm_object_unlock(object);
1294 }
1295 }
1296 }
1297
1298 if (object == VM_OBJECT_NULL) {
1299 return 0;
1300 } else if (object->internal) {
1301 vm_object_unlock(object);
1302 return 0;
1303 } else if (!object->pager_ready ||
1304 object->terminating ||
1305 !object->alive) {
1306 vm_object_unlock(object);
1307 return 0;
1308 } else {
1309 memory_object = object->pager;
1310 pager_ops = memory_object->mo_pager_ops;
1311 if (pager_ops == &vnode_pager_ops) {
1312 kr = vnode_pager_get_object_vnode(
1313 memory_object,
1314 vnodeaddr, vid);
1315 if (kr != KERN_SUCCESS) {
1316 vm_object_unlock(object);
1317 return 0;
1318 }
1319 } else {
1320 vm_object_unlock(object);
1321 return 0;
1322 }
1323 }
1324 vm_object_unlock(object);
1325 return 1;
1326 }
1327
1328 kern_return_t
1329 vnode_pager_get_object_vnode(
1330 memory_object_t mem_obj,
1331 uintptr_t * vnodeaddr,
1332 uint32_t * vid)
1333 {
1334 vnode_pager_t vnode_object;
1335
1336 vnode_object = vnode_pager_lookup(mem_obj);
1337 if (vnode_object->vnode_handle) {
1338 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1339 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1340
1341 return KERN_SUCCESS;
1342 }
1343
1344 return KERN_FAILURE;
1345 }
1346
1347 #if CONFIG_IOSCHED
1348 kern_return_t
1349 vnode_pager_get_object_devvp(
1350 memory_object_t mem_obj,
1351 uintptr_t *devvp)
1352 {
1353 struct vnode *vp;
1354 uint32_t vid;
1355
1356 if (vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS) {
1357 return KERN_FAILURE;
1358 }
1359 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1360 if (*devvp) {
1361 return KERN_SUCCESS;
1362 }
1363 return KERN_FAILURE;
1364 }
1365 #endif
1366
1367 /*
1368 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1369 * object locked, otherwise return NULL with nothing locked.
1370 */
1371
1372 vm_object_t
1373 find_vnode_object(
1374 vm_map_entry_t entry
1375 )
1376 {
1377 vm_object_t top_object, object;
1378 memory_object_t memory_object;
1379 memory_object_pager_ops_t pager_ops;
1380
1381 if (!entry->is_sub_map) {
1382 /*
1383 * The last object in the shadow chain has the
1384 * relevant pager information.
1385 */
1386
1387 top_object = VME_OBJECT(entry);
1388
1389 if (top_object) {
1390 vm_object_lock(top_object);
1391
1392 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1393 vm_object_lock(object->shadow);
1394 vm_object_unlock(object);
1395 }
1396
1397 if (object && !object->internal && object->pager_ready && !object->terminating &&
1398 object->alive) {
1399 memory_object = object->pager;
1400 pager_ops = memory_object->mo_pager_ops;
1401
1402 /*
1403 * If this object points to the vnode_pager_ops, then we found what we're
1404 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1405 * vnode and so we fall through to the bottom and return NULL.
1406 */
1407
1408 if (pager_ops == &vnode_pager_ops) {
1409 return object; /* we return with the object locked */
1410 }
1411 }
1412
1413 vm_object_unlock(object);
1414 }
1415 }
1416
1417 return VM_OBJECT_NULL;
1418 }