]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
0063500388e4dbc2384dd91baf44111f71ab708b
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/ledger.h>
45 #include <kern/thread.h>
46 #include <kern/ipc_kobject.h>
47 #include <os/refcnt.h>
48
49 #include <ipc/ipc_port.h>
50 #include <ipc/ipc_space.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return map->hdr.nentries;
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return vm_map_first_entry(map)->vme_start;
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return vm_map_last_entry(map)->vme_end;
93 }
94
95 /*
96 * BSD VNODE PAGER
97 */
98
99 const struct memory_object_pager_ops vnode_pager_ops = {
100 .memory_object_reference = vnode_pager_reference,
101 .memory_object_deallocate = vnode_pager_deallocate,
102 .memory_object_init = vnode_pager_init,
103 .memory_object_terminate = vnode_pager_terminate,
104 .memory_object_data_request = vnode_pager_data_request,
105 .memory_object_data_return = vnode_pager_data_return,
106 .memory_object_data_initialize = vnode_pager_data_initialize,
107 .memory_object_data_unlock = vnode_pager_data_unlock,
108 .memory_object_synchronize = vnode_pager_synchronize,
109 .memory_object_map = vnode_pager_map,
110 .memory_object_last_unmap = vnode_pager_last_unmap,
111 .memory_object_data_reclaim = NULL,
112 .memory_object_pager_name = "vnode pager"
113 };
114
115 typedef struct vnode_pager {
116 /* mandatory generic header */
117 struct memory_object vn_pgr_hdr;
118
119 /* pager-specific */
120 struct os_refcnt ref_count;
121 struct vnode *vnode_handle; /* vnode handle */
122 } *vnode_pager_t;
123
124
125 kern_return_t
126 vnode_pager_cluster_read( /* forward */
127 vnode_pager_t,
128 vm_object_offset_t,
129 vm_object_offset_t,
130 uint32_t,
131 vm_size_t);
132
133 void
134 vnode_pager_cluster_write( /* forward */
135 vnode_pager_t,
136 vm_object_offset_t,
137 vm_size_t,
138 vm_object_offset_t *,
139 int *,
140 int);
141
142
143 vnode_pager_t
144 vnode_object_create( /* forward */
145 struct vnode *);
146
147 vnode_pager_t
148 vnode_pager_lookup( /* forward */
149 memory_object_t);
150
151 struct vnode *
152 vnode_pager_lookup_vnode( /* forward */
153 memory_object_t);
154
155 ZONE_DECLARE(vnode_pager_zone, "vnode pager structures",
156 sizeof(struct vnode_pager), ZC_NOENCRYPT);
157
158 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
159
160 /* TODO: Should be set dynamically by vnode_pager_init() */
161 #define CLUSTER_SHIFT 1
162
163
164 #if DEBUG
165 int pagerdebug = 0;
166
167 #define PAGER_ALL 0xffffffff
168 #define PAGER_INIT 0x00000001
169 #define PAGER_PAGEIN 0x00000002
170
171 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
172 #else
173 #define PAGER_DEBUG(LEVEL, A)
174 #endif
175
176 extern int proc_resetpcontrol(int);
177
178
179 extern int uiomove64(addr64_t, int, void *);
180 #define MAX_RUN 32
181
182 int
183 memory_object_control_uiomove(
184 memory_object_control_t control,
185 memory_object_offset_t offset,
186 void * uio,
187 int start_offset,
188 int io_requested,
189 int mark_dirty,
190 int take_reference)
191 {
192 vm_object_t object;
193 vm_page_t dst_page;
194 int xsize;
195 int retval = 0;
196 int cur_run;
197 int cur_needed;
198 int i;
199 int orig_offset;
200 vm_page_t page_run[MAX_RUN];
201 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
202
203 object = memory_object_control_to_vm_object(control);
204 if (object == VM_OBJECT_NULL) {
205 return 0;
206 }
207 assert(!object->internal);
208
209 vm_object_lock(object);
210
211 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
212 /*
213 * We can't modify the pages without honoring
214 * copy-on-write obligations first, so fall off
215 * this optimized path and fall back to the regular
216 * path.
217 */
218 vm_object_unlock(object);
219 return 0;
220 }
221 orig_offset = start_offset;
222
223 dirty_count = 0;
224 while (io_requested && retval == 0) {
225 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
226
227 if (cur_needed > MAX_RUN) {
228 cur_needed = MAX_RUN;
229 }
230
231 for (cur_run = 0; cur_run < cur_needed;) {
232 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) {
233 break;
234 }
235
236
237 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
238 /*
239 * someone else is playing with the page... if we've
240 * already collected pages into this run, go ahead
241 * and process now, we can't block on this
242 * page while holding other pages in the BUSY state
243 * otherwise we will wait
244 */
245 if (cur_run) {
246 break;
247 }
248 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
249 continue;
250 }
251 if (dst_page->vmp_laundry) {
252 vm_pageout_steal_laundry(dst_page, FALSE);
253 }
254
255 if (mark_dirty) {
256 if (dst_page->vmp_dirty == FALSE) {
257 dirty_count++;
258 }
259 SET_PAGE_DIRTY(dst_page, FALSE);
260 if (dst_page->vmp_cs_validated &&
261 !dst_page->vmp_cs_tainted) {
262 /*
263 * CODE SIGNING:
264 * We're modifying a code-signed
265 * page: force revalidate
266 */
267 dst_page->vmp_cs_validated = VMP_CS_ALL_FALSE;
268
269 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
270
271 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
272 }
273 }
274 dst_page->vmp_busy = TRUE;
275
276 page_run[cur_run++] = dst_page;
277
278 offset += PAGE_SIZE_64;
279 }
280 if (cur_run == 0) {
281 /*
282 * we hit a 'hole' in the cache or
283 * a page we don't want to try to handle,
284 * so bail at this point
285 * we'll unlock the object below
286 */
287 break;
288 }
289 vm_object_unlock(object);
290
291 for (i = 0; i < cur_run; i++) {
292 dst_page = page_run[i];
293
294 if ((xsize = PAGE_SIZE - start_offset) > io_requested) {
295 xsize = io_requested;
296 }
297
298 if ((retval = uiomove64((addr64_t)(((addr64_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)) << PAGE_SHIFT) + start_offset), xsize, uio))) {
299 break;
300 }
301
302 io_requested -= xsize;
303 start_offset = 0;
304 }
305 vm_object_lock(object);
306
307 /*
308 * if we have more than 1 page to work on
309 * in the current run, or the original request
310 * started at offset 0 of the page, or we're
311 * processing multiple batches, we will move
312 * the pages to the tail of the inactive queue
313 * to implement an LRU for read/write accesses
314 *
315 * the check for orig_offset == 0 is there to
316 * mitigate the cost of small (< page_size) requests
317 * to the same page (this way we only move it once)
318 */
319 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
320 vm_page_lockspin_queues();
321
322 for (i = 0; i < cur_run; i++) {
323 vm_page_lru(page_run[i]);
324 }
325
326 vm_page_unlock_queues();
327 }
328 for (i = 0; i < cur_run; i++) {
329 dst_page = page_run[i];
330
331 /*
332 * someone is explicitly referencing this page...
333 * update clustered and speculative state
334 *
335 */
336 if (dst_page->vmp_clustered) {
337 VM_PAGE_CONSUME_CLUSTERED(dst_page);
338 }
339
340 PAGE_WAKEUP_DONE(dst_page);
341 }
342 orig_offset = 0;
343 }
344 if (object->pager) {
345 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
346 }
347 vm_object_unlock(object);
348 return retval;
349 }
350
351
352 /*
353 *
354 */
355 memory_object_t
356 vnode_pager_setup(
357 struct vnode *vp,
358 __unused memory_object_t pager)
359 {
360 vnode_pager_t vnode_object;
361
362 vnode_object = vnode_object_create(vp);
363 if (vnode_object == VNODE_PAGER_NULL) {
364 panic("vnode_pager_setup: vnode_object_create() failed");
365 }
366 return (memory_object_t)vnode_object;
367 }
368
369 /*
370 *
371 */
372 kern_return_t
373 vnode_pager_init(memory_object_t mem_obj,
374 memory_object_control_t control,
375 #if !DEBUG
376 __unused
377 #endif
378 memory_object_cluster_size_t pg_size)
379 {
380 vnode_pager_t vnode_object;
381 kern_return_t kr;
382 memory_object_attr_info_data_t attributes;
383
384
385 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
386
387 if (control == MEMORY_OBJECT_CONTROL_NULL) {
388 return KERN_INVALID_ARGUMENT;
389 }
390
391 vnode_object = vnode_pager_lookup(mem_obj);
392
393 memory_object_control_reference(control);
394
395 vnode_object->vn_pgr_hdr.mo_control = control;
396
397 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
398 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
399 attributes.cluster_size = (1 << (PAGE_SHIFT));
400 attributes.may_cache_object = TRUE;
401 attributes.temporary = TRUE;
402
403 kr = memory_object_change_attributes(
404 control,
405 MEMORY_OBJECT_ATTRIBUTE_INFO,
406 (memory_object_info_t) &attributes,
407 MEMORY_OBJECT_ATTR_INFO_COUNT);
408 if (kr != KERN_SUCCESS) {
409 panic("vnode_pager_init: memory_object_change_attributes() failed");
410 }
411
412 return KERN_SUCCESS;
413 }
414
415 /*
416 *
417 */
418 kern_return_t
419 vnode_pager_data_return(
420 memory_object_t mem_obj,
421 memory_object_offset_t offset,
422 memory_object_cluster_size_t data_cnt,
423 memory_object_offset_t *resid_offset,
424 int *io_error,
425 __unused boolean_t dirty,
426 __unused boolean_t kernel_copy,
427 int upl_flags)
428 {
429 vnode_pager_t vnode_object;
430
431 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
432
433 vnode_object = vnode_pager_lookup(mem_obj);
434
435 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
436
437 return KERN_SUCCESS;
438 }
439
440 kern_return_t
441 vnode_pager_data_initialize(
442 __unused memory_object_t mem_obj,
443 __unused memory_object_offset_t offset,
444 __unused memory_object_cluster_size_t data_cnt)
445 {
446 panic("vnode_pager_data_initialize");
447 return KERN_FAILURE;
448 }
449
450 kern_return_t
451 vnode_pager_data_unlock(
452 __unused memory_object_t mem_obj,
453 __unused memory_object_offset_t offset,
454 __unused memory_object_size_t size,
455 __unused vm_prot_t desired_access)
456 {
457 return KERN_FAILURE;
458 }
459
460 void
461 vnode_pager_dirtied(
462 memory_object_t mem_obj,
463 vm_object_offset_t s_offset,
464 vm_object_offset_t e_offset)
465 {
466 vnode_pager_t vnode_object;
467
468 if (mem_obj && mem_obj->mo_pager_ops == &vnode_pager_ops) {
469 vnode_object = vnode_pager_lookup(mem_obj);
470 vnode_pager_was_dirtied(vnode_object->vnode_handle, s_offset, e_offset);
471 }
472 }
473
474 kern_return_t
475 vnode_pager_get_isinuse(
476 memory_object_t mem_obj,
477 uint32_t *isinuse)
478 {
479 vnode_pager_t vnode_object;
480
481 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
482 *isinuse = 1;
483 return KERN_INVALID_ARGUMENT;
484 }
485
486 vnode_object = vnode_pager_lookup(mem_obj);
487
488 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
489 return KERN_SUCCESS;
490 }
491
492 kern_return_t
493 vnode_pager_get_throttle_io_limit(
494 memory_object_t mem_obj,
495 uint32_t *limit)
496 {
497 vnode_pager_t vnode_object;
498
499 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
500 return KERN_INVALID_ARGUMENT;
501 }
502
503 vnode_object = vnode_pager_lookup(mem_obj);
504
505 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
506 return KERN_SUCCESS;
507 }
508
509 kern_return_t
510 vnode_pager_get_isSSD(
511 memory_object_t mem_obj,
512 boolean_t *isSSD)
513 {
514 vnode_pager_t vnode_object;
515
516 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
517 return KERN_INVALID_ARGUMENT;
518 }
519
520 vnode_object = vnode_pager_lookup(mem_obj);
521
522 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
523 return KERN_SUCCESS;
524 }
525
526 kern_return_t
527 vnode_pager_get_object_size(
528 memory_object_t mem_obj,
529 memory_object_offset_t *length)
530 {
531 vnode_pager_t vnode_object;
532
533 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
534 *length = 0;
535 return KERN_INVALID_ARGUMENT;
536 }
537
538 vnode_object = vnode_pager_lookup(mem_obj);
539
540 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
541 return KERN_SUCCESS;
542 }
543
544 kern_return_t
545 vnode_pager_get_object_name(
546 memory_object_t mem_obj,
547 char *pathname,
548 vm_size_t pathname_len,
549 char *filename,
550 vm_size_t filename_len,
551 boolean_t *truncated_path_p)
552 {
553 vnode_pager_t vnode_object;
554
555 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
556 return KERN_INVALID_ARGUMENT;
557 }
558
559 vnode_object = vnode_pager_lookup(mem_obj);
560
561 return vnode_pager_get_name(vnode_object->vnode_handle,
562 pathname,
563 pathname_len,
564 filename,
565 filename_len,
566 truncated_path_p);
567 }
568
569 kern_return_t
570 vnode_pager_get_object_mtime(
571 memory_object_t mem_obj,
572 struct timespec *mtime,
573 struct timespec *cs_mtime)
574 {
575 vnode_pager_t vnode_object;
576
577 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
578 return KERN_INVALID_ARGUMENT;
579 }
580
581 vnode_object = vnode_pager_lookup(mem_obj);
582
583 return vnode_pager_get_mtime(vnode_object->vnode_handle,
584 mtime,
585 cs_mtime);
586 }
587
588 #if CHECK_CS_VALIDATION_BITMAP
589 kern_return_t
590 vnode_pager_cs_check_validation_bitmap(
591 memory_object_t mem_obj,
592 memory_object_offset_t offset,
593 int optype )
594 {
595 vnode_pager_t vnode_object;
596
597 if (mem_obj == MEMORY_OBJECT_NULL ||
598 mem_obj->mo_pager_ops != &vnode_pager_ops) {
599 return KERN_INVALID_ARGUMENT;
600 }
601
602 vnode_object = vnode_pager_lookup(mem_obj);
603 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
604 }
605 #endif /* CHECK_CS_VALIDATION_BITMAP */
606
607 /*
608 *
609 */
610 kern_return_t
611 vnode_pager_data_request(
612 memory_object_t mem_obj,
613 memory_object_offset_t offset,
614 __unused memory_object_cluster_size_t length,
615 __unused vm_prot_t desired_access,
616 memory_object_fault_info_t fault_info)
617 {
618 vnode_pager_t vnode_object;
619 memory_object_offset_t base_offset;
620 vm_size_t size;
621 uint32_t io_streaming = 0;
622
623 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
624
625 vnode_object = vnode_pager_lookup(mem_obj);
626
627 size = MAX_UPL_TRANSFER_BYTES;
628 base_offset = offset;
629
630 if (memory_object_cluster_size(vnode_object->vn_pgr_hdr.mo_control,
631 &base_offset, &size, &io_streaming,
632 fault_info) != KERN_SUCCESS) {
633 size = PAGE_SIZE;
634 }
635
636 assert(offset >= base_offset &&
637 offset < base_offset + size);
638
639 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
640 }
641
642 /*
643 *
644 */
645 void
646 vnode_pager_reference(
647 memory_object_t mem_obj)
648 {
649 vnode_pager_t vnode_object;
650
651 vnode_object = vnode_pager_lookup(mem_obj);
652 os_ref_retain(&vnode_object->ref_count);
653 }
654
655 /*
656 *
657 */
658 void
659 vnode_pager_deallocate(
660 memory_object_t mem_obj)
661 {
662 vnode_pager_t vnode_object;
663
664 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
665
666 vnode_object = vnode_pager_lookup(mem_obj);
667
668 if (os_ref_release(&vnode_object->ref_count) == 0) {
669 if (vnode_object->vnode_handle != NULL) {
670 vnode_pager_vrele(vnode_object->vnode_handle);
671 }
672 zfree(vnode_pager_zone, vnode_object);
673 }
674 }
675
676 /*
677 *
678 */
679 kern_return_t
680 vnode_pager_terminate(
681 #if !DEBUG
682 __unused
683 #endif
684 memory_object_t mem_obj)
685 {
686 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
687
688 return KERN_SUCCESS;
689 }
690
691 /*
692 *
693 */
694 kern_return_t
695 vnode_pager_synchronize(
696 __unused memory_object_t mem_obj,
697 __unused memory_object_offset_t offset,
698 __unused memory_object_size_t length,
699 __unused vm_sync_t sync_flags)
700 {
701 panic("vnode_pager_synchronize: memory_object_synchronize no longer supported\n");
702 return KERN_FAILURE;
703 }
704
705 /*
706 *
707 */
708 kern_return_t
709 vnode_pager_map(
710 memory_object_t mem_obj,
711 vm_prot_t prot)
712 {
713 vnode_pager_t vnode_object;
714 int ret;
715 kern_return_t kr;
716
717 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
718
719 vnode_object = vnode_pager_lookup(mem_obj);
720
721 ret = ubc_map(vnode_object->vnode_handle, prot);
722
723 if (ret != 0) {
724 kr = KERN_FAILURE;
725 } else {
726 kr = KERN_SUCCESS;
727 }
728
729 return kr;
730 }
731
732 kern_return_t
733 vnode_pager_last_unmap(
734 memory_object_t mem_obj)
735 {
736 vnode_pager_t vnode_object;
737
738 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
739
740 vnode_object = vnode_pager_lookup(mem_obj);
741
742 ubc_unmap(vnode_object->vnode_handle);
743 return KERN_SUCCESS;
744 }
745
746
747
748 /*
749 *
750 */
751 void
752 vnode_pager_cluster_write(
753 vnode_pager_t vnode_object,
754 vm_object_offset_t offset,
755 vm_size_t cnt,
756 vm_object_offset_t * resid_offset,
757 int * io_error,
758 int upl_flags)
759 {
760 vm_size_t size;
761 int errno;
762
763 if (upl_flags & UPL_MSYNC) {
764 upl_flags |= UPL_VNODE_PAGER;
765
766 if ((upl_flags & UPL_IOSYNC) && io_error) {
767 upl_flags |= UPL_KEEPCACHED;
768 }
769
770 while (cnt) {
771 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
772
773 assert((upl_size_t) size == size);
774 vnode_pageout(vnode_object->vnode_handle,
775 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
776
777 if ((upl_flags & UPL_KEEPCACHED)) {
778 if ((*io_error = errno)) {
779 break;
780 }
781 }
782 cnt -= size;
783 offset += size;
784 }
785 if (resid_offset) {
786 *resid_offset = offset;
787 }
788 } else {
789 vm_object_offset_t vnode_size;
790 vm_object_offset_t base_offset;
791
792 /*
793 * this is the pageout path
794 */
795 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
796
797 if (vnode_size > (offset + PAGE_SIZE)) {
798 /*
799 * preset the maximum size of the cluster
800 * and put us on a nice cluster boundary...
801 * and then clip the size to insure we
802 * don't request past the end of the underlying file
803 */
804 size = MAX_UPL_TRANSFER_BYTES;
805 base_offset = offset & ~((signed)(size - 1));
806
807 if ((base_offset + size) > vnode_size) {
808 size = round_page(((vm_size_t)(vnode_size - base_offset)));
809 }
810 } else {
811 /*
812 * we've been requested to page out a page beyond the current
813 * end of the 'file'... don't try to cluster in this case...
814 * we still need to send this page through because it might
815 * be marked precious and the underlying filesystem may need
816 * to do something with it (besides page it out)...
817 */
818 base_offset = offset;
819 size = PAGE_SIZE;
820 }
821 assert((upl_size_t) size == size);
822 vnode_pageout(vnode_object->vnode_handle,
823 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
824 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
825 }
826 }
827
828
829 /*
830 *
831 */
832 kern_return_t
833 vnode_pager_cluster_read(
834 vnode_pager_t vnode_object,
835 vm_object_offset_t base_offset,
836 vm_object_offset_t offset,
837 uint32_t io_streaming,
838 vm_size_t cnt)
839 {
840 int local_error = 0;
841 int kret;
842 int flags = 0;
843
844 assert(!(cnt & PAGE_MASK));
845
846 if (io_streaming) {
847 flags |= UPL_IOSTREAMING;
848 }
849
850 assert((upl_size_t) cnt == cnt);
851 kret = vnode_pagein(vnode_object->vnode_handle,
852 (upl_t) NULL,
853 (upl_offset_t) (offset - base_offset),
854 base_offset,
855 (upl_size_t) cnt,
856 flags,
857 &local_error);
858 /*
859 * if(kret == PAGER_ABSENT) {
860 * Need to work out the defs here, 1 corresponds to PAGER_ABSENT
861 * defined in bsd/vm/vm_pager.h However, we should not be including
862 * that file here it is a layering violation.
863 */
864 if (kret == 1) {
865 int uplflags;
866 upl_t upl = NULL;
867 unsigned int count = 0;
868 kern_return_t kr;
869
870 uplflags = (UPL_NO_SYNC |
871 UPL_CLEAN_IN_PLACE |
872 UPL_SET_INTERNAL);
873 count = 0;
874 assert((upl_size_t) cnt == cnt);
875 kr = memory_object_upl_request(vnode_object->vn_pgr_hdr.mo_control,
876 base_offset, (upl_size_t) cnt,
877 &upl, NULL, &count, uplflags, VM_KERN_MEMORY_NONE);
878 if (kr == KERN_SUCCESS) {
879 upl_abort(upl, 0);
880 upl_deallocate(upl);
881 } else {
882 /*
883 * We couldn't gather the page list, probably
884 * because the memory object doesn't have a link
885 * to a VM object anymore (forced unmount, for
886 * example). Just return an error to the vm_fault()
887 * path and let it handle it.
888 */
889 }
890
891 return KERN_FAILURE;
892 }
893
894 return KERN_SUCCESS;
895 }
896
897 /*
898 *
899 */
900 vnode_pager_t
901 vnode_object_create(
902 struct vnode *vp)
903 {
904 vnode_pager_t vnode_object;
905
906 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
907 if (vnode_object == VNODE_PAGER_NULL) {
908 return VNODE_PAGER_NULL;
909 }
910
911 /*
912 * The vm_map call takes both named entry ports and raw memory
913 * objects in the same parameter. We need to make sure that
914 * vm_map does not see this object as a named entry port. So,
915 * we reserve the first word in the object for a fake ip_kotype
916 * setting - that will tell vm_map to use it as a memory object.
917 */
918 vnode_object->vn_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
919 vnode_object->vn_pgr_hdr.mo_pager_ops = &vnode_pager_ops;
920 vnode_object->vn_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
921
922 os_ref_init(&vnode_object->ref_count, NULL);
923 vnode_object->vnode_handle = vp;
924
925 return vnode_object;
926 }
927
928 /*
929 *
930 */
931 vnode_pager_t
932 vnode_pager_lookup(
933 memory_object_t name)
934 {
935 vnode_pager_t vnode_object;
936
937 vnode_object = (vnode_pager_t)name;
938 assert(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops);
939 return vnode_object;
940 }
941
942
943 struct vnode *
944 vnode_pager_lookup_vnode(
945 memory_object_t name)
946 {
947 vnode_pager_t vnode_object;
948 vnode_object = (vnode_pager_t)name;
949 if (vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops) {
950 return vnode_object->vnode_handle;
951 } else {
952 return NULL;
953 }
954 }
955
956 /*********************** proc_info implementation *************/
957
958 #include <sys/bsdtask_info.h>
959
960 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
961
962 int
963 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
964 {
965 vm_map_t map;
966 vm_map_offset_t address = (vm_map_offset_t)arg;
967 vm_map_entry_t tmp_entry;
968 vm_map_entry_t entry;
969 vm_map_offset_t start;
970 vm_region_extended_info_data_t extended;
971 vm_region_top_info_data_t top;
972 boolean_t do_region_footprint;
973 int effective_page_shift, effective_page_size;
974
975 task_lock(task);
976 map = task->map;
977 if (map == VM_MAP_NULL) {
978 task_unlock(task);
979 return 0;
980 }
981
982 effective_page_shift = vm_self_region_page_shift(map);
983 effective_page_size = (1 << effective_page_shift);
984
985 vm_map_reference(map);
986 task_unlock(task);
987
988 do_region_footprint = task_self_region_footprint();
989
990 vm_map_lock_read(map);
991
992 start = address;
993
994 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
995 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
996 if (do_region_footprint &&
997 address == tmp_entry->vme_end) {
998 ledger_amount_t ledger_resident;
999 ledger_amount_t ledger_compressed;
1000
1001 /*
1002 * This request is right after the last valid
1003 * memory region; instead of reporting the
1004 * end of the address space, report a fake
1005 * memory region to account for non-volatile
1006 * purgeable and/or ledger-tagged memory
1007 * owned by this task.
1008 */
1009 task_ledgers_footprint(task->ledger,
1010 &ledger_resident,
1011 &ledger_compressed);
1012 if (ledger_resident + ledger_compressed == 0) {
1013 /* nothing to report */
1014 vm_map_unlock_read(map);
1015 vm_map_deallocate(map);
1016 return 0;
1017 }
1018
1019 /* provide fake region for purgeable */
1020 pinfo->pri_offset = address;
1021 pinfo->pri_protection = VM_PROT_DEFAULT;
1022 pinfo->pri_max_protection = VM_PROT_DEFAULT;
1023 pinfo->pri_inheritance = VM_INHERIT_NONE;
1024 pinfo->pri_behavior = VM_BEHAVIOR_DEFAULT;
1025 pinfo->pri_user_wired_count = 0;
1026 pinfo->pri_user_tag = -1;
1027 pinfo->pri_pages_resident =
1028 (uint32_t) (ledger_resident / effective_page_size);
1029 pinfo->pri_pages_shared_now_private = 0;
1030 pinfo->pri_pages_swapped_out =
1031 (uint32_t) (ledger_compressed / effective_page_size);
1032 pinfo->pri_pages_dirtied =
1033 (uint32_t) (ledger_resident / effective_page_size);
1034 pinfo->pri_ref_count = 1;
1035 pinfo->pri_shadow_depth = 0;
1036 pinfo->pri_share_mode = SM_PRIVATE;
1037 pinfo->pri_private_pages_resident =
1038 (uint32_t) (ledger_resident / effective_page_size);
1039 pinfo->pri_shared_pages_resident = 0;
1040 pinfo->pri_obj_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
1041 pinfo->pri_address = address;
1042 pinfo->pri_size =
1043 (uint64_t) (ledger_resident + ledger_compressed);
1044 pinfo->pri_depth = 0;
1045
1046 vm_map_unlock_read(map);
1047 vm_map_deallocate(map);
1048 return 1;
1049 }
1050 vm_map_unlock_read(map);
1051 vm_map_deallocate(map);
1052 return 0;
1053 }
1054 } else {
1055 entry = tmp_entry;
1056 }
1057
1058 start = entry->vme_start;
1059
1060 pinfo->pri_offset = VME_OFFSET(entry);
1061 pinfo->pri_protection = entry->protection;
1062 pinfo->pri_max_protection = entry->max_protection;
1063 pinfo->pri_inheritance = entry->inheritance;
1064 pinfo->pri_behavior = entry->behavior;
1065 pinfo->pri_user_wired_count = entry->user_wired_count;
1066 pinfo->pri_user_tag = VME_ALIAS(entry);
1067
1068 if (entry->is_sub_map) {
1069 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1070 } else {
1071 if (entry->is_shared) {
1072 pinfo->pri_flags |= PROC_REGION_SHARED;
1073 }
1074 }
1075
1076
1077 extended.protection = entry->protection;
1078 extended.user_tag = VME_ALIAS(entry);
1079 extended.pages_resident = 0;
1080 extended.pages_swapped_out = 0;
1081 extended.pages_shared_now_private = 0;
1082 extended.pages_dirtied = 0;
1083 extended.external_pager = 0;
1084 extended.shadow_depth = 0;
1085
1086 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended, TRUE, VM_REGION_EXTENDED_INFO_COUNT);
1087
1088 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) {
1089 extended.share_mode = SM_PRIVATE;
1090 }
1091
1092 top.private_pages_resident = 0;
1093 top.shared_pages_resident = 0;
1094 vm_map_region_top_walk(entry, &top);
1095
1096
1097 pinfo->pri_pages_resident = extended.pages_resident;
1098 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1099 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1100 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1101 pinfo->pri_ref_count = extended.ref_count;
1102 pinfo->pri_shadow_depth = extended.shadow_depth;
1103 pinfo->pri_share_mode = extended.share_mode;
1104
1105 pinfo->pri_private_pages_resident = top.private_pages_resident;
1106 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1107 pinfo->pri_obj_id = top.obj_id;
1108
1109 pinfo->pri_address = (uint64_t)start;
1110 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1111 pinfo->pri_depth = 0;
1112
1113 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1114 *vnodeaddr = (uintptr_t)0;
1115
1116 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) == 0) {
1117 vm_map_unlock_read(map);
1118 vm_map_deallocate(map);
1119 return 1;
1120 }
1121 }
1122
1123 vm_map_unlock_read(map);
1124 vm_map_deallocate(map);
1125 return 1;
1126 }
1127
1128 int
1129 fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1130 {
1131 vm_map_t map;
1132 vm_map_offset_t address = (vm_map_offset_t)arg;
1133 vm_map_entry_t tmp_entry;
1134 vm_map_entry_t entry;
1135
1136 task_lock(task);
1137 map = task->map;
1138 if (map == VM_MAP_NULL) {
1139 task_unlock(task);
1140 return 0;
1141 }
1142 vm_map_reference(map);
1143 task_unlock(task);
1144
1145 vm_map_lock_read(map);
1146
1147 if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1148 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1149 vm_map_unlock_read(map);
1150 vm_map_deallocate(map);
1151 return 0;
1152 }
1153 } else {
1154 entry = tmp_entry;
1155 }
1156
1157 while (entry != vm_map_to_entry(map)) {
1158 *vnodeaddr = 0;
1159 *vid = 0;
1160
1161 if (entry->is_sub_map == 0) {
1162 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1163 pinfo->pri_offset = VME_OFFSET(entry);
1164 pinfo->pri_protection = entry->protection;
1165 pinfo->pri_max_protection = entry->max_protection;
1166 pinfo->pri_inheritance = entry->inheritance;
1167 pinfo->pri_behavior = entry->behavior;
1168 pinfo->pri_user_wired_count = entry->user_wired_count;
1169 pinfo->pri_user_tag = VME_ALIAS(entry);
1170
1171 if (entry->is_shared) {
1172 pinfo->pri_flags |= PROC_REGION_SHARED;
1173 }
1174
1175 pinfo->pri_pages_resident = 0;
1176 pinfo->pri_pages_shared_now_private = 0;
1177 pinfo->pri_pages_swapped_out = 0;
1178 pinfo->pri_pages_dirtied = 0;
1179 pinfo->pri_ref_count = 0;
1180 pinfo->pri_shadow_depth = 0;
1181 pinfo->pri_share_mode = 0;
1182
1183 pinfo->pri_private_pages_resident = 0;
1184 pinfo->pri_shared_pages_resident = 0;
1185 pinfo->pri_obj_id = 0;
1186
1187 pinfo->pri_address = (uint64_t)entry->vme_start;
1188 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1189 pinfo->pri_depth = 0;
1190
1191 vm_map_unlock_read(map);
1192 vm_map_deallocate(map);
1193 return 1;
1194 }
1195 }
1196
1197 /* Keep searching for a vnode-backed mapping */
1198 entry = entry->vme_next;
1199 }
1200
1201 vm_map_unlock_read(map);
1202 vm_map_deallocate(map);
1203 return 0;
1204 }
1205
1206 int
1207 find_region_details(task_t task, vm_map_offset_t offset,
1208 uintptr_t *vnodeaddr, uint32_t *vid,
1209 uint64_t *start, uint64_t *len)
1210 {
1211 vm_map_t map;
1212 vm_map_entry_t tmp_entry, entry;
1213 int rc = 0;
1214
1215 task_lock(task);
1216 map = task->map;
1217 if (map == VM_MAP_NULL) {
1218 task_unlock(task);
1219 return 0;
1220 }
1221 vm_map_reference(map);
1222 task_unlock(task);
1223
1224 vm_map_lock_read(map);
1225 if (!vm_map_lookup_entry(map, offset, &tmp_entry)) {
1226 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1227 rc = 0;
1228 goto ret;
1229 }
1230 } else {
1231 entry = tmp_entry;
1232 }
1233
1234 while (entry != vm_map_to_entry(map)) {
1235 *vnodeaddr = 0;
1236 *vid = 0;
1237 *start = 0;
1238 *len = 0;
1239
1240 if (entry->is_sub_map == 0) {
1241 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1242 *start = entry->vme_start;
1243 *len = entry->vme_end - entry->vme_start;
1244 rc = 1;
1245 goto ret;
1246 }
1247 }
1248
1249 entry = entry->vme_next;
1250 }
1251
1252 ret:
1253 vm_map_unlock_read(map);
1254 vm_map_deallocate(map);
1255 return rc;
1256 }
1257
1258 static int
1259 fill_vnodeinfoforaddr(
1260 vm_map_entry_t entry,
1261 uintptr_t * vnodeaddr,
1262 uint32_t * vid)
1263 {
1264 vm_object_t top_object, object;
1265 memory_object_t memory_object;
1266 memory_object_pager_ops_t pager_ops;
1267 kern_return_t kr;
1268 int shadow_depth;
1269
1270
1271 if (entry->is_sub_map) {
1272 return 0;
1273 } else {
1274 /*
1275 * The last object in the shadow chain has the
1276 * relevant pager information.
1277 */
1278 top_object = VME_OBJECT(entry);
1279 if (top_object == VM_OBJECT_NULL) {
1280 object = VM_OBJECT_NULL;
1281 shadow_depth = 0;
1282 } else {
1283 vm_object_lock(top_object);
1284 for (object = top_object, shadow_depth = 0;
1285 object->shadow != VM_OBJECT_NULL;
1286 object = object->shadow, shadow_depth++) {
1287 vm_object_lock(object->shadow);
1288 vm_object_unlock(object);
1289 }
1290 }
1291 }
1292
1293 if (object == VM_OBJECT_NULL) {
1294 return 0;
1295 } else if (object->internal) {
1296 vm_object_unlock(object);
1297 return 0;
1298 } else if (!object->pager_ready ||
1299 object->terminating ||
1300 !object->alive) {
1301 vm_object_unlock(object);
1302 return 0;
1303 } else {
1304 memory_object = object->pager;
1305 pager_ops = memory_object->mo_pager_ops;
1306 if (pager_ops == &vnode_pager_ops) {
1307 kr = vnode_pager_get_object_vnode(
1308 memory_object,
1309 vnodeaddr, vid);
1310 if (kr != KERN_SUCCESS) {
1311 vm_object_unlock(object);
1312 return 0;
1313 }
1314 } else {
1315 vm_object_unlock(object);
1316 return 0;
1317 }
1318 }
1319 vm_object_unlock(object);
1320 return 1;
1321 }
1322
1323 kern_return_t
1324 vnode_pager_get_object_vnode(
1325 memory_object_t mem_obj,
1326 uintptr_t * vnodeaddr,
1327 uint32_t * vid)
1328 {
1329 vnode_pager_t vnode_object;
1330
1331 vnode_object = vnode_pager_lookup(mem_obj);
1332 if (vnode_object->vnode_handle) {
1333 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1334 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1335
1336 return KERN_SUCCESS;
1337 }
1338
1339 return KERN_FAILURE;
1340 }
1341
1342 #if CONFIG_IOSCHED
1343 kern_return_t
1344 vnode_pager_get_object_devvp(
1345 memory_object_t mem_obj,
1346 uintptr_t *devvp)
1347 {
1348 struct vnode *vp;
1349 uint32_t vid;
1350
1351 if (vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS) {
1352 return KERN_FAILURE;
1353 }
1354 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1355 if (*devvp) {
1356 return KERN_SUCCESS;
1357 }
1358 return KERN_FAILURE;
1359 }
1360 #endif
1361
1362 /*
1363 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1364 * object locked, otherwise return NULL with nothing locked.
1365 */
1366
1367 vm_object_t
1368 find_vnode_object(
1369 vm_map_entry_t entry
1370 )
1371 {
1372 vm_object_t top_object, object;
1373 memory_object_t memory_object;
1374 memory_object_pager_ops_t pager_ops;
1375
1376 if (!entry->is_sub_map) {
1377 /*
1378 * The last object in the shadow chain has the
1379 * relevant pager information.
1380 */
1381
1382 top_object = VME_OBJECT(entry);
1383
1384 if (top_object) {
1385 vm_object_lock(top_object);
1386
1387 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1388 vm_object_lock(object->shadow);
1389 vm_object_unlock(object);
1390 }
1391
1392 if (object && !object->internal && object->pager_ready && !object->terminating &&
1393 object->alive) {
1394 memory_object = object->pager;
1395 pager_ops = memory_object->mo_pager_ops;
1396
1397 /*
1398 * If this object points to the vnode_pager_ops, then we found what we're
1399 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1400 * vnode and so we fall through to the bottom and return NULL.
1401 */
1402
1403 if (pager_ops == &vnode_pager_ops) {
1404 return object; /* we return with the object locked */
1405 }
1406 }
1407
1408 vm_object_unlock(object);
1409 }
1410 }
1411
1412 return VM_OBJECT_NULL;
1413 }