]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
a7f17574a4d0971ea57c4b8b018aa791f6d30813
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/thread.h>
45
46 #include <ipc/ipc_port.h>
47 #include <ipc/ipc_space.h>
48
49 #include <default_pager/default_pager_types.h>
50 #include <default_pager/default_pager_object_server.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return(map->hdr.nentries);
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return( vm_map_first_entry(map)->vme_start);
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return( vm_map_last_entry(map)->vme_end);
93 }
94
95 /*
96 * BSD VNODE PAGER
97 */
98
99 const struct memory_object_pager_ops vnode_pager_ops = {
100 vnode_pager_reference,
101 vnode_pager_deallocate,
102 vnode_pager_init,
103 vnode_pager_terminate,
104 vnode_pager_data_request,
105 vnode_pager_data_return,
106 vnode_pager_data_initialize,
107 vnode_pager_data_unlock,
108 vnode_pager_synchronize,
109 vnode_pager_map,
110 vnode_pager_last_unmap,
111 NULL, /* data_reclaim */
112 "vnode pager"
113 };
114
115 typedef struct vnode_pager {
116 struct ipc_object_header pager_header; /* fake ip_kotype() */
117 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
118 unsigned int ref_count; /* reference count */
119 memory_object_control_t control_handle; /* mem object control handle */
120 struct vnode *vnode_handle; /* vnode handle */
121 } *vnode_pager_t;
122
123 #define pager_ikot pager_header.io_bits
124
125 ipc_port_t
126 trigger_name_to_port( /* forward */
127 mach_port_t);
128
129 kern_return_t
130 vnode_pager_cluster_read( /* forward */
131 vnode_pager_t,
132 vm_object_offset_t,
133 vm_object_offset_t,
134 uint32_t,
135 vm_size_t);
136
137 void
138 vnode_pager_cluster_write( /* forward */
139 vnode_pager_t,
140 vm_object_offset_t,
141 vm_size_t,
142 vm_object_offset_t *,
143 int *,
144 int);
145
146
147 vnode_pager_t
148 vnode_object_create( /* forward */
149 struct vnode *);
150
151 vnode_pager_t
152 vnode_pager_lookup( /* forward */
153 memory_object_t);
154
155 zone_t vnode_pager_zone;
156
157
158 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
159
160 /* TODO: Should be set dynamically by vnode_pager_init() */
161 #define CLUSTER_SHIFT 1
162
163 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
164 #define MAX_VNODE 10000
165
166
167 #if DEBUG
168 int pagerdebug=0;
169
170 #define PAGER_ALL 0xffffffff
171 #define PAGER_INIT 0x00000001
172 #define PAGER_PAGEIN 0x00000002
173
174 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
175 #else
176 #define PAGER_DEBUG(LEVEL, A)
177 #endif
178
179 extern int proc_resetpcontrol(int);
180
181 #if DEVELOPMENT || DEBUG
182 extern unsigned long vm_cs_validated_resets;
183 #endif
184
185 /*
186 * Routine: mach_macx_triggers
187 * Function:
188 * Syscall interface to set the call backs for low and
189 * high water marks.
190 */
191 int
192 mach_macx_triggers(
193 struct macx_triggers_args *args)
194 {
195 int hi_water = args->hi_water;
196 int low_water = args->low_water;
197 int flags = args->flags;
198 mach_port_t trigger_name = args->alert_port;
199 kern_return_t kr;
200 memory_object_default_t default_pager;
201 ipc_port_t trigger_port;
202
203 default_pager = MEMORY_OBJECT_DEFAULT_NULL;
204 kr = host_default_memory_manager(host_priv_self(),
205 &default_pager, 0);
206 if(kr != KERN_SUCCESS) {
207 return EINVAL;
208 }
209
210 if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) ||
211 ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
212 /* can't have it both ways */
213 return EINVAL;
214 }
215
216 if (default_pager_init_flag == 0) {
217 start_def_pager(NULL);
218 default_pager_init_flag = 1;
219 }
220
221 if (flags & SWAP_ENCRYPT_ON) {
222 /* ENCRYPTED SWAP: tell default_pager to encrypt */
223 default_pager_triggers(default_pager,
224 0, 0,
225 SWAP_ENCRYPT_ON,
226 IP_NULL);
227 } else if (flags & SWAP_ENCRYPT_OFF) {
228 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
229 default_pager_triggers(default_pager,
230 0, 0,
231 SWAP_ENCRYPT_OFF,
232 IP_NULL);
233 }
234
235 if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
236 /*
237 * Time to switch to the emergency segment.
238 */
239 return default_pager_triggers(default_pager,
240 0, 0,
241 USE_EMERGENCY_SWAP_FILE_FIRST,
242 IP_NULL);
243 }
244
245 if (flags & SWAP_FILE_CREATION_ERROR) {
246 /*
247 * For some reason, the dynamic pager failed to create a swap file.
248 */
249 trigger_port = trigger_name_to_port(trigger_name);
250 if(trigger_port == NULL) {
251 return EINVAL;
252 }
253 /* trigger_port is locked and active */
254 ipc_port_make_send_locked(trigger_port);
255 ip_unlock(trigger_port);
256 default_pager_triggers(default_pager,
257 0, 0,
258 SWAP_FILE_CREATION_ERROR,
259 trigger_port);
260 }
261
262 if (flags & HI_WAT_ALERT) {
263 trigger_port = trigger_name_to_port(trigger_name);
264 if(trigger_port == NULL) {
265 return EINVAL;
266 }
267 /* trigger_port is locked and active */
268 ipc_port_make_send_locked(trigger_port);
269 ip_unlock(trigger_port);
270 default_pager_triggers(default_pager,
271 hi_water, low_water,
272 HI_WAT_ALERT, trigger_port);
273 }
274
275 if (flags & LO_WAT_ALERT) {
276 trigger_port = trigger_name_to_port(trigger_name);
277 if(trigger_port == NULL) {
278 return EINVAL;
279 }
280 /* trigger_port is locked and active */
281 ipc_port_make_send_locked(trigger_port);
282 ip_unlock(trigger_port);
283 default_pager_triggers(default_pager,
284 hi_water, low_water,
285 LO_WAT_ALERT, trigger_port);
286 }
287
288
289 if (flags & PROC_RESUME) {
290
291 /*
292 * For this call, hi_water is used to pass in the pid of the process we want to resume
293 * or unthrottle. This is of course restricted to the superuser (checked inside of
294 * proc_resetpcontrol).
295 */
296
297 return proc_resetpcontrol(hi_water);
298 }
299
300 /*
301 * Set thread scheduling priority and policy for the current thread
302 * it is assumed for the time being that the thread setting the alert
303 * is the same one which will be servicing it.
304 *
305 * XXX This does not belong in the kernel XXX
306 */
307 if (flags & HI_WAT_ALERT) {
308 thread_precedence_policy_data_t pre;
309 thread_extended_policy_data_t ext;
310
311 ext.timeshare = FALSE;
312 pre.importance = INT32_MAX;
313
314 thread_policy_set(current_thread(),
315 THREAD_EXTENDED_POLICY,
316 (thread_policy_t)&ext,
317 THREAD_EXTENDED_POLICY_COUNT);
318
319 thread_policy_set(current_thread(),
320 THREAD_PRECEDENCE_POLICY,
321 (thread_policy_t)&pre,
322 THREAD_PRECEDENCE_POLICY_COUNT);
323
324 current_thread()->options |= TH_OPT_VMPRIV;
325 }
326
327 if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
328 return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
329 }
330
331 return 0;
332 }
333
334 /*
335 *
336 */
337 ipc_port_t
338 trigger_name_to_port(
339 mach_port_t trigger_name)
340 {
341 ipc_port_t trigger_port;
342 ipc_space_t space;
343
344 if (trigger_name == 0)
345 return (NULL);
346
347 space = current_space();
348 if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name),
349 &trigger_port) != KERN_SUCCESS)
350 return (NULL);
351 return trigger_port;
352 }
353
354
355 extern int uiomove64(addr64_t, int, void *);
356 #define MAX_RUN 32
357
358 int
359 memory_object_control_uiomove(
360 memory_object_control_t control,
361 memory_object_offset_t offset,
362 void * uio,
363 int start_offset,
364 int io_requested,
365 int mark_dirty,
366 int take_reference)
367 {
368 vm_object_t object;
369 vm_page_t dst_page;
370 int xsize;
371 int retval = 0;
372 int cur_run;
373 int cur_needed;
374 int i;
375 int orig_offset;
376 vm_page_t page_run[MAX_RUN];
377
378 object = memory_object_control_to_vm_object(control);
379 if (object == VM_OBJECT_NULL) {
380 return (0);
381 }
382 assert(!object->internal);
383
384 vm_object_lock(object);
385
386 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
387 /*
388 * We can't modify the pages without honoring
389 * copy-on-write obligations first, so fall off
390 * this optimized path and fall back to the regular
391 * path.
392 */
393 vm_object_unlock(object);
394 return 0;
395 }
396 orig_offset = start_offset;
397
398 while (io_requested && retval == 0) {
399
400 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
401
402 if (cur_needed > MAX_RUN)
403 cur_needed = MAX_RUN;
404
405 for (cur_run = 0; cur_run < cur_needed; ) {
406
407 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
408 break;
409
410
411 if (dst_page->busy || dst_page->cleaning) {
412 /*
413 * someone else is playing with the page... if we've
414 * already collected pages into this run, go ahead
415 * and process now, we can't block on this
416 * page while holding other pages in the BUSY state
417 * otherwise we will wait
418 */
419 if (cur_run)
420 break;
421 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
422 continue;
423 }
424 if (dst_page->laundry) {
425 dst_page->pageout = FALSE;
426
427 vm_pageout_steal_laundry(dst_page, FALSE);
428 }
429 /*
430 * this routine is only called when copying
431 * to/from real files... no need to consider
432 * encrypted swap pages
433 */
434 assert(!dst_page->encrypted);
435
436 if (mark_dirty) {
437 SET_PAGE_DIRTY(dst_page, FALSE);
438 if (dst_page->cs_validated &&
439 !dst_page->cs_tainted) {
440 /*
441 * CODE SIGNING:
442 * We're modifying a code-signed
443 * page: force revalidate
444 */
445 dst_page->cs_validated = FALSE;
446 #if DEVELOPMENT || DEBUG
447 vm_cs_validated_resets++;
448 #endif
449 pmap_disconnect(dst_page->phys_page);
450 }
451 }
452 dst_page->busy = TRUE;
453
454 page_run[cur_run++] = dst_page;
455
456 offset += PAGE_SIZE_64;
457 }
458 if (cur_run == 0)
459 /*
460 * we hit a 'hole' in the cache or
461 * a page we don't want to try to handle,
462 * so bail at this point
463 * we'll unlock the object below
464 */
465 break;
466 vm_object_unlock(object);
467
468 for (i = 0; i < cur_run; i++) {
469
470 dst_page = page_run[i];
471
472 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
473 xsize = io_requested;
474
475 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << PAGE_SHIFT) + start_offset), xsize, uio)) )
476 break;
477
478 io_requested -= xsize;
479 start_offset = 0;
480 }
481 vm_object_lock(object);
482
483 /*
484 * if we have more than 1 page to work on
485 * in the current run, or the original request
486 * started at offset 0 of the page, or we're
487 * processing multiple batches, we will move
488 * the pages to the tail of the inactive queue
489 * to implement an LRU for read/write accesses
490 *
491 * the check for orig_offset == 0 is there to
492 * mitigate the cost of small (< page_size) requests
493 * to the same page (this way we only move it once)
494 */
495 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
496
497 vm_page_lockspin_queues();
498
499 for (i = 0; i < cur_run; i++)
500 vm_page_lru(page_run[i]);
501
502 vm_page_unlock_queues();
503 }
504 for (i = 0; i < cur_run; i++) {
505 dst_page = page_run[i];
506
507 /*
508 * someone is explicitly referencing this page...
509 * update clustered and speculative state
510 *
511 */
512 if (dst_page->clustered)
513 VM_PAGE_CONSUME_CLUSTERED(dst_page);
514
515 PAGE_WAKEUP_DONE(dst_page);
516 }
517 orig_offset = 0;
518 }
519 vm_object_unlock(object);
520
521 return (retval);
522 }
523
524
525 /*
526 *
527 */
528 void
529 vnode_pager_bootstrap(void)
530 {
531 register vm_size_t size;
532
533 size = (vm_size_t) sizeof(struct vnode_pager);
534 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
535 PAGE_SIZE, "vnode pager structures");
536 zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
537 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
538
539
540 #if CONFIG_CODE_DECRYPTION
541 apple_protect_pager_bootstrap();
542 #endif /* CONFIG_CODE_DECRYPTION */
543 swapfile_pager_bootstrap();
544 return;
545 }
546
547 /*
548 *
549 */
550 memory_object_t
551 vnode_pager_setup(
552 struct vnode *vp,
553 __unused memory_object_t pager)
554 {
555 vnode_pager_t vnode_object;
556
557 vnode_object = vnode_object_create(vp);
558 if (vnode_object == VNODE_PAGER_NULL)
559 panic("vnode_pager_setup: vnode_object_create() failed");
560 return((memory_object_t)vnode_object);
561 }
562
563 /*
564 *
565 */
566 kern_return_t
567 vnode_pager_init(memory_object_t mem_obj,
568 memory_object_control_t control,
569 #if !DEBUG
570 __unused
571 #endif
572 memory_object_cluster_size_t pg_size)
573 {
574 vnode_pager_t vnode_object;
575 kern_return_t kr;
576 memory_object_attr_info_data_t attributes;
577
578
579 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
580
581 if (control == MEMORY_OBJECT_CONTROL_NULL)
582 return KERN_INVALID_ARGUMENT;
583
584 vnode_object = vnode_pager_lookup(mem_obj);
585
586 memory_object_control_reference(control);
587
588 vnode_object->control_handle = control;
589
590 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
591 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
592 attributes.cluster_size = (1 << (PAGE_SHIFT));
593 attributes.may_cache_object = TRUE;
594 attributes.temporary = TRUE;
595
596 kr = memory_object_change_attributes(
597 control,
598 MEMORY_OBJECT_ATTRIBUTE_INFO,
599 (memory_object_info_t) &attributes,
600 MEMORY_OBJECT_ATTR_INFO_COUNT);
601 if (kr != KERN_SUCCESS)
602 panic("vnode_pager_init: memory_object_change_attributes() failed");
603
604 return(KERN_SUCCESS);
605 }
606
607 /*
608 *
609 */
610 kern_return_t
611 vnode_pager_data_return(
612 memory_object_t mem_obj,
613 memory_object_offset_t offset,
614 memory_object_cluster_size_t data_cnt,
615 memory_object_offset_t *resid_offset,
616 int *io_error,
617 __unused boolean_t dirty,
618 __unused boolean_t kernel_copy,
619 int upl_flags)
620 {
621 register vnode_pager_t vnode_object;
622
623 vnode_object = vnode_pager_lookup(mem_obj);
624
625 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
626
627 return KERN_SUCCESS;
628 }
629
630 kern_return_t
631 vnode_pager_data_initialize(
632 __unused memory_object_t mem_obj,
633 __unused memory_object_offset_t offset,
634 __unused memory_object_cluster_size_t data_cnt)
635 {
636 panic("vnode_pager_data_initialize");
637 return KERN_FAILURE;
638 }
639
640 kern_return_t
641 vnode_pager_data_unlock(
642 __unused memory_object_t mem_obj,
643 __unused memory_object_offset_t offset,
644 __unused memory_object_size_t size,
645 __unused vm_prot_t desired_access)
646 {
647 return KERN_FAILURE;
648 }
649
650 kern_return_t
651 vnode_pager_get_isinuse(
652 memory_object_t mem_obj,
653 uint32_t *isinuse)
654 {
655 vnode_pager_t vnode_object;
656
657 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
658 *isinuse = 1;
659 return KERN_INVALID_ARGUMENT;
660 }
661
662 vnode_object = vnode_pager_lookup(mem_obj);
663
664 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
665 return KERN_SUCCESS;
666 }
667
668 kern_return_t
669 vnode_pager_get_throttle_io_limit(
670 memory_object_t mem_obj,
671 uint32_t *limit)
672 {
673 vnode_pager_t vnode_object;
674
675 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
676 return KERN_INVALID_ARGUMENT;
677
678 vnode_object = vnode_pager_lookup(mem_obj);
679
680 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
681 return KERN_SUCCESS;
682 }
683
684 kern_return_t
685 vnode_pager_get_isSSD(
686 memory_object_t mem_obj,
687 boolean_t *isSSD)
688 {
689 vnode_pager_t vnode_object;
690
691 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
692 return KERN_INVALID_ARGUMENT;
693
694 vnode_object = vnode_pager_lookup(mem_obj);
695
696 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
697 return KERN_SUCCESS;
698 }
699
700 kern_return_t
701 vnode_pager_get_object_size(
702 memory_object_t mem_obj,
703 memory_object_offset_t *length)
704 {
705 vnode_pager_t vnode_object;
706
707 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
708 *length = 0;
709 return KERN_INVALID_ARGUMENT;
710 }
711
712 vnode_object = vnode_pager_lookup(mem_obj);
713
714 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
715 return KERN_SUCCESS;
716 }
717
718 kern_return_t
719 vnode_pager_get_object_name(
720 memory_object_t mem_obj,
721 char *pathname,
722 vm_size_t pathname_len,
723 char *filename,
724 vm_size_t filename_len,
725 boolean_t *truncated_path_p)
726 {
727 vnode_pager_t vnode_object;
728
729 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
730 return KERN_INVALID_ARGUMENT;
731 }
732
733 vnode_object = vnode_pager_lookup(mem_obj);
734
735 return vnode_pager_get_name(vnode_object->vnode_handle,
736 pathname,
737 pathname_len,
738 filename,
739 filename_len,
740 truncated_path_p);
741 }
742
743 kern_return_t
744 vnode_pager_get_object_mtime(
745 memory_object_t mem_obj,
746 struct timespec *mtime,
747 struct timespec *cs_mtime)
748 {
749 vnode_pager_t vnode_object;
750
751 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
752 return KERN_INVALID_ARGUMENT;
753 }
754
755 vnode_object = vnode_pager_lookup(mem_obj);
756
757 return vnode_pager_get_mtime(vnode_object->vnode_handle,
758 mtime,
759 cs_mtime);
760 }
761
762 kern_return_t
763 vnode_pager_get_object_cs_blobs(
764 memory_object_t mem_obj,
765 void **blobs)
766 {
767 vnode_pager_t vnode_object;
768
769 if (mem_obj == MEMORY_OBJECT_NULL ||
770 mem_obj->mo_pager_ops != &vnode_pager_ops) {
771 return KERN_INVALID_ARGUMENT;
772 }
773
774 vnode_object = vnode_pager_lookup(mem_obj);
775
776 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
777 blobs);
778 }
779
780 #if CHECK_CS_VALIDATION_BITMAP
781 kern_return_t
782 vnode_pager_cs_check_validation_bitmap(
783 memory_object_t mem_obj,
784 memory_object_offset_t offset,
785 int optype )
786 {
787 vnode_pager_t vnode_object;
788
789 if (mem_obj == MEMORY_OBJECT_NULL ||
790 mem_obj->mo_pager_ops != &vnode_pager_ops) {
791 return KERN_INVALID_ARGUMENT;
792 }
793
794 vnode_object = vnode_pager_lookup(mem_obj);
795 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
796 }
797 #endif /* CHECK_CS_VALIDATION_BITMAP */
798
799 /*
800 *
801 */
802 kern_return_t
803 vnode_pager_data_request(
804 memory_object_t mem_obj,
805 memory_object_offset_t offset,
806 __unused memory_object_cluster_size_t length,
807 __unused vm_prot_t desired_access,
808 memory_object_fault_info_t fault_info)
809 {
810 vnode_pager_t vnode_object;
811 memory_object_offset_t base_offset;
812 vm_size_t size;
813 uint32_t io_streaming = 0;
814
815 vnode_object = vnode_pager_lookup(mem_obj);
816
817 size = MAX_UPL_TRANSFER_BYTES;
818 base_offset = offset;
819
820 if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
821 size = PAGE_SIZE;
822
823 assert(offset >= base_offset &&
824 offset < base_offset + size);
825
826 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
827 }
828
829 /*
830 *
831 */
832 void
833 vnode_pager_reference(
834 memory_object_t mem_obj)
835 {
836 register vnode_pager_t vnode_object;
837 unsigned int new_ref_count;
838
839 vnode_object = vnode_pager_lookup(mem_obj);
840 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
841 assert(new_ref_count > 1);
842 }
843
844 /*
845 *
846 */
847 void
848 vnode_pager_deallocate(
849 memory_object_t mem_obj)
850 {
851 register vnode_pager_t vnode_object;
852
853 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
854
855 vnode_object = vnode_pager_lookup(mem_obj);
856
857 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
858 if (vnode_object->vnode_handle != NULL) {
859 vnode_pager_vrele(vnode_object->vnode_handle);
860 }
861 zfree(vnode_pager_zone, vnode_object);
862 }
863 return;
864 }
865
866 /*
867 *
868 */
869 kern_return_t
870 vnode_pager_terminate(
871 #if !DEBUG
872 __unused
873 #endif
874 memory_object_t mem_obj)
875 {
876 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
877
878 return(KERN_SUCCESS);
879 }
880
881 /*
882 *
883 */
884 kern_return_t
885 vnode_pager_synchronize(
886 memory_object_t mem_obj,
887 memory_object_offset_t offset,
888 memory_object_size_t length,
889 __unused vm_sync_t sync_flags)
890 {
891 register vnode_pager_t vnode_object;
892
893 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
894
895 vnode_object = vnode_pager_lookup(mem_obj);
896
897 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
898
899 return (KERN_SUCCESS);
900 }
901
902 /*
903 *
904 */
905 kern_return_t
906 vnode_pager_map(
907 memory_object_t mem_obj,
908 vm_prot_t prot)
909 {
910 vnode_pager_t vnode_object;
911 int ret;
912 kern_return_t kr;
913
914 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
915
916 vnode_object = vnode_pager_lookup(mem_obj);
917
918 ret = ubc_map(vnode_object->vnode_handle, prot);
919
920 if (ret != 0) {
921 kr = KERN_FAILURE;
922 } else {
923 kr = KERN_SUCCESS;
924 }
925
926 return kr;
927 }
928
929 kern_return_t
930 vnode_pager_last_unmap(
931 memory_object_t mem_obj)
932 {
933 register vnode_pager_t vnode_object;
934
935 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
936
937 vnode_object = vnode_pager_lookup(mem_obj);
938
939 ubc_unmap(vnode_object->vnode_handle);
940 return KERN_SUCCESS;
941 }
942
943
944
945 /*
946 *
947 */
948 void
949 vnode_pager_cluster_write(
950 vnode_pager_t vnode_object,
951 vm_object_offset_t offset,
952 vm_size_t cnt,
953 vm_object_offset_t * resid_offset,
954 int * io_error,
955 int upl_flags)
956 {
957 vm_size_t size;
958 int errno;
959
960 if (upl_flags & UPL_MSYNC) {
961
962 upl_flags |= UPL_VNODE_PAGER;
963
964 if ( (upl_flags & UPL_IOSYNC) && io_error)
965 upl_flags |= UPL_KEEPCACHED;
966
967 while (cnt) {
968 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
969
970 assert((upl_size_t) size == size);
971 vnode_pageout(vnode_object->vnode_handle,
972 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
973
974 if ( (upl_flags & UPL_KEEPCACHED) ) {
975 if ( (*io_error = errno) )
976 break;
977 }
978 cnt -= size;
979 offset += size;
980 }
981 if (resid_offset)
982 *resid_offset = offset;
983
984 } else {
985 vm_object_offset_t vnode_size;
986 vm_object_offset_t base_offset;
987
988 /*
989 * this is the pageout path
990 */
991 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
992
993 if (vnode_size > (offset + PAGE_SIZE)) {
994 /*
995 * preset the maximum size of the cluster
996 * and put us on a nice cluster boundary...
997 * and then clip the size to insure we
998 * don't request past the end of the underlying file
999 */
1000 size = MAX_UPL_TRANSFER_BYTES;
1001 base_offset = offset & ~((signed)(size - 1));
1002
1003 if ((base_offset + size) > vnode_size)
1004 size = round_page(((vm_size_t)(vnode_size - base_offset)));
1005 } else {
1006 /*
1007 * we've been requested to page out a page beyond the current
1008 * end of the 'file'... don't try to cluster in this case...
1009 * we still need to send this page through because it might
1010 * be marked precious and the underlying filesystem may need
1011 * to do something with it (besides page it out)...
1012 */
1013 base_offset = offset;
1014 size = PAGE_SIZE;
1015 }
1016 assert((upl_size_t) size == size);
1017 vnode_pageout(vnode_object->vnode_handle,
1018 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
1019 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
1020 }
1021 }
1022
1023
1024 /*
1025 *
1026 */
1027 kern_return_t
1028 vnode_pager_cluster_read(
1029 vnode_pager_t vnode_object,
1030 vm_object_offset_t base_offset,
1031 vm_object_offset_t offset,
1032 uint32_t io_streaming,
1033 vm_size_t cnt)
1034 {
1035 int local_error = 0;
1036 int kret;
1037 int flags = 0;
1038
1039 assert(! (cnt & PAGE_MASK));
1040
1041 if (io_streaming)
1042 flags |= UPL_IOSTREAMING;
1043
1044 assert((upl_size_t) cnt == cnt);
1045 kret = vnode_pagein(vnode_object->vnode_handle,
1046 (upl_t) NULL,
1047 (upl_offset_t) (offset - base_offset),
1048 base_offset,
1049 (upl_size_t) cnt,
1050 flags,
1051 &local_error);
1052 /*
1053 if(kret == PAGER_ABSENT) {
1054 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
1055 defined in bsd/vm/vm_pager.h However, we should not be including
1056 that file here it is a layering violation.
1057 */
1058 if (kret == 1) {
1059 int uplflags;
1060 upl_t upl = NULL;
1061 unsigned int count = 0;
1062 kern_return_t kr;
1063
1064 uplflags = (UPL_NO_SYNC |
1065 UPL_CLEAN_IN_PLACE |
1066 UPL_SET_INTERNAL);
1067 count = 0;
1068 assert((upl_size_t) cnt == cnt);
1069 kr = memory_object_upl_request(vnode_object->control_handle,
1070 base_offset, (upl_size_t) cnt,
1071 &upl, NULL, &count, uplflags);
1072 if (kr == KERN_SUCCESS) {
1073 upl_abort(upl, 0);
1074 upl_deallocate(upl);
1075 } else {
1076 /*
1077 * We couldn't gather the page list, probably
1078 * because the memory object doesn't have a link
1079 * to a VM object anymore (forced unmount, for
1080 * example). Just return an error to the vm_fault()
1081 * path and let it handle it.
1082 */
1083 }
1084
1085 return KERN_FAILURE;
1086 }
1087
1088 return KERN_SUCCESS;
1089
1090 }
1091
1092
1093 /*
1094 *
1095 */
1096 void
1097 vnode_pager_release_from_cache(
1098 int *cnt)
1099 {
1100 memory_object_free_from_cache(
1101 &realhost, &vnode_pager_ops, cnt);
1102 }
1103
1104 /*
1105 *
1106 */
1107 vnode_pager_t
1108 vnode_object_create(
1109 struct vnode *vp)
1110 {
1111 register vnode_pager_t vnode_object;
1112
1113 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1114 if (vnode_object == VNODE_PAGER_NULL)
1115 return(VNODE_PAGER_NULL);
1116
1117 /*
1118 * The vm_map call takes both named entry ports and raw memory
1119 * objects in the same parameter. We need to make sure that
1120 * vm_map does not see this object as a named entry port. So,
1121 * we reserve the first word in the object for a fake ip_kotype
1122 * setting - that will tell vm_map to use it as a memory object.
1123 */
1124 vnode_object->pager_ops = &vnode_pager_ops;
1125 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1126 vnode_object->ref_count = 1;
1127 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1128 vnode_object->vnode_handle = vp;
1129
1130 return(vnode_object);
1131 }
1132
1133 /*
1134 *
1135 */
1136 vnode_pager_t
1137 vnode_pager_lookup(
1138 memory_object_t name)
1139 {
1140 vnode_pager_t vnode_object;
1141
1142 vnode_object = (vnode_pager_t)name;
1143 assert(vnode_object->pager_ops == &vnode_pager_ops);
1144 return (vnode_object);
1145 }
1146
1147
1148 /*********************** proc_info implementation *************/
1149
1150 #include <sys/bsdtask_info.h>
1151
1152 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
1153
1154
1155 int
1156 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1157 {
1158
1159 vm_map_t map;
1160 vm_map_offset_t address = (vm_map_offset_t )arg;
1161 vm_map_entry_t tmp_entry;
1162 vm_map_entry_t entry;
1163 vm_map_offset_t start;
1164 vm_region_extended_info_data_t extended;
1165 vm_region_top_info_data_t top;
1166
1167 task_lock(task);
1168 map = task->map;
1169 if (map == VM_MAP_NULL)
1170 {
1171 task_unlock(task);
1172 return(0);
1173 }
1174 vm_map_reference(map);
1175 task_unlock(task);
1176
1177 vm_map_lock_read(map);
1178
1179 start = address;
1180 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1181 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1182 vm_map_unlock_read(map);
1183 vm_map_deallocate(map);
1184 return(0);
1185 }
1186 } else {
1187 entry = tmp_entry;
1188 }
1189
1190 start = entry->vme_start;
1191
1192 pinfo->pri_offset = entry->offset;
1193 pinfo->pri_protection = entry->protection;
1194 pinfo->pri_max_protection = entry->max_protection;
1195 pinfo->pri_inheritance = entry->inheritance;
1196 pinfo->pri_behavior = entry->behavior;
1197 pinfo->pri_user_wired_count = entry->user_wired_count;
1198 pinfo->pri_user_tag = entry->alias;
1199
1200 if (entry->is_sub_map) {
1201 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1202 } else {
1203 if (entry->is_shared)
1204 pinfo->pri_flags |= PROC_REGION_SHARED;
1205 }
1206
1207
1208 extended.protection = entry->protection;
1209 extended.user_tag = entry->alias;
1210 extended.pages_resident = 0;
1211 extended.pages_swapped_out = 0;
1212 extended.pages_shared_now_private = 0;
1213 extended.pages_dirtied = 0;
1214 extended.external_pager = 0;
1215 extended.shadow_depth = 0;
1216
1217 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1218
1219 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1220 extended.share_mode = SM_PRIVATE;
1221
1222 top.private_pages_resident = 0;
1223 top.shared_pages_resident = 0;
1224 vm_map_region_top_walk(entry, &top);
1225
1226
1227 pinfo->pri_pages_resident = extended.pages_resident;
1228 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1229 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1230 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1231 pinfo->pri_ref_count = extended.ref_count;
1232 pinfo->pri_shadow_depth = extended.shadow_depth;
1233 pinfo->pri_share_mode = extended.share_mode;
1234
1235 pinfo->pri_private_pages_resident = top.private_pages_resident;
1236 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1237 pinfo->pri_obj_id = top.obj_id;
1238
1239 pinfo->pri_address = (uint64_t)start;
1240 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1241 pinfo->pri_depth = 0;
1242
1243 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1244 *vnodeaddr = (uintptr_t)0;
1245
1246 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1247 vm_map_unlock_read(map);
1248 vm_map_deallocate(map);
1249 return(1);
1250 }
1251 }
1252
1253 vm_map_unlock_read(map);
1254 vm_map_deallocate(map);
1255 return(1);
1256 }
1257
1258 int
1259 fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1260 {
1261
1262 vm_map_t map;
1263 vm_map_offset_t address = (vm_map_offset_t )arg;
1264 vm_map_entry_t tmp_entry;
1265 vm_map_entry_t entry;
1266
1267 task_lock(task);
1268 map = task->map;
1269 if (map == VM_MAP_NULL)
1270 {
1271 task_unlock(task);
1272 return(0);
1273 }
1274 vm_map_reference(map);
1275 task_unlock(task);
1276
1277 vm_map_lock_read(map);
1278
1279 if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1280 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1281 vm_map_unlock_read(map);
1282 vm_map_deallocate(map);
1283 return(0);
1284 }
1285 } else {
1286 entry = tmp_entry;
1287 }
1288
1289 while ((entry != vm_map_to_entry(map))) {
1290 *vnodeaddr = 0;
1291 *vid = 0;
1292
1293 if (entry->is_sub_map == 0) {
1294 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1295
1296 pinfo->pri_offset = entry->offset;
1297 pinfo->pri_protection = entry->protection;
1298 pinfo->pri_max_protection = entry->max_protection;
1299 pinfo->pri_inheritance = entry->inheritance;
1300 pinfo->pri_behavior = entry->behavior;
1301 pinfo->pri_user_wired_count = entry->user_wired_count;
1302 pinfo->pri_user_tag = entry->alias;
1303
1304 if (entry->is_shared)
1305 pinfo->pri_flags |= PROC_REGION_SHARED;
1306
1307 pinfo->pri_pages_resident = 0;
1308 pinfo->pri_pages_shared_now_private = 0;
1309 pinfo->pri_pages_swapped_out = 0;
1310 pinfo->pri_pages_dirtied = 0;
1311 pinfo->pri_ref_count = 0;
1312 pinfo->pri_shadow_depth = 0;
1313 pinfo->pri_share_mode = 0;
1314
1315 pinfo->pri_private_pages_resident = 0;
1316 pinfo->pri_shared_pages_resident = 0;
1317 pinfo->pri_obj_id = 0;
1318
1319 pinfo->pri_address = (uint64_t)entry->vme_start;
1320 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1321 pinfo->pri_depth = 0;
1322
1323 vm_map_unlock_read(map);
1324 vm_map_deallocate(map);
1325 return(1);
1326 }
1327 }
1328
1329 /* Keep searching for a vnode-backed mapping */
1330 entry = entry->vme_next;
1331 }
1332
1333 vm_map_unlock_read(map);
1334 vm_map_deallocate(map);
1335 return(0);
1336 }
1337
1338 static int
1339 fill_vnodeinfoforaddr(
1340 vm_map_entry_t entry,
1341 uintptr_t * vnodeaddr,
1342 uint32_t * vid)
1343 {
1344 vm_object_t top_object, object;
1345 memory_object_t memory_object;
1346 memory_object_pager_ops_t pager_ops;
1347 kern_return_t kr;
1348 int shadow_depth;
1349
1350
1351 if (entry->is_sub_map) {
1352 return(0);
1353 } else {
1354 /*
1355 * The last object in the shadow chain has the
1356 * relevant pager information.
1357 */
1358 top_object = entry->object.vm_object;
1359 if (top_object == VM_OBJECT_NULL) {
1360 object = VM_OBJECT_NULL;
1361 shadow_depth = 0;
1362 } else {
1363 vm_object_lock(top_object);
1364 for (object = top_object, shadow_depth = 0;
1365 object->shadow != VM_OBJECT_NULL;
1366 object = object->shadow, shadow_depth++) {
1367 vm_object_lock(object->shadow);
1368 vm_object_unlock(object);
1369 }
1370 }
1371 }
1372
1373 if (object == VM_OBJECT_NULL) {
1374 return(0);
1375 } else if (object->internal) {
1376 vm_object_unlock(object);
1377 return(0);
1378 } else if (! object->pager_ready ||
1379 object->terminating ||
1380 ! object->alive) {
1381 vm_object_unlock(object);
1382 return(0);
1383 } else {
1384 memory_object = object->pager;
1385 pager_ops = memory_object->mo_pager_ops;
1386 if (pager_ops == &vnode_pager_ops) {
1387 kr = vnode_pager_get_object_vnode(
1388 memory_object,
1389 vnodeaddr, vid);
1390 if (kr != KERN_SUCCESS) {
1391 vm_object_unlock(object);
1392 return(0);
1393 }
1394 } else {
1395 vm_object_unlock(object);
1396 return(0);
1397 }
1398 }
1399 vm_object_unlock(object);
1400 return(1);
1401 }
1402
1403 kern_return_t
1404 vnode_pager_get_object_vnode (
1405 memory_object_t mem_obj,
1406 uintptr_t * vnodeaddr,
1407 uint32_t * vid)
1408 {
1409 vnode_pager_t vnode_object;
1410
1411 vnode_object = vnode_pager_lookup(mem_obj);
1412 if (vnode_object->vnode_handle) {
1413 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1414 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1415
1416 return(KERN_SUCCESS);
1417 }
1418
1419 return(KERN_FAILURE);
1420 }
1421
1422 #if CONFIG_IOSCHED
1423 kern_return_t
1424 vnode_pager_get_object_devvp(
1425 memory_object_t mem_obj,
1426 uintptr_t *devvp)
1427 {
1428 struct vnode *vp;
1429 uint32_t vid;
1430
1431 if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS)
1432 return (KERN_FAILURE);
1433 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1434 if (*devvp)
1435 return (KERN_SUCCESS);
1436 return (KERN_FAILURE);
1437 }
1438 #endif
1439
1440 /*
1441 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1442 * object locked, otherwise return NULL with nothing locked.
1443 */
1444
1445 vm_object_t
1446 find_vnode_object(
1447 vm_map_entry_t entry
1448 )
1449 {
1450 vm_object_t top_object, object;
1451 memory_object_t memory_object;
1452 memory_object_pager_ops_t pager_ops;
1453
1454 if (!entry->is_sub_map) {
1455
1456 /*
1457 * The last object in the shadow chain has the
1458 * relevant pager information.
1459 */
1460
1461 top_object = entry->object.vm_object;
1462
1463 if (top_object) {
1464 vm_object_lock(top_object);
1465
1466 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1467 vm_object_lock(object->shadow);
1468 vm_object_unlock(object);
1469 }
1470
1471 if (object && !object->internal && object->pager_ready && !object->terminating &&
1472 object->alive) {
1473 memory_object = object->pager;
1474 pager_ops = memory_object->mo_pager_ops;
1475
1476 /*
1477 * If this object points to the vnode_pager_ops, then we found what we're
1478 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1479 * vnode and so we fall through to the bottom and return NULL.
1480 */
1481
1482 if (pager_ops == &vnode_pager_ops)
1483 return object; /* we return with the object locked */
1484 }
1485
1486 vm_object_unlock(object);
1487 }
1488
1489 }
1490
1491 return(VM_OBJECT_NULL);
1492 }