]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/thread.h>
45
46 #include <ipc/ipc_port.h>
47 #include <ipc/ipc_space.h>
48
49 #include <default_pager/default_pager_types.h>
50 #include <default_pager/default_pager_object_server.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return(map->hdr.nentries);
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return( vm_map_first_entry(map)->vme_start);
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return( vm_map_last_entry(map)->vme_end);
93 }
94
95 /*
96 * BSD VNODE PAGER
97 */
98
99 const struct memory_object_pager_ops vnode_pager_ops = {
100 vnode_pager_reference,
101 vnode_pager_deallocate,
102 vnode_pager_init,
103 vnode_pager_terminate,
104 vnode_pager_data_request,
105 vnode_pager_data_return,
106 vnode_pager_data_initialize,
107 vnode_pager_data_unlock,
108 vnode_pager_synchronize,
109 vnode_pager_map,
110 vnode_pager_last_unmap,
111 NULL, /* data_reclaim */
112 "vnode pager"
113 };
114
115 typedef struct vnode_pager {
116 struct ipc_object_header pager_header; /* fake ip_kotype() */
117 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
118 unsigned int ref_count; /* reference count */
119 memory_object_control_t control_handle; /* mem object control handle */
120 struct vnode *vnode_handle; /* vnode handle */
121 } *vnode_pager_t;
122
123
124 #define pager_ikot pager_header.io_bits
125
126 ipc_port_t
127 trigger_name_to_port( /* forward */
128 mach_port_t);
129
130 kern_return_t
131 vnode_pager_cluster_read( /* forward */
132 vnode_pager_t,
133 vm_object_offset_t,
134 vm_object_offset_t,
135 uint32_t,
136 vm_size_t);
137
138 void
139 vnode_pager_cluster_write( /* forward */
140 vnode_pager_t,
141 vm_object_offset_t,
142 vm_size_t,
143 vm_object_offset_t *,
144 int *,
145 int);
146
147
148 vnode_pager_t
149 vnode_object_create( /* forward */
150 struct vnode *);
151
152 vnode_pager_t
153 vnode_pager_lookup( /* forward */
154 memory_object_t);
155
156 zone_t vnode_pager_zone;
157
158
159 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
160
161 /* TODO: Should be set dynamically by vnode_pager_init() */
162 #define CLUSTER_SHIFT 1
163
164 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
165 #define MAX_VNODE 10000
166
167
168 #if DEBUG
169 int pagerdebug=0;
170
171 #define PAGER_ALL 0xffffffff
172 #define PAGER_INIT 0x00000001
173 #define PAGER_PAGEIN 0x00000002
174
175 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
176 #else
177 #define PAGER_DEBUG(LEVEL, A)
178 #endif
179
180 extern int proc_resetpcontrol(int);
181
182 #if DEVELOPMENT || DEBUG
183 extern unsigned long vm_cs_validated_resets;
184 #endif
185
186 /*
187 * Routine: mach_macx_triggers
188 * Function:
189 * Syscall interface to set the call backs for low and
190 * high water marks.
191 */
192 int
193 mach_macx_triggers(
194 struct macx_triggers_args *args)
195 {
196 int hi_water = args->hi_water;
197 int low_water = args->low_water;
198 int flags = args->flags;
199 mach_port_t trigger_name = args->alert_port;
200 kern_return_t kr;
201 memory_object_default_t default_pager;
202 ipc_port_t trigger_port;
203
204 default_pager = MEMORY_OBJECT_DEFAULT_NULL;
205 kr = host_default_memory_manager(host_priv_self(),
206 &default_pager, 0);
207 if(kr != KERN_SUCCESS) {
208 return EINVAL;
209 }
210
211 if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) ||
212 ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
213 /* can't have it both ways */
214 return EINVAL;
215 }
216
217 if (default_pager_init_flag == 0) {
218 start_def_pager(NULL);
219 default_pager_init_flag = 1;
220 }
221
222 if (flags & SWAP_ENCRYPT_ON) {
223 /* ENCRYPTED SWAP: tell default_pager to encrypt */
224 default_pager_triggers(default_pager,
225 0, 0,
226 SWAP_ENCRYPT_ON,
227 IP_NULL);
228 } else if (flags & SWAP_ENCRYPT_OFF) {
229 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
230 default_pager_triggers(default_pager,
231 0, 0,
232 SWAP_ENCRYPT_OFF,
233 IP_NULL);
234 }
235
236 if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
237 /*
238 * Time to switch to the emergency segment.
239 */
240 return default_pager_triggers(default_pager,
241 0, 0,
242 USE_EMERGENCY_SWAP_FILE_FIRST,
243 IP_NULL);
244 }
245
246 if (flags & SWAP_FILE_CREATION_ERROR) {
247 /*
248 * For some reason, the dynamic pager failed to create a swap file.
249 */
250 trigger_port = trigger_name_to_port(trigger_name);
251 if(trigger_port == NULL) {
252 return EINVAL;
253 }
254 /* trigger_port is locked and active */
255 ipc_port_make_send_locked(trigger_port);
256 ip_unlock(trigger_port);
257 default_pager_triggers(default_pager,
258 0, 0,
259 SWAP_FILE_CREATION_ERROR,
260 trigger_port);
261 }
262
263 if (flags & HI_WAT_ALERT) {
264 trigger_port = trigger_name_to_port(trigger_name);
265 if(trigger_port == NULL) {
266 return EINVAL;
267 }
268 /* trigger_port is locked and active */
269 ipc_port_make_send_locked(trigger_port);
270 ip_unlock(trigger_port);
271 default_pager_triggers(default_pager,
272 hi_water, low_water,
273 HI_WAT_ALERT, trigger_port);
274 }
275
276 if (flags & LO_WAT_ALERT) {
277 trigger_port = trigger_name_to_port(trigger_name);
278 if(trigger_port == NULL) {
279 return EINVAL;
280 }
281 /* trigger_port is locked and active */
282 ipc_port_make_send_locked(trigger_port);
283 ip_unlock(trigger_port);
284 default_pager_triggers(default_pager,
285 hi_water, low_water,
286 LO_WAT_ALERT, trigger_port);
287 }
288
289
290 if (flags & PROC_RESUME) {
291
292 /*
293 * For this call, hi_water is used to pass in the pid of the process we want to resume
294 * or unthrottle. This is of course restricted to the superuser (checked inside of
295 * proc_resetpcontrol).
296 */
297
298 return proc_resetpcontrol(hi_water);
299 }
300
301 /*
302 * Set thread scheduling priority and policy for the current thread
303 * it is assumed for the time being that the thread setting the alert
304 * is the same one which will be servicing it.
305 *
306 * XXX This does not belong in the kernel XXX
307 */
308 if (flags & HI_WAT_ALERT) {
309 thread_precedence_policy_data_t pre;
310 thread_extended_policy_data_t ext;
311
312 ext.timeshare = FALSE;
313 pre.importance = INT32_MAX;
314
315 thread_policy_set(current_thread(),
316 THREAD_EXTENDED_POLICY,
317 (thread_policy_t)&ext,
318 THREAD_EXTENDED_POLICY_COUNT);
319
320 thread_policy_set(current_thread(),
321 THREAD_PRECEDENCE_POLICY,
322 (thread_policy_t)&pre,
323 THREAD_PRECEDENCE_POLICY_COUNT);
324
325 current_thread()->options |= TH_OPT_VMPRIV;
326 }
327
328 if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
329 return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
330 }
331
332 return 0;
333 }
334
335 /*
336 *
337 */
338 ipc_port_t
339 trigger_name_to_port(
340 mach_port_t trigger_name)
341 {
342 ipc_port_t trigger_port;
343 ipc_space_t space;
344
345 if (trigger_name == 0)
346 return (NULL);
347
348 space = current_space();
349 if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name),
350 &trigger_port) != KERN_SUCCESS)
351 return (NULL);
352 return trigger_port;
353 }
354
355
356 extern int uiomove64(addr64_t, int, void *);
357 #define MAX_RUN 32
358
359 int
360 memory_object_control_uiomove(
361 memory_object_control_t control,
362 memory_object_offset_t offset,
363 void * uio,
364 int start_offset,
365 int io_requested,
366 int mark_dirty,
367 int take_reference)
368 {
369 vm_object_t object;
370 vm_page_t dst_page;
371 int xsize;
372 int retval = 0;
373 int cur_run;
374 int cur_needed;
375 int i;
376 int orig_offset;
377 vm_page_t page_run[MAX_RUN];
378 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
379
380 object = memory_object_control_to_vm_object(control);
381 if (object == VM_OBJECT_NULL) {
382 return (0);
383 }
384 assert(!object->internal);
385
386 vm_object_lock(object);
387
388 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
389 /*
390 * We can't modify the pages without honoring
391 * copy-on-write obligations first, so fall off
392 * this optimized path and fall back to the regular
393 * path.
394 */
395 vm_object_unlock(object);
396 return 0;
397 }
398 orig_offset = start_offset;
399
400 dirty_count = 0;
401 while (io_requested && retval == 0) {
402
403 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
404
405 if (cur_needed > MAX_RUN)
406 cur_needed = MAX_RUN;
407
408 for (cur_run = 0; cur_run < cur_needed; ) {
409
410 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
411 break;
412
413
414 if (dst_page->busy || dst_page->cleaning) {
415 /*
416 * someone else is playing with the page... if we've
417 * already collected pages into this run, go ahead
418 * and process now, we can't block on this
419 * page while holding other pages in the BUSY state
420 * otherwise we will wait
421 */
422 if (cur_run)
423 break;
424 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
425 continue;
426 }
427 if (dst_page->laundry) {
428 dst_page->pageout = FALSE;
429
430 vm_pageout_steal_laundry(dst_page, FALSE);
431 }
432 /*
433 * this routine is only called when copying
434 * to/from real files... no need to consider
435 * encrypted swap pages
436 */
437 assert(!dst_page->encrypted);
438
439 if (mark_dirty) {
440 if (dst_page->dirty == FALSE)
441 dirty_count++;
442 SET_PAGE_DIRTY(dst_page, FALSE);
443 if (dst_page->cs_validated &&
444 !dst_page->cs_tainted) {
445 /*
446 * CODE SIGNING:
447 * We're modifying a code-signed
448 * page: force revalidate
449 */
450 dst_page->cs_validated = FALSE;
451 #if DEVELOPMENT || DEBUG
452 vm_cs_validated_resets++;
453 #endif
454 pmap_disconnect(dst_page->phys_page);
455 }
456 }
457 dst_page->busy = TRUE;
458
459 page_run[cur_run++] = dst_page;
460
461 offset += PAGE_SIZE_64;
462 }
463 if (cur_run == 0)
464 /*
465 * we hit a 'hole' in the cache or
466 * a page we don't want to try to handle,
467 * so bail at this point
468 * we'll unlock the object below
469 */
470 break;
471 vm_object_unlock(object);
472
473 for (i = 0; i < cur_run; i++) {
474
475 dst_page = page_run[i];
476
477 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
478 xsize = io_requested;
479
480 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << PAGE_SHIFT) + start_offset), xsize, uio)) )
481 break;
482
483 io_requested -= xsize;
484 start_offset = 0;
485 }
486 vm_object_lock(object);
487
488 /*
489 * if we have more than 1 page to work on
490 * in the current run, or the original request
491 * started at offset 0 of the page, or we're
492 * processing multiple batches, we will move
493 * the pages to the tail of the inactive queue
494 * to implement an LRU for read/write accesses
495 *
496 * the check for orig_offset == 0 is there to
497 * mitigate the cost of small (< page_size) requests
498 * to the same page (this way we only move it once)
499 */
500 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
501
502 vm_page_lockspin_queues();
503
504 for (i = 0; i < cur_run; i++)
505 vm_page_lru(page_run[i]);
506
507 vm_page_unlock_queues();
508 }
509 for (i = 0; i < cur_run; i++) {
510 dst_page = page_run[i];
511
512 /*
513 * someone is explicitly referencing this page...
514 * update clustered and speculative state
515 *
516 */
517 if (dst_page->clustered)
518 VM_PAGE_CONSUME_CLUSTERED(dst_page);
519
520 PAGE_WAKEUP_DONE(dst_page);
521 }
522 orig_offset = 0;
523 }
524 vm_object_unlock(object);
525 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED);
526 return (retval);
527 }
528
529
530 /*
531 *
532 */
533 void
534 vnode_pager_bootstrap(void)
535 {
536 register vm_size_t size;
537
538 size = (vm_size_t) sizeof(struct vnode_pager);
539 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
540 PAGE_SIZE, "vnode pager structures");
541 zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
542 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
543
544
545 #if CONFIG_CODE_DECRYPTION
546 apple_protect_pager_bootstrap();
547 #endif /* CONFIG_CODE_DECRYPTION */
548 swapfile_pager_bootstrap();
549 return;
550 }
551
552 /*
553 *
554 */
555 memory_object_t
556 vnode_pager_setup(
557 struct vnode *vp,
558 __unused memory_object_t pager)
559 {
560 vnode_pager_t vnode_object;
561
562 vnode_object = vnode_object_create(vp);
563 if (vnode_object == VNODE_PAGER_NULL)
564 panic("vnode_pager_setup: vnode_object_create() failed");
565 return((memory_object_t)vnode_object);
566 }
567
568 /*
569 *
570 */
571 kern_return_t
572 vnode_pager_init(memory_object_t mem_obj,
573 memory_object_control_t control,
574 #if !DEBUG
575 __unused
576 #endif
577 memory_object_cluster_size_t pg_size)
578 {
579 vnode_pager_t vnode_object;
580 kern_return_t kr;
581 memory_object_attr_info_data_t attributes;
582
583
584 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
585
586 if (control == MEMORY_OBJECT_CONTROL_NULL)
587 return KERN_INVALID_ARGUMENT;
588
589 vnode_object = vnode_pager_lookup(mem_obj);
590
591 memory_object_control_reference(control);
592
593 vnode_object->control_handle = control;
594
595 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
596 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
597 attributes.cluster_size = (1 << (PAGE_SHIFT));
598 attributes.may_cache_object = TRUE;
599 attributes.temporary = TRUE;
600
601 kr = memory_object_change_attributes(
602 control,
603 MEMORY_OBJECT_ATTRIBUTE_INFO,
604 (memory_object_info_t) &attributes,
605 MEMORY_OBJECT_ATTR_INFO_COUNT);
606 if (kr != KERN_SUCCESS)
607 panic("vnode_pager_init: memory_object_change_attributes() failed");
608
609 return(KERN_SUCCESS);
610 }
611
612 /*
613 *
614 */
615 kern_return_t
616 vnode_pager_data_return(
617 memory_object_t mem_obj,
618 memory_object_offset_t offset,
619 memory_object_cluster_size_t data_cnt,
620 memory_object_offset_t *resid_offset,
621 int *io_error,
622 __unused boolean_t dirty,
623 __unused boolean_t kernel_copy,
624 int upl_flags)
625 {
626 register vnode_pager_t vnode_object;
627
628 vnode_object = vnode_pager_lookup(mem_obj);
629
630 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
631
632 return KERN_SUCCESS;
633 }
634
635 kern_return_t
636 vnode_pager_data_initialize(
637 __unused memory_object_t mem_obj,
638 __unused memory_object_offset_t offset,
639 __unused memory_object_cluster_size_t data_cnt)
640 {
641 panic("vnode_pager_data_initialize");
642 return KERN_FAILURE;
643 }
644
645 kern_return_t
646 vnode_pager_data_unlock(
647 __unused memory_object_t mem_obj,
648 __unused memory_object_offset_t offset,
649 __unused memory_object_size_t size,
650 __unused vm_prot_t desired_access)
651 {
652 return KERN_FAILURE;
653 }
654
655 kern_return_t
656 vnode_pager_get_isinuse(
657 memory_object_t mem_obj,
658 uint32_t *isinuse)
659 {
660 vnode_pager_t vnode_object;
661
662 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
663 *isinuse = 1;
664 return KERN_INVALID_ARGUMENT;
665 }
666
667 vnode_object = vnode_pager_lookup(mem_obj);
668
669 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
670 return KERN_SUCCESS;
671 }
672
673 kern_return_t
674 vnode_pager_get_throttle_io_limit(
675 memory_object_t mem_obj,
676 uint32_t *limit)
677 {
678 vnode_pager_t vnode_object;
679
680 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
681 return KERN_INVALID_ARGUMENT;
682
683 vnode_object = vnode_pager_lookup(mem_obj);
684
685 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
686 return KERN_SUCCESS;
687 }
688
689 kern_return_t
690 vnode_pager_get_isSSD(
691 memory_object_t mem_obj,
692 boolean_t *isSSD)
693 {
694 vnode_pager_t vnode_object;
695
696 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
697 return KERN_INVALID_ARGUMENT;
698
699 vnode_object = vnode_pager_lookup(mem_obj);
700
701 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
702 return KERN_SUCCESS;
703 }
704
705 kern_return_t
706 vnode_pager_get_object_size(
707 memory_object_t mem_obj,
708 memory_object_offset_t *length)
709 {
710 vnode_pager_t vnode_object;
711
712 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
713 *length = 0;
714 return KERN_INVALID_ARGUMENT;
715 }
716
717 vnode_object = vnode_pager_lookup(mem_obj);
718
719 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
720 return KERN_SUCCESS;
721 }
722
723 kern_return_t
724 vnode_pager_get_object_name(
725 memory_object_t mem_obj,
726 char *pathname,
727 vm_size_t pathname_len,
728 char *filename,
729 vm_size_t filename_len,
730 boolean_t *truncated_path_p)
731 {
732 vnode_pager_t vnode_object;
733
734 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
735 return KERN_INVALID_ARGUMENT;
736 }
737
738 vnode_object = vnode_pager_lookup(mem_obj);
739
740 return vnode_pager_get_name(vnode_object->vnode_handle,
741 pathname,
742 pathname_len,
743 filename,
744 filename_len,
745 truncated_path_p);
746 }
747
748 kern_return_t
749 vnode_pager_get_object_mtime(
750 memory_object_t mem_obj,
751 struct timespec *mtime,
752 struct timespec *cs_mtime)
753 {
754 vnode_pager_t vnode_object;
755
756 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
757 return KERN_INVALID_ARGUMENT;
758 }
759
760 vnode_object = vnode_pager_lookup(mem_obj);
761
762 return vnode_pager_get_mtime(vnode_object->vnode_handle,
763 mtime,
764 cs_mtime);
765 }
766
767 kern_return_t
768 vnode_pager_get_object_cs_blobs(
769 memory_object_t mem_obj,
770 void **blobs)
771 {
772 vnode_pager_t vnode_object;
773
774 if (mem_obj == MEMORY_OBJECT_NULL ||
775 mem_obj->mo_pager_ops != &vnode_pager_ops) {
776 return KERN_INVALID_ARGUMENT;
777 }
778
779 vnode_object = vnode_pager_lookup(mem_obj);
780
781 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
782 blobs);
783 }
784
785 #if CHECK_CS_VALIDATION_BITMAP
786 kern_return_t
787 vnode_pager_cs_check_validation_bitmap(
788 memory_object_t mem_obj,
789 memory_object_offset_t offset,
790 int optype )
791 {
792 vnode_pager_t vnode_object;
793
794 if (mem_obj == MEMORY_OBJECT_NULL ||
795 mem_obj->mo_pager_ops != &vnode_pager_ops) {
796 return KERN_INVALID_ARGUMENT;
797 }
798
799 vnode_object = vnode_pager_lookup(mem_obj);
800 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
801 }
802 #endif /* CHECK_CS_VALIDATION_BITMAP */
803
804 /*
805 *
806 */
807 kern_return_t
808 vnode_pager_data_request(
809 memory_object_t mem_obj,
810 memory_object_offset_t offset,
811 __unused memory_object_cluster_size_t length,
812 __unused vm_prot_t desired_access,
813 memory_object_fault_info_t fault_info)
814 {
815 vnode_pager_t vnode_object;
816 memory_object_offset_t base_offset;
817 vm_size_t size;
818 uint32_t io_streaming = 0;
819
820 vnode_object = vnode_pager_lookup(mem_obj);
821
822 size = MAX_UPL_TRANSFER_BYTES;
823 base_offset = offset;
824
825 if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
826 size = PAGE_SIZE;
827
828 assert(offset >= base_offset &&
829 offset < base_offset + size);
830
831 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
832 }
833
834 /*
835 *
836 */
837 void
838 vnode_pager_reference(
839 memory_object_t mem_obj)
840 {
841 register vnode_pager_t vnode_object;
842 unsigned int new_ref_count;
843
844 vnode_object = vnode_pager_lookup(mem_obj);
845 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
846 assert(new_ref_count > 1);
847 }
848
849 /*
850 *
851 */
852 void
853 vnode_pager_deallocate(
854 memory_object_t mem_obj)
855 {
856 register vnode_pager_t vnode_object;
857
858 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
859
860 vnode_object = vnode_pager_lookup(mem_obj);
861
862 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
863 if (vnode_object->vnode_handle != NULL) {
864 vnode_pager_vrele(vnode_object->vnode_handle);
865 }
866 zfree(vnode_pager_zone, vnode_object);
867 }
868 return;
869 }
870
871 /*
872 *
873 */
874 kern_return_t
875 vnode_pager_terminate(
876 #if !DEBUG
877 __unused
878 #endif
879 memory_object_t mem_obj)
880 {
881 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
882
883 return(KERN_SUCCESS);
884 }
885
886 /*
887 *
888 */
889 kern_return_t
890 vnode_pager_synchronize(
891 memory_object_t mem_obj,
892 memory_object_offset_t offset,
893 memory_object_size_t length,
894 __unused vm_sync_t sync_flags)
895 {
896 register vnode_pager_t vnode_object;
897
898 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
899
900 vnode_object = vnode_pager_lookup(mem_obj);
901
902 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
903
904 return (KERN_SUCCESS);
905 }
906
907 /*
908 *
909 */
910 kern_return_t
911 vnode_pager_map(
912 memory_object_t mem_obj,
913 vm_prot_t prot)
914 {
915 vnode_pager_t vnode_object;
916 int ret;
917 kern_return_t kr;
918
919 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
920
921 vnode_object = vnode_pager_lookup(mem_obj);
922
923 ret = ubc_map(vnode_object->vnode_handle, prot);
924
925 if (ret != 0) {
926 kr = KERN_FAILURE;
927 } else {
928 kr = KERN_SUCCESS;
929 }
930
931 return kr;
932 }
933
934 kern_return_t
935 vnode_pager_last_unmap(
936 memory_object_t mem_obj)
937 {
938 register vnode_pager_t vnode_object;
939
940 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
941
942 vnode_object = vnode_pager_lookup(mem_obj);
943
944 ubc_unmap(vnode_object->vnode_handle);
945 return KERN_SUCCESS;
946 }
947
948
949
950 /*
951 *
952 */
953 void
954 vnode_pager_cluster_write(
955 vnode_pager_t vnode_object,
956 vm_object_offset_t offset,
957 vm_size_t cnt,
958 vm_object_offset_t * resid_offset,
959 int * io_error,
960 int upl_flags)
961 {
962 vm_size_t size;
963 int errno;
964
965 if (upl_flags & UPL_MSYNC) {
966
967 upl_flags |= UPL_VNODE_PAGER;
968
969 if ( (upl_flags & UPL_IOSYNC) && io_error)
970 upl_flags |= UPL_KEEPCACHED;
971
972 while (cnt) {
973 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
974
975 assert((upl_size_t) size == size);
976 vnode_pageout(vnode_object->vnode_handle,
977 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
978
979 if ( (upl_flags & UPL_KEEPCACHED) ) {
980 if ( (*io_error = errno) )
981 break;
982 }
983 cnt -= size;
984 offset += size;
985 }
986 if (resid_offset)
987 *resid_offset = offset;
988
989 } else {
990 vm_object_offset_t vnode_size;
991 vm_object_offset_t base_offset;
992
993 /*
994 * this is the pageout path
995 */
996 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
997
998 if (vnode_size > (offset + PAGE_SIZE)) {
999 /*
1000 * preset the maximum size of the cluster
1001 * and put us on a nice cluster boundary...
1002 * and then clip the size to insure we
1003 * don't request past the end of the underlying file
1004 */
1005 size = MAX_UPL_TRANSFER_BYTES;
1006 base_offset = offset & ~((signed)(size - 1));
1007
1008 if ((base_offset + size) > vnode_size)
1009 size = round_page(((vm_size_t)(vnode_size - base_offset)));
1010 } else {
1011 /*
1012 * we've been requested to page out a page beyond the current
1013 * end of the 'file'... don't try to cluster in this case...
1014 * we still need to send this page through because it might
1015 * be marked precious and the underlying filesystem may need
1016 * to do something with it (besides page it out)...
1017 */
1018 base_offset = offset;
1019 size = PAGE_SIZE;
1020 }
1021 assert((upl_size_t) size == size);
1022 vnode_pageout(vnode_object->vnode_handle,
1023 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
1024 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
1025 }
1026 }
1027
1028
1029 /*
1030 *
1031 */
1032 kern_return_t
1033 vnode_pager_cluster_read(
1034 vnode_pager_t vnode_object,
1035 vm_object_offset_t base_offset,
1036 vm_object_offset_t offset,
1037 uint32_t io_streaming,
1038 vm_size_t cnt)
1039 {
1040 int local_error = 0;
1041 int kret;
1042 int flags = 0;
1043
1044 assert(! (cnt & PAGE_MASK));
1045
1046 if (io_streaming)
1047 flags |= UPL_IOSTREAMING;
1048
1049 assert((upl_size_t) cnt == cnt);
1050 kret = vnode_pagein(vnode_object->vnode_handle,
1051 (upl_t) NULL,
1052 (upl_offset_t) (offset - base_offset),
1053 base_offset,
1054 (upl_size_t) cnt,
1055 flags,
1056 &local_error);
1057 /*
1058 if(kret == PAGER_ABSENT) {
1059 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
1060 defined in bsd/vm/vm_pager.h However, we should not be including
1061 that file here it is a layering violation.
1062 */
1063 if (kret == 1) {
1064 int uplflags;
1065 upl_t upl = NULL;
1066 unsigned int count = 0;
1067 kern_return_t kr;
1068
1069 uplflags = (UPL_NO_SYNC |
1070 UPL_CLEAN_IN_PLACE |
1071 UPL_SET_INTERNAL);
1072 count = 0;
1073 assert((upl_size_t) cnt == cnt);
1074 kr = memory_object_upl_request(vnode_object->control_handle,
1075 base_offset, (upl_size_t) cnt,
1076 &upl, NULL, &count, uplflags);
1077 if (kr == KERN_SUCCESS) {
1078 upl_abort(upl, 0);
1079 upl_deallocate(upl);
1080 } else {
1081 /*
1082 * We couldn't gather the page list, probably
1083 * because the memory object doesn't have a link
1084 * to a VM object anymore (forced unmount, for
1085 * example). Just return an error to the vm_fault()
1086 * path and let it handle it.
1087 */
1088 }
1089
1090 return KERN_FAILURE;
1091 }
1092
1093 return KERN_SUCCESS;
1094
1095 }
1096
1097
1098 /*
1099 *
1100 */
1101 void
1102 vnode_pager_release_from_cache(
1103 int *cnt)
1104 {
1105 memory_object_free_from_cache(
1106 &realhost, &vnode_pager_ops, cnt);
1107 }
1108
1109 /*
1110 *
1111 */
1112 vnode_pager_t
1113 vnode_object_create(
1114 struct vnode *vp)
1115 {
1116 register vnode_pager_t vnode_object;
1117
1118 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1119 if (vnode_object == VNODE_PAGER_NULL)
1120 return(VNODE_PAGER_NULL);
1121
1122 /*
1123 * The vm_map call takes both named entry ports and raw memory
1124 * objects in the same parameter. We need to make sure that
1125 * vm_map does not see this object as a named entry port. So,
1126 * we reserve the first word in the object for a fake ip_kotype
1127 * setting - that will tell vm_map to use it as a memory object.
1128 */
1129 vnode_object->pager_ops = &vnode_pager_ops;
1130 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1131 vnode_object->ref_count = 1;
1132 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1133 vnode_object->vnode_handle = vp;
1134
1135 return(vnode_object);
1136 }
1137
1138 /*
1139 *
1140 */
1141 vnode_pager_t
1142 vnode_pager_lookup(
1143 memory_object_t name)
1144 {
1145 vnode_pager_t vnode_object;
1146
1147 vnode_object = (vnode_pager_t)name;
1148 assert(vnode_object->pager_ops == &vnode_pager_ops);
1149 return (vnode_object);
1150 }
1151
1152
1153 /*********************** proc_info implementation *************/
1154
1155 #include <sys/bsdtask_info.h>
1156
1157 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
1158
1159
1160 int
1161 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1162 {
1163
1164 vm_map_t map;
1165 vm_map_offset_t address = (vm_map_offset_t )arg;
1166 vm_map_entry_t tmp_entry;
1167 vm_map_entry_t entry;
1168 vm_map_offset_t start;
1169 vm_region_extended_info_data_t extended;
1170 vm_region_top_info_data_t top;
1171
1172 task_lock(task);
1173 map = task->map;
1174 if (map == VM_MAP_NULL)
1175 {
1176 task_unlock(task);
1177 return(0);
1178 }
1179 vm_map_reference(map);
1180 task_unlock(task);
1181
1182 vm_map_lock_read(map);
1183
1184 start = address;
1185 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1186 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1187 vm_map_unlock_read(map);
1188 vm_map_deallocate(map);
1189 return(0);
1190 }
1191 } else {
1192 entry = tmp_entry;
1193 }
1194
1195 start = entry->vme_start;
1196
1197 pinfo->pri_offset = VME_OFFSET(entry);
1198 pinfo->pri_protection = entry->protection;
1199 pinfo->pri_max_protection = entry->max_protection;
1200 pinfo->pri_inheritance = entry->inheritance;
1201 pinfo->pri_behavior = entry->behavior;
1202 pinfo->pri_user_wired_count = entry->user_wired_count;
1203 pinfo->pri_user_tag = VME_ALIAS(entry);
1204
1205 if (entry->is_sub_map) {
1206 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1207 } else {
1208 if (entry->is_shared)
1209 pinfo->pri_flags |= PROC_REGION_SHARED;
1210 }
1211
1212
1213 extended.protection = entry->protection;
1214 extended.user_tag = VME_ALIAS(entry);
1215 extended.pages_resident = 0;
1216 extended.pages_swapped_out = 0;
1217 extended.pages_shared_now_private = 0;
1218 extended.pages_dirtied = 0;
1219 extended.external_pager = 0;
1220 extended.shadow_depth = 0;
1221
1222 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended);
1223
1224 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1225 extended.share_mode = SM_PRIVATE;
1226
1227 top.private_pages_resident = 0;
1228 top.shared_pages_resident = 0;
1229 vm_map_region_top_walk(entry, &top);
1230
1231
1232 pinfo->pri_pages_resident = extended.pages_resident;
1233 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1234 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1235 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1236 pinfo->pri_ref_count = extended.ref_count;
1237 pinfo->pri_shadow_depth = extended.shadow_depth;
1238 pinfo->pri_share_mode = extended.share_mode;
1239
1240 pinfo->pri_private_pages_resident = top.private_pages_resident;
1241 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1242 pinfo->pri_obj_id = top.obj_id;
1243
1244 pinfo->pri_address = (uint64_t)start;
1245 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1246 pinfo->pri_depth = 0;
1247
1248 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1249 *vnodeaddr = (uintptr_t)0;
1250
1251 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1252 vm_map_unlock_read(map);
1253 vm_map_deallocate(map);
1254 return(1);
1255 }
1256 }
1257
1258 vm_map_unlock_read(map);
1259 vm_map_deallocate(map);
1260 return(1);
1261 }
1262
1263 int
1264 fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1265 {
1266
1267 vm_map_t map;
1268 vm_map_offset_t address = (vm_map_offset_t )arg;
1269 vm_map_entry_t tmp_entry;
1270 vm_map_entry_t entry;
1271
1272 task_lock(task);
1273 map = task->map;
1274 if (map == VM_MAP_NULL)
1275 {
1276 task_unlock(task);
1277 return(0);
1278 }
1279 vm_map_reference(map);
1280 task_unlock(task);
1281
1282 vm_map_lock_read(map);
1283
1284 if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1285 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1286 vm_map_unlock_read(map);
1287 vm_map_deallocate(map);
1288 return(0);
1289 }
1290 } else {
1291 entry = tmp_entry;
1292 }
1293
1294 while (entry != vm_map_to_entry(map)) {
1295 *vnodeaddr = 0;
1296 *vid = 0;
1297
1298 if (entry->is_sub_map == 0) {
1299 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1300
1301 pinfo->pri_offset = VME_OFFSET(entry);
1302 pinfo->pri_protection = entry->protection;
1303 pinfo->pri_max_protection = entry->max_protection;
1304 pinfo->pri_inheritance = entry->inheritance;
1305 pinfo->pri_behavior = entry->behavior;
1306 pinfo->pri_user_wired_count = entry->user_wired_count;
1307 pinfo->pri_user_tag = VME_ALIAS(entry);
1308
1309 if (entry->is_shared)
1310 pinfo->pri_flags |= PROC_REGION_SHARED;
1311
1312 pinfo->pri_pages_resident = 0;
1313 pinfo->pri_pages_shared_now_private = 0;
1314 pinfo->pri_pages_swapped_out = 0;
1315 pinfo->pri_pages_dirtied = 0;
1316 pinfo->pri_ref_count = 0;
1317 pinfo->pri_shadow_depth = 0;
1318 pinfo->pri_share_mode = 0;
1319
1320 pinfo->pri_private_pages_resident = 0;
1321 pinfo->pri_shared_pages_resident = 0;
1322 pinfo->pri_obj_id = 0;
1323
1324 pinfo->pri_address = (uint64_t)entry->vme_start;
1325 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1326 pinfo->pri_depth = 0;
1327
1328 vm_map_unlock_read(map);
1329 vm_map_deallocate(map);
1330 return(1);
1331 }
1332 }
1333
1334 /* Keep searching for a vnode-backed mapping */
1335 entry = entry->vme_next;
1336 }
1337
1338 vm_map_unlock_read(map);
1339 vm_map_deallocate(map);
1340 return(0);
1341 }
1342
1343 static int
1344 fill_vnodeinfoforaddr(
1345 vm_map_entry_t entry,
1346 uintptr_t * vnodeaddr,
1347 uint32_t * vid)
1348 {
1349 vm_object_t top_object, object;
1350 memory_object_t memory_object;
1351 memory_object_pager_ops_t pager_ops;
1352 kern_return_t kr;
1353 int shadow_depth;
1354
1355
1356 if (entry->is_sub_map) {
1357 return(0);
1358 } else {
1359 /*
1360 * The last object in the shadow chain has the
1361 * relevant pager information.
1362 */
1363 top_object = VME_OBJECT(entry);
1364 if (top_object == VM_OBJECT_NULL) {
1365 object = VM_OBJECT_NULL;
1366 shadow_depth = 0;
1367 } else {
1368 vm_object_lock(top_object);
1369 for (object = top_object, shadow_depth = 0;
1370 object->shadow != VM_OBJECT_NULL;
1371 object = object->shadow, shadow_depth++) {
1372 vm_object_lock(object->shadow);
1373 vm_object_unlock(object);
1374 }
1375 }
1376 }
1377
1378 if (object == VM_OBJECT_NULL) {
1379 return(0);
1380 } else if (object->internal) {
1381 vm_object_unlock(object);
1382 return(0);
1383 } else if (! object->pager_ready ||
1384 object->terminating ||
1385 ! object->alive) {
1386 vm_object_unlock(object);
1387 return(0);
1388 } else {
1389 memory_object = object->pager;
1390 pager_ops = memory_object->mo_pager_ops;
1391 if (pager_ops == &vnode_pager_ops) {
1392 kr = vnode_pager_get_object_vnode(
1393 memory_object,
1394 vnodeaddr, vid);
1395 if (kr != KERN_SUCCESS) {
1396 vm_object_unlock(object);
1397 return(0);
1398 }
1399 } else {
1400 vm_object_unlock(object);
1401 return(0);
1402 }
1403 }
1404 vm_object_unlock(object);
1405 return(1);
1406 }
1407
1408 kern_return_t
1409 vnode_pager_get_object_vnode (
1410 memory_object_t mem_obj,
1411 uintptr_t * vnodeaddr,
1412 uint32_t * vid)
1413 {
1414 vnode_pager_t vnode_object;
1415
1416 vnode_object = vnode_pager_lookup(mem_obj);
1417 if (vnode_object->vnode_handle) {
1418 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1419 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1420
1421 return(KERN_SUCCESS);
1422 }
1423
1424 return(KERN_FAILURE);
1425 }
1426
1427 #if CONFIG_IOSCHED
1428 kern_return_t
1429 vnode_pager_get_object_devvp(
1430 memory_object_t mem_obj,
1431 uintptr_t *devvp)
1432 {
1433 struct vnode *vp;
1434 uint32_t vid;
1435
1436 if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS)
1437 return (KERN_FAILURE);
1438 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1439 if (*devvp)
1440 return (KERN_SUCCESS);
1441 return (KERN_FAILURE);
1442 }
1443 #endif
1444
1445 /*
1446 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1447 * object locked, otherwise return NULL with nothing locked.
1448 */
1449
1450 vm_object_t
1451 find_vnode_object(
1452 vm_map_entry_t entry
1453 )
1454 {
1455 vm_object_t top_object, object;
1456 memory_object_t memory_object;
1457 memory_object_pager_ops_t pager_ops;
1458
1459 if (!entry->is_sub_map) {
1460
1461 /*
1462 * The last object in the shadow chain has the
1463 * relevant pager information.
1464 */
1465
1466 top_object = VME_OBJECT(entry);
1467
1468 if (top_object) {
1469 vm_object_lock(top_object);
1470
1471 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1472 vm_object_lock(object->shadow);
1473 vm_object_unlock(object);
1474 }
1475
1476 if (object && !object->internal && object->pager_ready && !object->terminating &&
1477 object->alive) {
1478 memory_object = object->pager;
1479 pager_ops = memory_object->mo_pager_ops;
1480
1481 /*
1482 * If this object points to the vnode_pager_ops, then we found what we're
1483 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1484 * vnode and so we fall through to the bottom and return NULL.
1485 */
1486
1487 if (pager_ops == &vnode_pager_ops)
1488 return object; /* we return with the object locked */
1489 }
1490
1491 vm_object_unlock(object);
1492 }
1493
1494 }
1495
1496 return(VM_OBJECT_NULL);
1497 }