]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-1504.3.12.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/thread.h>
45
46 #include <ipc/ipc_port.h>
47 #include <ipc/ipc_space.h>
48
49 #include <default_pager/default_pager_types.h>
50 #include <default_pager/default_pager_object_server.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return(map->hdr.nentries);
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return( vm_map_first_entry(map)->vme_start);
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return( vm_map_last_entry(map)->vme_end);
93 }
94
95 /*
96 * Legacy routines to get the start and end for a vm_map_t. They
97 * return them in the vm_offset_t format. So, they should only be
98 * called on maps that are the same size as the kernel map for
99 * accurate results.
100 */
101 vm_offset_t
102 get_vm_start(
103 vm_map_t map)
104 {
105 return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start));
106 }
107
108 vm_offset_t
109 get_vm_end(
110 vm_map_t map)
111 {
112 return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end));
113 }
114
115 /*
116 * BSD VNODE PAGER
117 */
118
119 const struct memory_object_pager_ops vnode_pager_ops = {
120 vnode_pager_reference,
121 vnode_pager_deallocate,
122 vnode_pager_init,
123 vnode_pager_terminate,
124 vnode_pager_data_request,
125 vnode_pager_data_return,
126 vnode_pager_data_initialize,
127 vnode_pager_data_unlock,
128 vnode_pager_synchronize,
129 vnode_pager_map,
130 vnode_pager_last_unmap,
131 "vnode pager"
132 };
133
134 typedef struct vnode_pager {
135 struct ipc_object_header pager_header; /* fake ip_kotype() */
136 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
137 unsigned int ref_count; /* reference count */
138 memory_object_control_t control_handle; /* mem object control handle */
139 struct vnode *vnode_handle; /* vnode handle */
140 } *vnode_pager_t;
141
142 #define pager_ikot pager_header.io_bits
143
144 ipc_port_t
145 trigger_name_to_port( /* forward */
146 mach_port_t);
147
148 kern_return_t
149 vnode_pager_cluster_read( /* forward */
150 vnode_pager_t,
151 vm_object_offset_t,
152 vm_object_offset_t,
153 uint32_t,
154 vm_size_t);
155
156 void
157 vnode_pager_cluster_write( /* forward */
158 vnode_pager_t,
159 vm_object_offset_t,
160 vm_size_t,
161 vm_object_offset_t *,
162 int *,
163 int);
164
165
166 vnode_pager_t
167 vnode_object_create( /* forward */
168 struct vnode *);
169
170 vnode_pager_t
171 vnode_pager_lookup( /* forward */
172 memory_object_t);
173
174 zone_t vnode_pager_zone;
175
176
177 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
178
179 /* TODO: Should be set dynamically by vnode_pager_init() */
180 #define CLUSTER_SHIFT 1
181
182 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
183 #define MAX_VNODE 10000
184
185
186 #if DEBUG
187 int pagerdebug=0;
188
189 #define PAGER_ALL 0xffffffff
190 #define PAGER_INIT 0x00000001
191 #define PAGER_PAGEIN 0x00000002
192
193 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
194 #else
195 #define PAGER_DEBUG(LEVEL, A)
196 #endif
197
198 extern int proc_resetpcontrol(int);
199
200 #if DEVELOPMENT || DEBUG
201 extern unsigned long vm_cs_validated_resets;
202 #endif
203
204 /*
205 * Routine: mach_macx_triggers
206 * Function:
207 * Syscall interface to set the call backs for low and
208 * high water marks.
209 */
210 int
211 mach_macx_triggers(
212 struct macx_triggers_args *args)
213 {
214 int hi_water = args->hi_water;
215 int low_water = args->low_water;
216 int flags = args->flags;
217 mach_port_t trigger_name = args->alert_port;
218 kern_return_t kr;
219 memory_object_default_t default_pager;
220 ipc_port_t trigger_port;
221
222 default_pager = MEMORY_OBJECT_DEFAULT_NULL;
223 kr = host_default_memory_manager(host_priv_self(),
224 &default_pager, 0);
225 if(kr != KERN_SUCCESS) {
226 return EINVAL;
227 }
228
229 if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) ||
230 ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
231 /* can't have it both ways */
232 return EINVAL;
233 }
234
235 if (default_pager_init_flag == 0) {
236 start_def_pager(NULL);
237 default_pager_init_flag = 1;
238 }
239
240 if (flags & SWAP_ENCRYPT_ON) {
241 /* ENCRYPTED SWAP: tell default_pager to encrypt */
242 default_pager_triggers(default_pager,
243 0, 0,
244 SWAP_ENCRYPT_ON,
245 IP_NULL);
246 } else if (flags & SWAP_ENCRYPT_OFF) {
247 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
248 default_pager_triggers(default_pager,
249 0, 0,
250 SWAP_ENCRYPT_OFF,
251 IP_NULL);
252 }
253
254 if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
255 /*
256 * Time to switch to the emergency segment.
257 */
258 return default_pager_triggers(default_pager,
259 0, 0,
260 USE_EMERGENCY_SWAP_FILE_FIRST,
261 IP_NULL);
262 }
263
264 if (flags & SWAP_FILE_CREATION_ERROR) {
265 /*
266 * For some reason, the dynamic pager failed to create a swap file.
267 */
268 trigger_port = trigger_name_to_port(trigger_name);
269 if(trigger_port == NULL) {
270 return EINVAL;
271 }
272 /* trigger_port is locked and active */
273 ipc_port_make_send_locked(trigger_port);
274 /* now unlocked */
275 default_pager_triggers(default_pager,
276 0, 0,
277 SWAP_FILE_CREATION_ERROR,
278 trigger_port);
279 }
280
281 if (flags & HI_WAT_ALERT) {
282 trigger_port = trigger_name_to_port(trigger_name);
283 if(trigger_port == NULL) {
284 return EINVAL;
285 }
286 /* trigger_port is locked and active */
287 ipc_port_make_send_locked(trigger_port);
288 /* now unlocked */
289 default_pager_triggers(default_pager,
290 hi_water, low_water,
291 HI_WAT_ALERT, trigger_port);
292 }
293
294 if (flags & LO_WAT_ALERT) {
295 trigger_port = trigger_name_to_port(trigger_name);
296 if(trigger_port == NULL) {
297 return EINVAL;
298 }
299 /* trigger_port is locked and active */
300 ipc_port_make_send_locked(trigger_port);
301 /* and now its unlocked */
302 default_pager_triggers(default_pager,
303 hi_water, low_water,
304 LO_WAT_ALERT, trigger_port);
305 }
306
307
308 if (flags & PROC_RESUME) {
309
310 /*
311 * For this call, hi_water is used to pass in the pid of the process we want to resume
312 * or unthrottle. This is of course restricted to the superuser (checked inside of
313 * proc_resetpcontrol).
314 */
315
316 return proc_resetpcontrol(hi_water);
317 }
318
319 /*
320 * Set thread scheduling priority and policy for the current thread
321 * it is assumed for the time being that the thread setting the alert
322 * is the same one which will be servicing it.
323 *
324 * XXX This does not belong in the kernel XXX
325 */
326 if (flags & HI_WAT_ALERT) {
327 thread_precedence_policy_data_t pre;
328 thread_extended_policy_data_t ext;
329
330 ext.timeshare = FALSE;
331 pre.importance = INT32_MAX;
332
333 thread_policy_set(current_thread(),
334 THREAD_EXTENDED_POLICY,
335 (thread_policy_t)&ext,
336 THREAD_EXTENDED_POLICY_COUNT);
337
338 thread_policy_set(current_thread(),
339 THREAD_PRECEDENCE_POLICY,
340 (thread_policy_t)&pre,
341 THREAD_PRECEDENCE_POLICY_COUNT);
342
343 current_thread()->options |= TH_OPT_VMPRIV;
344 }
345
346 if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
347 return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
348 }
349
350 return 0;
351 }
352
353 /*
354 *
355 */
356 ipc_port_t
357 trigger_name_to_port(
358 mach_port_t trigger_name)
359 {
360 ipc_port_t trigger_port;
361 ipc_space_t space;
362
363 if (trigger_name == 0)
364 return (NULL);
365
366 space = current_space();
367 if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name),
368 &trigger_port) != KERN_SUCCESS)
369 return (NULL);
370 return trigger_port;
371 }
372
373
374 extern int uiomove64(addr64_t, int, void *);
375 #define MAX_RUN 32
376
377 int
378 memory_object_control_uiomove(
379 memory_object_control_t control,
380 memory_object_offset_t offset,
381 void * uio,
382 int start_offset,
383 int io_requested,
384 int mark_dirty,
385 int take_reference)
386 {
387 vm_object_t object;
388 vm_page_t dst_page;
389 int xsize;
390 int retval = 0;
391 int cur_run;
392 int cur_needed;
393 int i;
394 int orig_offset;
395 vm_page_t page_run[MAX_RUN];
396
397 object = memory_object_control_to_vm_object(control);
398 if (object == VM_OBJECT_NULL) {
399 return (0);
400 }
401 assert(!object->internal);
402
403 vm_object_lock(object);
404
405 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
406 /*
407 * We can't modify the pages without honoring
408 * copy-on-write obligations first, so fall off
409 * this optimized path and fall back to the regular
410 * path.
411 */
412 vm_object_unlock(object);
413 return 0;
414 }
415 orig_offset = start_offset;
416
417 while (io_requested && retval == 0) {
418
419 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
420
421 if (cur_needed > MAX_RUN)
422 cur_needed = MAX_RUN;
423
424 for (cur_run = 0; cur_run < cur_needed; ) {
425
426 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
427 break;
428
429 /*
430 * if we're in this routine, we are inside a filesystem's
431 * locking model, so we don't ever want to wait for pages that have
432 * list_req_pending == TRUE since it means that the
433 * page is a candidate for some type of I/O operation,
434 * but that it has not yet been gathered into a UPL...
435 * this implies that it is still outside the domain
436 * of the filesystem and that whoever is responsible for
437 * grabbing it into a UPL may be stuck behind the filesystem
438 * lock this thread owns, or trying to take a lock exclusively
439 * and waiting for the readers to drain from a rw lock...
440 * if we block in those cases, we will deadlock
441 */
442 if (dst_page->list_req_pending) {
443
444 if (dst_page->absent) {
445 /*
446 * this is the list_req_pending | absent | busy case
447 * which originates from vm_fault_page... we want
448 * to fall out of the fast path and go back
449 * to the caller which will gather this page
450 * into a UPL and issue the I/O if no one
451 * else beats us to it
452 */
453 break;
454 }
455 if (dst_page->pageout) {
456 /*
457 * this is the list_req_pending | pageout | busy case
458 * which can originate from both the pageout_scan and
459 * msync worlds... we need to reset the state of this page to indicate
460 * it should stay in the cache marked dirty... nothing else we
461 * can do at this point... we can't block on it, we can't busy
462 * it and we can't clean it from this routine.
463 */
464 vm_page_lockspin_queues();
465
466 vm_pageout_queue_steal(dst_page, TRUE);
467 vm_page_deactivate(dst_page);
468
469 vm_page_unlock_queues();
470 }
471 /*
472 * this is the list_req_pending | cleaning case...
473 * we can go ahead and deal with this page since
474 * its ok for us to mark this page busy... if a UPL
475 * tries to gather this page, it will block until the
476 * busy is cleared, thus allowing us safe use of the page
477 * when we're done with it, we will clear busy and wake
478 * up anyone waiting on it, thus allowing the UPL creation
479 * to finish
480 */
481
482 } else if (dst_page->busy || dst_page->cleaning) {
483 /*
484 * someone else is playing with the page... if we've
485 * already collected pages into this run, go ahead
486 * and process now, we can't block on this
487 * page while holding other pages in the BUSY state
488 * otherwise we will wait
489 */
490 if (cur_run)
491 break;
492 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
493 continue;
494 }
495
496 /*
497 * this routine is only called when copying
498 * to/from real files... no need to consider
499 * encrypted swap pages
500 */
501 assert(!dst_page->encrypted);
502
503 if (mark_dirty) {
504 dst_page->dirty = TRUE;
505 if (dst_page->cs_validated &&
506 !dst_page->cs_tainted) {
507 /*
508 * CODE SIGNING:
509 * We're modifying a code-signed
510 * page: force revalidate
511 */
512 dst_page->cs_validated = FALSE;
513 #if DEVELOPMENT || DEBUG
514 vm_cs_validated_resets++;
515 #endif
516 pmap_disconnect(dst_page->phys_page);
517 }
518 }
519 dst_page->busy = TRUE;
520
521 page_run[cur_run++] = dst_page;
522
523 offset += PAGE_SIZE_64;
524 }
525 if (cur_run == 0)
526 /*
527 * we hit a 'hole' in the cache or
528 * a page we don't want to try to handle,
529 * so bail at this point
530 * we'll unlock the object below
531 */
532 break;
533 vm_object_unlock(object);
534
535 for (i = 0; i < cur_run; i++) {
536
537 dst_page = page_run[i];
538
539 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
540 xsize = io_requested;
541
542 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
543 break;
544
545 io_requested -= xsize;
546 start_offset = 0;
547 }
548 vm_object_lock(object);
549
550 /*
551 * if we have more than 1 page to work on
552 * in the current run, or the original request
553 * started at offset 0 of the page, or we're
554 * processing multiple batches, we will move
555 * the pages to the tail of the inactive queue
556 * to implement an LRU for read/write accesses
557 *
558 * the check for orig_offset == 0 is there to
559 * mitigate the cost of small (< page_size) requests
560 * to the same page (this way we only move it once)
561 */
562 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
563
564 vm_page_lockspin_queues();
565
566 for (i = 0; i < cur_run; i++)
567 vm_page_lru(page_run[i]);
568
569 vm_page_unlock_queues();
570 }
571 for (i = 0; i < cur_run; i++) {
572 dst_page = page_run[i];
573
574 /*
575 * someone is explicitly referencing this page...
576 * update clustered and speculative state
577 *
578 */
579 VM_PAGE_CONSUME_CLUSTERED(dst_page);
580
581 PAGE_WAKEUP_DONE(dst_page);
582 }
583 orig_offset = 0;
584 }
585 vm_object_unlock(object);
586
587 return (retval);
588 }
589
590
591 /*
592 *
593 */
594 void
595 vnode_pager_bootstrap(void)
596 {
597 register vm_size_t size;
598
599 size = (vm_size_t) sizeof(struct vnode_pager);
600 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
601 PAGE_SIZE, "vnode pager structures");
602 #if CONFIG_CODE_DECRYPTION
603 apple_protect_pager_bootstrap();
604 #endif /* CONFIG_CODE_DECRYPTION */
605 swapfile_pager_bootstrap();
606 return;
607 }
608
609 /*
610 *
611 */
612 memory_object_t
613 vnode_pager_setup(
614 struct vnode *vp,
615 __unused memory_object_t pager)
616 {
617 vnode_pager_t vnode_object;
618
619 vnode_object = vnode_object_create(vp);
620 if (vnode_object == VNODE_PAGER_NULL)
621 panic("vnode_pager_setup: vnode_object_create() failed");
622 return((memory_object_t)vnode_object);
623 }
624
625 /*
626 *
627 */
628 kern_return_t
629 vnode_pager_init(memory_object_t mem_obj,
630 memory_object_control_t control,
631 #if !DEBUG
632 __unused
633 #endif
634 memory_object_cluster_size_t pg_size)
635 {
636 vnode_pager_t vnode_object;
637 kern_return_t kr;
638 memory_object_attr_info_data_t attributes;
639
640
641 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
642
643 if (control == MEMORY_OBJECT_CONTROL_NULL)
644 return KERN_INVALID_ARGUMENT;
645
646 vnode_object = vnode_pager_lookup(mem_obj);
647
648 memory_object_control_reference(control);
649
650 vnode_object->control_handle = control;
651
652 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
653 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
654 attributes.cluster_size = (1 << (PAGE_SHIFT));
655 attributes.may_cache_object = TRUE;
656 attributes.temporary = TRUE;
657
658 kr = memory_object_change_attributes(
659 control,
660 MEMORY_OBJECT_ATTRIBUTE_INFO,
661 (memory_object_info_t) &attributes,
662 MEMORY_OBJECT_ATTR_INFO_COUNT);
663 if (kr != KERN_SUCCESS)
664 panic("vnode_pager_init: memory_object_change_attributes() failed");
665
666 return(KERN_SUCCESS);
667 }
668
669 /*
670 *
671 */
672 kern_return_t
673 vnode_pager_data_return(
674 memory_object_t mem_obj,
675 memory_object_offset_t offset,
676 memory_object_cluster_size_t data_cnt,
677 memory_object_offset_t *resid_offset,
678 int *io_error,
679 __unused boolean_t dirty,
680 __unused boolean_t kernel_copy,
681 int upl_flags)
682 {
683 register vnode_pager_t vnode_object;
684
685 vnode_object = vnode_pager_lookup(mem_obj);
686
687 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
688
689 return KERN_SUCCESS;
690 }
691
692 kern_return_t
693 vnode_pager_data_initialize(
694 __unused memory_object_t mem_obj,
695 __unused memory_object_offset_t offset,
696 __unused memory_object_cluster_size_t data_cnt)
697 {
698 panic("vnode_pager_data_initialize");
699 return KERN_FAILURE;
700 }
701
702 kern_return_t
703 vnode_pager_data_unlock(
704 __unused memory_object_t mem_obj,
705 __unused memory_object_offset_t offset,
706 __unused memory_object_size_t size,
707 __unused vm_prot_t desired_access)
708 {
709 return KERN_FAILURE;
710 }
711
712 kern_return_t
713 vnode_pager_get_isinuse(
714 memory_object_t mem_obj,
715 uint32_t *isinuse)
716 {
717 vnode_pager_t vnode_object;
718
719 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
720 *isinuse = 1;
721 return KERN_INVALID_ARGUMENT;
722 }
723
724 vnode_object = vnode_pager_lookup(mem_obj);
725
726 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
727 return KERN_SUCCESS;
728 }
729
730 kern_return_t
731 vnode_pager_check_hard_throttle(
732 memory_object_t mem_obj,
733 uint32_t *limit,
734 uint32_t hard_throttle)
735 {
736 vnode_pager_t vnode_object;
737
738 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
739 return KERN_INVALID_ARGUMENT;
740
741 vnode_object = vnode_pager_lookup(mem_obj);
742
743 (void)vnode_pager_return_hard_throttle_limit(vnode_object->vnode_handle, limit, hard_throttle);
744 return KERN_SUCCESS;
745 }
746
747 kern_return_t
748 vnode_pager_get_object_size(
749 memory_object_t mem_obj,
750 memory_object_offset_t *length)
751 {
752 vnode_pager_t vnode_object;
753
754 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
755 *length = 0;
756 return KERN_INVALID_ARGUMENT;
757 }
758
759 vnode_object = vnode_pager_lookup(mem_obj);
760
761 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
762 return KERN_SUCCESS;
763 }
764
765 kern_return_t
766 vnode_pager_get_object_pathname(
767 memory_object_t mem_obj,
768 char *pathname,
769 vm_size_t *length_p)
770 {
771 vnode_pager_t vnode_object;
772
773 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
774 return KERN_INVALID_ARGUMENT;
775 }
776
777 vnode_object = vnode_pager_lookup(mem_obj);
778
779 return vnode_pager_get_pathname(vnode_object->vnode_handle,
780 pathname,
781 length_p);
782 }
783
784 kern_return_t
785 vnode_pager_get_object_filename(
786 memory_object_t mem_obj,
787 const char **filename)
788 {
789 vnode_pager_t vnode_object;
790
791 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
792 return KERN_INVALID_ARGUMENT;
793 }
794
795 vnode_object = vnode_pager_lookup(mem_obj);
796
797 return vnode_pager_get_filename(vnode_object->vnode_handle,
798 filename);
799 }
800
801 kern_return_t
802 vnode_pager_get_object_cs_blobs(
803 memory_object_t mem_obj,
804 void **blobs)
805 {
806 vnode_pager_t vnode_object;
807
808 if (mem_obj == MEMORY_OBJECT_NULL ||
809 mem_obj->mo_pager_ops != &vnode_pager_ops) {
810 return KERN_INVALID_ARGUMENT;
811 }
812
813 vnode_object = vnode_pager_lookup(mem_obj);
814
815 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
816 blobs);
817 }
818
819 /*
820 *
821 */
822 kern_return_t
823 vnode_pager_data_request(
824 memory_object_t mem_obj,
825 memory_object_offset_t offset,
826 __unused memory_object_cluster_size_t length,
827 __unused vm_prot_t desired_access,
828 memory_object_fault_info_t fault_info)
829 {
830 vnode_pager_t vnode_object;
831 memory_object_offset_t base_offset;
832 vm_size_t size;
833 uint32_t io_streaming = 0;
834
835 vnode_object = vnode_pager_lookup(mem_obj);
836
837 size = MAX_UPL_TRANSFER * PAGE_SIZE;
838 base_offset = offset;
839
840 if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
841 size = PAGE_SIZE;
842
843 assert(offset >= base_offset &&
844 offset < base_offset + size);
845
846 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
847 }
848
849 /*
850 *
851 */
852 void
853 vnode_pager_reference(
854 memory_object_t mem_obj)
855 {
856 register vnode_pager_t vnode_object;
857 unsigned int new_ref_count;
858
859 vnode_object = vnode_pager_lookup(mem_obj);
860 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
861 assert(new_ref_count > 1);
862 }
863
864 /*
865 *
866 */
867 void
868 vnode_pager_deallocate(
869 memory_object_t mem_obj)
870 {
871 register vnode_pager_t vnode_object;
872
873 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
874
875 vnode_object = vnode_pager_lookup(mem_obj);
876
877 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
878 if (vnode_object->vnode_handle != NULL) {
879 vnode_pager_vrele(vnode_object->vnode_handle);
880 }
881 zfree(vnode_pager_zone, vnode_object);
882 }
883 return;
884 }
885
886 /*
887 *
888 */
889 kern_return_t
890 vnode_pager_terminate(
891 #if !DEBUG
892 __unused
893 #endif
894 memory_object_t mem_obj)
895 {
896 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
897
898 return(KERN_SUCCESS);
899 }
900
901 /*
902 *
903 */
904 kern_return_t
905 vnode_pager_synchronize(
906 memory_object_t mem_obj,
907 memory_object_offset_t offset,
908 memory_object_size_t length,
909 __unused vm_sync_t sync_flags)
910 {
911 register vnode_pager_t vnode_object;
912
913 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
914
915 vnode_object = vnode_pager_lookup(mem_obj);
916
917 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
918
919 return (KERN_SUCCESS);
920 }
921
922 /*
923 *
924 */
925 kern_return_t
926 vnode_pager_map(
927 memory_object_t mem_obj,
928 vm_prot_t prot)
929 {
930 vnode_pager_t vnode_object;
931 int ret;
932 kern_return_t kr;
933
934 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
935
936 vnode_object = vnode_pager_lookup(mem_obj);
937
938 ret = ubc_map(vnode_object->vnode_handle, prot);
939
940 if (ret != 0) {
941 kr = KERN_FAILURE;
942 } else {
943 kr = KERN_SUCCESS;
944 }
945
946 return kr;
947 }
948
949 kern_return_t
950 vnode_pager_last_unmap(
951 memory_object_t mem_obj)
952 {
953 register vnode_pager_t vnode_object;
954
955 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
956
957 vnode_object = vnode_pager_lookup(mem_obj);
958
959 ubc_unmap(vnode_object->vnode_handle);
960 return KERN_SUCCESS;
961 }
962
963
964
965 /*
966 *
967 */
968 void
969 vnode_pager_cluster_write(
970 vnode_pager_t vnode_object,
971 vm_object_offset_t offset,
972 vm_size_t cnt,
973 vm_object_offset_t * resid_offset,
974 int * io_error,
975 int upl_flags)
976 {
977 vm_size_t size;
978 int errno;
979
980 if (upl_flags & UPL_MSYNC) {
981
982 upl_flags |= UPL_VNODE_PAGER;
983
984 if ( (upl_flags & UPL_IOSYNC) && io_error)
985 upl_flags |= UPL_KEEPCACHED;
986
987 while (cnt) {
988 size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
989
990 assert((upl_size_t) size == size);
991 vnode_pageout(vnode_object->vnode_handle,
992 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
993
994 if ( (upl_flags & UPL_KEEPCACHED) ) {
995 if ( (*io_error = errno) )
996 break;
997 }
998 cnt -= size;
999 offset += size;
1000 }
1001 if (resid_offset)
1002 *resid_offset = offset;
1003
1004 } else {
1005 vm_object_offset_t vnode_size;
1006 vm_object_offset_t base_offset;
1007
1008 /*
1009 * this is the pageout path
1010 */
1011 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
1012
1013 if (vnode_size > (offset + PAGE_SIZE)) {
1014 /*
1015 * preset the maximum size of the cluster
1016 * and put us on a nice cluster boundary...
1017 * and then clip the size to insure we
1018 * don't request past the end of the underlying file
1019 */
1020 size = PAGE_SIZE * MAX_UPL_TRANSFER;
1021 base_offset = offset & ~((signed)(size - 1));
1022
1023 if ((base_offset + size) > vnode_size)
1024 size = round_page(((vm_size_t)(vnode_size - base_offset)));
1025 } else {
1026 /*
1027 * we've been requested to page out a page beyond the current
1028 * end of the 'file'... don't try to cluster in this case...
1029 * we still need to send this page through because it might
1030 * be marked precious and the underlying filesystem may need
1031 * to do something with it (besides page it out)...
1032 */
1033 base_offset = offset;
1034 size = PAGE_SIZE;
1035 }
1036 assert((upl_size_t) size == size);
1037 vnode_pageout(vnode_object->vnode_handle,
1038 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, UPL_VNODE_PAGER, NULL);
1039 }
1040 }
1041
1042
1043 /*
1044 *
1045 */
1046 kern_return_t
1047 vnode_pager_cluster_read(
1048 vnode_pager_t vnode_object,
1049 vm_object_offset_t base_offset,
1050 vm_object_offset_t offset,
1051 uint32_t io_streaming,
1052 vm_size_t cnt)
1053 {
1054 int local_error = 0;
1055 int kret;
1056 int flags = 0;
1057
1058 assert(! (cnt & PAGE_MASK));
1059
1060 if (io_streaming)
1061 flags |= UPL_IOSTREAMING;
1062
1063 assert((upl_size_t) cnt == cnt);
1064 kret = vnode_pagein(vnode_object->vnode_handle,
1065 (upl_t) NULL,
1066 (upl_offset_t) (offset - base_offset),
1067 base_offset,
1068 (upl_size_t) cnt,
1069 flags,
1070 &local_error);
1071 /*
1072 if(kret == PAGER_ABSENT) {
1073 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
1074 defined in bsd/vm/vm_pager.h However, we should not be including
1075 that file here it is a layering violation.
1076 */
1077 if (kret == 1) {
1078 int uplflags;
1079 upl_t upl = NULL;
1080 unsigned int count = 0;
1081 kern_return_t kr;
1082
1083 uplflags = (UPL_NO_SYNC |
1084 UPL_CLEAN_IN_PLACE |
1085 UPL_SET_INTERNAL);
1086 count = 0;
1087 assert((upl_size_t) cnt == cnt);
1088 kr = memory_object_upl_request(vnode_object->control_handle,
1089 base_offset, (upl_size_t) cnt,
1090 &upl, NULL, &count, uplflags);
1091 if (kr == KERN_SUCCESS) {
1092 upl_abort(upl, 0);
1093 upl_deallocate(upl);
1094 } else {
1095 /*
1096 * We couldn't gather the page list, probably
1097 * because the memory object doesn't have a link
1098 * to a VM object anymore (forced unmount, for
1099 * example). Just return an error to the vm_fault()
1100 * path and let it handle it.
1101 */
1102 }
1103
1104 return KERN_FAILURE;
1105 }
1106
1107 return KERN_SUCCESS;
1108
1109 }
1110
1111
1112 /*
1113 *
1114 */
1115 void
1116 vnode_pager_release_from_cache(
1117 int *cnt)
1118 {
1119 memory_object_free_from_cache(
1120 &realhost, &vnode_pager_ops, cnt);
1121 }
1122
1123 /*
1124 *
1125 */
1126 vnode_pager_t
1127 vnode_object_create(
1128 struct vnode *vp)
1129 {
1130 register vnode_pager_t vnode_object;
1131
1132 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1133 if (vnode_object == VNODE_PAGER_NULL)
1134 return(VNODE_PAGER_NULL);
1135
1136 /*
1137 * The vm_map call takes both named entry ports and raw memory
1138 * objects in the same parameter. We need to make sure that
1139 * vm_map does not see this object as a named entry port. So,
1140 * we reserve the first word in the object for a fake ip_kotype
1141 * setting - that will tell vm_map to use it as a memory object.
1142 */
1143 vnode_object->pager_ops = &vnode_pager_ops;
1144 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1145 vnode_object->ref_count = 1;
1146 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1147 vnode_object->vnode_handle = vp;
1148
1149 return(vnode_object);
1150 }
1151
1152 /*
1153 *
1154 */
1155 vnode_pager_t
1156 vnode_pager_lookup(
1157 memory_object_t name)
1158 {
1159 vnode_pager_t vnode_object;
1160
1161 vnode_object = (vnode_pager_t)name;
1162 assert(vnode_object->pager_ops == &vnode_pager_ops);
1163 return (vnode_object);
1164 }
1165
1166
1167 /*********************** proc_info implementation *************/
1168
1169 #include <sys/bsdtask_info.h>
1170
1171 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
1172
1173
1174 int
1175 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1176 {
1177
1178 vm_map_t map;
1179 vm_map_offset_t address = (vm_map_offset_t )arg;
1180 vm_map_entry_t tmp_entry;
1181 vm_map_entry_t entry;
1182 vm_map_offset_t start;
1183 vm_region_extended_info_data_t extended;
1184 vm_region_top_info_data_t top;
1185
1186 task_lock(task);
1187 map = task->map;
1188 if (map == VM_MAP_NULL)
1189 {
1190 task_unlock(task);
1191 return(0);
1192 }
1193 vm_map_reference(map);
1194 task_unlock(task);
1195
1196 vm_map_lock_read(map);
1197
1198 start = address;
1199 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1200 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1201 vm_map_unlock_read(map);
1202 vm_map_deallocate(map);
1203 return(0);
1204 }
1205 } else {
1206 entry = tmp_entry;
1207 }
1208
1209 start = entry->vme_start;
1210
1211 pinfo->pri_offset = entry->offset;
1212 pinfo->pri_protection = entry->protection;
1213 pinfo->pri_max_protection = entry->max_protection;
1214 pinfo->pri_inheritance = entry->inheritance;
1215 pinfo->pri_behavior = entry->behavior;
1216 pinfo->pri_user_wired_count = entry->user_wired_count;
1217 pinfo->pri_user_tag = entry->alias;
1218
1219 if (entry->is_sub_map) {
1220 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1221 } else {
1222 if (entry->is_shared)
1223 pinfo->pri_flags |= PROC_REGION_SHARED;
1224 }
1225
1226
1227 extended.protection = entry->protection;
1228 extended.user_tag = entry->alias;
1229 extended.pages_resident = 0;
1230 extended.pages_swapped_out = 0;
1231 extended.pages_shared_now_private = 0;
1232 extended.pages_dirtied = 0;
1233 extended.external_pager = 0;
1234 extended.shadow_depth = 0;
1235
1236 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1237
1238 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1239 extended.share_mode = SM_PRIVATE;
1240
1241 top.private_pages_resident = 0;
1242 top.shared_pages_resident = 0;
1243 vm_map_region_top_walk(entry, &top);
1244
1245
1246 pinfo->pri_pages_resident = extended.pages_resident;
1247 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1248 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1249 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1250 pinfo->pri_ref_count = extended.ref_count;
1251 pinfo->pri_shadow_depth = extended.shadow_depth;
1252 pinfo->pri_share_mode = extended.share_mode;
1253
1254 pinfo->pri_private_pages_resident = top.private_pages_resident;
1255 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1256 pinfo->pri_obj_id = top.obj_id;
1257
1258 pinfo->pri_address = (uint64_t)start;
1259 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1260 pinfo->pri_depth = 0;
1261
1262 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1263 *vnodeaddr = (uintptr_t)0;
1264
1265 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1266 vm_map_unlock_read(map);
1267 vm_map_deallocate(map);
1268 return(1);
1269 }
1270 }
1271
1272 vm_map_unlock_read(map);
1273 vm_map_deallocate(map);
1274 return(1);
1275 }
1276
1277 static int
1278 fill_vnodeinfoforaddr(
1279 vm_map_entry_t entry,
1280 uintptr_t * vnodeaddr,
1281 uint32_t * vid)
1282 {
1283 vm_object_t top_object, object;
1284 memory_object_t memory_object;
1285 memory_object_pager_ops_t pager_ops;
1286 kern_return_t kr;
1287 int shadow_depth;
1288
1289
1290 if (entry->is_sub_map) {
1291 return(0);
1292 } else {
1293 /*
1294 * The last object in the shadow chain has the
1295 * relevant pager information.
1296 */
1297 top_object = entry->object.vm_object;
1298 if (top_object == VM_OBJECT_NULL) {
1299 object = VM_OBJECT_NULL;
1300 shadow_depth = 0;
1301 } else {
1302 vm_object_lock(top_object);
1303 for (object = top_object, shadow_depth = 0;
1304 object->shadow != VM_OBJECT_NULL;
1305 object = object->shadow, shadow_depth++) {
1306 vm_object_lock(object->shadow);
1307 vm_object_unlock(object);
1308 }
1309 }
1310 }
1311
1312 if (object == VM_OBJECT_NULL) {
1313 return(0);
1314 } else if (object->internal) {
1315 vm_object_unlock(object);
1316 return(0);
1317 } else if (! object->pager_ready ||
1318 object->terminating ||
1319 ! object->alive) {
1320 vm_object_unlock(object);
1321 return(0);
1322 } else {
1323 memory_object = object->pager;
1324 pager_ops = memory_object->mo_pager_ops;
1325 if (pager_ops == &vnode_pager_ops) {
1326 kr = vnode_pager_get_object_vnode(
1327 memory_object,
1328 vnodeaddr, vid);
1329 if (kr != KERN_SUCCESS) {
1330 vm_object_unlock(object);
1331 return(0);
1332 }
1333 } else {
1334 vm_object_unlock(object);
1335 return(0);
1336 }
1337 }
1338 vm_object_unlock(object);
1339 return(1);
1340 }
1341
1342 kern_return_t
1343 vnode_pager_get_object_vnode (
1344 memory_object_t mem_obj,
1345 uintptr_t * vnodeaddr,
1346 uint32_t * vid)
1347 {
1348 vnode_pager_t vnode_object;
1349
1350 vnode_object = vnode_pager_lookup(mem_obj);
1351 if (vnode_object->vnode_handle) {
1352 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1353 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1354
1355 return(KERN_SUCCESS);
1356 }
1357
1358 return(KERN_FAILURE);
1359 }
1360
1361
1362 /*
1363 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1364 * object locked, otherwise return NULL with nothing locked.
1365 */
1366
1367 vm_object_t
1368 find_vnode_object(
1369 vm_map_entry_t entry
1370 )
1371 {
1372 vm_object_t top_object, object;
1373 memory_object_t memory_object;
1374 memory_object_pager_ops_t pager_ops;
1375
1376 if (!entry->is_sub_map) {
1377
1378 /*
1379 * The last object in the shadow chain has the
1380 * relevant pager information.
1381 */
1382
1383 top_object = entry->object.vm_object;
1384
1385 if (top_object) {
1386 vm_object_lock(top_object);
1387
1388 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1389 vm_object_lock(object->shadow);
1390 vm_object_unlock(object);
1391 }
1392
1393 if (object && !object->internal && object->pager_ready && !object->terminating &&
1394 object->alive) {
1395 memory_object = object->pager;
1396 pager_ops = memory_object->mo_pager_ops;
1397
1398 /*
1399 * If this object points to the vnode_pager_ops, then we found what we're
1400 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1401 * vnode and so we fall through to the bottom and return NULL.
1402 */
1403
1404 if (pager_ops == &vnode_pager_ops)
1405 return object; /* we return with the object locked */
1406 }
1407
1408 vm_object_unlock(object);
1409 }
1410
1411 }
1412
1413 return(VM_OBJECT_NULL);
1414 }