]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-1504.9.26.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/thread.h>
45
46 #include <ipc/ipc_port.h>
47 #include <ipc/ipc_space.h>
48
49 #include <default_pager/default_pager_types.h>
50 #include <default_pager/default_pager_object_server.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return(map->hdr.nentries);
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return( vm_map_first_entry(map)->vme_start);
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return( vm_map_last_entry(map)->vme_end);
93 }
94
95 /*
96 * Legacy routines to get the start and end for a vm_map_t. They
97 * return them in the vm_offset_t format. So, they should only be
98 * called on maps that are the same size as the kernel map for
99 * accurate results.
100 */
101 vm_offset_t
102 get_vm_start(
103 vm_map_t map)
104 {
105 return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start));
106 }
107
108 vm_offset_t
109 get_vm_end(
110 vm_map_t map)
111 {
112 return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end));
113 }
114
115 /*
116 * BSD VNODE PAGER
117 */
118
119 const struct memory_object_pager_ops vnode_pager_ops = {
120 vnode_pager_reference,
121 vnode_pager_deallocate,
122 vnode_pager_init,
123 vnode_pager_terminate,
124 vnode_pager_data_request,
125 vnode_pager_data_return,
126 vnode_pager_data_initialize,
127 vnode_pager_data_unlock,
128 vnode_pager_synchronize,
129 vnode_pager_map,
130 vnode_pager_last_unmap,
131 "vnode pager"
132 };
133
134 typedef struct vnode_pager {
135 struct ipc_object_header pager_header; /* fake ip_kotype() */
136 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
137 unsigned int ref_count; /* reference count */
138 memory_object_control_t control_handle; /* mem object control handle */
139 struct vnode *vnode_handle; /* vnode handle */
140 } *vnode_pager_t;
141
142 #define pager_ikot pager_header.io_bits
143
144 ipc_port_t
145 trigger_name_to_port( /* forward */
146 mach_port_t);
147
148 kern_return_t
149 vnode_pager_cluster_read( /* forward */
150 vnode_pager_t,
151 vm_object_offset_t,
152 vm_object_offset_t,
153 uint32_t,
154 vm_size_t);
155
156 void
157 vnode_pager_cluster_write( /* forward */
158 vnode_pager_t,
159 vm_object_offset_t,
160 vm_size_t,
161 vm_object_offset_t *,
162 int *,
163 int);
164
165
166 vnode_pager_t
167 vnode_object_create( /* forward */
168 struct vnode *);
169
170 vnode_pager_t
171 vnode_pager_lookup( /* forward */
172 memory_object_t);
173
174 zone_t vnode_pager_zone;
175
176
177 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
178
179 /* TODO: Should be set dynamically by vnode_pager_init() */
180 #define CLUSTER_SHIFT 1
181
182 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
183 #define MAX_VNODE 10000
184
185
186 #if DEBUG
187 int pagerdebug=0;
188
189 #define PAGER_ALL 0xffffffff
190 #define PAGER_INIT 0x00000001
191 #define PAGER_PAGEIN 0x00000002
192
193 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
194 #else
195 #define PAGER_DEBUG(LEVEL, A)
196 #endif
197
198 extern int proc_resetpcontrol(int);
199
200 #if DEVELOPMENT || DEBUG
201 extern unsigned long vm_cs_validated_resets;
202 #endif
203
204 /*
205 * Routine: mach_macx_triggers
206 * Function:
207 * Syscall interface to set the call backs for low and
208 * high water marks.
209 */
210 int
211 mach_macx_triggers(
212 struct macx_triggers_args *args)
213 {
214 int hi_water = args->hi_water;
215 int low_water = args->low_water;
216 int flags = args->flags;
217 mach_port_t trigger_name = args->alert_port;
218 kern_return_t kr;
219 memory_object_default_t default_pager;
220 ipc_port_t trigger_port;
221
222 default_pager = MEMORY_OBJECT_DEFAULT_NULL;
223 kr = host_default_memory_manager(host_priv_self(),
224 &default_pager, 0);
225 if(kr != KERN_SUCCESS) {
226 return EINVAL;
227 }
228
229 if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) ||
230 ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
231 /* can't have it both ways */
232 return EINVAL;
233 }
234
235 if (default_pager_init_flag == 0) {
236 start_def_pager(NULL);
237 default_pager_init_flag = 1;
238 }
239
240 if (flags & SWAP_ENCRYPT_ON) {
241 /* ENCRYPTED SWAP: tell default_pager to encrypt */
242 default_pager_triggers(default_pager,
243 0, 0,
244 SWAP_ENCRYPT_ON,
245 IP_NULL);
246 } else if (flags & SWAP_ENCRYPT_OFF) {
247 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
248 default_pager_triggers(default_pager,
249 0, 0,
250 SWAP_ENCRYPT_OFF,
251 IP_NULL);
252 }
253
254 if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
255 /*
256 * Time to switch to the emergency segment.
257 */
258 return default_pager_triggers(default_pager,
259 0, 0,
260 USE_EMERGENCY_SWAP_FILE_FIRST,
261 IP_NULL);
262 }
263
264 if (flags & SWAP_FILE_CREATION_ERROR) {
265 /*
266 * For some reason, the dynamic pager failed to create a swap file.
267 */
268 trigger_port = trigger_name_to_port(trigger_name);
269 if(trigger_port == NULL) {
270 return EINVAL;
271 }
272 /* trigger_port is locked and active */
273 ipc_port_make_send_locked(trigger_port);
274 /* now unlocked */
275 default_pager_triggers(default_pager,
276 0, 0,
277 SWAP_FILE_CREATION_ERROR,
278 trigger_port);
279 }
280
281 if (flags & HI_WAT_ALERT) {
282 trigger_port = trigger_name_to_port(trigger_name);
283 if(trigger_port == NULL) {
284 return EINVAL;
285 }
286 /* trigger_port is locked and active */
287 ipc_port_make_send_locked(trigger_port);
288 /* now unlocked */
289 default_pager_triggers(default_pager,
290 hi_water, low_water,
291 HI_WAT_ALERT, trigger_port);
292 }
293
294 if (flags & LO_WAT_ALERT) {
295 trigger_port = trigger_name_to_port(trigger_name);
296 if(trigger_port == NULL) {
297 return EINVAL;
298 }
299 /* trigger_port is locked and active */
300 ipc_port_make_send_locked(trigger_port);
301 /* and now its unlocked */
302 default_pager_triggers(default_pager,
303 hi_water, low_water,
304 LO_WAT_ALERT, trigger_port);
305 }
306
307
308 if (flags & PROC_RESUME) {
309
310 /*
311 * For this call, hi_water is used to pass in the pid of the process we want to resume
312 * or unthrottle. This is of course restricted to the superuser (checked inside of
313 * proc_resetpcontrol).
314 */
315
316 return proc_resetpcontrol(hi_water);
317 }
318
319 /*
320 * Set thread scheduling priority and policy for the current thread
321 * it is assumed for the time being that the thread setting the alert
322 * is the same one which will be servicing it.
323 *
324 * XXX This does not belong in the kernel XXX
325 */
326 if (flags & HI_WAT_ALERT) {
327 thread_precedence_policy_data_t pre;
328 thread_extended_policy_data_t ext;
329
330 ext.timeshare = FALSE;
331 pre.importance = INT32_MAX;
332
333 thread_policy_set(current_thread(),
334 THREAD_EXTENDED_POLICY,
335 (thread_policy_t)&ext,
336 THREAD_EXTENDED_POLICY_COUNT);
337
338 thread_policy_set(current_thread(),
339 THREAD_PRECEDENCE_POLICY,
340 (thread_policy_t)&pre,
341 THREAD_PRECEDENCE_POLICY_COUNT);
342
343 current_thread()->options |= TH_OPT_VMPRIV;
344 }
345
346 if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
347 return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
348 }
349
350 return 0;
351 }
352
353 /*
354 *
355 */
356 ipc_port_t
357 trigger_name_to_port(
358 mach_port_t trigger_name)
359 {
360 ipc_port_t trigger_port;
361 ipc_space_t space;
362
363 if (trigger_name == 0)
364 return (NULL);
365
366 space = current_space();
367 if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name),
368 &trigger_port) != KERN_SUCCESS)
369 return (NULL);
370 return trigger_port;
371 }
372
373
374 extern int uiomove64(addr64_t, int, void *);
375 #define MAX_RUN 32
376
377 int
378 memory_object_control_uiomove(
379 memory_object_control_t control,
380 memory_object_offset_t offset,
381 void * uio,
382 int start_offset,
383 int io_requested,
384 int mark_dirty,
385 int take_reference)
386 {
387 vm_object_t object;
388 vm_page_t dst_page;
389 int xsize;
390 int retval = 0;
391 int cur_run;
392 int cur_needed;
393 int i;
394 int orig_offset;
395 vm_page_t page_run[MAX_RUN];
396
397 object = memory_object_control_to_vm_object(control);
398 if (object == VM_OBJECT_NULL) {
399 return (0);
400 }
401 assert(!object->internal);
402
403 vm_object_lock(object);
404
405 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
406 /*
407 * We can't modify the pages without honoring
408 * copy-on-write obligations first, so fall off
409 * this optimized path and fall back to the regular
410 * path.
411 */
412 vm_object_unlock(object);
413 return 0;
414 }
415 orig_offset = start_offset;
416
417 while (io_requested && retval == 0) {
418
419 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
420
421 if (cur_needed > MAX_RUN)
422 cur_needed = MAX_RUN;
423
424 for (cur_run = 0; cur_run < cur_needed; ) {
425
426 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
427 break;
428
429 /*
430 * if we're in this routine, we are inside a filesystem's
431 * locking model, so we don't ever want to wait for pages that have
432 * list_req_pending == TRUE since it means that the
433 * page is a candidate for some type of I/O operation,
434 * but that it has not yet been gathered into a UPL...
435 * this implies that it is still outside the domain
436 * of the filesystem and that whoever is responsible for
437 * grabbing it into a UPL may be stuck behind the filesystem
438 * lock this thread owns, or trying to take a lock exclusively
439 * and waiting for the readers to drain from a rw lock...
440 * if we block in those cases, we will deadlock
441 */
442 if (dst_page->list_req_pending) {
443
444 if (dst_page->absent) {
445 /*
446 * this is the list_req_pending | absent | busy case
447 * which originates from vm_fault_page... we want
448 * to fall out of the fast path and go back
449 * to the caller which will gather this page
450 * into a UPL and issue the I/O if no one
451 * else beats us to it
452 */
453 break;
454 }
455 if (dst_page->pageout || dst_page->cleaning) {
456 /*
457 * this is the list_req_pending | pageout | busy case
458 * or the list_req_pending | cleaning case...
459 * which originate from the pageout_scan and
460 * msync worlds for the pageout case and the hibernate
461 * pre-cleaning world for the cleaning case...
462 * we need to reset the state of this page to indicate
463 * it should stay in the cache marked dirty... nothing else we
464 * can do at this point... we can't block on it, we can't busy
465 * it and we can't clean it from this routine.
466 */
467 vm_page_lockspin_queues();
468
469 vm_pageout_queue_steal(dst_page, TRUE);
470 vm_page_deactivate(dst_page);
471
472 vm_page_unlock_queues();
473 }
474 /*
475 * this is the list_req_pending | cleaning case...
476 * we can go ahead and deal with this page since
477 * its ok for us to mark this page busy... if a UPL
478 * tries to gather this page, it will block until the
479 * busy is cleared, thus allowing us safe use of the page
480 * when we're done with it, we will clear busy and wake
481 * up anyone waiting on it, thus allowing the UPL creation
482 * to finish
483 */
484
485 } else if (dst_page->busy || dst_page->cleaning) {
486 /*
487 * someone else is playing with the page... if we've
488 * already collected pages into this run, go ahead
489 * and process now, we can't block on this
490 * page while holding other pages in the BUSY state
491 * otherwise we will wait
492 */
493 if (cur_run)
494 break;
495 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
496 continue;
497 }
498
499 /*
500 * this routine is only called when copying
501 * to/from real files... no need to consider
502 * encrypted swap pages
503 */
504 assert(!dst_page->encrypted);
505
506 if (mark_dirty) {
507 dst_page->dirty = TRUE;
508 if (dst_page->cs_validated &&
509 !dst_page->cs_tainted) {
510 /*
511 * CODE SIGNING:
512 * We're modifying a code-signed
513 * page: force revalidate
514 */
515 dst_page->cs_validated = FALSE;
516 #if DEVELOPMENT || DEBUG
517 vm_cs_validated_resets++;
518 #endif
519 pmap_disconnect(dst_page->phys_page);
520 }
521 }
522 dst_page->busy = TRUE;
523
524 page_run[cur_run++] = dst_page;
525
526 offset += PAGE_SIZE_64;
527 }
528 if (cur_run == 0)
529 /*
530 * we hit a 'hole' in the cache or
531 * a page we don't want to try to handle,
532 * so bail at this point
533 * we'll unlock the object below
534 */
535 break;
536 vm_object_unlock(object);
537
538 for (i = 0; i < cur_run; i++) {
539
540 dst_page = page_run[i];
541
542 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
543 xsize = io_requested;
544
545 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
546 break;
547
548 io_requested -= xsize;
549 start_offset = 0;
550 }
551 vm_object_lock(object);
552
553 /*
554 * if we have more than 1 page to work on
555 * in the current run, or the original request
556 * started at offset 0 of the page, or we're
557 * processing multiple batches, we will move
558 * the pages to the tail of the inactive queue
559 * to implement an LRU for read/write accesses
560 *
561 * the check for orig_offset == 0 is there to
562 * mitigate the cost of small (< page_size) requests
563 * to the same page (this way we only move it once)
564 */
565 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
566
567 vm_page_lockspin_queues();
568
569 for (i = 0; i < cur_run; i++)
570 vm_page_lru(page_run[i]);
571
572 vm_page_unlock_queues();
573 }
574 for (i = 0; i < cur_run; i++) {
575 dst_page = page_run[i];
576
577 /*
578 * someone is explicitly referencing this page...
579 * update clustered and speculative state
580 *
581 */
582 VM_PAGE_CONSUME_CLUSTERED(dst_page);
583
584 PAGE_WAKEUP_DONE(dst_page);
585 }
586 orig_offset = 0;
587 }
588 vm_object_unlock(object);
589
590 return (retval);
591 }
592
593
594 /*
595 *
596 */
597 void
598 vnode_pager_bootstrap(void)
599 {
600 register vm_size_t size;
601
602 size = (vm_size_t) sizeof(struct vnode_pager);
603 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
604 PAGE_SIZE, "vnode pager structures");
605 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
606
607 #if CONFIG_CODE_DECRYPTION
608 apple_protect_pager_bootstrap();
609 #endif /* CONFIG_CODE_DECRYPTION */
610 swapfile_pager_bootstrap();
611 return;
612 }
613
614 /*
615 *
616 */
617 memory_object_t
618 vnode_pager_setup(
619 struct vnode *vp,
620 __unused memory_object_t pager)
621 {
622 vnode_pager_t vnode_object;
623
624 vnode_object = vnode_object_create(vp);
625 if (vnode_object == VNODE_PAGER_NULL)
626 panic("vnode_pager_setup: vnode_object_create() failed");
627 return((memory_object_t)vnode_object);
628 }
629
630 /*
631 *
632 */
633 kern_return_t
634 vnode_pager_init(memory_object_t mem_obj,
635 memory_object_control_t control,
636 #if !DEBUG
637 __unused
638 #endif
639 memory_object_cluster_size_t pg_size)
640 {
641 vnode_pager_t vnode_object;
642 kern_return_t kr;
643 memory_object_attr_info_data_t attributes;
644
645
646 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
647
648 if (control == MEMORY_OBJECT_CONTROL_NULL)
649 return KERN_INVALID_ARGUMENT;
650
651 vnode_object = vnode_pager_lookup(mem_obj);
652
653 memory_object_control_reference(control);
654
655 vnode_object->control_handle = control;
656
657 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
658 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
659 attributes.cluster_size = (1 << (PAGE_SHIFT));
660 attributes.may_cache_object = TRUE;
661 attributes.temporary = TRUE;
662
663 kr = memory_object_change_attributes(
664 control,
665 MEMORY_OBJECT_ATTRIBUTE_INFO,
666 (memory_object_info_t) &attributes,
667 MEMORY_OBJECT_ATTR_INFO_COUNT);
668 if (kr != KERN_SUCCESS)
669 panic("vnode_pager_init: memory_object_change_attributes() failed");
670
671 return(KERN_SUCCESS);
672 }
673
674 /*
675 *
676 */
677 kern_return_t
678 vnode_pager_data_return(
679 memory_object_t mem_obj,
680 memory_object_offset_t offset,
681 memory_object_cluster_size_t data_cnt,
682 memory_object_offset_t *resid_offset,
683 int *io_error,
684 __unused boolean_t dirty,
685 __unused boolean_t kernel_copy,
686 int upl_flags)
687 {
688 register vnode_pager_t vnode_object;
689
690 vnode_object = vnode_pager_lookup(mem_obj);
691
692 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
693
694 return KERN_SUCCESS;
695 }
696
697 kern_return_t
698 vnode_pager_data_initialize(
699 __unused memory_object_t mem_obj,
700 __unused memory_object_offset_t offset,
701 __unused memory_object_cluster_size_t data_cnt)
702 {
703 panic("vnode_pager_data_initialize");
704 return KERN_FAILURE;
705 }
706
707 kern_return_t
708 vnode_pager_data_unlock(
709 __unused memory_object_t mem_obj,
710 __unused memory_object_offset_t offset,
711 __unused memory_object_size_t size,
712 __unused vm_prot_t desired_access)
713 {
714 return KERN_FAILURE;
715 }
716
717 kern_return_t
718 vnode_pager_get_isinuse(
719 memory_object_t mem_obj,
720 uint32_t *isinuse)
721 {
722 vnode_pager_t vnode_object;
723
724 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
725 *isinuse = 1;
726 return KERN_INVALID_ARGUMENT;
727 }
728
729 vnode_object = vnode_pager_lookup(mem_obj);
730
731 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
732 return KERN_SUCCESS;
733 }
734
735 kern_return_t
736 vnode_pager_check_hard_throttle(
737 memory_object_t mem_obj,
738 uint32_t *limit,
739 uint32_t hard_throttle)
740 {
741 vnode_pager_t vnode_object;
742
743 if (mem_obj->mo_pager_ops != &vnode_pager_ops)
744 return KERN_INVALID_ARGUMENT;
745
746 vnode_object = vnode_pager_lookup(mem_obj);
747
748 (void)vnode_pager_return_hard_throttle_limit(vnode_object->vnode_handle, limit, hard_throttle);
749 return KERN_SUCCESS;
750 }
751
752 kern_return_t
753 vnode_pager_get_object_size(
754 memory_object_t mem_obj,
755 memory_object_offset_t *length)
756 {
757 vnode_pager_t vnode_object;
758
759 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
760 *length = 0;
761 return KERN_INVALID_ARGUMENT;
762 }
763
764 vnode_object = vnode_pager_lookup(mem_obj);
765
766 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
767 return KERN_SUCCESS;
768 }
769
770 kern_return_t
771 vnode_pager_get_object_pathname(
772 memory_object_t mem_obj,
773 char *pathname,
774 vm_size_t *length_p)
775 {
776 vnode_pager_t vnode_object;
777
778 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
779 return KERN_INVALID_ARGUMENT;
780 }
781
782 vnode_object = vnode_pager_lookup(mem_obj);
783
784 return vnode_pager_get_pathname(vnode_object->vnode_handle,
785 pathname,
786 length_p);
787 }
788
789 kern_return_t
790 vnode_pager_get_object_filename(
791 memory_object_t mem_obj,
792 const char **filename)
793 {
794 vnode_pager_t vnode_object;
795
796 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
797 return KERN_INVALID_ARGUMENT;
798 }
799
800 vnode_object = vnode_pager_lookup(mem_obj);
801
802 return vnode_pager_get_filename(vnode_object->vnode_handle,
803 filename);
804 }
805
806 kern_return_t
807 vnode_pager_get_object_cs_blobs(
808 memory_object_t mem_obj,
809 void **blobs)
810 {
811 vnode_pager_t vnode_object;
812
813 if (mem_obj == MEMORY_OBJECT_NULL ||
814 mem_obj->mo_pager_ops != &vnode_pager_ops) {
815 return KERN_INVALID_ARGUMENT;
816 }
817
818 vnode_object = vnode_pager_lookup(mem_obj);
819
820 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
821 blobs);
822 }
823
824 /*
825 *
826 */
827 kern_return_t
828 vnode_pager_data_request(
829 memory_object_t mem_obj,
830 memory_object_offset_t offset,
831 __unused memory_object_cluster_size_t length,
832 __unused vm_prot_t desired_access,
833 memory_object_fault_info_t fault_info)
834 {
835 vnode_pager_t vnode_object;
836 memory_object_offset_t base_offset;
837 vm_size_t size;
838 uint32_t io_streaming = 0;
839
840 vnode_object = vnode_pager_lookup(mem_obj);
841
842 size = MAX_UPL_TRANSFER * PAGE_SIZE;
843 base_offset = offset;
844
845 if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
846 size = PAGE_SIZE;
847
848 assert(offset >= base_offset &&
849 offset < base_offset + size);
850
851 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
852 }
853
854 /*
855 *
856 */
857 void
858 vnode_pager_reference(
859 memory_object_t mem_obj)
860 {
861 register vnode_pager_t vnode_object;
862 unsigned int new_ref_count;
863
864 vnode_object = vnode_pager_lookup(mem_obj);
865 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
866 assert(new_ref_count > 1);
867 }
868
869 /*
870 *
871 */
872 void
873 vnode_pager_deallocate(
874 memory_object_t mem_obj)
875 {
876 register vnode_pager_t vnode_object;
877
878 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
879
880 vnode_object = vnode_pager_lookup(mem_obj);
881
882 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
883 if (vnode_object->vnode_handle != NULL) {
884 vnode_pager_vrele(vnode_object->vnode_handle);
885 }
886 zfree(vnode_pager_zone, vnode_object);
887 }
888 return;
889 }
890
891 /*
892 *
893 */
894 kern_return_t
895 vnode_pager_terminate(
896 #if !DEBUG
897 __unused
898 #endif
899 memory_object_t mem_obj)
900 {
901 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
902
903 return(KERN_SUCCESS);
904 }
905
906 /*
907 *
908 */
909 kern_return_t
910 vnode_pager_synchronize(
911 memory_object_t mem_obj,
912 memory_object_offset_t offset,
913 memory_object_size_t length,
914 __unused vm_sync_t sync_flags)
915 {
916 register vnode_pager_t vnode_object;
917
918 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
919
920 vnode_object = vnode_pager_lookup(mem_obj);
921
922 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
923
924 return (KERN_SUCCESS);
925 }
926
927 /*
928 *
929 */
930 kern_return_t
931 vnode_pager_map(
932 memory_object_t mem_obj,
933 vm_prot_t prot)
934 {
935 vnode_pager_t vnode_object;
936 int ret;
937 kern_return_t kr;
938
939 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
940
941 vnode_object = vnode_pager_lookup(mem_obj);
942
943 ret = ubc_map(vnode_object->vnode_handle, prot);
944
945 if (ret != 0) {
946 kr = KERN_FAILURE;
947 } else {
948 kr = KERN_SUCCESS;
949 }
950
951 return kr;
952 }
953
954 kern_return_t
955 vnode_pager_last_unmap(
956 memory_object_t mem_obj)
957 {
958 register vnode_pager_t vnode_object;
959
960 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
961
962 vnode_object = vnode_pager_lookup(mem_obj);
963
964 ubc_unmap(vnode_object->vnode_handle);
965 return KERN_SUCCESS;
966 }
967
968
969
970 /*
971 *
972 */
973 void
974 vnode_pager_cluster_write(
975 vnode_pager_t vnode_object,
976 vm_object_offset_t offset,
977 vm_size_t cnt,
978 vm_object_offset_t * resid_offset,
979 int * io_error,
980 int upl_flags)
981 {
982 vm_size_t size;
983 int errno;
984
985 if (upl_flags & UPL_MSYNC) {
986
987 upl_flags |= UPL_VNODE_PAGER;
988
989 if ( (upl_flags & UPL_IOSYNC) && io_error)
990 upl_flags |= UPL_KEEPCACHED;
991
992 while (cnt) {
993 size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
994
995 assert((upl_size_t) size == size);
996 vnode_pageout(vnode_object->vnode_handle,
997 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
998
999 if ( (upl_flags & UPL_KEEPCACHED) ) {
1000 if ( (*io_error = errno) )
1001 break;
1002 }
1003 cnt -= size;
1004 offset += size;
1005 }
1006 if (resid_offset)
1007 *resid_offset = offset;
1008
1009 } else {
1010 vm_object_offset_t vnode_size;
1011 vm_object_offset_t base_offset;
1012
1013 /*
1014 * this is the pageout path
1015 */
1016 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
1017
1018 if (vnode_size > (offset + PAGE_SIZE)) {
1019 /*
1020 * preset the maximum size of the cluster
1021 * and put us on a nice cluster boundary...
1022 * and then clip the size to insure we
1023 * don't request past the end of the underlying file
1024 */
1025 size = PAGE_SIZE * MAX_UPL_TRANSFER;
1026 base_offset = offset & ~((signed)(size - 1));
1027
1028 if ((base_offset + size) > vnode_size)
1029 size = round_page(((vm_size_t)(vnode_size - base_offset)));
1030 } else {
1031 /*
1032 * we've been requested to page out a page beyond the current
1033 * end of the 'file'... don't try to cluster in this case...
1034 * we still need to send this page through because it might
1035 * be marked precious and the underlying filesystem may need
1036 * to do something with it (besides page it out)...
1037 */
1038 base_offset = offset;
1039 size = PAGE_SIZE;
1040 }
1041 assert((upl_size_t) size == size);
1042 vnode_pageout(vnode_object->vnode_handle,
1043 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, UPL_VNODE_PAGER, NULL);
1044 }
1045 }
1046
1047
1048 /*
1049 *
1050 */
1051 kern_return_t
1052 vnode_pager_cluster_read(
1053 vnode_pager_t vnode_object,
1054 vm_object_offset_t base_offset,
1055 vm_object_offset_t offset,
1056 uint32_t io_streaming,
1057 vm_size_t cnt)
1058 {
1059 int local_error = 0;
1060 int kret;
1061 int flags = 0;
1062
1063 assert(! (cnt & PAGE_MASK));
1064
1065 if (io_streaming)
1066 flags |= UPL_IOSTREAMING;
1067
1068 assert((upl_size_t) cnt == cnt);
1069 kret = vnode_pagein(vnode_object->vnode_handle,
1070 (upl_t) NULL,
1071 (upl_offset_t) (offset - base_offset),
1072 base_offset,
1073 (upl_size_t) cnt,
1074 flags,
1075 &local_error);
1076 /*
1077 if(kret == PAGER_ABSENT) {
1078 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
1079 defined in bsd/vm/vm_pager.h However, we should not be including
1080 that file here it is a layering violation.
1081 */
1082 if (kret == 1) {
1083 int uplflags;
1084 upl_t upl = NULL;
1085 unsigned int count = 0;
1086 kern_return_t kr;
1087
1088 uplflags = (UPL_NO_SYNC |
1089 UPL_CLEAN_IN_PLACE |
1090 UPL_SET_INTERNAL);
1091 count = 0;
1092 assert((upl_size_t) cnt == cnt);
1093 kr = memory_object_upl_request(vnode_object->control_handle,
1094 base_offset, (upl_size_t) cnt,
1095 &upl, NULL, &count, uplflags);
1096 if (kr == KERN_SUCCESS) {
1097 upl_abort(upl, 0);
1098 upl_deallocate(upl);
1099 } else {
1100 /*
1101 * We couldn't gather the page list, probably
1102 * because the memory object doesn't have a link
1103 * to a VM object anymore (forced unmount, for
1104 * example). Just return an error to the vm_fault()
1105 * path and let it handle it.
1106 */
1107 }
1108
1109 return KERN_FAILURE;
1110 }
1111
1112 return KERN_SUCCESS;
1113
1114 }
1115
1116
1117 /*
1118 *
1119 */
1120 void
1121 vnode_pager_release_from_cache(
1122 int *cnt)
1123 {
1124 memory_object_free_from_cache(
1125 &realhost, &vnode_pager_ops, cnt);
1126 }
1127
1128 /*
1129 *
1130 */
1131 vnode_pager_t
1132 vnode_object_create(
1133 struct vnode *vp)
1134 {
1135 register vnode_pager_t vnode_object;
1136
1137 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1138 if (vnode_object == VNODE_PAGER_NULL)
1139 return(VNODE_PAGER_NULL);
1140
1141 /*
1142 * The vm_map call takes both named entry ports and raw memory
1143 * objects in the same parameter. We need to make sure that
1144 * vm_map does not see this object as a named entry port. So,
1145 * we reserve the first word in the object for a fake ip_kotype
1146 * setting - that will tell vm_map to use it as a memory object.
1147 */
1148 vnode_object->pager_ops = &vnode_pager_ops;
1149 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1150 vnode_object->ref_count = 1;
1151 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1152 vnode_object->vnode_handle = vp;
1153
1154 return(vnode_object);
1155 }
1156
1157 /*
1158 *
1159 */
1160 vnode_pager_t
1161 vnode_pager_lookup(
1162 memory_object_t name)
1163 {
1164 vnode_pager_t vnode_object;
1165
1166 vnode_object = (vnode_pager_t)name;
1167 assert(vnode_object->pager_ops == &vnode_pager_ops);
1168 return (vnode_object);
1169 }
1170
1171
1172 /*********************** proc_info implementation *************/
1173
1174 #include <sys/bsdtask_info.h>
1175
1176 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
1177
1178
1179 int
1180 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1181 {
1182
1183 vm_map_t map;
1184 vm_map_offset_t address = (vm_map_offset_t )arg;
1185 vm_map_entry_t tmp_entry;
1186 vm_map_entry_t entry;
1187 vm_map_offset_t start;
1188 vm_region_extended_info_data_t extended;
1189 vm_region_top_info_data_t top;
1190
1191 task_lock(task);
1192 map = task->map;
1193 if (map == VM_MAP_NULL)
1194 {
1195 task_unlock(task);
1196 return(0);
1197 }
1198 vm_map_reference(map);
1199 task_unlock(task);
1200
1201 vm_map_lock_read(map);
1202
1203 start = address;
1204 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1205 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1206 vm_map_unlock_read(map);
1207 vm_map_deallocate(map);
1208 return(0);
1209 }
1210 } else {
1211 entry = tmp_entry;
1212 }
1213
1214 start = entry->vme_start;
1215
1216 pinfo->pri_offset = entry->offset;
1217 pinfo->pri_protection = entry->protection;
1218 pinfo->pri_max_protection = entry->max_protection;
1219 pinfo->pri_inheritance = entry->inheritance;
1220 pinfo->pri_behavior = entry->behavior;
1221 pinfo->pri_user_wired_count = entry->user_wired_count;
1222 pinfo->pri_user_tag = entry->alias;
1223
1224 if (entry->is_sub_map) {
1225 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1226 } else {
1227 if (entry->is_shared)
1228 pinfo->pri_flags |= PROC_REGION_SHARED;
1229 }
1230
1231
1232 extended.protection = entry->protection;
1233 extended.user_tag = entry->alias;
1234 extended.pages_resident = 0;
1235 extended.pages_swapped_out = 0;
1236 extended.pages_shared_now_private = 0;
1237 extended.pages_dirtied = 0;
1238 extended.external_pager = 0;
1239 extended.shadow_depth = 0;
1240
1241 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1242
1243 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1244 extended.share_mode = SM_PRIVATE;
1245
1246 top.private_pages_resident = 0;
1247 top.shared_pages_resident = 0;
1248 vm_map_region_top_walk(entry, &top);
1249
1250
1251 pinfo->pri_pages_resident = extended.pages_resident;
1252 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1253 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1254 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1255 pinfo->pri_ref_count = extended.ref_count;
1256 pinfo->pri_shadow_depth = extended.shadow_depth;
1257 pinfo->pri_share_mode = extended.share_mode;
1258
1259 pinfo->pri_private_pages_resident = top.private_pages_resident;
1260 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1261 pinfo->pri_obj_id = top.obj_id;
1262
1263 pinfo->pri_address = (uint64_t)start;
1264 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1265 pinfo->pri_depth = 0;
1266
1267 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1268 *vnodeaddr = (uintptr_t)0;
1269
1270 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1271 vm_map_unlock_read(map);
1272 vm_map_deallocate(map);
1273 return(1);
1274 }
1275 }
1276
1277 vm_map_unlock_read(map);
1278 vm_map_deallocate(map);
1279 return(1);
1280 }
1281
1282 static int
1283 fill_vnodeinfoforaddr(
1284 vm_map_entry_t entry,
1285 uintptr_t * vnodeaddr,
1286 uint32_t * vid)
1287 {
1288 vm_object_t top_object, object;
1289 memory_object_t memory_object;
1290 memory_object_pager_ops_t pager_ops;
1291 kern_return_t kr;
1292 int shadow_depth;
1293
1294
1295 if (entry->is_sub_map) {
1296 return(0);
1297 } else {
1298 /*
1299 * The last object in the shadow chain has the
1300 * relevant pager information.
1301 */
1302 top_object = entry->object.vm_object;
1303 if (top_object == VM_OBJECT_NULL) {
1304 object = VM_OBJECT_NULL;
1305 shadow_depth = 0;
1306 } else {
1307 vm_object_lock(top_object);
1308 for (object = top_object, shadow_depth = 0;
1309 object->shadow != VM_OBJECT_NULL;
1310 object = object->shadow, shadow_depth++) {
1311 vm_object_lock(object->shadow);
1312 vm_object_unlock(object);
1313 }
1314 }
1315 }
1316
1317 if (object == VM_OBJECT_NULL) {
1318 return(0);
1319 } else if (object->internal) {
1320 vm_object_unlock(object);
1321 return(0);
1322 } else if (! object->pager_ready ||
1323 object->terminating ||
1324 ! object->alive) {
1325 vm_object_unlock(object);
1326 return(0);
1327 } else {
1328 memory_object = object->pager;
1329 pager_ops = memory_object->mo_pager_ops;
1330 if (pager_ops == &vnode_pager_ops) {
1331 kr = vnode_pager_get_object_vnode(
1332 memory_object,
1333 vnodeaddr, vid);
1334 if (kr != KERN_SUCCESS) {
1335 vm_object_unlock(object);
1336 return(0);
1337 }
1338 } else {
1339 vm_object_unlock(object);
1340 return(0);
1341 }
1342 }
1343 vm_object_unlock(object);
1344 return(1);
1345 }
1346
1347 kern_return_t
1348 vnode_pager_get_object_vnode (
1349 memory_object_t mem_obj,
1350 uintptr_t * vnodeaddr,
1351 uint32_t * vid)
1352 {
1353 vnode_pager_t vnode_object;
1354
1355 vnode_object = vnode_pager_lookup(mem_obj);
1356 if (vnode_object->vnode_handle) {
1357 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1358 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1359
1360 return(KERN_SUCCESS);
1361 }
1362
1363 return(KERN_FAILURE);
1364 }
1365
1366
1367 /*
1368 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1369 * object locked, otherwise return NULL with nothing locked.
1370 */
1371
1372 vm_object_t
1373 find_vnode_object(
1374 vm_map_entry_t entry
1375 )
1376 {
1377 vm_object_t top_object, object;
1378 memory_object_t memory_object;
1379 memory_object_pager_ops_t pager_ops;
1380
1381 if (!entry->is_sub_map) {
1382
1383 /*
1384 * The last object in the shadow chain has the
1385 * relevant pager information.
1386 */
1387
1388 top_object = entry->object.vm_object;
1389
1390 if (top_object) {
1391 vm_object_lock(top_object);
1392
1393 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1394 vm_object_lock(object->shadow);
1395 vm_object_unlock(object);
1396 }
1397
1398 if (object && !object->internal && object->pager_ready && !object->terminating &&
1399 object->alive) {
1400 memory_object = object->pager;
1401 pager_ops = memory_object->mo_pager_ops;
1402
1403 /*
1404 * If this object points to the vnode_pager_ops, then we found what we're
1405 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1406 * vnode and so we fall through to the bottom and return NULL.
1407 */
1408
1409 if (pager_ops == &vnode_pager_ops)
1410 return object; /* we return with the object locked */
1411 }
1412
1413 vm_object_unlock(object);
1414 }
1415
1416 }
1417
1418 return(VM_OBJECT_NULL);
1419 }