]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/thread.h>
45
46 #include <ipc/ipc_port.h>
47 #include <ipc/ipc_space.h>
48
49 #include <default_pager/default_pager_types.h>
50 #include <default_pager/default_pager_object_server.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return(map->hdr.nentries);
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return( vm_map_first_entry(map)->vme_start);
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return( vm_map_last_entry(map)->vme_end);
93 }
94
95 /*
96 * Legacy routines to get the start and end for a vm_map_t. They
97 * return them in the vm_offset_t format. So, they should only be
98 * called on maps that are the same size as the kernel map for
99 * accurate results.
100 */
101 vm_offset_t
102 get_vm_start(
103 vm_map_t map)
104 {
105 return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start));
106 }
107
108 vm_offset_t
109 get_vm_end(
110 vm_map_t map)
111 {
112 return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end));
113 }
114
115 /*
116 * BSD VNODE PAGER
117 */
118
119 const struct memory_object_pager_ops vnode_pager_ops = {
120 vnode_pager_reference,
121 vnode_pager_deallocate,
122 vnode_pager_init,
123 vnode_pager_terminate,
124 vnode_pager_data_request,
125 vnode_pager_data_return,
126 vnode_pager_data_initialize,
127 vnode_pager_data_unlock,
128 vnode_pager_synchronize,
129 vnode_pager_map,
130 vnode_pager_last_unmap,
131 "vnode pager"
132 };
133
134 typedef struct vnode_pager {
135 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
136 unsigned int pager_ikot; /* JMM: fake ip_kotype() */
137 unsigned int ref_count; /* reference count */
138 memory_object_control_t control_handle; /* mem object control handle */
139 struct vnode *vnode_handle; /* vnode handle */
140 } *vnode_pager_t;
141
142
143 ipc_port_t
144 trigger_name_to_port( /* forward */
145 mach_port_t);
146
147 kern_return_t
148 vnode_pager_cluster_read( /* forward */
149 vnode_pager_t,
150 vm_object_offset_t,
151 vm_size_t);
152
153 void
154 vnode_pager_cluster_write( /* forward */
155 vnode_pager_t,
156 vm_object_offset_t,
157 vm_size_t,
158 vm_object_offset_t *,
159 int *,
160 int);
161
162
163 vnode_pager_t
164 vnode_object_create( /* forward */
165 struct vnode *);
166
167 vnode_pager_t
168 vnode_pager_lookup( /* forward */
169 memory_object_t);
170
171 zone_t vnode_pager_zone;
172
173
174 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
175
176 /* TODO: Should be set dynamically by vnode_pager_init() */
177 #define CLUSTER_SHIFT 1
178
179 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
180 #define MAX_VNODE 10000
181
182
183 #if DEBUG
184 int pagerdebug=0;
185
186 #define PAGER_ALL 0xffffffff
187 #define PAGER_INIT 0x00000001
188 #define PAGER_PAGEIN 0x00000002
189
190 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
191 #else
192 #define PAGER_DEBUG(LEVEL, A)
193 #endif
194
195 /*
196 * Routine: macx_triggers
197 * Function:
198 * Syscall interface to set the call backs for low and
199 * high water marks.
200 */
201 int
202 macx_triggers(
203 struct macx_triggers_args *args)
204 {
205 int hi_water = args->hi_water;
206 int low_water = args->low_water;
207 int flags = args->flags;
208 mach_port_t trigger_name = args->alert_port;
209 kern_return_t kr;
210 memory_object_default_t default_pager;
211 ipc_port_t trigger_port;
212
213 default_pager = MEMORY_OBJECT_DEFAULT_NULL;
214 kr = host_default_memory_manager(host_priv_self(),
215 &default_pager, 0);
216 if(kr != KERN_SUCCESS) {
217 return EINVAL;
218 }
219
220 if ((flags & SWAP_ENCRYPT_ON) &&
221 (flags & SWAP_ENCRYPT_OFF)) {
222 /* can't have it both ways */
223 return EINVAL;
224 }
225
226 if (default_pager_init_flag == 0) {
227 start_def_pager(NULL);
228 default_pager_init_flag = 1;
229 }
230
231 if (flags & SWAP_ENCRYPT_ON) {
232 /* ENCRYPTED SWAP: tell default_pager to encrypt */
233 default_pager_triggers(default_pager,
234 0, 0,
235 SWAP_ENCRYPT_ON,
236 IP_NULL);
237 } else if (flags & SWAP_ENCRYPT_OFF) {
238 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
239 default_pager_triggers(default_pager,
240 0, 0,
241 SWAP_ENCRYPT_OFF,
242 IP_NULL);
243 }
244
245 if (flags & HI_WAT_ALERT) {
246 trigger_port = trigger_name_to_port(trigger_name);
247 if(trigger_port == NULL) {
248 return EINVAL;
249 }
250 /* trigger_port is locked and active */
251 ipc_port_make_send_locked(trigger_port);
252 /* now unlocked */
253 default_pager_triggers(default_pager,
254 hi_water, low_water,
255 HI_WAT_ALERT, trigger_port);
256 }
257
258 if (flags & LO_WAT_ALERT) {
259 trigger_port = trigger_name_to_port(trigger_name);
260 if(trigger_port == NULL) {
261 return EINVAL;
262 }
263 /* trigger_port is locked and active */
264 ipc_port_make_send_locked(trigger_port);
265 /* and now its unlocked */
266 default_pager_triggers(default_pager,
267 hi_water, low_water,
268 LO_WAT_ALERT, trigger_port);
269 }
270
271 /*
272 * Set thread scheduling priority and policy for the current thread
273 * it is assumed for the time being that the thread setting the alert
274 * is the same one which will be servicing it.
275 *
276 * XXX This does not belong in the kernel XXX
277 */
278 {
279 thread_precedence_policy_data_t pre;
280 thread_extended_policy_data_t ext;
281
282 ext.timeshare = FALSE;
283 pre.importance = INT32_MAX;
284
285 thread_policy_set(current_thread(),
286 THREAD_EXTENDED_POLICY,
287 (thread_policy_t)&ext,
288 THREAD_EXTENDED_POLICY_COUNT);
289
290 thread_policy_set(current_thread(),
291 THREAD_PRECEDENCE_POLICY,
292 (thread_policy_t)&pre,
293 THREAD_PRECEDENCE_POLICY_COUNT);
294 }
295
296 current_thread()->options |= TH_OPT_VMPRIV;
297
298 return 0;
299 }
300
301 /*
302 *
303 */
304 ipc_port_t
305 trigger_name_to_port(
306 mach_port_t trigger_name)
307 {
308 ipc_port_t trigger_port;
309 ipc_space_t space;
310
311 if (trigger_name == 0)
312 return (NULL);
313
314 space = current_space();
315 if(ipc_port_translate_receive(space, (mach_port_name_t)trigger_name,
316 &trigger_port) != KERN_SUCCESS)
317 return (NULL);
318 return trigger_port;
319 }
320
321
322 extern int uiomove64(addr64_t, int, void *);
323 #define MAX_RUN 32
324
325 unsigned long vm_cs_tainted_forces = 0;
326
327 int
328 memory_object_control_uiomove(
329 memory_object_control_t control,
330 memory_object_offset_t offset,
331 void * uio,
332 int start_offset,
333 int io_requested,
334 int mark_dirty,
335 int take_reference)
336 {
337 vm_object_t object;
338 vm_page_t dst_page;
339 int xsize;
340 int retval = 0;
341 int cur_run;
342 int cur_needed;
343 int i;
344 int orig_offset;
345 boolean_t make_lru = FALSE;
346 vm_page_t page_run[MAX_RUN];
347
348 object = memory_object_control_to_vm_object(control);
349 if (object == VM_OBJECT_NULL) {
350 return (0);
351 }
352 assert(!object->internal);
353
354 vm_object_lock(object);
355
356 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
357 /*
358 * We can't modify the pages without honoring
359 * copy-on-write obligations first, so fall off
360 * this optimized path and fall back to the regular
361 * path.
362 */
363 vm_object_unlock(object);
364 return 0;
365 }
366 orig_offset = start_offset;
367
368 while (io_requested && retval == 0) {
369
370 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
371
372 if (cur_needed > MAX_RUN)
373 cur_needed = MAX_RUN;
374
375 for (cur_run = 0; cur_run < cur_needed; ) {
376
377 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
378 break;
379 /*
380 * Sync up on getting the busy bit
381 */
382 if ((dst_page->busy || dst_page->cleaning)) {
383 /*
384 * someone else is playing with the page... if we've
385 * already collected pages into this run, go ahead
386 * and process now, we can't block on this
387 * page while holding other pages in the BUSY state
388 * otherwise we will wait
389 */
390 if (cur_run)
391 break;
392 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
393 continue;
394 }
395 /*
396 * this routine is only called when copying
397 * to/from real files... no need to consider
398 * encrypted swap pages
399 */
400 assert(!dst_page->encrypted);
401
402 if (mark_dirty) {
403 dst_page->dirty = TRUE;
404 if (dst_page->cs_validated) {
405 /*
406 * CODE SIGNING:
407 * We're modifying a code-signed
408 * page: assume that it is now tainted.
409 */
410 dst_page->cs_tainted = TRUE;
411 vm_cs_tainted_forces++;
412 }
413 }
414 dst_page->busy = TRUE;
415
416 page_run[cur_run++] = dst_page;
417
418 offset += PAGE_SIZE_64;
419 }
420 if (cur_run == 0)
421 /*
422 * we hit a 'hole' in the cache
423 * we bail at this point
424 * we'll unlock the object below
425 */
426 break;
427 vm_object_unlock(object);
428
429 for (i = 0; i < cur_run; i++) {
430
431 dst_page = page_run[i];
432
433 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
434 xsize = io_requested;
435
436 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
437 break;
438
439 io_requested -= xsize;
440 start_offset = 0;
441 }
442 vm_object_lock(object);
443
444 /*
445 * if we have more than 1 page to work on
446 * in the current run, or the original request
447 * started at offset 0 of the page, or we're
448 * processing multiple batches, we will move
449 * the pages to the tail of the inactive queue
450 * to implement an LRU for read/write accesses
451 *
452 * the check for orig_offset == 0 is there to
453 * mitigate the cost of small (< page_size) requests
454 * to the same page (this way we only move it once)
455 */
456 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
457 vm_page_lockspin_queues();
458 make_lru = TRUE;
459 }
460 for (i = 0; i < cur_run; i++) {
461 dst_page = page_run[i];
462
463 /*
464 * someone is explicitly referencing this page...
465 * update clustered and speculative state
466 *
467 */
468 VM_PAGE_CONSUME_CLUSTERED(dst_page);
469
470 if (make_lru == TRUE)
471 vm_page_lru(dst_page);
472
473 PAGE_WAKEUP_DONE(dst_page);
474 }
475 if (make_lru == TRUE) {
476 vm_page_unlock_queues();
477 make_lru = FALSE;
478 }
479 orig_offset = 0;
480 }
481 vm_object_unlock(object);
482
483 return (retval);
484 }
485
486
487 /*
488 *
489 */
490 void
491 vnode_pager_bootstrap(void)
492 {
493 register vm_size_t size;
494
495 size = (vm_size_t) sizeof(struct vnode_pager);
496 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
497 PAGE_SIZE, "vnode pager structures");
498 #if CONFIG_CODE_DECRYPTION
499 apple_protect_pager_bootstrap();
500 #endif /* CONFIG_CODE_DECRYPTION */
501 return;
502 }
503
504 /*
505 *
506 */
507 memory_object_t
508 vnode_pager_setup(
509 struct vnode *vp,
510 __unused memory_object_t pager)
511 {
512 vnode_pager_t vnode_object;
513
514 vnode_object = vnode_object_create(vp);
515 if (vnode_object == VNODE_PAGER_NULL)
516 panic("vnode_pager_setup: vnode_object_create() failed");
517 return((memory_object_t)vnode_object);
518 }
519
520 /*
521 *
522 */
523 kern_return_t
524 vnode_pager_init(memory_object_t mem_obj,
525 memory_object_control_t control,
526 #if !DEBUG
527 __unused
528 #endif
529 vm_size_t pg_size)
530 {
531 vnode_pager_t vnode_object;
532 kern_return_t kr;
533 memory_object_attr_info_data_t attributes;
534
535
536 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %x\n", mem_obj, control, pg_size));
537
538 if (control == MEMORY_OBJECT_CONTROL_NULL)
539 return KERN_INVALID_ARGUMENT;
540
541 vnode_object = vnode_pager_lookup(mem_obj);
542
543 memory_object_control_reference(control);
544
545 vnode_object->control_handle = control;
546
547 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
548 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
549 attributes.cluster_size = (1 << (PAGE_SHIFT));
550 attributes.may_cache_object = TRUE;
551 attributes.temporary = TRUE;
552
553 kr = memory_object_change_attributes(
554 control,
555 MEMORY_OBJECT_ATTRIBUTE_INFO,
556 (memory_object_info_t) &attributes,
557 MEMORY_OBJECT_ATTR_INFO_COUNT);
558 if (kr != KERN_SUCCESS)
559 panic("vnode_pager_init: memory_object_change_attributes() failed");
560
561 return(KERN_SUCCESS);
562 }
563
564 /*
565 *
566 */
567 kern_return_t
568 vnode_pager_data_return(
569 memory_object_t mem_obj,
570 memory_object_offset_t offset,
571 vm_size_t data_cnt,
572 memory_object_offset_t *resid_offset,
573 int *io_error,
574 __unused boolean_t dirty,
575 __unused boolean_t kernel_copy,
576 int upl_flags)
577 {
578 register vnode_pager_t vnode_object;
579
580 vnode_object = vnode_pager_lookup(mem_obj);
581
582 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
583
584 return KERN_SUCCESS;
585 }
586
587 kern_return_t
588 vnode_pager_data_initialize(
589 __unused memory_object_t mem_obj,
590 __unused memory_object_offset_t offset,
591 __unused vm_size_t data_cnt)
592 {
593 panic("vnode_pager_data_initialize");
594 return KERN_FAILURE;
595 }
596
597 kern_return_t
598 vnode_pager_data_unlock(
599 __unused memory_object_t mem_obj,
600 __unused memory_object_offset_t offset,
601 __unused vm_size_t size,
602 __unused vm_prot_t desired_access)
603 {
604 return KERN_FAILURE;
605 }
606
607 kern_return_t
608 vnode_pager_get_object_size(
609 memory_object_t mem_obj,
610 memory_object_offset_t *length)
611 {
612 vnode_pager_t vnode_object;
613
614 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
615 *length = 0;
616 return KERN_INVALID_ARGUMENT;
617 }
618
619 vnode_object = vnode_pager_lookup(mem_obj);
620
621 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
622 return KERN_SUCCESS;
623 }
624
625 kern_return_t
626 vnode_pager_get_object_pathname(
627 memory_object_t mem_obj,
628 char *pathname,
629 vm_size_t *length_p)
630 {
631 vnode_pager_t vnode_object;
632
633 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
634 return KERN_INVALID_ARGUMENT;
635 }
636
637 vnode_object = vnode_pager_lookup(mem_obj);
638
639 return vnode_pager_get_pathname(vnode_object->vnode_handle,
640 pathname,
641 length_p);
642 }
643
644 kern_return_t
645 vnode_pager_get_object_filename(
646 memory_object_t mem_obj,
647 const char **filename)
648 {
649 vnode_pager_t vnode_object;
650
651 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
652 return KERN_INVALID_ARGUMENT;
653 }
654
655 vnode_object = vnode_pager_lookup(mem_obj);
656
657 return vnode_pager_get_filename(vnode_object->vnode_handle,
658 filename);
659 }
660
661 kern_return_t
662 vnode_pager_get_object_cs_blobs(
663 memory_object_t mem_obj,
664 void **blobs)
665 {
666 vnode_pager_t vnode_object;
667
668 if (mem_obj == MEMORY_OBJECT_NULL ||
669 mem_obj->mo_pager_ops != &vnode_pager_ops) {
670 return KERN_INVALID_ARGUMENT;
671 }
672
673 vnode_object = vnode_pager_lookup(mem_obj);
674
675 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
676 blobs);
677 }
678
679 /*
680 *
681 */
682 kern_return_t
683 vnode_pager_data_request(
684 memory_object_t mem_obj,
685 memory_object_offset_t offset,
686 __unused vm_size_t length,
687 __unused vm_prot_t desired_access,
688 memory_object_fault_info_t fault_info)
689 {
690 register vnode_pager_t vnode_object;
691 vm_size_t size;
692 #if MACH_ASSERT
693 memory_object_offset_t original_offset = offset;
694 #endif /* MACH_ASSERT */
695
696 vnode_object = vnode_pager_lookup(mem_obj);
697
698 size = MAX_UPL_TRANSFER * PAGE_SIZE;
699
700 if (memory_object_cluster_size(vnode_object->control_handle, &offset, &size, fault_info) != KERN_SUCCESS)
701 size = PAGE_SIZE;
702
703 assert(original_offset >= offset &&
704 original_offset < offset + size);
705
706 return vnode_pager_cluster_read(vnode_object, offset, size);
707 }
708
709 /*
710 *
711 */
712 void
713 vnode_pager_reference(
714 memory_object_t mem_obj)
715 {
716 register vnode_pager_t vnode_object;
717 unsigned int new_ref_count;
718
719 vnode_object = vnode_pager_lookup(mem_obj);
720 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
721 assert(new_ref_count > 1);
722 }
723
724 /*
725 *
726 */
727 void
728 vnode_pager_deallocate(
729 memory_object_t mem_obj)
730 {
731 register vnode_pager_t vnode_object;
732
733 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
734
735 vnode_object = vnode_pager_lookup(mem_obj);
736
737 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
738 if (vnode_object->vnode_handle != NULL) {
739 vnode_pager_vrele(vnode_object->vnode_handle);
740 }
741 zfree(vnode_pager_zone, vnode_object);
742 }
743 return;
744 }
745
746 /*
747 *
748 */
749 kern_return_t
750 vnode_pager_terminate(
751 #if !DEBUG
752 __unused
753 #endif
754 memory_object_t mem_obj)
755 {
756 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
757
758 return(KERN_SUCCESS);
759 }
760
761 /*
762 *
763 */
764 kern_return_t
765 vnode_pager_synchronize(
766 memory_object_t mem_obj,
767 memory_object_offset_t offset,
768 vm_size_t length,
769 __unused vm_sync_t sync_flags)
770 {
771 register vnode_pager_t vnode_object;
772
773 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
774
775 vnode_object = vnode_pager_lookup(mem_obj);
776
777 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
778
779 return (KERN_SUCCESS);
780 }
781
782 /*
783 *
784 */
785 kern_return_t
786 vnode_pager_map(
787 memory_object_t mem_obj,
788 vm_prot_t prot)
789 {
790 vnode_pager_t vnode_object;
791 int ret;
792 kern_return_t kr;
793
794 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
795
796 vnode_object = vnode_pager_lookup(mem_obj);
797
798 ret = ubc_map(vnode_object->vnode_handle, prot);
799
800 if (ret != 0) {
801 kr = KERN_FAILURE;
802 } else {
803 kr = KERN_SUCCESS;
804 }
805
806 return kr;
807 }
808
809 kern_return_t
810 vnode_pager_last_unmap(
811 memory_object_t mem_obj)
812 {
813 register vnode_pager_t vnode_object;
814
815 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
816
817 vnode_object = vnode_pager_lookup(mem_obj);
818
819 ubc_unmap(vnode_object->vnode_handle);
820 return KERN_SUCCESS;
821 }
822
823
824 /*
825 *
826 */
827 void
828 vnode_pager_cluster_write(
829 vnode_pager_t vnode_object,
830 vm_object_offset_t offset,
831 vm_size_t cnt,
832 vm_object_offset_t * resid_offset,
833 int * io_error,
834 int upl_flags)
835 {
836 vm_size_t size;
837 upl_t upl = NULL;
838 int request_flags;
839 int errno;
840
841 if (upl_flags & UPL_MSYNC) {
842
843 upl_flags |= UPL_VNODE_PAGER;
844
845 if ( (upl_flags & UPL_IOSYNC) && io_error)
846 upl_flags |= UPL_KEEPCACHED;
847
848 while (cnt) {
849 kern_return_t kr;
850
851 size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
852
853 request_flags = UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
854 UPL_SET_INTERNAL | UPL_SET_LITE;
855
856 kr = memory_object_upl_request(vnode_object->control_handle,
857 offset, size, &upl, NULL, NULL, request_flags);
858 if (kr != KERN_SUCCESS)
859 panic("vnode_pager_cluster_write: upl request failed\n");
860
861 vnode_pageout(vnode_object->vnode_handle,
862 upl, (vm_offset_t)0, offset, size, upl_flags, &errno);
863
864 if ( (upl_flags & UPL_KEEPCACHED) ) {
865 if ( (*io_error = errno) )
866 break;
867 }
868 cnt -= size;
869 offset += size;
870 }
871 if (resid_offset)
872 *resid_offset = offset;
873
874 } else {
875 vm_object_offset_t vnode_size;
876 vm_object_offset_t base_offset;
877 vm_object_t object;
878
879 /*
880 * this is the pageout path
881 */
882 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
883
884 if (vnode_size > (offset + PAGE_SIZE)) {
885 /*
886 * preset the maximum size of the cluster
887 * and put us on a nice cluster boundary...
888 * and then clip the size to insure we
889 * don't request past the end of the underlying file
890 */
891 size = PAGE_SIZE * MAX_UPL_TRANSFER;
892 base_offset = offset & ~((signed)(size - 1));
893
894 if ((base_offset + size) > vnode_size)
895 size = round_page_32(((vm_size_t)(vnode_size - base_offset)));
896 } else {
897 /*
898 * we've been requested to page out a page beyond the current
899 * end of the 'file'... don't try to cluster in this case...
900 * we still need to send this page through because it might
901 * be marked precious and the underlying filesystem may need
902 * to do something with it (besides page it out)...
903 */
904 base_offset = offset;
905 size = PAGE_SIZE;
906 }
907 object = memory_object_control_to_vm_object(vnode_object->control_handle);
908
909 if (object == VM_OBJECT_NULL)
910 panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
911
912 request_flags = UPL_NOBLOCK | UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
913 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
914 UPL_SET_INTERNAL | UPL_SET_LITE;
915
916 vm_object_upl_request(object, base_offset, size,
917 &upl, NULL, NULL, request_flags);
918 if (upl == NULL)
919 panic("vnode_pager_cluster_write: upl request failed\n");
920
921 vnode_pageout(vnode_object->vnode_handle,
922 upl, (vm_offset_t)0, upl->offset, upl->size, UPL_VNODE_PAGER, NULL);
923 }
924 }
925
926
927 /*
928 *
929 */
930 kern_return_t
931 vnode_pager_cluster_read(
932 vnode_pager_t vnode_object,
933 vm_object_offset_t offset,
934 vm_size_t cnt)
935 {
936 int local_error = 0;
937 int kret;
938
939 assert(! (cnt & PAGE_MASK));
940
941 kret = vnode_pagein(vnode_object->vnode_handle,
942 (upl_t) NULL,
943 (vm_offset_t) NULL,
944 offset,
945 cnt,
946 0,
947 &local_error);
948 /*
949 if(kret == PAGER_ABSENT) {
950 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
951 defined in bsd/vm/vm_pager.h However, we should not be including
952 that file here it is a layering violation.
953 */
954 if (kret == 1) {
955 int uplflags;
956 upl_t upl = NULL;
957 unsigned int count = 0;
958 kern_return_t kr;
959
960 uplflags = (UPL_NO_SYNC |
961 UPL_CLEAN_IN_PLACE |
962 UPL_SET_INTERNAL);
963 count = 0;
964 kr = memory_object_upl_request(vnode_object->control_handle,
965 offset, cnt,
966 &upl, NULL, &count, uplflags);
967 if (kr == KERN_SUCCESS) {
968 upl_abort(upl, 0);
969 upl_deallocate(upl);
970 } else {
971 /*
972 * We couldn't gather the page list, probably
973 * because the memory object doesn't have a link
974 * to a VM object anymore (forced unmount, for
975 * example). Just return an error to the vm_fault()
976 * path and let it handle it.
977 */
978 }
979
980 return KERN_FAILURE;
981 }
982
983 return KERN_SUCCESS;
984
985 }
986
987
988 /*
989 *
990 */
991 void
992 vnode_pager_release_from_cache(
993 int *cnt)
994 {
995 memory_object_free_from_cache(
996 &realhost, &vnode_pager_ops, cnt);
997 }
998
999 /*
1000 *
1001 */
1002 vnode_pager_t
1003 vnode_object_create(
1004 struct vnode *vp)
1005 {
1006 register vnode_pager_t vnode_object;
1007
1008 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1009 if (vnode_object == VNODE_PAGER_NULL)
1010 return(VNODE_PAGER_NULL);
1011
1012 /*
1013 * The vm_map call takes both named entry ports and raw memory
1014 * objects in the same parameter. We need to make sure that
1015 * vm_map does not see this object as a named entry port. So,
1016 * we reserve the second word in the object for a fake ip_kotype
1017 * setting - that will tell vm_map to use it as a memory object.
1018 */
1019 vnode_object->pager_ops = &vnode_pager_ops;
1020 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1021 vnode_object->ref_count = 1;
1022 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1023 vnode_object->vnode_handle = vp;
1024
1025 return(vnode_object);
1026 }
1027
1028 /*
1029 *
1030 */
1031 vnode_pager_t
1032 vnode_pager_lookup(
1033 memory_object_t name)
1034 {
1035 vnode_pager_t vnode_object;
1036
1037 vnode_object = (vnode_pager_t)name;
1038 assert(vnode_object->pager_ops == &vnode_pager_ops);
1039 return (vnode_object);
1040 }
1041
1042
1043 /*********************** proc_info implementation *************/
1044
1045 #include <sys/bsdtask_info.h>
1046
1047 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid);
1048
1049
1050 int
1051 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid)
1052 {
1053
1054 vm_map_t map;
1055 vm_map_offset_t address = (vm_map_offset_t )arg;
1056 vm_map_entry_t tmp_entry;
1057 vm_map_entry_t entry;
1058 vm_map_offset_t start;
1059 vm_region_extended_info_data_t extended;
1060 vm_region_top_info_data_t top;
1061
1062 task_lock(task);
1063 map = task->map;
1064 if (map == VM_MAP_NULL)
1065 {
1066 task_unlock(task);
1067 return(0);
1068 }
1069 vm_map_reference(map);
1070 task_unlock(task);
1071
1072 vm_map_lock_read(map);
1073
1074 start = address;
1075 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1076 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1077 vm_map_unlock_read(map);
1078 vm_map_deallocate(map);
1079 return(0);
1080 }
1081 } else {
1082 entry = tmp_entry;
1083 }
1084
1085 start = entry->vme_start;
1086
1087 pinfo->pri_offset = entry->offset;
1088 pinfo->pri_protection = entry->protection;
1089 pinfo->pri_max_protection = entry->max_protection;
1090 pinfo->pri_inheritance = entry->inheritance;
1091 pinfo->pri_behavior = entry->behavior;
1092 pinfo->pri_user_wired_count = entry->user_wired_count;
1093 pinfo->pri_user_tag = entry->alias;
1094
1095 if (entry->is_sub_map) {
1096 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1097 } else {
1098 if (entry->is_shared)
1099 pinfo->pri_flags |= PROC_REGION_SHARED;
1100 }
1101
1102
1103 extended.protection = entry->protection;
1104 extended.user_tag = entry->alias;
1105 extended.pages_resident = 0;
1106 extended.pages_swapped_out = 0;
1107 extended.pages_shared_now_private = 0;
1108 extended.pages_dirtied = 0;
1109 extended.external_pager = 0;
1110 extended.shadow_depth = 0;
1111
1112 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1113
1114 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1115 extended.share_mode = SM_PRIVATE;
1116
1117 top.private_pages_resident = 0;
1118 top.shared_pages_resident = 0;
1119 vm_map_region_top_walk(entry, &top);
1120
1121
1122 pinfo->pri_pages_resident = extended.pages_resident;
1123 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1124 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1125 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1126 pinfo->pri_ref_count = extended.ref_count;
1127 pinfo->pri_shadow_depth = extended.shadow_depth;
1128 pinfo->pri_share_mode = extended.share_mode;
1129
1130 pinfo->pri_private_pages_resident = top.private_pages_resident;
1131 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1132 pinfo->pri_obj_id = top.obj_id;
1133
1134 pinfo->pri_address = (uint64_t)start;
1135 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1136 pinfo->pri_depth = 0;
1137
1138 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1139 *vnodeaddr = (uint32_t)0;
1140
1141 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1142 vm_map_unlock_read(map);
1143 vm_map_deallocate(map);
1144 return(1);
1145 }
1146 }
1147
1148 vm_map_unlock_read(map);
1149 vm_map_deallocate(map);
1150 return(1);
1151 }
1152
1153 static int
1154 fill_vnodeinfoforaddr(
1155 vm_map_entry_t entry,
1156 uint32_t * vnodeaddr,
1157 uint32_t * vid)
1158 {
1159 vm_object_t top_object, object;
1160 memory_object_t memory_object;
1161 memory_object_pager_ops_t pager_ops;
1162 kern_return_t kr;
1163 int shadow_depth;
1164
1165
1166 if (entry->is_sub_map) {
1167 return(0);
1168 } else {
1169 /*
1170 * The last object in the shadow chain has the
1171 * relevant pager information.
1172 */
1173 top_object = entry->object.vm_object;
1174 if (top_object == VM_OBJECT_NULL) {
1175 object = VM_OBJECT_NULL;
1176 shadow_depth = 0;
1177 } else {
1178 vm_object_lock(top_object);
1179 for (object = top_object, shadow_depth = 0;
1180 object->shadow != VM_OBJECT_NULL;
1181 object = object->shadow, shadow_depth++) {
1182 vm_object_lock(object->shadow);
1183 vm_object_unlock(object);
1184 }
1185 }
1186 }
1187
1188 if (object == VM_OBJECT_NULL) {
1189 return(0);
1190 } else if (object->internal) {
1191 vm_object_unlock(object);
1192 return(0);
1193 } else if (! object->pager_ready ||
1194 object->terminating ||
1195 ! object->alive) {
1196 vm_object_unlock(object);
1197 return(0);
1198 } else {
1199 memory_object = object->pager;
1200 pager_ops = memory_object->mo_pager_ops;
1201 if (pager_ops == &vnode_pager_ops) {
1202 kr = vnode_pager_get_object_vnode(
1203 memory_object,
1204 vnodeaddr, vid);
1205 if (kr != KERN_SUCCESS) {
1206 vm_object_unlock(object);
1207 return(0);
1208 }
1209 } else {
1210 vm_object_unlock(object);
1211 return(0);
1212 }
1213 }
1214 vm_object_unlock(object);
1215 return(1);
1216 }
1217
1218 kern_return_t
1219 vnode_pager_get_object_vnode (
1220 memory_object_t mem_obj,
1221 uint32_t * vnodeaddr,
1222 uint32_t * vid)
1223 {
1224 vnode_pager_t vnode_object;
1225
1226 vnode_object = vnode_pager_lookup(mem_obj);
1227 if (vnode_object->vnode_handle) {
1228 *vnodeaddr = (uint32_t)vnode_object->vnode_handle;
1229 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1230
1231 return(KERN_SUCCESS);
1232 }
1233
1234 return(KERN_FAILURE);
1235 }
1236