]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/bsd_vm.c
xnu-1228.7.58.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/thread.h>
45
46 #include <ipc/ipc_port.h>
47 #include <ipc/ipc_space.h>
48
49 #include <default_pager/default_pager_types.h>
50 #include <default_pager/default_pager_object_server.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59
60 /* BSD VM COMPONENT INTERFACES */
61 int
62 get_map_nentries(
63 vm_map_t);
64
65 vm_offset_t
66 get_map_start(
67 vm_map_t);
68
69 vm_offset_t
70 get_map_end(
71 vm_map_t);
72
73 /*
74 *
75 */
76 int
77 get_map_nentries(
78 vm_map_t map)
79 {
80 return(map->hdr.nentries);
81 }
82
83 mach_vm_offset_t
84 mach_get_vm_start(vm_map_t map)
85 {
86 return( vm_map_first_entry(map)->vme_start);
87 }
88
89 mach_vm_offset_t
90 mach_get_vm_end(vm_map_t map)
91 {
92 return( vm_map_last_entry(map)->vme_end);
93 }
94
95 /*
96 * Legacy routines to get the start and end for a vm_map_t. They
97 * return them in the vm_offset_t format. So, they should only be
98 * called on maps that are the same size as the kernel map for
99 * accurate results.
100 */
101 vm_offset_t
102 get_vm_start(
103 vm_map_t map)
104 {
105 return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start));
106 }
107
108 vm_offset_t
109 get_vm_end(
110 vm_map_t map)
111 {
112 return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end));
113 }
114
115 /*
116 * BSD VNODE PAGER
117 */
118
119 const struct memory_object_pager_ops vnode_pager_ops = {
120 vnode_pager_reference,
121 vnode_pager_deallocate,
122 vnode_pager_init,
123 vnode_pager_terminate,
124 vnode_pager_data_request,
125 vnode_pager_data_return,
126 vnode_pager_data_initialize,
127 vnode_pager_data_unlock,
128 vnode_pager_synchronize,
129 vnode_pager_unmap,
130 "vnode pager"
131 };
132
133 typedef struct vnode_pager {
134 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
135 unsigned int pager_ikot; /* JMM: fake ip_kotype() */
136 unsigned int ref_count; /* reference count */
137 memory_object_control_t control_handle; /* mem object control handle */
138 struct vnode *vnode_handle; /* vnode handle */
139 } *vnode_pager_t;
140
141
142 ipc_port_t
143 trigger_name_to_port( /* forward */
144 mach_port_t);
145
146 kern_return_t
147 vnode_pager_cluster_read( /* forward */
148 vnode_pager_t,
149 vm_object_offset_t,
150 vm_size_t);
151
152 void
153 vnode_pager_cluster_write( /* forward */
154 vnode_pager_t,
155 vm_object_offset_t,
156 vm_size_t,
157 vm_object_offset_t *,
158 int *,
159 int);
160
161
162 vnode_pager_t
163 vnode_object_create( /* forward */
164 struct vnode *);
165
166 vnode_pager_t
167 vnode_pager_lookup( /* forward */
168 memory_object_t);
169
170 zone_t vnode_pager_zone;
171
172
173 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
174
175 /* TODO: Should be set dynamically by vnode_pager_init() */
176 #define CLUSTER_SHIFT 1
177
178 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
179 #define MAX_VNODE 10000
180
181
182 #if DEBUG
183 int pagerdebug=0;
184
185 #define PAGER_ALL 0xffffffff
186 #define PAGER_INIT 0x00000001
187 #define PAGER_PAGEIN 0x00000002
188
189 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
190 #else
191 #define PAGER_DEBUG(LEVEL, A)
192 #endif
193
194 /*
195 * Routine: macx_triggers
196 * Function:
197 * Syscall interface to set the call backs for low and
198 * high water marks.
199 */
200 int
201 macx_triggers(
202 struct macx_triggers_args *args)
203 {
204 int hi_water = args->hi_water;
205 int low_water = args->low_water;
206 int flags = args->flags;
207 mach_port_t trigger_name = args->alert_port;
208 kern_return_t kr;
209 memory_object_default_t default_pager;
210 ipc_port_t trigger_port;
211
212 default_pager = MEMORY_OBJECT_DEFAULT_NULL;
213 kr = host_default_memory_manager(host_priv_self(),
214 &default_pager, 0);
215 if(kr != KERN_SUCCESS) {
216 return EINVAL;
217 }
218
219 if ((flags & SWAP_ENCRYPT_ON) &&
220 (flags & SWAP_ENCRYPT_OFF)) {
221 /* can't have it both ways */
222 return EINVAL;
223 }
224
225 if (default_pager_init_flag == 0) {
226 start_def_pager(NULL);
227 default_pager_init_flag = 1;
228 }
229
230 if (flags & SWAP_ENCRYPT_ON) {
231 /* ENCRYPTED SWAP: tell default_pager to encrypt */
232 default_pager_triggers(default_pager,
233 0, 0,
234 SWAP_ENCRYPT_ON,
235 IP_NULL);
236 } else if (flags & SWAP_ENCRYPT_OFF) {
237 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
238 default_pager_triggers(default_pager,
239 0, 0,
240 SWAP_ENCRYPT_OFF,
241 IP_NULL);
242 }
243
244 if (flags & HI_WAT_ALERT) {
245 trigger_port = trigger_name_to_port(trigger_name);
246 if(trigger_port == NULL) {
247 return EINVAL;
248 }
249 /* trigger_port is locked and active */
250 ipc_port_make_send_locked(trigger_port);
251 /* now unlocked */
252 default_pager_triggers(default_pager,
253 hi_water, low_water,
254 HI_WAT_ALERT, trigger_port);
255 }
256
257 if (flags & LO_WAT_ALERT) {
258 trigger_port = trigger_name_to_port(trigger_name);
259 if(trigger_port == NULL) {
260 return EINVAL;
261 }
262 /* trigger_port is locked and active */
263 ipc_port_make_send_locked(trigger_port);
264 /* and now its unlocked */
265 default_pager_triggers(default_pager,
266 hi_water, low_water,
267 LO_WAT_ALERT, trigger_port);
268 }
269
270 /*
271 * Set thread scheduling priority and policy for the current thread
272 * it is assumed for the time being that the thread setting the alert
273 * is the same one which will be servicing it.
274 *
275 * XXX This does not belong in the kernel XXX
276 */
277 {
278 thread_precedence_policy_data_t pre;
279 thread_extended_policy_data_t ext;
280
281 ext.timeshare = FALSE;
282 pre.importance = INT32_MAX;
283
284 thread_policy_set(current_thread(),
285 THREAD_EXTENDED_POLICY,
286 (thread_policy_t)&ext,
287 THREAD_EXTENDED_POLICY_COUNT);
288
289 thread_policy_set(current_thread(),
290 THREAD_PRECEDENCE_POLICY,
291 (thread_policy_t)&pre,
292 THREAD_PRECEDENCE_POLICY_COUNT);
293 }
294
295 current_thread()->options |= TH_OPT_VMPRIV;
296
297 return 0;
298 }
299
300 /*
301 *
302 */
303 ipc_port_t
304 trigger_name_to_port(
305 mach_port_t trigger_name)
306 {
307 ipc_port_t trigger_port;
308 ipc_space_t space;
309
310 if (trigger_name == 0)
311 return (NULL);
312
313 space = current_space();
314 if(ipc_port_translate_receive(space, (mach_port_name_t)trigger_name,
315 &trigger_port) != KERN_SUCCESS)
316 return (NULL);
317 return trigger_port;
318 }
319
320
321 extern int uiomove64(addr64_t, int, void *);
322 #define MAX_RUN 32
323
324 unsigned long vm_cs_tainted_forces = 0;
325
326 int
327 memory_object_control_uiomove(
328 memory_object_control_t control,
329 memory_object_offset_t offset,
330 void * uio,
331 int start_offset,
332 int io_requested,
333 int mark_dirty,
334 int take_reference)
335 {
336 vm_object_t object;
337 vm_page_t dst_page;
338 int xsize;
339 int retval = 0;
340 int cur_run;
341 int cur_needed;
342 int i;
343 int orig_offset;
344 boolean_t make_lru = FALSE;
345 vm_page_t page_run[MAX_RUN];
346
347 object = memory_object_control_to_vm_object(control);
348 if (object == VM_OBJECT_NULL) {
349 return (0);
350 }
351 assert(!object->internal);
352
353 vm_object_lock(object);
354
355 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
356 /*
357 * We can't modify the pages without honoring
358 * copy-on-write obligations first, so fall off
359 * this optimized path and fall back to the regular
360 * path.
361 */
362 vm_object_unlock(object);
363 return 0;
364 }
365 orig_offset = start_offset;
366
367 while (io_requested && retval == 0) {
368
369 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
370
371 if (cur_needed > MAX_RUN)
372 cur_needed = MAX_RUN;
373
374 for (cur_run = 0; cur_run < cur_needed; ) {
375
376 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
377 break;
378 /*
379 * Sync up on getting the busy bit
380 */
381 if ((dst_page->busy || dst_page->cleaning)) {
382 /*
383 * someone else is playing with the page... if we've
384 * already collected pages into this run, go ahead
385 * and process now, we can't block on this
386 * page while holding other pages in the BUSY state
387 * otherwise we will wait
388 */
389 if (cur_run)
390 break;
391 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
392 continue;
393 }
394 /*
395 * this routine is only called when copying
396 * to/from real files... no need to consider
397 * encrypted swap pages
398 */
399 assert(!dst_page->encrypted);
400
401 if (mark_dirty) {
402 dst_page->dirty = TRUE;
403 if (dst_page->cs_validated) {
404 /*
405 * CODE SIGNING:
406 * We're modifying a code-signed
407 * page: assume that it is now tainted.
408 */
409 dst_page->cs_tainted = TRUE;
410 vm_cs_tainted_forces++;
411 }
412 }
413 dst_page->busy = TRUE;
414
415 page_run[cur_run++] = dst_page;
416
417 offset += PAGE_SIZE_64;
418 }
419 if (cur_run == 0)
420 /*
421 * we hit a 'hole' in the cache
422 * we bail at this point
423 * we'll unlock the object below
424 */
425 break;
426 vm_object_unlock(object);
427
428 for (i = 0; i < cur_run; i++) {
429
430 dst_page = page_run[i];
431
432 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
433 xsize = io_requested;
434
435 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
436 break;
437
438 io_requested -= xsize;
439 start_offset = 0;
440 }
441 vm_object_lock(object);
442
443 /*
444 * if we have more than 1 page to work on
445 * in the current run, or the original request
446 * started at offset 0 of the page, or we're
447 * processing multiple batches, we will move
448 * the pages to the tail of the inactive queue
449 * to implement an LRU for read/write accesses
450 *
451 * the check for orig_offset == 0 is there to
452 * mitigate the cost of small (< page_size) requests
453 * to the same page (this way we only move it once)
454 */
455 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
456 vm_page_lockspin_queues();
457 make_lru = TRUE;
458 }
459 for (i = 0; i < cur_run; i++) {
460 dst_page = page_run[i];
461
462 /*
463 * someone is explicitly referencing this page...
464 * update clustered and speculative state
465 *
466 */
467 VM_PAGE_CONSUME_CLUSTERED(dst_page);
468
469 if (make_lru == TRUE)
470 vm_page_lru(dst_page);
471
472 PAGE_WAKEUP_DONE(dst_page);
473 }
474 if (make_lru == TRUE) {
475 vm_page_unlock_queues();
476 make_lru = FALSE;
477 }
478 orig_offset = 0;
479 }
480 vm_object_unlock(object);
481
482 return (retval);
483 }
484
485
486 /*
487 *
488 */
489 void
490 vnode_pager_bootstrap(void)
491 {
492 register vm_size_t size;
493
494 size = (vm_size_t) sizeof(struct vnode_pager);
495 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
496 PAGE_SIZE, "vnode pager structures");
497 #ifdef __i386__
498 apple_protect_pager_bootstrap();
499 #endif /* __i386__ */
500 return;
501 }
502
503 /*
504 *
505 */
506 memory_object_t
507 vnode_pager_setup(
508 struct vnode *vp,
509 __unused memory_object_t pager)
510 {
511 vnode_pager_t vnode_object;
512
513 vnode_object = vnode_object_create(vp);
514 if (vnode_object == VNODE_PAGER_NULL)
515 panic("vnode_pager_setup: vnode_object_create() failed");
516 return((memory_object_t)vnode_object);
517 }
518
519 /*
520 *
521 */
522 kern_return_t
523 vnode_pager_init(memory_object_t mem_obj,
524 memory_object_control_t control,
525 #if !DEBUG
526 __unused
527 #endif
528 vm_size_t pg_size)
529 {
530 vnode_pager_t vnode_object;
531 kern_return_t kr;
532 memory_object_attr_info_data_t attributes;
533
534
535 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %x\n", mem_obj, control, pg_size));
536
537 if (control == MEMORY_OBJECT_CONTROL_NULL)
538 return KERN_INVALID_ARGUMENT;
539
540 vnode_object = vnode_pager_lookup(mem_obj);
541
542 memory_object_control_reference(control);
543
544 vnode_object->control_handle = control;
545
546 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
547 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
548 attributes.cluster_size = (1 << (PAGE_SHIFT));
549 attributes.may_cache_object = TRUE;
550 attributes.temporary = TRUE;
551
552 kr = memory_object_change_attributes(
553 control,
554 MEMORY_OBJECT_ATTRIBUTE_INFO,
555 (memory_object_info_t) &attributes,
556 MEMORY_OBJECT_ATTR_INFO_COUNT);
557 if (kr != KERN_SUCCESS)
558 panic("vnode_pager_init: memory_object_change_attributes() failed");
559
560 return(KERN_SUCCESS);
561 }
562
563 /*
564 *
565 */
566 kern_return_t
567 vnode_pager_data_return(
568 memory_object_t mem_obj,
569 memory_object_offset_t offset,
570 vm_size_t data_cnt,
571 memory_object_offset_t *resid_offset,
572 int *io_error,
573 __unused boolean_t dirty,
574 __unused boolean_t kernel_copy,
575 int upl_flags)
576 {
577 register vnode_pager_t vnode_object;
578
579 vnode_object = vnode_pager_lookup(mem_obj);
580
581 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
582
583 return KERN_SUCCESS;
584 }
585
586 kern_return_t
587 vnode_pager_data_initialize(
588 __unused memory_object_t mem_obj,
589 __unused memory_object_offset_t offset,
590 __unused vm_size_t data_cnt)
591 {
592 panic("vnode_pager_data_initialize");
593 return KERN_FAILURE;
594 }
595
596 kern_return_t
597 vnode_pager_data_unlock(
598 __unused memory_object_t mem_obj,
599 __unused memory_object_offset_t offset,
600 __unused vm_size_t size,
601 __unused vm_prot_t desired_access)
602 {
603 return KERN_FAILURE;
604 }
605
606 kern_return_t
607 vnode_pager_get_object_size(
608 memory_object_t mem_obj,
609 memory_object_offset_t *length)
610 {
611 vnode_pager_t vnode_object;
612
613 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
614 *length = 0;
615 return KERN_INVALID_ARGUMENT;
616 }
617
618 vnode_object = vnode_pager_lookup(mem_obj);
619
620 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
621 return KERN_SUCCESS;
622 }
623
624 kern_return_t
625 vnode_pager_get_object_pathname(
626 memory_object_t mem_obj,
627 char *pathname,
628 vm_size_t *length_p)
629 {
630 vnode_pager_t vnode_object;
631
632 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
633 return KERN_INVALID_ARGUMENT;
634 }
635
636 vnode_object = vnode_pager_lookup(mem_obj);
637
638 return vnode_pager_get_pathname(vnode_object->vnode_handle,
639 pathname,
640 length_p);
641 }
642
643 kern_return_t
644 vnode_pager_get_object_filename(
645 memory_object_t mem_obj,
646 const char **filename)
647 {
648 vnode_pager_t vnode_object;
649
650 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
651 return KERN_INVALID_ARGUMENT;
652 }
653
654 vnode_object = vnode_pager_lookup(mem_obj);
655
656 return vnode_pager_get_filename(vnode_object->vnode_handle,
657 filename);
658 }
659
660 kern_return_t
661 vnode_pager_get_object_cs_blobs(
662 memory_object_t mem_obj,
663 void **blobs)
664 {
665 vnode_pager_t vnode_object;
666
667 if (mem_obj == MEMORY_OBJECT_NULL ||
668 mem_obj->mo_pager_ops != &vnode_pager_ops) {
669 return KERN_INVALID_ARGUMENT;
670 }
671
672 vnode_object = vnode_pager_lookup(mem_obj);
673
674 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
675 blobs);
676 }
677
678 /*
679 *
680 */
681 kern_return_t
682 vnode_pager_data_request(
683 memory_object_t mem_obj,
684 memory_object_offset_t offset,
685 __unused vm_size_t length,
686 __unused vm_prot_t desired_access,
687 memory_object_fault_info_t fault_info)
688 {
689 register vnode_pager_t vnode_object;
690 vm_size_t size;
691 #if MACH_ASSERT
692 memory_object_offset_t original_offset = offset;
693 #endif /* MACH_ASSERT */
694
695 vnode_object = vnode_pager_lookup(mem_obj);
696
697 size = MAX_UPL_TRANSFER * PAGE_SIZE;
698
699 if (memory_object_cluster_size(vnode_object->control_handle, &offset, &size, fault_info) != KERN_SUCCESS)
700 size = PAGE_SIZE;
701
702 assert(original_offset >= offset &&
703 original_offset < offset + size);
704
705 return vnode_pager_cluster_read(vnode_object, offset, size);
706 }
707
708 /*
709 *
710 */
711 void
712 vnode_pager_reference(
713 memory_object_t mem_obj)
714 {
715 register vnode_pager_t vnode_object;
716 unsigned int new_ref_count;
717
718 vnode_object = vnode_pager_lookup(mem_obj);
719 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
720 assert(new_ref_count > 1);
721 }
722
723 /*
724 *
725 */
726 void
727 vnode_pager_deallocate(
728 memory_object_t mem_obj)
729 {
730 register vnode_pager_t vnode_object;
731
732 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
733
734 vnode_object = vnode_pager_lookup(mem_obj);
735
736 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
737 if (vnode_object->vnode_handle != NULL) {
738 vnode_pager_vrele(vnode_object->vnode_handle);
739 }
740 zfree(vnode_pager_zone, vnode_object);
741 }
742 return;
743 }
744
745 /*
746 *
747 */
748 kern_return_t
749 vnode_pager_terminate(
750 #if !DEBUG
751 __unused
752 #endif
753 memory_object_t mem_obj)
754 {
755 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
756
757 return(KERN_SUCCESS);
758 }
759
760 /*
761 *
762 */
763 kern_return_t
764 vnode_pager_synchronize(
765 memory_object_t mem_obj,
766 memory_object_offset_t offset,
767 vm_size_t length,
768 __unused vm_sync_t sync_flags)
769 {
770 register vnode_pager_t vnode_object;
771
772 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
773
774 vnode_object = vnode_pager_lookup(mem_obj);
775
776 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
777
778 return (KERN_SUCCESS);
779 }
780
781 /*
782 *
783 */
784 kern_return_t
785 vnode_pager_unmap(
786 memory_object_t mem_obj)
787 {
788 register vnode_pager_t vnode_object;
789
790 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_unmap: %p\n", mem_obj));
791
792 vnode_object = vnode_pager_lookup(mem_obj);
793
794 ubc_unmap(vnode_object->vnode_handle);
795 return KERN_SUCCESS;
796 }
797
798
799 /*
800 *
801 */
802 void
803 vnode_pager_cluster_write(
804 vnode_pager_t vnode_object,
805 vm_object_offset_t offset,
806 vm_size_t cnt,
807 vm_object_offset_t * resid_offset,
808 int * io_error,
809 int upl_flags)
810 {
811 vm_size_t size;
812 upl_t upl = NULL;
813 int request_flags;
814 int errno;
815
816 if (upl_flags & UPL_MSYNC) {
817
818 upl_flags |= UPL_VNODE_PAGER;
819
820 if ( (upl_flags & UPL_IOSYNC) && io_error)
821 upl_flags |= UPL_KEEPCACHED;
822
823 while (cnt) {
824 kern_return_t kr;
825
826 size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
827
828 request_flags = UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
829 UPL_SET_INTERNAL | UPL_SET_LITE;
830
831 kr = memory_object_upl_request(vnode_object->control_handle,
832 offset, size, &upl, NULL, NULL, request_flags);
833 if (kr != KERN_SUCCESS)
834 panic("vnode_pager_cluster_write: upl request failed\n");
835
836 vnode_pageout(vnode_object->vnode_handle,
837 upl, (vm_offset_t)0, offset, size, upl_flags, &errno);
838
839 if ( (upl_flags & UPL_KEEPCACHED) ) {
840 if ( (*io_error = errno) )
841 break;
842 }
843 cnt -= size;
844 offset += size;
845 }
846 if (resid_offset)
847 *resid_offset = offset;
848
849 } else {
850 vm_object_offset_t vnode_size;
851 vm_object_offset_t base_offset;
852 vm_object_t object;
853
854 /*
855 * this is the pageout path
856 */
857 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
858
859 if (vnode_size > (offset + PAGE_SIZE)) {
860 /*
861 * preset the maximum size of the cluster
862 * and put us on a nice cluster boundary...
863 * and then clip the size to insure we
864 * don't request past the end of the underlying file
865 */
866 size = PAGE_SIZE * MAX_UPL_TRANSFER;
867 base_offset = offset & ~((signed)(size - 1));
868
869 if ((base_offset + size) > vnode_size)
870 size = round_page_32(((vm_size_t)(vnode_size - base_offset)));
871 } else {
872 /*
873 * we've been requested to page out a page beyond the current
874 * end of the 'file'... don't try to cluster in this case...
875 * we still need to send this page through because it might
876 * be marked precious and the underlying filesystem may need
877 * to do something with it (besides page it out)...
878 */
879 base_offset = offset;
880 size = PAGE_SIZE;
881 }
882 object = memory_object_control_to_vm_object(vnode_object->control_handle);
883
884 if (object == VM_OBJECT_NULL)
885 panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
886
887 request_flags = UPL_NOBLOCK | UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
888 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
889 UPL_SET_INTERNAL | UPL_SET_LITE;
890
891 vm_object_upl_request(object, base_offset, size,
892 &upl, NULL, NULL, request_flags);
893 if (upl == NULL)
894 panic("vnode_pager_cluster_write: upl request failed\n");
895
896 vnode_pageout(vnode_object->vnode_handle,
897 upl, (vm_offset_t)0, upl->offset, upl->size, UPL_VNODE_PAGER, NULL);
898 }
899 }
900
901
902 /*
903 *
904 */
905 kern_return_t
906 vnode_pager_cluster_read(
907 vnode_pager_t vnode_object,
908 vm_object_offset_t offset,
909 vm_size_t cnt)
910 {
911 int local_error = 0;
912 int kret;
913
914 assert(! (cnt & PAGE_MASK));
915
916 kret = vnode_pagein(vnode_object->vnode_handle,
917 (upl_t) NULL,
918 (vm_offset_t) NULL,
919 offset,
920 cnt,
921 0,
922 &local_error);
923 /*
924 if(kret == PAGER_ABSENT) {
925 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
926 defined in bsd/vm/vm_pager.h However, we should not be including
927 that file here it is a layering violation.
928 */
929 if (kret == 1) {
930 int uplflags;
931 upl_t upl = NULL;
932 unsigned int count = 0;
933 kern_return_t kr;
934
935 uplflags = (UPL_NO_SYNC |
936 UPL_CLEAN_IN_PLACE |
937 UPL_SET_INTERNAL);
938 count = 0;
939 kr = memory_object_upl_request(vnode_object->control_handle,
940 offset, cnt,
941 &upl, NULL, &count, uplflags);
942 if (kr == KERN_SUCCESS) {
943 upl_abort(upl, 0);
944 upl_deallocate(upl);
945 } else {
946 /*
947 * We couldn't gather the page list, probably
948 * because the memory object doesn't have a link
949 * to a VM object anymore (forced unmount, for
950 * example). Just return an error to the vm_fault()
951 * path and let it handle it.
952 */
953 }
954
955 return KERN_FAILURE;
956 }
957
958 return KERN_SUCCESS;
959
960 }
961
962
963 /*
964 *
965 */
966 void
967 vnode_pager_release_from_cache(
968 int *cnt)
969 {
970 memory_object_free_from_cache(
971 &realhost, &vnode_pager_ops, cnt);
972 }
973
974 /*
975 *
976 */
977 vnode_pager_t
978 vnode_object_create(
979 struct vnode *vp)
980 {
981 register vnode_pager_t vnode_object;
982
983 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
984 if (vnode_object == VNODE_PAGER_NULL)
985 return(VNODE_PAGER_NULL);
986
987 /*
988 * The vm_map call takes both named entry ports and raw memory
989 * objects in the same parameter. We need to make sure that
990 * vm_map does not see this object as a named entry port. So,
991 * we reserve the second word in the object for a fake ip_kotype
992 * setting - that will tell vm_map to use it as a memory object.
993 */
994 vnode_object->pager_ops = &vnode_pager_ops;
995 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
996 vnode_object->ref_count = 1;
997 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
998 vnode_object->vnode_handle = vp;
999
1000 return(vnode_object);
1001 }
1002
1003 /*
1004 *
1005 */
1006 vnode_pager_t
1007 vnode_pager_lookup(
1008 memory_object_t name)
1009 {
1010 vnode_pager_t vnode_object;
1011
1012 vnode_object = (vnode_pager_t)name;
1013 assert(vnode_object->pager_ops == &vnode_pager_ops);
1014 return (vnode_object);
1015 }
1016
1017
1018 /*********************** proc_info implementation *************/
1019
1020 #include <sys/bsdtask_info.h>
1021
1022 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid);
1023
1024
1025 int
1026 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid)
1027 {
1028
1029 vm_map_t map;
1030 vm_map_offset_t address = (vm_map_offset_t )arg;
1031 vm_map_entry_t tmp_entry;
1032 vm_map_entry_t entry;
1033 vm_map_offset_t start;
1034 vm_region_extended_info_data_t extended;
1035 vm_region_top_info_data_t top;
1036
1037 task_lock(task);
1038 map = task->map;
1039 if (map == VM_MAP_NULL)
1040 {
1041 task_unlock(task);
1042 return(0);
1043 }
1044 vm_map_reference(map);
1045 task_unlock(task);
1046
1047 vm_map_lock_read(map);
1048
1049 start = address;
1050 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1051 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1052 vm_map_unlock_read(map);
1053 vm_map_deallocate(map);
1054 return(0);
1055 }
1056 } else {
1057 entry = tmp_entry;
1058 }
1059
1060 start = entry->vme_start;
1061
1062 pinfo->pri_offset = entry->offset;
1063 pinfo->pri_protection = entry->protection;
1064 pinfo->pri_max_protection = entry->max_protection;
1065 pinfo->pri_inheritance = entry->inheritance;
1066 pinfo->pri_behavior = entry->behavior;
1067 pinfo->pri_user_wired_count = entry->user_wired_count;
1068 pinfo->pri_user_tag = entry->alias;
1069
1070 if (entry->is_sub_map) {
1071 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1072 } else {
1073 if (entry->is_shared)
1074 pinfo->pri_flags |= PROC_REGION_SHARED;
1075 }
1076
1077
1078 extended.protection = entry->protection;
1079 extended.user_tag = entry->alias;
1080 extended.pages_resident = 0;
1081 extended.pages_swapped_out = 0;
1082 extended.pages_shared_now_private = 0;
1083 extended.pages_dirtied = 0;
1084 extended.external_pager = 0;
1085 extended.shadow_depth = 0;
1086
1087 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1088
1089 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1090 extended.share_mode = SM_PRIVATE;
1091
1092 top.private_pages_resident = 0;
1093 top.shared_pages_resident = 0;
1094 vm_map_region_top_walk(entry, &top);
1095
1096
1097 pinfo->pri_pages_resident = extended.pages_resident;
1098 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1099 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1100 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1101 pinfo->pri_ref_count = extended.ref_count;
1102 pinfo->pri_shadow_depth = extended.shadow_depth;
1103 pinfo->pri_share_mode = extended.share_mode;
1104
1105 pinfo->pri_private_pages_resident = top.private_pages_resident;
1106 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1107 pinfo->pri_obj_id = top.obj_id;
1108
1109 pinfo->pri_address = (uint64_t)start;
1110 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1111 pinfo->pri_depth = 0;
1112
1113 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1114 *vnodeaddr = (uint32_t)0;
1115
1116 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1117 vm_map_unlock_read(map);
1118 vm_map_deallocate(map);
1119 return(1);
1120 }
1121 }
1122
1123 vm_map_unlock_read(map);
1124 vm_map_deallocate(map);
1125 return(1);
1126 }
1127
1128 static int
1129 fill_vnodeinfoforaddr(
1130 vm_map_entry_t entry,
1131 uint32_t * vnodeaddr,
1132 uint32_t * vid)
1133 {
1134 vm_object_t top_object, object;
1135 memory_object_t memory_object;
1136 memory_object_pager_ops_t pager_ops;
1137 kern_return_t kr;
1138 int shadow_depth;
1139
1140
1141 if (entry->is_sub_map) {
1142 return(0);
1143 } else {
1144 /*
1145 * The last object in the shadow chain has the
1146 * relevant pager information.
1147 */
1148 top_object = entry->object.vm_object;
1149 if (top_object == VM_OBJECT_NULL) {
1150 object = VM_OBJECT_NULL;
1151 shadow_depth = 0;
1152 } else {
1153 vm_object_lock(top_object);
1154 for (object = top_object, shadow_depth = 0;
1155 object->shadow != VM_OBJECT_NULL;
1156 object = object->shadow, shadow_depth++) {
1157 vm_object_lock(object->shadow);
1158 vm_object_unlock(object);
1159 }
1160 }
1161 }
1162
1163 if (object == VM_OBJECT_NULL) {
1164 return(0);
1165 } else if (object->internal) {
1166 vm_object_unlock(object);
1167 return(0);
1168 } else if (! object->pager_ready ||
1169 object->terminating ||
1170 ! object->alive) {
1171 vm_object_unlock(object);
1172 return(0);
1173 } else {
1174 memory_object = object->pager;
1175 pager_ops = memory_object->mo_pager_ops;
1176 if (pager_ops == &vnode_pager_ops) {
1177 kr = vnode_pager_get_object_vnode(
1178 memory_object,
1179 vnodeaddr, vid);
1180 if (kr != KERN_SUCCESS) {
1181 vm_object_unlock(object);
1182 return(0);
1183 }
1184 } else {
1185 vm_object_unlock(object);
1186 return(0);
1187 }
1188 }
1189 vm_object_unlock(object);
1190 return(1);
1191 }
1192
1193 kern_return_t
1194 vnode_pager_get_object_vnode (
1195 memory_object_t mem_obj,
1196 uint32_t * vnodeaddr,
1197 uint32_t * vid)
1198 {
1199 vnode_pager_t vnode_object;
1200
1201 vnode_object = vnode_pager_lookup(mem_obj);
1202 if (vnode_object->vnode_handle) {
1203 *vnodeaddr = (uint32_t)vnode_object->vnode_handle;
1204 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1205
1206 return(KERN_SUCCESS);
1207 }
1208
1209 return(KERN_FAILURE);
1210 }
1211