]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/bsd_vm.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28
29#include <sys/errno.h>
91447636 30
1c79356b 31#include <mach/mach_types.h>
91447636
A
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
1c79356b 34#include <mach/kern_return.h>
91447636 35#include <mach/memory_object_control.h>
1c79356b
A
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
91447636
A
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41
2d21ac55 42#include <kern/assert.h>
91447636
A
43#include <kern/host.h>
44#include <kern/thread.h>
45
1c79356b
A
46#include <ipc/ipc_port.h>
47#include <ipc/ipc_space.h>
1c79356b 48
0b4e3aa0 49#include <default_pager/default_pager_types.h>
91447636
A
50#include <default_pager/default_pager_object_server.h>
51
52#include <vm/vm_map.h>
91447636
A
53#include <vm/vm_pageout.h>
54#include <vm/memory_object.h>
55#include <vm/vm_pageout.h>
56#include <vm/vm_protos.h>
2d21ac55
A
57#include <vm/vm_purgeable_internal.h>
58
1c79356b
A
59
60/* BSD VM COMPONENT INTERFACES */
61int
62get_map_nentries(
63 vm_map_t);
64
65vm_offset_t
66get_map_start(
67 vm_map_t);
68
69vm_offset_t
70get_map_end(
71 vm_map_t);
72
73/*
74 *
75 */
76int
77get_map_nentries(
78 vm_map_t map)
79{
80 return(map->hdr.nentries);
81}
82
91447636
A
83mach_vm_offset_t
84mach_get_vm_start(vm_map_t map)
85{
86 return( vm_map_first_entry(map)->vme_start);
87}
88
89mach_vm_offset_t
90mach_get_vm_end(vm_map_t map)
91{
92 return( vm_map_last_entry(map)->vme_end);
93}
94
1c79356b 95/*
91447636
A
96 * Legacy routines to get the start and end for a vm_map_t. They
97 * return them in the vm_offset_t format. So, they should only be
98 * called on maps that are the same size as the kernel map for
99 * accurate results.
1c79356b
A
100 */
101vm_offset_t
91447636 102get_vm_start(
1c79356b
A
103 vm_map_t map)
104{
91447636 105 return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start));
1c79356b
A
106}
107
1c79356b 108vm_offset_t
91447636 109get_vm_end(
1c79356b
A
110 vm_map_t map)
111{
91447636 112 return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end));
1c79356b
A
113}
114
115/*
116 * BSD VNODE PAGER
117 */
118
0c530ab8
A
119const struct memory_object_pager_ops vnode_pager_ops = {
120 vnode_pager_reference,
121 vnode_pager_deallocate,
122 vnode_pager_init,
123 vnode_pager_terminate,
124 vnode_pager_data_request,
125 vnode_pager_data_return,
126 vnode_pager_data_initialize,
127 vnode_pager_data_unlock,
128 vnode_pager_synchronize,
129 vnode_pager_unmap,
130 "vnode pager"
131};
1c79356b 132
1c79356b 133typedef struct vnode_pager {
0c530ab8 134 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */
0b4e3aa0
A
135 unsigned int pager_ikot; /* JMM: fake ip_kotype() */
136 unsigned int ref_count; /* reference count */
137 memory_object_control_t control_handle; /* mem object control handle */
91447636 138 struct vnode *vnode_handle; /* vnode handle */
1c79356b
A
139} *vnode_pager_t;
140
1c79356b
A
141
142ipc_port_t
91447636 143trigger_name_to_port( /* forward */
1c79356b
A
144 mach_port_t);
145
1c79356b 146kern_return_t
91447636 147vnode_pager_cluster_read( /* forward */
1c79356b
A
148 vnode_pager_t,
149 vm_object_offset_t,
150 vm_size_t);
151
152void
91447636 153vnode_pager_cluster_write( /* forward */
1c79356b
A
154 vnode_pager_t,
155 vm_object_offset_t,
91447636
A
156 vm_size_t,
157 vm_object_offset_t *,
158 int *,
159 int);
1c79356b 160
0b4e3aa0 161
1c79356b 162vnode_pager_t
91447636
A
163vnode_object_create( /* forward */
164 struct vnode *);
1c79356b 165
1c79356b 166vnode_pager_t
91447636 167vnode_pager_lookup( /* forward */
0b4e3aa0 168 memory_object_t);
1c79356b 169
1c79356b
A
170zone_t vnode_pager_zone;
171
172
173#define VNODE_PAGER_NULL ((vnode_pager_t) 0)
174
175/* TODO: Should be set dynamically by vnode_pager_init() */
176#define CLUSTER_SHIFT 1
177
178/* TODO: Should be set dynamically by vnode_pager_bootstrap() */
179#define MAX_VNODE 10000
180
181
182#if DEBUG
183int pagerdebug=0;
184
185#define PAGER_ALL 0xffffffff
186#define PAGER_INIT 0x00000001
187#define PAGER_PAGEIN 0x00000002
188
189#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
190#else
191#define PAGER_DEBUG(LEVEL, A)
192#endif
193
194/*
195 * Routine: macx_triggers
196 * Function:
197 * Syscall interface to set the call backs for low and
198 * high water marks.
199 */
200int
201macx_triggers(
91447636 202 struct macx_triggers_args *args)
1c79356b 203{
91447636
A
204 int hi_water = args->hi_water;
205 int low_water = args->low_water;
206 int flags = args->flags;
207 mach_port_t trigger_name = args->alert_port;
1c79356b 208 kern_return_t kr;
0b4e3aa0 209 memory_object_default_t default_pager;
1c79356b
A
210 ipc_port_t trigger_port;
211
0b4e3aa0 212 default_pager = MEMORY_OBJECT_DEFAULT_NULL;
1c79356b 213 kr = host_default_memory_manager(host_priv_self(),
0b4e3aa0 214 &default_pager, 0);
1c79356b
A
215 if(kr != KERN_SUCCESS) {
216 return EINVAL;
217 }
91447636
A
218
219 if ((flags & SWAP_ENCRYPT_ON) &&
220 (flags & SWAP_ENCRYPT_OFF)) {
221 /* can't have it both ways */
222 return EINVAL;
223 }
224
2d21ac55 225 if (default_pager_init_flag == 0) {
0c530ab8
A
226 start_def_pager(NULL);
227 default_pager_init_flag = 1;
2d21ac55 228 }
0c530ab8 229
91447636
A
230 if (flags & SWAP_ENCRYPT_ON) {
231 /* ENCRYPTED SWAP: tell default_pager to encrypt */
232 default_pager_triggers(default_pager,
233 0, 0,
234 SWAP_ENCRYPT_ON,
235 IP_NULL);
236 } else if (flags & SWAP_ENCRYPT_OFF) {
237 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
238 default_pager_triggers(default_pager,
239 0, 0,
240 SWAP_ENCRYPT_OFF,
241 IP_NULL);
242 }
243
0b4e3aa0
A
244 if (flags & HI_WAT_ALERT) {
245 trigger_port = trigger_name_to_port(trigger_name);
246 if(trigger_port == NULL) {
247 return EINVAL;
248 }
249 /* trigger_port is locked and active */
250 ipc_port_make_send_locked(trigger_port);
251 /* now unlocked */
252 default_pager_triggers(default_pager,
253 hi_water, low_water,
254 HI_WAT_ALERT, trigger_port);
255 }
256
257 if (flags & LO_WAT_ALERT) {
258 trigger_port = trigger_name_to_port(trigger_name);
259 if(trigger_port == NULL) {
260 return EINVAL;
261 }
262 /* trigger_port is locked and active */
263 ipc_port_make_send_locked(trigger_port);
264 /* and now its unlocked */
265 default_pager_triggers(default_pager,
266 hi_water, low_water,
267 LO_WAT_ALERT, trigger_port);
1c79356b 268 }
1c79356b
A
269
270 /*
271 * Set thread scheduling priority and policy for the current thread
272 * it is assumed for the time being that the thread setting the alert
55e303ae
A
273 * is the same one which will be servicing it.
274 *
275 * XXX This does not belong in the kernel XXX
1c79356b
A
276 */
277 {
55e303ae
A
278 thread_precedence_policy_data_t pre;
279 thread_extended_policy_data_t ext;
280
281 ext.timeshare = FALSE;
282 pre.importance = INT32_MAX;
283
91447636
A
284 thread_policy_set(current_thread(),
285 THREAD_EXTENDED_POLICY,
286 (thread_policy_t)&ext,
287 THREAD_EXTENDED_POLICY_COUNT);
55e303ae 288
91447636
A
289 thread_policy_set(current_thread(),
290 THREAD_PRECEDENCE_POLICY,
291 (thread_policy_t)&pre,
292 THREAD_PRECEDENCE_POLICY_COUNT);
1c79356b
A
293 }
294
91447636
A
295 current_thread()->options |= TH_OPT_VMPRIV;
296
297 return 0;
1c79356b
A
298}
299
300/*
301 *
302 */
303ipc_port_t
304trigger_name_to_port(
305 mach_port_t trigger_name)
306{
307 ipc_port_t trigger_port;
308 ipc_space_t space;
309
310 if (trigger_name == 0)
311 return (NULL);
312
313 space = current_space();
314 if(ipc_port_translate_receive(space, (mach_port_name_t)trigger_name,
315 &trigger_port) != KERN_SUCCESS)
316 return (NULL);
317 return trigger_port;
318}
319
91447636
A
320
321extern int uiomove64(addr64_t, int, void *);
322#define MAX_RUN 32
323
324int
325memory_object_control_uiomove(
326 memory_object_control_t control,
327 memory_object_offset_t offset,
328 void * uio,
329 int start_offset,
330 int io_requested,
2d21ac55
A
331 int mark_dirty,
332 int take_reference)
91447636
A
333{
334 vm_object_t object;
335 vm_page_t dst_page;
336 int xsize;
337 int retval = 0;
338 int cur_run;
339 int cur_needed;
340 int i;
2d21ac55
A
341 int orig_offset;
342 boolean_t make_lru = FALSE;
91447636
A
343 vm_page_t page_run[MAX_RUN];
344
91447636
A
345 object = memory_object_control_to_vm_object(control);
346 if (object == VM_OBJECT_NULL) {
347 return (0);
348 }
349 assert(!object->internal);
350
351 vm_object_lock(object);
352
353 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
354 /*
355 * We can't modify the pages without honoring
356 * copy-on-write obligations first, so fall off
357 * this optimized path and fall back to the regular
358 * path.
359 */
360 vm_object_unlock(object);
361 return 0;
362 }
2d21ac55 363 orig_offset = start_offset;
91447636
A
364
365 while (io_requested && retval == 0) {
366
367 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
368
369 if (cur_needed > MAX_RUN)
370 cur_needed = MAX_RUN;
371
372 for (cur_run = 0; cur_run < cur_needed; ) {
373
374 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
375 break;
376 /*
377 * Sync up on getting the busy bit
378 */
379 if ((dst_page->busy || dst_page->cleaning)) {
380 /*
381 * someone else is playing with the page... if we've
382 * already collected pages into this run, go ahead
383 * and process now, we can't block on this
384 * page while holding other pages in the BUSY state
385 * otherwise we will wait
386 */
387 if (cur_run)
388 break;
389 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
390 continue;
391 }
392 /*
393 * this routine is only called when copying
394 * to/from real files... no need to consider
395 * encrypted swap pages
396 */
397 assert(!dst_page->encrypted);
398
399 if (mark_dirty)
400 dst_page->dirty = TRUE;
401 dst_page->busy = TRUE;
402
403 page_run[cur_run++] = dst_page;
404
405 offset += PAGE_SIZE_64;
406 }
407 if (cur_run == 0)
408 /*
409 * we hit a 'hole' in the cache
410 * we bail at this point
411 * we'll unlock the object below
412 */
413 break;
414 vm_object_unlock(object);
415
416 for (i = 0; i < cur_run; i++) {
417
418 dst_page = page_run[i];
419
420 if ((xsize = PAGE_SIZE - start_offset) > io_requested)
421 xsize = io_requested;
422
423 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
424 break;
425
426 io_requested -= xsize;
427 start_offset = 0;
428 }
429 vm_object_lock(object);
430
2d21ac55
A
431 /*
432 * if we have more than 1 page to work on
433 * in the current run, or the original request
434 * started at offset 0 of the page, or we're
435 * processing multiple batches, we will move
436 * the pages to the tail of the inactive queue
437 * to implement an LRU for read/write accesses
438 *
439 * the check for orig_offset == 0 is there to
440 * mitigate the cost of small (< page_size) requests
441 * to the same page (this way we only move it once)
442 */
443 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
444 vm_page_lockspin_queues();
445 make_lru = TRUE;
446 }
91447636
A
447 for (i = 0; i < cur_run; i++) {
448 dst_page = page_run[i];
449
2d21ac55
A
450 /*
451 * someone is explicitly referencing this page...
452 * update clustered and speculative state
453 *
454 */
455 VM_PAGE_CONSUME_CLUSTERED(dst_page);
456
457 if (make_lru == TRUE)
458 vm_page_lru(dst_page);
459
91447636
A
460 PAGE_WAKEUP_DONE(dst_page);
461 }
2d21ac55
A
462 if (make_lru == TRUE) {
463 vm_page_unlock_queues();
464 make_lru = FALSE;
465 }
466 orig_offset = 0;
91447636
A
467 }
468 vm_object_unlock(object);
469
470 return (retval);
471}
472
473
1c79356b
A
474/*
475 *
476 */
477void
478vnode_pager_bootstrap(void)
479{
480 register vm_size_t size;
481
482 size = (vm_size_t) sizeof(struct vnode_pager);
483 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
484 PAGE_SIZE, "vnode pager structures");
0c530ab8
A
485#ifdef __i386__
486 apple_protect_pager_bootstrap();
487#endif /* __i386__ */
1c79356b
A
488 return;
489}
490
491/*
492 *
493 */
0b4e3aa0 494memory_object_t
1c79356b 495vnode_pager_setup(
91447636
A
496 struct vnode *vp,
497 __unused memory_object_t pager)
1c79356b
A
498{
499 vnode_pager_t vnode_object;
1c79356b
A
500
501 vnode_object = vnode_object_create(vp);
502 if (vnode_object == VNODE_PAGER_NULL)
503 panic("vnode_pager_setup: vnode_object_create() failed");
0b4e3aa0 504 return((memory_object_t)vnode_object);
1c79356b
A
505}
506
507/*
508 *
509 */
510kern_return_t
0b4e3aa0
A
511vnode_pager_init(memory_object_t mem_obj,
512 memory_object_control_t control,
91447636
A
513#if !DEBUG
514 __unused
515#endif
516 vm_size_t pg_size)
1c79356b
A
517{
518 vnode_pager_t vnode_object;
519 kern_return_t kr;
520 memory_object_attr_info_data_t attributes;
1c79356b
A
521
522
91447636 523 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %x\n", mem_obj, control, pg_size));
1c79356b 524
0b4e3aa0
A
525 if (control == MEMORY_OBJECT_CONTROL_NULL)
526 return KERN_INVALID_ARGUMENT;
1c79356b 527
0b4e3aa0 528 vnode_object = vnode_pager_lookup(mem_obj);
1c79356b 529
0b4e3aa0 530 memory_object_control_reference(control);
91447636 531
0b4e3aa0 532 vnode_object->control_handle = control;
1c79356b
A
533
534 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
535 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
536 attributes.cluster_size = (1 << (PAGE_SHIFT));
537 attributes.may_cache_object = TRUE;
538 attributes.temporary = TRUE;
539
540 kr = memory_object_change_attributes(
0b4e3aa0 541 control,
1c79356b
A
542 MEMORY_OBJECT_ATTRIBUTE_INFO,
543 (memory_object_info_t) &attributes,
0b4e3aa0 544 MEMORY_OBJECT_ATTR_INFO_COUNT);
1c79356b
A
545 if (kr != KERN_SUCCESS)
546 panic("vnode_pager_init: memory_object_change_attributes() failed");
547
548 return(KERN_SUCCESS);
549}
550
551/*
552 *
553 */
554kern_return_t
555vnode_pager_data_return(
0b4e3aa0
A
556 memory_object_t mem_obj,
557 memory_object_offset_t offset,
1c79356b 558 vm_size_t data_cnt,
91447636
A
559 memory_object_offset_t *resid_offset,
560 int *io_error,
561 __unused boolean_t dirty,
562 __unused boolean_t kernel_copy,
563 int upl_flags)
1c79356b
A
564{
565 register vnode_pager_t vnode_object;
566
0b4e3aa0 567 vnode_object = vnode_pager_lookup(mem_obj);
1c79356b 568
91447636 569 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
1c79356b
A
570
571 return KERN_SUCCESS;
572}
573
0b4e3aa0
A
574kern_return_t
575vnode_pager_data_initialize(
91447636
A
576 __unused memory_object_t mem_obj,
577 __unused memory_object_offset_t offset,
578 __unused vm_size_t data_cnt)
0b4e3aa0 579{
91447636 580 panic("vnode_pager_data_initialize");
0b4e3aa0
A
581 return KERN_FAILURE;
582}
583
584kern_return_t
585vnode_pager_data_unlock(
91447636
A
586 __unused memory_object_t mem_obj,
587 __unused memory_object_offset_t offset,
588 __unused vm_size_t size,
589 __unused vm_prot_t desired_access)
0b4e3aa0
A
590{
591 return KERN_FAILURE;
592}
593
594kern_return_t
595vnode_pager_get_object_size(
596 memory_object_t mem_obj,
597 memory_object_offset_t *length)
598{
599 vnode_pager_t vnode_object;
600
0c530ab8
A
601 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
602 *length = 0;
603 return KERN_INVALID_ARGUMENT;
604 }
605
0b4e3aa0
A
606 vnode_object = vnode_pager_lookup(mem_obj);
607
608 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
609 return KERN_SUCCESS;
610}
611
0c530ab8
A
612kern_return_t
613vnode_pager_get_object_pathname(
614 memory_object_t mem_obj,
615 char *pathname,
616 vm_size_t *length_p)
617{
618 vnode_pager_t vnode_object;
619
620 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
621 return KERN_INVALID_ARGUMENT;
622 }
623
624 vnode_object = vnode_pager_lookup(mem_obj);
625
626 return vnode_pager_get_pathname(vnode_object->vnode_handle,
627 pathname,
628 length_p);
629}
630
631kern_return_t
632vnode_pager_get_object_filename(
633 memory_object_t mem_obj,
2d21ac55 634 const char **filename)
0c530ab8
A
635{
636 vnode_pager_t vnode_object;
637
638 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
639 return KERN_INVALID_ARGUMENT;
640 }
641
642 vnode_object = vnode_pager_lookup(mem_obj);
643
644 return vnode_pager_get_filename(vnode_object->vnode_handle,
645 filename);
646}
647
2d21ac55
A
648kern_return_t
649vnode_pager_get_object_cs_blobs(
650 memory_object_t mem_obj,
651 void **blobs)
652{
653 vnode_pager_t vnode_object;
654
655 if (mem_obj == MEMORY_OBJECT_NULL ||
656 mem_obj->mo_pager_ops != &vnode_pager_ops) {
657 return KERN_INVALID_ARGUMENT;
658 }
659
660 vnode_object = vnode_pager_lookup(mem_obj);
661
662 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
663 blobs);
664}
665
1c79356b
A
666/*
667 *
668 */
669kern_return_t
670vnode_pager_data_request(
0b4e3aa0
A
671 memory_object_t mem_obj,
672 memory_object_offset_t offset,
2d21ac55
A
673 __unused vm_size_t length,
674 __unused vm_prot_t desired_access,
675 memory_object_fault_info_t fault_info)
1c79356b
A
676{
677 register vnode_pager_t vnode_object;
2d21ac55
A
678 vm_size_t size;
679#if MACH_ASSERT
680 memory_object_offset_t original_offset = offset;
681#endif /* MACH_ASSERT */
1c79356b 682
0b4e3aa0 683 vnode_object = vnode_pager_lookup(mem_obj);
1c79356b 684
2d21ac55
A
685 size = MAX_UPL_TRANSFER * PAGE_SIZE;
686
687 if (memory_object_cluster_size(vnode_object->control_handle, &offset, &size, fault_info) != KERN_SUCCESS)
688 size = PAGE_SIZE;
689
690 assert(original_offset >= offset &&
691 original_offset < offset + size);
692
693 return vnode_pager_cluster_read(vnode_object, offset, size);
1c79356b
A
694}
695
696/*
697 *
698 */
699void
0b4e3aa0
A
700vnode_pager_reference(
701 memory_object_t mem_obj)
702{
1c79356b 703 register vnode_pager_t vnode_object;
9bccf70c 704 unsigned int new_ref_count;
1c79356b 705
0b4e3aa0 706 vnode_object = vnode_pager_lookup(mem_obj);
9bccf70c
A
707 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
708 assert(new_ref_count > 1);
0b4e3aa0 709}
1c79356b 710
0b4e3aa0
A
711/*
712 *
713 */
714void
715vnode_pager_deallocate(
716 memory_object_t mem_obj)
717{
718 register vnode_pager_t vnode_object;
1c79356b 719
2d21ac55 720 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
1c79356b 721
0b4e3aa0 722 vnode_object = vnode_pager_lookup(mem_obj);
1c79356b 723
9bccf70c 724 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
91447636 725 if (vnode_object->vnode_handle != NULL) {
0b4e3aa0
A
726 vnode_pager_vrele(vnode_object->vnode_handle);
727 }
91447636 728 zfree(vnode_pager_zone, vnode_object);
0b4e3aa0 729 }
1c79356b
A
730 return;
731}
732
733/*
734 *
735 */
736kern_return_t
737vnode_pager_terminate(
91447636
A
738#if !DEBUG
739 __unused
740#endif
0b4e3aa0 741 memory_object_t mem_obj)
1c79356b 742{
2d21ac55 743 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
1c79356b 744
0b4e3aa0
A
745 return(KERN_SUCCESS);
746}
1c79356b 747
0b4e3aa0
A
748/*
749 *
750 */
751kern_return_t
752vnode_pager_synchronize(
753 memory_object_t mem_obj,
754 memory_object_offset_t offset,
755 vm_size_t length,
91447636 756 __unused vm_sync_t sync_flags)
0b4e3aa0
A
757{
758 register vnode_pager_t vnode_object;
1c79356b 759
2d21ac55 760 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
1c79356b 761
0b4e3aa0 762 vnode_object = vnode_pager_lookup(mem_obj);
1c79356b 763
0b4e3aa0 764 memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
1c79356b 765
0b4e3aa0 766 return (KERN_SUCCESS);
1c79356b
A
767}
768
769/*
770 *
771 */
772kern_return_t
0b4e3aa0
A
773vnode_pager_unmap(
774 memory_object_t mem_obj)
1c79356b 775{
0b4e3aa0 776 register vnode_pager_t vnode_object;
1c79356b 777
2d21ac55 778 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_unmap: %p\n", mem_obj));
0b4e3aa0
A
779
780 vnode_object = vnode_pager_lookup(mem_obj);
781
782 ubc_unmap(vnode_object->vnode_handle);
783 return KERN_SUCCESS;
1c79356b
A
784}
785
0b4e3aa0 786
1c79356b
A
787/*
788 *
789 */
790void
791vnode_pager_cluster_write(
792 vnode_pager_t vnode_object,
793 vm_object_offset_t offset,
91447636
A
794 vm_size_t cnt,
795 vm_object_offset_t * resid_offset,
796 int * io_error,
797 int upl_flags)
1c79356b 798{
91447636
A
799 vm_size_t size;
800 upl_t upl = NULL;
801 int request_flags;
802 int errno;
1c79356b 803
91447636 804 if (upl_flags & UPL_MSYNC) {
1c79356b 805
91447636
A
806 upl_flags |= UPL_VNODE_PAGER;
807
808 if ( (upl_flags & UPL_IOSYNC) && io_error)
809 upl_flags |= UPL_KEEPCACHED;
810
811 while (cnt) {
812 kern_return_t kr;
813
814 size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
0b4e3aa0 815
91447636
A
816 request_flags = UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
817 UPL_SET_INTERNAL | UPL_SET_LITE;
818
819 kr = memory_object_upl_request(vnode_object->control_handle,
820 offset, size, &upl, NULL, NULL, request_flags);
821 if (kr != KERN_SUCCESS)
822 panic("vnode_pager_cluster_write: upl request failed\n");
823
824 vnode_pageout(vnode_object->vnode_handle,
825 upl, (vm_offset_t)0, offset, size, upl_flags, &errno);
826
827 if ( (upl_flags & UPL_KEEPCACHED) ) {
828 if ( (*io_error = errno) )
829 break;
830 }
831 cnt -= size;
832 offset += size;
833 }
834 if (resid_offset)
835 *resid_offset = offset;
836
837 } else {
838 vm_object_offset_t vnode_size;
839 vm_object_offset_t base_offset;
840 vm_object_t object;
91447636
A
841
842 /*
843 * this is the pageout path
844 */
845 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
846
847 if (vnode_size > (offset + PAGE_SIZE)) {
848 /*
849 * preset the maximum size of the cluster
850 * and put us on a nice cluster boundary...
851 * and then clip the size to insure we
852 * don't request past the end of the underlying file
853 */
854 size = PAGE_SIZE * MAX_UPL_TRANSFER;
855 base_offset = offset & ~((signed)(size - 1));
856
857 if ((base_offset + size) > vnode_size)
858 size = round_page_32(((vm_size_t)(vnode_size - base_offset)));
859 } else {
860 /*
861 * we've been requested to page out a page beyond the current
862 * end of the 'file'... don't try to cluster in this case...
863 * we still need to send this page through because it might
864 * be marked precious and the underlying filesystem may need
865 * to do something with it (besides page it out)...
866 */
867 base_offset = offset;
868 size = PAGE_SIZE;
0b4e3aa0 869 }
91447636
A
870 object = memory_object_control_to_vm_object(vnode_object->control_handle);
871
872 if (object == VM_OBJECT_NULL)
873 panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
874
875 request_flags = UPL_NOBLOCK | UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
876 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
877 UPL_SET_INTERNAL | UPL_SET_LITE;
878
91447636
A
879 vm_object_upl_request(object, base_offset, size,
880 &upl, NULL, NULL, request_flags);
881 if (upl == NULL)
882 panic("vnode_pager_cluster_write: upl request failed\n");
883
884 vnode_pageout(vnode_object->vnode_handle,
885 upl, (vm_offset_t)0, upl->offset, upl->size, UPL_VNODE_PAGER, NULL);
1c79356b 886 }
1c79356b
A
887}
888
889
890/*
891 *
892 */
893kern_return_t
894vnode_pager_cluster_read(
895 vnode_pager_t vnode_object,
896 vm_object_offset_t offset,
897 vm_size_t cnt)
898{
1c79356b
A
899 int local_error = 0;
900 int kret;
1c79356b 901
91447636 902 assert(! (cnt & PAGE_MASK));
1c79356b 903
91447636
A
904 kret = vnode_pagein(vnode_object->vnode_handle,
905 (upl_t) NULL,
906 (vm_offset_t) NULL,
907 offset,
908 cnt,
909 0,
910 &local_error);
0b4e3aa0
A
911/*
912 if(kret == PAGER_ABSENT) {
913 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
914 defined in bsd/vm/vm_pager.h However, we should not be including
915 that file here it is a layering violation.
916*/
91447636
A
917 if (kret == 1) {
918 int uplflags;
919 upl_t upl = NULL;
0c530ab8 920 unsigned int count = 0;
91447636
A
921 kern_return_t kr;
922
923 uplflags = (UPL_NO_SYNC |
924 UPL_CLEAN_IN_PLACE |
925 UPL_SET_INTERNAL);
926 count = 0;
927 kr = memory_object_upl_request(vnode_object->control_handle,
928 offset, cnt,
929 &upl, NULL, &count, uplflags);
930 if (kr == KERN_SUCCESS) {
0b4e3aa0
A
931 upl_abort(upl, 0);
932 upl_deallocate(upl);
91447636
A
933 } else {
934 /*
935 * We couldn't gather the page list, probably
936 * because the memory object doesn't have a link
937 * to a VM object anymore (forced unmount, for
938 * example). Just return an error to the vm_fault()
939 * path and let it handle it.
940 */
941 }
0b4e3aa0 942
91447636 943 return KERN_FAILURE;
1c79356b 944 }
0b4e3aa0 945
91447636 946 return KERN_SUCCESS;
1c79356b
A
947
948}
949
950
951/*
952 *
953 */
954void
955vnode_pager_release_from_cache(
956 int *cnt)
957{
958 memory_object_free_from_cache(
0c530ab8 959 &realhost, &vnode_pager_ops, cnt);
1c79356b
A
960}
961
962/*
963 *
964 */
965vnode_pager_t
966vnode_object_create(
91447636 967 struct vnode *vp)
1c79356b
A
968{
969 register vnode_pager_t vnode_object;
970
971 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
972 if (vnode_object == VNODE_PAGER_NULL)
973 return(VNODE_PAGER_NULL);
1c79356b 974
1c79356b 975 /*
0b4e3aa0
A
976 * The vm_map call takes both named entry ports and raw memory
977 * objects in the same parameter. We need to make sure that
978 * vm_map does not see this object as a named entry port. So,
979 * we reserve the second word in the object for a fake ip_kotype
980 * setting - that will tell vm_map to use it as a memory object.
1c79356b 981 */
0c530ab8 982 vnode_object->pager_ops = &vnode_pager_ops;
0b4e3aa0
A
983 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
984 vnode_object->ref_count = 1;
985 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
986 vnode_object->vnode_handle = vp;
987
988 return(vnode_object);
1c79356b
A
989}
990
991/*
992 *
993 */
994vnode_pager_t
0b4e3aa0
A
995vnode_pager_lookup(
996 memory_object_t name)
1c79356b 997{
0b4e3aa0 998 vnode_pager_t vnode_object;
1c79356b 999
0b4e3aa0 1000 vnode_object = (vnode_pager_t)name;
0c530ab8 1001 assert(vnode_object->pager_ops == &vnode_pager_ops);
0b4e3aa0 1002 return (vnode_object);
1c79356b 1003}
0b4e3aa0 1004
0c530ab8
A
1005
1006/*********************** proc_info implementation *************/
1007
1008#include <sys/bsdtask_info.h>
1009
1010static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid);
1011
1012
1013int
1014fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid)
1015{
1016
1017 vm_map_t map = task->map;
1018 vm_map_offset_t address = (vm_map_offset_t )arg;
1019 vm_map_entry_t tmp_entry;
1020 vm_map_entry_t entry;
1021 vm_map_offset_t start;
1022 vm_region_extended_info_data_t extended;
1023 vm_region_top_info_data_t top;
1024
1025
1026 if (map == VM_MAP_NULL)
1027 return(0);
1028
1029 vm_map_lock_read(map);
1030
1031 start = address;
1032 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1033 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1034 vm_map_unlock_read(map);
1035 return(0);
1036 }
1037 } else {
1038 entry = tmp_entry;
1039 }
1040
1041 start = entry->vme_start;
1042
1043 pinfo->pri_offset = entry->offset;
1044 pinfo->pri_protection = entry->protection;
1045 pinfo->pri_max_protection = entry->max_protection;
1046 pinfo->pri_inheritance = entry->inheritance;
1047 pinfo->pri_behavior = entry->behavior;
1048 pinfo->pri_user_wired_count = entry->user_wired_count;
1049 pinfo->pri_user_tag = entry->alias;
1050
1051 if (entry->is_sub_map) {
1052 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1053 } else {
1054 if (entry->is_shared)
1055 pinfo->pri_flags |= PROC_REGION_SHARED;
1056 }
1057
1058
1059 extended.protection = entry->protection;
1060 extended.user_tag = entry->alias;
1061 extended.pages_resident = 0;
1062 extended.pages_swapped_out = 0;
1063 extended.pages_shared_now_private = 0;
1064 extended.pages_dirtied = 0;
1065 extended.external_pager = 0;
1066 extended.shadow_depth = 0;
1067
1068 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1069
1070 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1071 extended.share_mode = SM_PRIVATE;
1072
1073 top.private_pages_resident = 0;
1074 top.shared_pages_resident = 0;
1075 vm_map_region_top_walk(entry, &top);
1076
1077
1078 pinfo->pri_pages_resident = extended.pages_resident;
1079 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1080 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1081 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1082 pinfo->pri_ref_count = extended.ref_count;
1083 pinfo->pri_shadow_depth = extended.shadow_depth;
1084 pinfo->pri_share_mode = extended.share_mode;
1085
1086 pinfo->pri_private_pages_resident = top.private_pages_resident;
1087 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1088 pinfo->pri_obj_id = top.obj_id;
1089
1090 pinfo->pri_address = (uint64_t)start;
1091 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1092 pinfo->pri_depth = 0;
1093
1094 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1095 *vnodeaddr = (uint32_t)0;
1096
1097 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1098 vm_map_unlock_read(map);
1099 return(1);
1100 }
1101 }
1102
1103 vm_map_unlock_read(map);
1104 return(1);
1105}
1106
1107static int
1108fill_vnodeinfoforaddr(
1109 vm_map_entry_t entry,
1110 uint32_t * vnodeaddr,
1111 uint32_t * vid)
1112{
1113 vm_object_t top_object, object;
1114 memory_object_t memory_object;
1115 memory_object_pager_ops_t pager_ops;
1116 kern_return_t kr;
1117 int shadow_depth;
1118
1119
1120 if (entry->is_sub_map) {
1121 return(0);
1122 } else {
1123 /*
1124 * The last object in the shadow chain has the
1125 * relevant pager information.
1126 */
1127 top_object = entry->object.vm_object;
1128 if (top_object == VM_OBJECT_NULL) {
1129 object = VM_OBJECT_NULL;
1130 shadow_depth = 0;
1131 } else {
1132 vm_object_lock(top_object);
1133 for (object = top_object, shadow_depth = 0;
1134 object->shadow != VM_OBJECT_NULL;
1135 object = object->shadow, shadow_depth++) {
1136 vm_object_lock(object->shadow);
1137 vm_object_unlock(object);
1138 }
1139 }
1140 }
1141
1142 if (object == VM_OBJECT_NULL) {
1143 return(0);
1144 } else if (object->internal) {
1145 vm_object_unlock(object);
1146 return(0);
1147 } else if (! object->pager_ready ||
1148 object->terminating ||
1149 ! object->alive) {
1150 vm_object_unlock(object);
1151 return(0);
1152 } else {
1153 memory_object = object->pager;
1154 pager_ops = memory_object->mo_pager_ops;
1155 if (pager_ops == &vnode_pager_ops) {
1156 kr = vnode_pager_get_object_vnode(
1157 memory_object,
1158 vnodeaddr, vid);
1159 if (kr != KERN_SUCCESS) {
1160 vm_object_unlock(object);
1161 return(0);
1162 }
1163 } else {
1164 vm_object_unlock(object);
1165 return(0);
1166 }
1167 }
1168 vm_object_unlock(object);
1169 return(1);
1170}
1171
1172kern_return_t
1173vnode_pager_get_object_vnode (
1174 memory_object_t mem_obj,
1175 uint32_t * vnodeaddr,
1176 uint32_t * vid)
1177{
1178 vnode_pager_t vnode_object;
1179
1180 vnode_object = vnode_pager_lookup(mem_obj);
1181 if (vnode_object->vnode_handle) {
1182 *vnodeaddr = (uint32_t)vnode_object->vnode_handle;
1183 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1184
1185 return(KERN_SUCCESS);
1186 }
1187
1188 return(KERN_FAILURE);
1189}
1190