]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/errno.h> | |
30 | ||
31 | #include <mach/mach_types.h> | |
32 | #include <mach/mach_traps.h> | |
33 | #include <mach/host_priv.h> | |
34 | #include <mach/kern_return.h> | |
35 | #include <mach/memory_object_control.h> | |
36 | #include <mach/memory_object_types.h> | |
37 | #include <mach/port.h> | |
38 | #include <mach/policy.h> | |
39 | #include <mach/upl.h> | |
40 | #include <mach/thread_act.h> | |
41 | ||
42 | #include <kern/assert.h> | |
43 | #include <kern/host.h> | |
44 | #include <kern/thread.h> | |
45 | ||
46 | #include <ipc/ipc_port.h> | |
47 | #include <ipc/ipc_space.h> | |
48 | ||
49 | #include <default_pager/default_pager_types.h> | |
50 | #include <default_pager/default_pager_object_server.h> | |
51 | ||
52 | #include <vm/vm_map.h> | |
53 | #include <vm/vm_pageout.h> | |
54 | #include <vm/memory_object.h> | |
55 | #include <vm/vm_pageout.h> | |
56 | #include <vm/vm_protos.h> | |
57 | #include <vm/vm_purgeable_internal.h> | |
58 | ||
59 | ||
60 | /* BSD VM COMPONENT INTERFACES */ | |
61 | int | |
62 | get_map_nentries( | |
63 | vm_map_t); | |
64 | ||
65 | vm_offset_t | |
66 | get_map_start( | |
67 | vm_map_t); | |
68 | ||
69 | vm_offset_t | |
70 | get_map_end( | |
71 | vm_map_t); | |
72 | ||
73 | /* | |
74 | * | |
75 | */ | |
76 | int | |
77 | get_map_nentries( | |
78 | vm_map_t map) | |
79 | { | |
80 | return(map->hdr.nentries); | |
81 | } | |
82 | ||
83 | mach_vm_offset_t | |
84 | mach_get_vm_start(vm_map_t map) | |
85 | { | |
86 | return( vm_map_first_entry(map)->vme_start); | |
87 | } | |
88 | ||
89 | mach_vm_offset_t | |
90 | mach_get_vm_end(vm_map_t map) | |
91 | { | |
92 | return( vm_map_last_entry(map)->vme_end); | |
93 | } | |
94 | ||
95 | /* | |
96 | * Legacy routines to get the start and end for a vm_map_t. They | |
97 | * return them in the vm_offset_t format. So, they should only be | |
98 | * called on maps that are the same size as the kernel map for | |
99 | * accurate results. | |
100 | */ | |
101 | vm_offset_t | |
102 | get_vm_start( | |
103 | vm_map_t map) | |
104 | { | |
105 | return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start)); | |
106 | } | |
107 | ||
108 | vm_offset_t | |
109 | get_vm_end( | |
110 | vm_map_t map) | |
111 | { | |
112 | return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end)); | |
113 | } | |
114 | ||
115 | /* | |
116 | * BSD VNODE PAGER | |
117 | */ | |
118 | ||
119 | const struct memory_object_pager_ops vnode_pager_ops = { | |
120 | vnode_pager_reference, | |
121 | vnode_pager_deallocate, | |
122 | vnode_pager_init, | |
123 | vnode_pager_terminate, | |
124 | vnode_pager_data_request, | |
125 | vnode_pager_data_return, | |
126 | vnode_pager_data_initialize, | |
127 | vnode_pager_data_unlock, | |
128 | vnode_pager_synchronize, | |
129 | vnode_pager_unmap, | |
130 | "vnode pager" | |
131 | }; | |
132 | ||
133 | typedef struct vnode_pager { | |
134 | memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */ | |
135 | unsigned int pager_ikot; /* JMM: fake ip_kotype() */ | |
136 | unsigned int ref_count; /* reference count */ | |
137 | memory_object_control_t control_handle; /* mem object control handle */ | |
138 | struct vnode *vnode_handle; /* vnode handle */ | |
139 | } *vnode_pager_t; | |
140 | ||
141 | ||
142 | ipc_port_t | |
143 | trigger_name_to_port( /* forward */ | |
144 | mach_port_t); | |
145 | ||
146 | kern_return_t | |
147 | vnode_pager_cluster_read( /* forward */ | |
148 | vnode_pager_t, | |
149 | vm_object_offset_t, | |
150 | vm_size_t); | |
151 | ||
152 | void | |
153 | vnode_pager_cluster_write( /* forward */ | |
154 | vnode_pager_t, | |
155 | vm_object_offset_t, | |
156 | vm_size_t, | |
157 | vm_object_offset_t *, | |
158 | int *, | |
159 | int); | |
160 | ||
161 | ||
162 | vnode_pager_t | |
163 | vnode_object_create( /* forward */ | |
164 | struct vnode *); | |
165 | ||
166 | vnode_pager_t | |
167 | vnode_pager_lookup( /* forward */ | |
168 | memory_object_t); | |
169 | ||
170 | zone_t vnode_pager_zone; | |
171 | ||
172 | ||
173 | #define VNODE_PAGER_NULL ((vnode_pager_t) 0) | |
174 | ||
175 | /* TODO: Should be set dynamically by vnode_pager_init() */ | |
176 | #define CLUSTER_SHIFT 1 | |
177 | ||
178 | /* TODO: Should be set dynamically by vnode_pager_bootstrap() */ | |
179 | #define MAX_VNODE 10000 | |
180 | ||
181 | ||
182 | #if DEBUG | |
183 | int pagerdebug=0; | |
184 | ||
185 | #define PAGER_ALL 0xffffffff | |
186 | #define PAGER_INIT 0x00000001 | |
187 | #define PAGER_PAGEIN 0x00000002 | |
188 | ||
189 | #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}} | |
190 | #else | |
191 | #define PAGER_DEBUG(LEVEL, A) | |
192 | #endif | |
193 | ||
194 | /* | |
195 | * Routine: macx_triggers | |
196 | * Function: | |
197 | * Syscall interface to set the call backs for low and | |
198 | * high water marks. | |
199 | */ | |
200 | int | |
201 | macx_triggers( | |
202 | struct macx_triggers_args *args) | |
203 | { | |
204 | int hi_water = args->hi_water; | |
205 | int low_water = args->low_water; | |
206 | int flags = args->flags; | |
207 | mach_port_t trigger_name = args->alert_port; | |
208 | kern_return_t kr; | |
209 | memory_object_default_t default_pager; | |
210 | ipc_port_t trigger_port; | |
211 | ||
212 | default_pager = MEMORY_OBJECT_DEFAULT_NULL; | |
213 | kr = host_default_memory_manager(host_priv_self(), | |
214 | &default_pager, 0); | |
215 | if(kr != KERN_SUCCESS) { | |
216 | return EINVAL; | |
217 | } | |
218 | ||
219 | if ((flags & SWAP_ENCRYPT_ON) && | |
220 | (flags & SWAP_ENCRYPT_OFF)) { | |
221 | /* can't have it both ways */ | |
222 | return EINVAL; | |
223 | } | |
224 | ||
225 | if (default_pager_init_flag == 0) { | |
226 | start_def_pager(NULL); | |
227 | default_pager_init_flag = 1; | |
228 | } | |
229 | ||
230 | if (flags & SWAP_ENCRYPT_ON) { | |
231 | /* ENCRYPTED SWAP: tell default_pager to encrypt */ | |
232 | default_pager_triggers(default_pager, | |
233 | 0, 0, | |
234 | SWAP_ENCRYPT_ON, | |
235 | IP_NULL); | |
236 | } else if (flags & SWAP_ENCRYPT_OFF) { | |
237 | /* ENCRYPTED SWAP: tell default_pager not to encrypt */ | |
238 | default_pager_triggers(default_pager, | |
239 | 0, 0, | |
240 | SWAP_ENCRYPT_OFF, | |
241 | IP_NULL); | |
242 | } | |
243 | ||
244 | if (flags & HI_WAT_ALERT) { | |
245 | trigger_port = trigger_name_to_port(trigger_name); | |
246 | if(trigger_port == NULL) { | |
247 | return EINVAL; | |
248 | } | |
249 | /* trigger_port is locked and active */ | |
250 | ipc_port_make_send_locked(trigger_port); | |
251 | /* now unlocked */ | |
252 | default_pager_triggers(default_pager, | |
253 | hi_water, low_water, | |
254 | HI_WAT_ALERT, trigger_port); | |
255 | } | |
256 | ||
257 | if (flags & LO_WAT_ALERT) { | |
258 | trigger_port = trigger_name_to_port(trigger_name); | |
259 | if(trigger_port == NULL) { | |
260 | return EINVAL; | |
261 | } | |
262 | /* trigger_port is locked and active */ | |
263 | ipc_port_make_send_locked(trigger_port); | |
264 | /* and now its unlocked */ | |
265 | default_pager_triggers(default_pager, | |
266 | hi_water, low_water, | |
267 | LO_WAT_ALERT, trigger_port); | |
268 | } | |
269 | ||
270 | /* | |
271 | * Set thread scheduling priority and policy for the current thread | |
272 | * it is assumed for the time being that the thread setting the alert | |
273 | * is the same one which will be servicing it. | |
274 | * | |
275 | * XXX This does not belong in the kernel XXX | |
276 | */ | |
277 | { | |
278 | thread_precedence_policy_data_t pre; | |
279 | thread_extended_policy_data_t ext; | |
280 | ||
281 | ext.timeshare = FALSE; | |
282 | pre.importance = INT32_MAX; | |
283 | ||
284 | thread_policy_set(current_thread(), | |
285 | THREAD_EXTENDED_POLICY, | |
286 | (thread_policy_t)&ext, | |
287 | THREAD_EXTENDED_POLICY_COUNT); | |
288 | ||
289 | thread_policy_set(current_thread(), | |
290 | THREAD_PRECEDENCE_POLICY, | |
291 | (thread_policy_t)&pre, | |
292 | THREAD_PRECEDENCE_POLICY_COUNT); | |
293 | } | |
294 | ||
295 | current_thread()->options |= TH_OPT_VMPRIV; | |
296 | ||
297 | return 0; | |
298 | } | |
299 | ||
300 | /* | |
301 | * | |
302 | */ | |
303 | ipc_port_t | |
304 | trigger_name_to_port( | |
305 | mach_port_t trigger_name) | |
306 | { | |
307 | ipc_port_t trigger_port; | |
308 | ipc_space_t space; | |
309 | ||
310 | if (trigger_name == 0) | |
311 | return (NULL); | |
312 | ||
313 | space = current_space(); | |
314 | if(ipc_port_translate_receive(space, (mach_port_name_t)trigger_name, | |
315 | &trigger_port) != KERN_SUCCESS) | |
316 | return (NULL); | |
317 | return trigger_port; | |
318 | } | |
319 | ||
320 | ||
321 | extern int uiomove64(addr64_t, int, void *); | |
322 | #define MAX_RUN 32 | |
323 | ||
324 | unsigned long vm_cs_tainted_forces = 0; | |
325 | ||
326 | int | |
327 | memory_object_control_uiomove( | |
328 | memory_object_control_t control, | |
329 | memory_object_offset_t offset, | |
330 | void * uio, | |
331 | int start_offset, | |
332 | int io_requested, | |
333 | int mark_dirty, | |
334 | int take_reference) | |
335 | { | |
336 | vm_object_t object; | |
337 | vm_page_t dst_page; | |
338 | int xsize; | |
339 | int retval = 0; | |
340 | int cur_run; | |
341 | int cur_needed; | |
342 | int i; | |
343 | int orig_offset; | |
344 | boolean_t make_lru = FALSE; | |
345 | vm_page_t page_run[MAX_RUN]; | |
346 | ||
347 | object = memory_object_control_to_vm_object(control); | |
348 | if (object == VM_OBJECT_NULL) { | |
349 | return (0); | |
350 | } | |
351 | assert(!object->internal); | |
352 | ||
353 | vm_object_lock(object); | |
354 | ||
355 | if (mark_dirty && object->copy != VM_OBJECT_NULL) { | |
356 | /* | |
357 | * We can't modify the pages without honoring | |
358 | * copy-on-write obligations first, so fall off | |
359 | * this optimized path and fall back to the regular | |
360 | * path. | |
361 | */ | |
362 | vm_object_unlock(object); | |
363 | return 0; | |
364 | } | |
365 | orig_offset = start_offset; | |
366 | ||
367 | while (io_requested && retval == 0) { | |
368 | ||
369 | cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; | |
370 | ||
371 | if (cur_needed > MAX_RUN) | |
372 | cur_needed = MAX_RUN; | |
373 | ||
374 | for (cur_run = 0; cur_run < cur_needed; ) { | |
375 | ||
376 | if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) | |
377 | break; | |
378 | /* | |
379 | * Sync up on getting the busy bit | |
380 | */ | |
381 | if ((dst_page->busy || dst_page->cleaning)) { | |
382 | /* | |
383 | * someone else is playing with the page... if we've | |
384 | * already collected pages into this run, go ahead | |
385 | * and process now, we can't block on this | |
386 | * page while holding other pages in the BUSY state | |
387 | * otherwise we will wait | |
388 | */ | |
389 | if (cur_run) | |
390 | break; | |
391 | PAGE_SLEEP(object, dst_page, THREAD_UNINT); | |
392 | continue; | |
393 | } | |
394 | /* | |
395 | * this routine is only called when copying | |
396 | * to/from real files... no need to consider | |
397 | * encrypted swap pages | |
398 | */ | |
399 | assert(!dst_page->encrypted); | |
400 | ||
401 | if (mark_dirty) { | |
402 | dst_page->dirty = TRUE; | |
403 | if (dst_page->cs_validated) { | |
404 | /* | |
405 | * CODE SIGNING: | |
406 | * We're modifying a code-signed | |
407 | * page: assume that it is now tainted. | |
408 | */ | |
409 | dst_page->cs_tainted = TRUE; | |
410 | vm_cs_tainted_forces++; | |
411 | } | |
412 | } | |
413 | dst_page->busy = TRUE; | |
414 | ||
415 | page_run[cur_run++] = dst_page; | |
416 | ||
417 | offset += PAGE_SIZE_64; | |
418 | } | |
419 | if (cur_run == 0) | |
420 | /* | |
421 | * we hit a 'hole' in the cache | |
422 | * we bail at this point | |
423 | * we'll unlock the object below | |
424 | */ | |
425 | break; | |
426 | vm_object_unlock(object); | |
427 | ||
428 | for (i = 0; i < cur_run; i++) { | |
429 | ||
430 | dst_page = page_run[i]; | |
431 | ||
432 | if ((xsize = PAGE_SIZE - start_offset) > io_requested) | |
433 | xsize = io_requested; | |
434 | ||
435 | if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) | |
436 | break; | |
437 | ||
438 | io_requested -= xsize; | |
439 | start_offset = 0; | |
440 | } | |
441 | vm_object_lock(object); | |
442 | ||
443 | /* | |
444 | * if we have more than 1 page to work on | |
445 | * in the current run, or the original request | |
446 | * started at offset 0 of the page, or we're | |
447 | * processing multiple batches, we will move | |
448 | * the pages to the tail of the inactive queue | |
449 | * to implement an LRU for read/write accesses | |
450 | * | |
451 | * the check for orig_offset == 0 is there to | |
452 | * mitigate the cost of small (< page_size) requests | |
453 | * to the same page (this way we only move it once) | |
454 | */ | |
455 | if (take_reference && (cur_run > 1 || orig_offset == 0)) { | |
456 | vm_page_lockspin_queues(); | |
457 | make_lru = TRUE; | |
458 | } | |
459 | for (i = 0; i < cur_run; i++) { | |
460 | dst_page = page_run[i]; | |
461 | ||
462 | /* | |
463 | * someone is explicitly referencing this page... | |
464 | * update clustered and speculative state | |
465 | * | |
466 | */ | |
467 | VM_PAGE_CONSUME_CLUSTERED(dst_page); | |
468 | ||
469 | if (make_lru == TRUE) | |
470 | vm_page_lru(dst_page); | |
471 | ||
472 | PAGE_WAKEUP_DONE(dst_page); | |
473 | } | |
474 | if (make_lru == TRUE) { | |
475 | vm_page_unlock_queues(); | |
476 | make_lru = FALSE; | |
477 | } | |
478 | orig_offset = 0; | |
479 | } | |
480 | vm_object_unlock(object); | |
481 | ||
482 | return (retval); | |
483 | } | |
484 | ||
485 | ||
486 | /* | |
487 | * | |
488 | */ | |
489 | void | |
490 | vnode_pager_bootstrap(void) | |
491 | { | |
492 | register vm_size_t size; | |
493 | ||
494 | size = (vm_size_t) sizeof(struct vnode_pager); | |
495 | vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size, | |
496 | PAGE_SIZE, "vnode pager structures"); | |
497 | #ifdef __i386__ | |
498 | apple_protect_pager_bootstrap(); | |
499 | #endif /* __i386__ */ | |
500 | return; | |
501 | } | |
502 | ||
503 | /* | |
504 | * | |
505 | */ | |
506 | memory_object_t | |
507 | vnode_pager_setup( | |
508 | struct vnode *vp, | |
509 | __unused memory_object_t pager) | |
510 | { | |
511 | vnode_pager_t vnode_object; | |
512 | ||
513 | vnode_object = vnode_object_create(vp); | |
514 | if (vnode_object == VNODE_PAGER_NULL) | |
515 | panic("vnode_pager_setup: vnode_object_create() failed"); | |
516 | return((memory_object_t)vnode_object); | |
517 | } | |
518 | ||
519 | /* | |
520 | * | |
521 | */ | |
522 | kern_return_t | |
523 | vnode_pager_init(memory_object_t mem_obj, | |
524 | memory_object_control_t control, | |
525 | #if !DEBUG | |
526 | __unused | |
527 | #endif | |
528 | vm_size_t pg_size) | |
529 | { | |
530 | vnode_pager_t vnode_object; | |
531 | kern_return_t kr; | |
532 | memory_object_attr_info_data_t attributes; | |
533 | ||
534 | ||
535 | PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %x\n", mem_obj, control, pg_size)); | |
536 | ||
537 | if (control == MEMORY_OBJECT_CONTROL_NULL) | |
538 | return KERN_INVALID_ARGUMENT; | |
539 | ||
540 | vnode_object = vnode_pager_lookup(mem_obj); | |
541 | ||
542 | memory_object_control_reference(control); | |
543 | ||
544 | vnode_object->control_handle = control; | |
545 | ||
546 | attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY; | |
547 | /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/ | |
548 | attributes.cluster_size = (1 << (PAGE_SHIFT)); | |
549 | attributes.may_cache_object = TRUE; | |
550 | attributes.temporary = TRUE; | |
551 | ||
552 | kr = memory_object_change_attributes( | |
553 | control, | |
554 | MEMORY_OBJECT_ATTRIBUTE_INFO, | |
555 | (memory_object_info_t) &attributes, | |
556 | MEMORY_OBJECT_ATTR_INFO_COUNT); | |
557 | if (kr != KERN_SUCCESS) | |
558 | panic("vnode_pager_init: memory_object_change_attributes() failed"); | |
559 | ||
560 | return(KERN_SUCCESS); | |
561 | } | |
562 | ||
563 | /* | |
564 | * | |
565 | */ | |
566 | kern_return_t | |
567 | vnode_pager_data_return( | |
568 | memory_object_t mem_obj, | |
569 | memory_object_offset_t offset, | |
570 | vm_size_t data_cnt, | |
571 | memory_object_offset_t *resid_offset, | |
572 | int *io_error, | |
573 | __unused boolean_t dirty, | |
574 | __unused boolean_t kernel_copy, | |
575 | int upl_flags) | |
576 | { | |
577 | register vnode_pager_t vnode_object; | |
578 | ||
579 | vnode_object = vnode_pager_lookup(mem_obj); | |
580 | ||
581 | vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags); | |
582 | ||
583 | return KERN_SUCCESS; | |
584 | } | |
585 | ||
586 | kern_return_t | |
587 | vnode_pager_data_initialize( | |
588 | __unused memory_object_t mem_obj, | |
589 | __unused memory_object_offset_t offset, | |
590 | __unused vm_size_t data_cnt) | |
591 | { | |
592 | panic("vnode_pager_data_initialize"); | |
593 | return KERN_FAILURE; | |
594 | } | |
595 | ||
596 | kern_return_t | |
597 | vnode_pager_data_unlock( | |
598 | __unused memory_object_t mem_obj, | |
599 | __unused memory_object_offset_t offset, | |
600 | __unused vm_size_t size, | |
601 | __unused vm_prot_t desired_access) | |
602 | { | |
603 | return KERN_FAILURE; | |
604 | } | |
605 | ||
606 | kern_return_t | |
607 | vnode_pager_get_object_size( | |
608 | memory_object_t mem_obj, | |
609 | memory_object_offset_t *length) | |
610 | { | |
611 | vnode_pager_t vnode_object; | |
612 | ||
613 | if (mem_obj->mo_pager_ops != &vnode_pager_ops) { | |
614 | *length = 0; | |
615 | return KERN_INVALID_ARGUMENT; | |
616 | } | |
617 | ||
618 | vnode_object = vnode_pager_lookup(mem_obj); | |
619 | ||
620 | *length = vnode_pager_get_filesize(vnode_object->vnode_handle); | |
621 | return KERN_SUCCESS; | |
622 | } | |
623 | ||
624 | kern_return_t | |
625 | vnode_pager_get_object_pathname( | |
626 | memory_object_t mem_obj, | |
627 | char *pathname, | |
628 | vm_size_t *length_p) | |
629 | { | |
630 | vnode_pager_t vnode_object; | |
631 | ||
632 | if (mem_obj->mo_pager_ops != &vnode_pager_ops) { | |
633 | return KERN_INVALID_ARGUMENT; | |
634 | } | |
635 | ||
636 | vnode_object = vnode_pager_lookup(mem_obj); | |
637 | ||
638 | return vnode_pager_get_pathname(vnode_object->vnode_handle, | |
639 | pathname, | |
640 | length_p); | |
641 | } | |
642 | ||
643 | kern_return_t | |
644 | vnode_pager_get_object_filename( | |
645 | memory_object_t mem_obj, | |
646 | const char **filename) | |
647 | { | |
648 | vnode_pager_t vnode_object; | |
649 | ||
650 | if (mem_obj->mo_pager_ops != &vnode_pager_ops) { | |
651 | return KERN_INVALID_ARGUMENT; | |
652 | } | |
653 | ||
654 | vnode_object = vnode_pager_lookup(mem_obj); | |
655 | ||
656 | return vnode_pager_get_filename(vnode_object->vnode_handle, | |
657 | filename); | |
658 | } | |
659 | ||
660 | kern_return_t | |
661 | vnode_pager_get_object_cs_blobs( | |
662 | memory_object_t mem_obj, | |
663 | void **blobs) | |
664 | { | |
665 | vnode_pager_t vnode_object; | |
666 | ||
667 | if (mem_obj == MEMORY_OBJECT_NULL || | |
668 | mem_obj->mo_pager_ops != &vnode_pager_ops) { | |
669 | return KERN_INVALID_ARGUMENT; | |
670 | } | |
671 | ||
672 | vnode_object = vnode_pager_lookup(mem_obj); | |
673 | ||
674 | return vnode_pager_get_cs_blobs(vnode_object->vnode_handle, | |
675 | blobs); | |
676 | } | |
677 | ||
678 | /* | |
679 | * | |
680 | */ | |
681 | kern_return_t | |
682 | vnode_pager_data_request( | |
683 | memory_object_t mem_obj, | |
684 | memory_object_offset_t offset, | |
685 | __unused vm_size_t length, | |
686 | __unused vm_prot_t desired_access, | |
687 | memory_object_fault_info_t fault_info) | |
688 | { | |
689 | register vnode_pager_t vnode_object; | |
690 | vm_size_t size; | |
691 | #if MACH_ASSERT | |
692 | memory_object_offset_t original_offset = offset; | |
693 | #endif /* MACH_ASSERT */ | |
694 | ||
695 | vnode_object = vnode_pager_lookup(mem_obj); | |
696 | ||
697 | size = MAX_UPL_TRANSFER * PAGE_SIZE; | |
698 | ||
699 | if (memory_object_cluster_size(vnode_object->control_handle, &offset, &size, fault_info) != KERN_SUCCESS) | |
700 | size = PAGE_SIZE; | |
701 | ||
702 | assert(original_offset >= offset && | |
703 | original_offset < offset + size); | |
704 | ||
705 | return vnode_pager_cluster_read(vnode_object, offset, size); | |
706 | } | |
707 | ||
708 | /* | |
709 | * | |
710 | */ | |
711 | void | |
712 | vnode_pager_reference( | |
713 | memory_object_t mem_obj) | |
714 | { | |
715 | register vnode_pager_t vnode_object; | |
716 | unsigned int new_ref_count; | |
717 | ||
718 | vnode_object = vnode_pager_lookup(mem_obj); | |
719 | new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1); | |
720 | assert(new_ref_count > 1); | |
721 | } | |
722 | ||
723 | /* | |
724 | * | |
725 | */ | |
726 | void | |
727 | vnode_pager_deallocate( | |
728 | memory_object_t mem_obj) | |
729 | { | |
730 | register vnode_pager_t vnode_object; | |
731 | ||
732 | PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj)); | |
733 | ||
734 | vnode_object = vnode_pager_lookup(mem_obj); | |
735 | ||
736 | if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) { | |
737 | if (vnode_object->vnode_handle != NULL) { | |
738 | vnode_pager_vrele(vnode_object->vnode_handle); | |
739 | } | |
740 | zfree(vnode_pager_zone, vnode_object); | |
741 | } | |
742 | return; | |
743 | } | |
744 | ||
745 | /* | |
746 | * | |
747 | */ | |
748 | kern_return_t | |
749 | vnode_pager_terminate( | |
750 | #if !DEBUG | |
751 | __unused | |
752 | #endif | |
753 | memory_object_t mem_obj) | |
754 | { | |
755 | PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj)); | |
756 | ||
757 | return(KERN_SUCCESS); | |
758 | } | |
759 | ||
760 | /* | |
761 | * | |
762 | */ | |
763 | kern_return_t | |
764 | vnode_pager_synchronize( | |
765 | memory_object_t mem_obj, | |
766 | memory_object_offset_t offset, | |
767 | vm_size_t length, | |
768 | __unused vm_sync_t sync_flags) | |
769 | { | |
770 | register vnode_pager_t vnode_object; | |
771 | ||
772 | PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj)); | |
773 | ||
774 | vnode_object = vnode_pager_lookup(mem_obj); | |
775 | ||
776 | memory_object_synchronize_completed(vnode_object->control_handle, offset, length); | |
777 | ||
778 | return (KERN_SUCCESS); | |
779 | } | |
780 | ||
781 | /* | |
782 | * | |
783 | */ | |
784 | kern_return_t | |
785 | vnode_pager_unmap( | |
786 | memory_object_t mem_obj) | |
787 | { | |
788 | register vnode_pager_t vnode_object; | |
789 | ||
790 | PAGER_DEBUG(PAGER_ALL, ("vnode_pager_unmap: %p\n", mem_obj)); | |
791 | ||
792 | vnode_object = vnode_pager_lookup(mem_obj); | |
793 | ||
794 | ubc_unmap(vnode_object->vnode_handle); | |
795 | return KERN_SUCCESS; | |
796 | } | |
797 | ||
798 | ||
799 | /* | |
800 | * | |
801 | */ | |
802 | void | |
803 | vnode_pager_cluster_write( | |
804 | vnode_pager_t vnode_object, | |
805 | vm_object_offset_t offset, | |
806 | vm_size_t cnt, | |
807 | vm_object_offset_t * resid_offset, | |
808 | int * io_error, | |
809 | int upl_flags) | |
810 | { | |
811 | vm_size_t size; | |
812 | upl_t upl = NULL; | |
813 | int request_flags; | |
814 | int errno; | |
815 | ||
816 | if (upl_flags & UPL_MSYNC) { | |
817 | ||
818 | upl_flags |= UPL_VNODE_PAGER; | |
819 | ||
820 | if ( (upl_flags & UPL_IOSYNC) && io_error) | |
821 | upl_flags |= UPL_KEEPCACHED; | |
822 | ||
823 | while (cnt) { | |
824 | kern_return_t kr; | |
825 | ||
826 | size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */ | |
827 | ||
828 | request_flags = UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE | | |
829 | UPL_SET_INTERNAL | UPL_SET_LITE; | |
830 | ||
831 | kr = memory_object_upl_request(vnode_object->control_handle, | |
832 | offset, size, &upl, NULL, NULL, request_flags); | |
833 | if (kr != KERN_SUCCESS) | |
834 | panic("vnode_pager_cluster_write: upl request failed\n"); | |
835 | ||
836 | vnode_pageout(vnode_object->vnode_handle, | |
837 | upl, (vm_offset_t)0, offset, size, upl_flags, &errno); | |
838 | ||
839 | if ( (upl_flags & UPL_KEEPCACHED) ) { | |
840 | if ( (*io_error = errno) ) | |
841 | break; | |
842 | } | |
843 | cnt -= size; | |
844 | offset += size; | |
845 | } | |
846 | if (resid_offset) | |
847 | *resid_offset = offset; | |
848 | ||
849 | } else { | |
850 | vm_object_offset_t vnode_size; | |
851 | vm_object_offset_t base_offset; | |
852 | vm_object_t object; | |
853 | ||
854 | /* | |
855 | * this is the pageout path | |
856 | */ | |
857 | vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle); | |
858 | ||
859 | if (vnode_size > (offset + PAGE_SIZE)) { | |
860 | /* | |
861 | * preset the maximum size of the cluster | |
862 | * and put us on a nice cluster boundary... | |
863 | * and then clip the size to insure we | |
864 | * don't request past the end of the underlying file | |
865 | */ | |
866 | size = PAGE_SIZE * MAX_UPL_TRANSFER; | |
867 | base_offset = offset & ~((signed)(size - 1)); | |
868 | ||
869 | if ((base_offset + size) > vnode_size) | |
870 | size = round_page_32(((vm_size_t)(vnode_size - base_offset))); | |
871 | } else { | |
872 | /* | |
873 | * we've been requested to page out a page beyond the current | |
874 | * end of the 'file'... don't try to cluster in this case... | |
875 | * we still need to send this page through because it might | |
876 | * be marked precious and the underlying filesystem may need | |
877 | * to do something with it (besides page it out)... | |
878 | */ | |
879 | base_offset = offset; | |
880 | size = PAGE_SIZE; | |
881 | } | |
882 | object = memory_object_control_to_vm_object(vnode_object->control_handle); | |
883 | ||
884 | if (object == VM_OBJECT_NULL) | |
885 | panic("vnode_pager_cluster_write: NULL vm_object in control handle\n"); | |
886 | ||
887 | request_flags = UPL_NOBLOCK | UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE | | |
888 | UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | | |
889 | UPL_SET_INTERNAL | UPL_SET_LITE; | |
890 | ||
891 | vm_object_upl_request(object, base_offset, size, | |
892 | &upl, NULL, NULL, request_flags); | |
893 | if (upl == NULL) | |
894 | panic("vnode_pager_cluster_write: upl request failed\n"); | |
895 | ||
896 | vnode_pageout(vnode_object->vnode_handle, | |
897 | upl, (vm_offset_t)0, upl->offset, upl->size, UPL_VNODE_PAGER, NULL); | |
898 | } | |
899 | } | |
900 | ||
901 | ||
902 | /* | |
903 | * | |
904 | */ | |
905 | kern_return_t | |
906 | vnode_pager_cluster_read( | |
907 | vnode_pager_t vnode_object, | |
908 | vm_object_offset_t offset, | |
909 | vm_size_t cnt) | |
910 | { | |
911 | int local_error = 0; | |
912 | int kret; | |
913 | ||
914 | assert(! (cnt & PAGE_MASK)); | |
915 | ||
916 | kret = vnode_pagein(vnode_object->vnode_handle, | |
917 | (upl_t) NULL, | |
918 | (vm_offset_t) NULL, | |
919 | offset, | |
920 | cnt, | |
921 | 0, | |
922 | &local_error); | |
923 | /* | |
924 | if(kret == PAGER_ABSENT) { | |
925 | Need to work out the defs here, 1 corresponds to PAGER_ABSENT | |
926 | defined in bsd/vm/vm_pager.h However, we should not be including | |
927 | that file here it is a layering violation. | |
928 | */ | |
929 | if (kret == 1) { | |
930 | int uplflags; | |
931 | upl_t upl = NULL; | |
932 | unsigned int count = 0; | |
933 | kern_return_t kr; | |
934 | ||
935 | uplflags = (UPL_NO_SYNC | | |
936 | UPL_CLEAN_IN_PLACE | | |
937 | UPL_SET_INTERNAL); | |
938 | count = 0; | |
939 | kr = memory_object_upl_request(vnode_object->control_handle, | |
940 | offset, cnt, | |
941 | &upl, NULL, &count, uplflags); | |
942 | if (kr == KERN_SUCCESS) { | |
943 | upl_abort(upl, 0); | |
944 | upl_deallocate(upl); | |
945 | } else { | |
946 | /* | |
947 | * We couldn't gather the page list, probably | |
948 | * because the memory object doesn't have a link | |
949 | * to a VM object anymore (forced unmount, for | |
950 | * example). Just return an error to the vm_fault() | |
951 | * path and let it handle it. | |
952 | */ | |
953 | } | |
954 | ||
955 | return KERN_FAILURE; | |
956 | } | |
957 | ||
958 | return KERN_SUCCESS; | |
959 | ||
960 | } | |
961 | ||
962 | ||
963 | /* | |
964 | * | |
965 | */ | |
966 | void | |
967 | vnode_pager_release_from_cache( | |
968 | int *cnt) | |
969 | { | |
970 | memory_object_free_from_cache( | |
971 | &realhost, &vnode_pager_ops, cnt); | |
972 | } | |
973 | ||
974 | /* | |
975 | * | |
976 | */ | |
977 | vnode_pager_t | |
978 | vnode_object_create( | |
979 | struct vnode *vp) | |
980 | { | |
981 | register vnode_pager_t vnode_object; | |
982 | ||
983 | vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone); | |
984 | if (vnode_object == VNODE_PAGER_NULL) | |
985 | return(VNODE_PAGER_NULL); | |
986 | ||
987 | /* | |
988 | * The vm_map call takes both named entry ports and raw memory | |
989 | * objects in the same parameter. We need to make sure that | |
990 | * vm_map does not see this object as a named entry port. So, | |
991 | * we reserve the second word in the object for a fake ip_kotype | |
992 | * setting - that will tell vm_map to use it as a memory object. | |
993 | */ | |
994 | vnode_object->pager_ops = &vnode_pager_ops; | |
995 | vnode_object->pager_ikot = IKOT_MEMORY_OBJECT; | |
996 | vnode_object->ref_count = 1; | |
997 | vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL; | |
998 | vnode_object->vnode_handle = vp; | |
999 | ||
1000 | return(vnode_object); | |
1001 | } | |
1002 | ||
1003 | /* | |
1004 | * | |
1005 | */ | |
1006 | vnode_pager_t | |
1007 | vnode_pager_lookup( | |
1008 | memory_object_t name) | |
1009 | { | |
1010 | vnode_pager_t vnode_object; | |
1011 | ||
1012 | vnode_object = (vnode_pager_t)name; | |
1013 | assert(vnode_object->pager_ops == &vnode_pager_ops); | |
1014 | return (vnode_object); | |
1015 | } | |
1016 | ||
1017 | ||
1018 | /*********************** proc_info implementation *************/ | |
1019 | ||
1020 | #include <sys/bsdtask_info.h> | |
1021 | ||
1022 | static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid); | |
1023 | ||
1024 | ||
1025 | int | |
1026 | fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid) | |
1027 | { | |
1028 | ||
1029 | vm_map_t map; | |
1030 | vm_map_offset_t address = (vm_map_offset_t )arg; | |
1031 | vm_map_entry_t tmp_entry; | |
1032 | vm_map_entry_t entry; | |
1033 | vm_map_offset_t start; | |
1034 | vm_region_extended_info_data_t extended; | |
1035 | vm_region_top_info_data_t top; | |
1036 | ||
1037 | task_lock(task); | |
1038 | map = task->map; | |
1039 | if (map == VM_MAP_NULL) | |
1040 | { | |
1041 | task_unlock(task); | |
1042 | return(0); | |
1043 | } | |
1044 | vm_map_reference(map); | |
1045 | task_unlock(task); | |
1046 | ||
1047 | vm_map_lock_read(map); | |
1048 | ||
1049 | start = address; | |
1050 | if (!vm_map_lookup_entry(map, start, &tmp_entry)) { | |
1051 | if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { | |
1052 | vm_map_unlock_read(map); | |
1053 | vm_map_deallocate(map); | |
1054 | return(0); | |
1055 | } | |
1056 | } else { | |
1057 | entry = tmp_entry; | |
1058 | } | |
1059 | ||
1060 | start = entry->vme_start; | |
1061 | ||
1062 | pinfo->pri_offset = entry->offset; | |
1063 | pinfo->pri_protection = entry->protection; | |
1064 | pinfo->pri_max_protection = entry->max_protection; | |
1065 | pinfo->pri_inheritance = entry->inheritance; | |
1066 | pinfo->pri_behavior = entry->behavior; | |
1067 | pinfo->pri_user_wired_count = entry->user_wired_count; | |
1068 | pinfo->pri_user_tag = entry->alias; | |
1069 | ||
1070 | if (entry->is_sub_map) { | |
1071 | pinfo->pri_flags |= PROC_REGION_SUBMAP; | |
1072 | } else { | |
1073 | if (entry->is_shared) | |
1074 | pinfo->pri_flags |= PROC_REGION_SHARED; | |
1075 | } | |
1076 | ||
1077 | ||
1078 | extended.protection = entry->protection; | |
1079 | extended.user_tag = entry->alias; | |
1080 | extended.pages_resident = 0; | |
1081 | extended.pages_swapped_out = 0; | |
1082 | extended.pages_shared_now_private = 0; | |
1083 | extended.pages_dirtied = 0; | |
1084 | extended.external_pager = 0; | |
1085 | extended.shadow_depth = 0; | |
1086 | ||
1087 | vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended); | |
1088 | ||
1089 | if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) | |
1090 | extended.share_mode = SM_PRIVATE; | |
1091 | ||
1092 | top.private_pages_resident = 0; | |
1093 | top.shared_pages_resident = 0; | |
1094 | vm_map_region_top_walk(entry, &top); | |
1095 | ||
1096 | ||
1097 | pinfo->pri_pages_resident = extended.pages_resident; | |
1098 | pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private; | |
1099 | pinfo->pri_pages_swapped_out = extended.pages_swapped_out; | |
1100 | pinfo->pri_pages_dirtied = extended.pages_dirtied; | |
1101 | pinfo->pri_ref_count = extended.ref_count; | |
1102 | pinfo->pri_shadow_depth = extended.shadow_depth; | |
1103 | pinfo->pri_share_mode = extended.share_mode; | |
1104 | ||
1105 | pinfo->pri_private_pages_resident = top.private_pages_resident; | |
1106 | pinfo->pri_shared_pages_resident = top.shared_pages_resident; | |
1107 | pinfo->pri_obj_id = top.obj_id; | |
1108 | ||
1109 | pinfo->pri_address = (uint64_t)start; | |
1110 | pinfo->pri_size = (uint64_t)(entry->vme_end - start); | |
1111 | pinfo->pri_depth = 0; | |
1112 | ||
1113 | if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { | |
1114 | *vnodeaddr = (uint32_t)0; | |
1115 | ||
1116 | if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { | |
1117 | vm_map_unlock_read(map); | |
1118 | vm_map_deallocate(map); | |
1119 | return(1); | |
1120 | } | |
1121 | } | |
1122 | ||
1123 | vm_map_unlock_read(map); | |
1124 | vm_map_deallocate(map); | |
1125 | return(1); | |
1126 | } | |
1127 | ||
1128 | static int | |
1129 | fill_vnodeinfoforaddr( | |
1130 | vm_map_entry_t entry, | |
1131 | uint32_t * vnodeaddr, | |
1132 | uint32_t * vid) | |
1133 | { | |
1134 | vm_object_t top_object, object; | |
1135 | memory_object_t memory_object; | |
1136 | memory_object_pager_ops_t pager_ops; | |
1137 | kern_return_t kr; | |
1138 | int shadow_depth; | |
1139 | ||
1140 | ||
1141 | if (entry->is_sub_map) { | |
1142 | return(0); | |
1143 | } else { | |
1144 | /* | |
1145 | * The last object in the shadow chain has the | |
1146 | * relevant pager information. | |
1147 | */ | |
1148 | top_object = entry->object.vm_object; | |
1149 | if (top_object == VM_OBJECT_NULL) { | |
1150 | object = VM_OBJECT_NULL; | |
1151 | shadow_depth = 0; | |
1152 | } else { | |
1153 | vm_object_lock(top_object); | |
1154 | for (object = top_object, shadow_depth = 0; | |
1155 | object->shadow != VM_OBJECT_NULL; | |
1156 | object = object->shadow, shadow_depth++) { | |
1157 | vm_object_lock(object->shadow); | |
1158 | vm_object_unlock(object); | |
1159 | } | |
1160 | } | |
1161 | } | |
1162 | ||
1163 | if (object == VM_OBJECT_NULL) { | |
1164 | return(0); | |
1165 | } else if (object->internal) { | |
1166 | vm_object_unlock(object); | |
1167 | return(0); | |
1168 | } else if (! object->pager_ready || | |
1169 | object->terminating || | |
1170 | ! object->alive) { | |
1171 | vm_object_unlock(object); | |
1172 | return(0); | |
1173 | } else { | |
1174 | memory_object = object->pager; | |
1175 | pager_ops = memory_object->mo_pager_ops; | |
1176 | if (pager_ops == &vnode_pager_ops) { | |
1177 | kr = vnode_pager_get_object_vnode( | |
1178 | memory_object, | |
1179 | vnodeaddr, vid); | |
1180 | if (kr != KERN_SUCCESS) { | |
1181 | vm_object_unlock(object); | |
1182 | return(0); | |
1183 | } | |
1184 | } else { | |
1185 | vm_object_unlock(object); | |
1186 | return(0); | |
1187 | } | |
1188 | } | |
1189 | vm_object_unlock(object); | |
1190 | return(1); | |
1191 | } | |
1192 | ||
1193 | kern_return_t | |
1194 | vnode_pager_get_object_vnode ( | |
1195 | memory_object_t mem_obj, | |
1196 | uint32_t * vnodeaddr, | |
1197 | uint32_t * vid) | |
1198 | { | |
1199 | vnode_pager_t vnode_object; | |
1200 | ||
1201 | vnode_object = vnode_pager_lookup(mem_obj); | |
1202 | if (vnode_object->vnode_handle) { | |
1203 | *vnodeaddr = (uint32_t)vnode_object->vnode_handle; | |
1204 | *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle); | |
1205 | ||
1206 | return(KERN_SUCCESS); | |
1207 | } | |
1208 | ||
1209 | return(KERN_FAILURE); | |
1210 | } | |
1211 |