]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vnode_pager.c
c994f396ba5f821ae13b6c5af64c62f73de40145
[apple/xnu.git] / bsd / vm / vnode_pager.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * Mach Operating System
24 * Copyright (c) 1987 Carnegie-Mellon University
25 * All rights reserved. The CMU software License Agreement specifies
26 * the terms and conditions for use and redistribution.
27 */
28 /*
29 * File: vnode_pager.c
30 *
31 * "Swap" pager that pages to/from vnodes. Also
32 * handles demand paging from files.
33 *
34 */
35
36 #include <mach/boolean.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/user.h>
40 #include <sys/proc.h>
41 #include <sys/kauth.h>
42 #include <sys/buf.h>
43 #include <sys/uio.h>
44 #include <sys/vnode_internal.h>
45 #include <sys/namei.h>
46 #include <sys/mount_internal.h> /* needs internal due to fhandle_t */
47 #include <sys/ubc_internal.h>
48 #include <sys/lock.h>
49
50 #include <mach/mach_types.h>
51 #include <mach/memory_object_types.h>
52
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
55 #include <kern/zalloc.h>
56 #include <kern/kalloc.h>
57 #include <libkern/libkern.h>
58
59 #include <vm/vnode_pager.h>
60 #include <vm/vm_pageout.h>
61
62 #include <kern/assert.h>
63 #include <sys/kdebug.h>
64 #include <machine/spl.h>
65
66 #include <nfs/rpcv2.h>
67 #include <nfs/nfsproto.h>
68 #include <nfs/nfs.h>
69
70 #include <vm/vm_protos.h>
71
72 unsigned int vp_pagein=0;
73 unsigned int vp_pgodirty=0;
74 unsigned int vp_pgoclean=0;
75 unsigned int dp_pgouts=0; /* Default pager pageouts */
76 unsigned int dp_pgins=0; /* Default pager pageins */
77
78 vm_object_offset_t
79 vnode_pager_get_filesize(struct vnode *vp)
80 {
81
82 return (vm_object_offset_t) ubc_getsize(vp);
83 }
84
85 kern_return_t
86 vnode_pager_get_pathname(
87 struct vnode *vp,
88 char *pathname,
89 vm_size_t *length_p)
90 {
91 int error, len;
92
93 len = (int) *length_p;
94 error = vn_getpath(vp, pathname, &len);
95 if (error != 0) {
96 return KERN_FAILURE;
97 }
98 *length_p = (vm_size_t) len;
99 return KERN_SUCCESS;
100 }
101
102 kern_return_t
103 vnode_pager_get_filename(
104 struct vnode *vp,
105 char **filename)
106 {
107 *filename = vp->v_name;
108 return KERN_SUCCESS;
109 }
110
111 pager_return_t
112 vnode_pageout(struct vnode *vp,
113 upl_t upl,
114 vm_offset_t upl_offset,
115 vm_object_offset_t f_offset,
116 vm_size_t size,
117 int flags,
118 int *errorp)
119 {
120 struct proc *p = current_proc();
121 int result = PAGER_SUCCESS;
122 int error = 0;
123 int error_ret = 0;
124 daddr64_t blkno;
125 int isize;
126 int pg_index;
127 int base_index;
128 int offset;
129 upl_page_info_t *pl;
130 struct vfs_context context;
131
132 context.vc_proc = p;
133 context.vc_ucred = kauth_cred_get();
134
135 isize = (int)size;
136
137 if (isize <= 0) {
138 result = PAGER_ERROR;
139 error_ret = EINVAL;
140 goto out;
141 }
142 UBCINFOCHECK("vnode_pageout", vp);
143
144 if (UBCINVALID(vp)) {
145 result = PAGER_ERROR;
146 error_ret = EINVAL;
147
148 if (upl && !(flags & UPL_NOCOMMIT))
149 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
150 goto out;
151 }
152 if ( !(flags & UPL_VNODE_PAGER)) {
153 /*
154 * This is a pageout from the default pager,
155 * just go ahead and call vnop_pageout since
156 * it has already sorted out the dirty ranges
157 */
158 dp_pgouts++;
159
160 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
161 size, 1, 0, 0, 0);
162
163 if ( (error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
164 (size_t)size, flags, &context)) )
165 result = PAGER_ERROR;
166
167 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
168 size, 1, 0, 0, 0);
169
170 goto out;
171 }
172 /*
173 * we come here for pageouts to 'real' files and
174 * for msyncs... the upl may not contain any
175 * dirty pages.. it's our responsibility to sort
176 * through it and find the 'runs' of dirty pages
177 * to call VNOP_PAGEOUT on...
178 */
179 pl = ubc_upl_pageinfo(upl);
180
181 if (ubc_getsize(vp) == 0) {
182 /*
183 * if the file has been effectively deleted, then
184 * we need to go through the UPL and invalidate any
185 * buffer headers we might have that reference any
186 * of it's pages
187 */
188 for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
189 #if NFSCLIENT
190 if (vp->v_tag == VT_NFS)
191 /* check with nfs if page is OK to drop */
192 error = nfs_buf_page_inval(vp, (off_t)f_offset);
193 else
194 #endif
195 {
196 blkno = ubc_offtoblk(vp, (off_t)f_offset);
197 error = buf_invalblkno(vp, blkno, 0);
198 }
199 if (error) {
200 if ( !(flags & UPL_NOCOMMIT))
201 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
202 if (error_ret == 0)
203 error_ret = error;
204 result = PAGER_ERROR;
205
206 } else if ( !(flags & UPL_NOCOMMIT)) {
207 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
208 }
209 f_offset += PAGE_SIZE;
210 }
211 goto out;
212 }
213 /*
214 * Ignore any non-present pages at the end of the
215 * UPL so that we aren't looking at a upl that
216 * may already have been freed by the preceeding
217 * aborts/completions.
218 */
219 base_index = upl_offset / PAGE_SIZE;
220
221 for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) {
222 if (upl_page_present(pl, --pg_index))
223 break;
224 if (pg_index == base_index) {
225 /*
226 * no pages were returned, so release
227 * our hold on the upl and leave
228 */
229 if ( !(flags & UPL_NOCOMMIT))
230 ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY);
231
232 goto out;
233 }
234 }
235 isize = (pg_index + 1) * PAGE_SIZE;
236
237 offset = upl_offset;
238 pg_index = base_index;
239
240 while (isize) {
241 int xsize;
242 int num_of_pages;
243
244 if ( !upl_page_present(pl, pg_index)) {
245 /*
246 * we asked for RET_ONLY_DIRTY, so it's possible
247 * to get back empty slots in the UPL
248 * just skip over them
249 */
250 offset += PAGE_SIZE;
251 isize -= PAGE_SIZE;
252 pg_index++;
253
254 continue;
255 }
256 if ( !upl_dirty_page(pl, pg_index)) {
257 /*
258 * if the page is not dirty and reached here it is
259 * marked precious or it is due to invalidation in
260 * memory_object_lock request as part of truncation
261 * We also get here from vm_object_terminate()
262 * So all you need to do in these
263 * cases is to invalidate incore buffer if it is there
264 * Note we must not sleep here if the buffer is busy - that is
265 * a lock inversion which causes deadlock.
266 */
267 vp_pgoclean++;
268
269 #if NFSCLIENT
270 if (vp->v_tag == VT_NFS)
271 /* check with nfs if page is OK to drop */
272 error = nfs_buf_page_inval(vp, (off_t)(f_offset + offset));
273 else
274 #endif
275 {
276 blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset));
277 error = buf_invalblkno(vp, blkno, 0);
278 }
279 if (error) {
280 if ( !(flags & UPL_NOCOMMIT))
281 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
282 if (error_ret == 0)
283 error_ret = error;
284 result = PAGER_ERROR;
285
286 } else if ( !(flags & UPL_NOCOMMIT)) {
287 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
288 }
289 offset += PAGE_SIZE;
290 isize -= PAGE_SIZE;
291 pg_index++;
292
293 continue;
294 }
295 vp_pgodirty++;
296
297 num_of_pages = 1;
298 xsize = isize - PAGE_SIZE;
299
300 while (xsize) {
301 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
302 break;
303 num_of_pages++;
304 xsize -= PAGE_SIZE;
305 }
306 xsize = num_of_pages * PAGE_SIZE;
307
308 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
309 xsize, (int)(f_offset + offset), 0, 0, 0);
310
311 if ( (error = VNOP_PAGEOUT(vp, upl, (vm_offset_t)offset,
312 (off_t)(f_offset + offset), xsize,
313 flags, &context)) ) {
314 if (error_ret == 0)
315 error_ret = error;
316 result = PAGER_ERROR;
317 }
318 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
319 xsize, 0, 0, 0, 0);
320
321 offset += xsize;
322 isize -= xsize;
323 pg_index += num_of_pages;
324 }
325 out:
326 if (errorp)
327 *errorp = error_ret;
328
329 return (result);
330 }
331
332
333 void IOSleep(int);
334
335 pager_return_t
336 vnode_pagein(
337 struct vnode *vp,
338 upl_t upl,
339 vm_offset_t upl_offset,
340 vm_object_offset_t f_offset,
341 vm_size_t size,
342 int flags,
343 int *errorp)
344 {
345 struct proc *p = current_proc();
346 struct uthread *ut;
347 upl_page_info_t *pl;
348 int result = PAGER_SUCCESS;
349 int error = 0;
350 int pages_in_upl;
351 int start_pg;
352 int last_pg;
353 int first_pg;
354 int xsize;
355 int abort_needed = 1;
356
357
358 UBCINFOCHECK("vnode_pagein", vp);
359
360 if (UBCINVALID(vp)) {
361 result = PAGER_ERROR;
362 error = PAGER_ERROR;
363 if (upl && !(flags & UPL_NOCOMMIT)) {
364 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
365 }
366 goto out;
367 }
368 if (upl == (upl_t)NULL) {
369 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE)) {
370 result = PAGER_ERROR;
371 error = PAGER_ERROR;
372 goto out;
373 }
374 ubc_create_upl(vp, f_offset, size, &upl, &pl, UPL_RET_ONLY_ABSENT | UPL_SET_LITE);
375
376 if (upl == (upl_t)NULL) {
377 result = PAGER_ABSENT;
378 error = PAGER_ABSENT;
379 goto out;
380 }
381 upl_offset = 0;
382 /*
383 * if we get here, we've created the upl and
384 * are responsible for commiting/aborting it
385 * regardless of what the caller has passed in
386 */
387 flags &= ~UPL_NOCOMMIT;
388
389 vp_pagein++;
390 } else {
391 pl = ubc_upl_pageinfo(upl);
392
393 dp_pgins++;
394 }
395 pages_in_upl = size / PAGE_SIZE;
396 first_pg = upl_offset / PAGE_SIZE;
397
398 /*
399 * before we start marching forward, we must make sure we end on
400 * a present page, otherwise we will be working with a freed
401 * upl
402 */
403 for (last_pg = pages_in_upl - 1; last_pg >= first_pg; last_pg--) {
404 if (upl_page_present(pl, last_pg))
405 break;
406 }
407 pages_in_upl = last_pg + 1;
408
409 for (last_pg = first_pg; last_pg < pages_in_upl;) {
410 /*
411 * scan the upl looking for the next
412 * page that is present.... if all of the
413 * pages are absent, we're done
414 */
415 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
416 if (upl_page_present(pl, last_pg))
417 break;
418 }
419 if (last_pg == pages_in_upl)
420 break;
421
422 /*
423 * if we get here, we've sitting on a page
424 * that is present... we want to skip over
425 * any range of 'valid' pages... if this takes
426 * us to the end of the request, than we're done
427 */
428 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
429 if (!upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
430 break;
431 }
432 if (last_pg > start_pg) {
433 /*
434 * we've found a range of valid pages
435 * if we've got COMMIT responsibility
436 * commit this range of pages back to the
437 * cache unchanged
438 */
439 xsize = (last_pg - start_pg) * PAGE_SIZE;
440
441 if (!(flags & UPL_NOCOMMIT))
442 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, xsize, UPL_ABORT_FREE_ON_EMPTY);
443
444 abort_needed = 0;
445 }
446 if (last_pg == pages_in_upl)
447 break;
448
449 if (!upl_page_present(pl, last_pg))
450 /*
451 * if we found a range of valid pages
452 * terminated by a non-present page
453 * than start over
454 */
455 continue;
456
457 /*
458 * scan from the found invalid page looking for a valid
459 * or non-present page before the end of the upl is reached, if we
460 * find one, then it will be the last page of the request to
461 * 'cluster_io'
462 */
463 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
464 if (upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
465 break;
466 }
467 if (last_pg > start_pg) {
468 int xoff;
469 struct vfs_context context;
470
471 context.vc_proc = p;
472 context.vc_ucred = kauth_cred_get();
473 xsize = (last_pg - start_pg) * PAGE_SIZE;
474 xoff = start_pg * PAGE_SIZE;
475
476 if ( (error = VNOP_PAGEIN(vp, upl, (vm_offset_t) xoff,
477 (off_t)f_offset + xoff,
478 xsize, flags, &context)) ) {
479 result = PAGER_ERROR;
480 error = PAGER_ERROR;
481
482 }
483 abort_needed = 0;
484 }
485 }
486 if (!(flags & UPL_NOCOMMIT) && abort_needed)
487 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
488 out:
489 if (errorp)
490 *errorp = result;
491
492 ut = get_bsdthread_info(current_thread());
493
494 if (ut->uu_lowpri_delay) {
495 /*
496 * task is marked as a low priority I/O type
497 * and the I/O we issued while in this system call
498 * collided with normal I/O operations... we'll
499 * delay in order to mitigate the impact of this
500 * task on the normal operation of the system
501 */
502 IOSleep(ut->uu_lowpri_delay);
503 ut->uu_lowpri_delay = 0;
504 }
505 return (error);
506 }
507
508 void
509 vnode_pager_shutdown(void)
510 {
511 int i;
512 vnode_t vp;
513
514 for(i = 0; i < MAX_BACKING_STORE; i++) {
515 vp = (vnode_t)(bs_port_table[i]).vp;
516 if (vp) {
517 (bs_port_table[i]).vp = 0;
518
519 /* get rid of macx_swapon() reference */
520 vnode_rele(vp);
521 }
522 }
523 }
524
525
526 void *
527 upl_get_internal_page_list(upl_t upl)
528 {
529 return(UPL_GET_INTERNAL_PAGE_LIST(upl));
530
531 }