]> git.saurik.com Git - apple/xnu.git/blame - bsd/vm/vnode_pager.c
xnu-792.1.5.tar.gz
[apple/xnu.git] / bsd / vm / vnode_pager.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * Mach Operating System
24 * Copyright (c) 1987 Carnegie-Mellon University
25 * All rights reserved. The CMU software License Agreement specifies
26 * the terms and conditions for use and redistribution.
27 */
28/*
29 * File: vnode_pager.c
30 *
31 * "Swap" pager that pages to/from vnodes. Also
32 * handles demand paging from files.
33 *
34 */
35
36#include <mach/boolean.h>
37#include <sys/param.h>
38#include <sys/systm.h>
91447636 39#include <sys/user.h>
1c79356b 40#include <sys/proc.h>
91447636 41#include <sys/kauth.h>
1c79356b
A
42#include <sys/buf.h>
43#include <sys/uio.h>
91447636 44#include <sys/vnode_internal.h>
1c79356b 45#include <sys/namei.h>
91447636
A
46#include <sys/mount_internal.h> /* needs internal due to fhandle_t */
47#include <sys/ubc_internal.h>
1c79356b
A
48#include <sys/lock.h>
49
50#include <mach/mach_types.h>
51#include <mach/memory_object_types.h>
52
53#include <vm/vm_map.h>
54#include <vm/vm_kern.h>
1c79356b
A
55#include <kern/zalloc.h>
56#include <kern/kalloc.h>
57#include <libkern/libkern.h>
58
59#include <vm/vnode_pager.h>
60#include <vm/vm_pageout.h>
61
62#include <kern/assert.h>
9bccf70c 63#include <sys/kdebug.h>
91447636
A
64#include <machine/spl.h>
65
66#include <nfs/rpcv2.h>
67#include <nfs/nfsproto.h>
68#include <nfs/nfs.h>
69
70#include <vm/vm_protos.h>
1c79356b
A
71
72unsigned int vp_pagein=0;
73unsigned int vp_pgodirty=0;
74unsigned int vp_pgoclean=0;
75unsigned int dp_pgouts=0; /* Default pager pageouts */
76unsigned int dp_pgins=0; /* Default pager pageins */
77
0b4e3aa0
A
78vm_object_offset_t
79vnode_pager_get_filesize(struct vnode *vp)
80{
0b4e3aa0
A
81
82 return (vm_object_offset_t) ubc_getsize(vp);
0b4e3aa0
A
83}
84
1c79356b
A
85pager_return_t
86vnode_pageout(struct vnode *vp,
87 upl_t upl,
88 vm_offset_t upl_offset,
89 vm_object_offset_t f_offset,
90 vm_size_t size,
91 int flags,
92 int *errorp)
93{
1c79356b 94 struct proc *p = current_proc();
91447636 95 int result = PAGER_SUCCESS;
1c79356b 96 int error = 0;
91447636
A
97 int error_ret = 0;
98 daddr64_t blkno;
99 int isize;
1c79356b 100 int pg_index;
91447636 101 int base_index;
1c79356b 102 int offset;
1c79356b 103 upl_page_info_t *pl;
91447636 104 struct vfs_context context;
1c79356b 105
91447636
A
106 context.vc_proc = p;
107 context.vc_ucred = kauth_cred_get();
1c79356b 108
1c79356b
A
109 isize = (int)size;
110
9bccf70c 111 if (isize <= 0) {
91447636
A
112 result = PAGER_ERROR;
113 error_ret = EINVAL;
9bccf70c
A
114 goto out;
115 }
1c79356b
A
116 UBCINFOCHECK("vnode_pageout", vp);
117
118 if (UBCINVALID(vp)) {
91447636
A
119 result = PAGER_ERROR;
120 error_ret = EINVAL;
9bccf70c 121
fa4905b1 122 if (upl && !(flags & UPL_NOCOMMIT))
9bccf70c 123 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
1c79356b
A
124 goto out;
125 }
91447636 126 if ( !(flags & UPL_VNODE_PAGER)) {
1c79356b 127 /*
91447636
A
128 * This is a pageout from the default pager,
129 * just go ahead and call vnop_pageout since
130 * it has already sorted out the dirty ranges
1c79356b
A
131 */
132 dp_pgouts++;
9bccf70c
A
133
134 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
135 size, 1, 0, 0, 0);
136
91447636
A
137 if ( (error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
138 (size_t)size, flags, &context)) )
139 result = PAGER_ERROR;
9bccf70c
A
140
141 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
142 size, 1, 0, 0, 0);
143
1c79356b
A
144 goto out;
145 }
9bccf70c 146 /*
91447636
A
147 * we come here for pageouts to 'real' files and
148 * for msyncs... the upl may not contain any
149 * dirty pages.. it's our responsibility to sort
150 * through it and find the 'runs' of dirty pages
151 * to call VNOP_PAGEOUT on...
9bccf70c 152 */
91447636 153 pl = ubc_upl_pageinfo(upl);
1c79356b 154
fa4905b1 155 if (ubc_getsize(vp) == 0) {
91447636
A
156 /*
157 * if the file has been effectively deleted, then
158 * we need to go through the UPL and invalidate any
159 * buffer headers we might have that reference any
160 * of it's pages
161 */
162 for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
163#if NFSCLIENT
164 if (vp->v_tag == VT_NFS)
165 /* check with nfs if page is OK to drop */
166 error = nfs_buf_page_inval(vp, (off_t)f_offset);
167 else
168#endif
169 {
170 blkno = ubc_offtoblk(vp, (off_t)f_offset);
171 error = buf_invalblkno(vp, blkno, 0);
172 }
173 if (error) {
174 if ( !(flags & UPL_NOCOMMIT))
175 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
176 if (error_ret == 0)
177 error_ret = error;
178 result = PAGER_ERROR;
179
180 } else if ( !(flags & UPL_NOCOMMIT)) {
181 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
fa4905b1 182 }
91447636 183 f_offset += PAGE_SIZE;
1c79356b 184 }
1c79356b
A
185 goto out;
186 }
91447636
A
187 /*
188 * Ignore any non-present pages at the end of the
189 * UPL so that we aren't looking at a upl that
190 * may already have been freed by the preceeding
191 * aborts/completions.
192 */
193 base_index = upl_offset / PAGE_SIZE;
194
195 for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) {
196 if (upl_page_present(pl, --pg_index))
197 break;
198 if (pg_index == base_index) {
199 /*
200 * no pages were returned, so release
201 * our hold on the upl and leave
202 */
203 if ( !(flags & UPL_NOCOMMIT))
204 ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY);
205
206 goto out;
207 }
208 }
209 isize = (pg_index + 1) * PAGE_SIZE;
210
211 offset = upl_offset;
212 pg_index = base_index;
1c79356b
A
213
214 while (isize) {
215 int xsize;
216 int num_of_pages;
217
91447636
A
218 if ( !upl_page_present(pl, pg_index)) {
219 /*
220 * we asked for RET_ONLY_DIRTY, so it's possible
221 * to get back empty slots in the UPL
222 * just skip over them
223 */
1c79356b
A
224 offset += PAGE_SIZE;
225 isize -= PAGE_SIZE;
226 pg_index++;
227
228 continue;
229 }
230 if ( !upl_dirty_page(pl, pg_index)) {
231 /*
232 * if the page is not dirty and reached here it is
233 * marked precious or it is due to invalidation in
234 * memory_object_lock request as part of truncation
235 * We also get here from vm_object_terminate()
236 * So all you need to do in these
237 * cases is to invalidate incore buffer if it is there
91447636 238 * Note we must not sleep here if the buffer is busy - that is
fa4905b1 239 * a lock inversion which causes deadlock.
1c79356b 240 */
1c79356b 241 vp_pgoclean++;
91447636
A
242
243#if NFSCLIENT
244 if (vp->v_tag == VT_NFS)
55e303ae
A
245 /* check with nfs if page is OK to drop */
246 error = nfs_buf_page_inval(vp, (off_t)(f_offset + offset));
91447636
A
247 else
248#endif
249 {
250 blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset));
251 error = buf_invalblkno(vp, blkno, 0);
252 }
253 if (error) {
254 if ( !(flags & UPL_NOCOMMIT))
255 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
256 if (error_ret == 0)
257 error_ret = error;
258 result = PAGER_ERROR;
259
260 } else if ( !(flags & UPL_NOCOMMIT)) {
261 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
262 }
1c79356b
A
263 offset += PAGE_SIZE;
264 isize -= PAGE_SIZE;
265 pg_index++;
266
267 continue;
268 }
269 vp_pgodirty++;
270
271 num_of_pages = 1;
272 xsize = isize - PAGE_SIZE;
273
274 while (xsize) {
1c79356b
A
275 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
276 break;
277 num_of_pages++;
278 xsize -= PAGE_SIZE;
279 }
280 xsize = num_of_pages * PAGE_SIZE;
281
9bccf70c 282 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
91447636 283 xsize, (int)(f_offset + offset), 0, 0, 0);
9bccf70c 284
91447636 285 if ( (error = VNOP_PAGEOUT(vp, upl, (vm_offset_t)offset,
fa4905b1 286 (off_t)(f_offset + offset), xsize,
91447636
A
287 flags, &context)) ) {
288 if (error_ret == 0)
289 error_ret = error;
290 result = PAGER_ERROR;
291 }
9bccf70c
A
292 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
293 xsize, 0, 0, 0, 0);
294
1c79356b
A
295 offset += xsize;
296 isize -= xsize;
297 pg_index += num_of_pages;
298 }
299out:
300 if (errorp)
91447636 301 *errorp = error_ret;
1c79356b 302
91447636 303 return (result);
1c79356b
A
304}
305
306
91447636
A
307void IOSleep(int);
308
1c79356b
A
309pager_return_t
310vnode_pagein(
311 struct vnode *vp,
9bccf70c
A
312 upl_t upl,
313 vm_offset_t upl_offset,
1c79356b
A
314 vm_object_offset_t f_offset,
315 vm_size_t size,
316 int flags,
317 int *errorp)
318{
9bccf70c 319 struct proc *p = current_proc();
91447636 320 struct uthread *ut;
9bccf70c
A
321 upl_page_info_t *pl;
322 int result = PAGER_SUCCESS;
1c79356b 323 int error = 0;
9bccf70c
A
324 int pages_in_upl;
325 int start_pg;
326 int last_pg;
327 int first_pg;
328 int xsize;
329 int abort_needed = 1;
1c79356b 330
1c79356b 331
1c79356b
A
332 UBCINFOCHECK("vnode_pagein", vp);
333
334 if (UBCINVALID(vp)) {
335 result = PAGER_ERROR;
336 error = PAGER_ERROR;
9bccf70c
A
337 if (upl && !(flags & UPL_NOCOMMIT)) {
338 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
fa4905b1 339 }
1c79356b
A
340 goto out;
341 }
9bccf70c
A
342 if (upl == (upl_t)NULL) {
343 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE)) {
344 result = PAGER_ERROR;
345 error = PAGER_ERROR;
346 goto out;
347 }
55e303ae 348 ubc_create_upl(vp, f_offset, size, &upl, &pl, UPL_RET_ONLY_ABSENT | UPL_SET_LITE);
1c79356b 349
9bccf70c
A
350 if (upl == (upl_t)NULL) {
351 result = PAGER_ABSENT;
352 error = PAGER_ABSENT;
353 goto out;
1c79356b 354 }
9bccf70c
A
355 upl_offset = 0;
356 /*
357 * if we get here, we've created the upl and
358 * are responsible for commiting/aborting it
359 * regardless of what the caller has passed in
360 */
361 flags &= ~UPL_NOCOMMIT;
362
363 vp_pagein++;
1c79356b 364 } else {
9bccf70c 365 pl = ubc_upl_pageinfo(upl);
1c79356b 366
9bccf70c
A
367 dp_pgins++;
368 }
369 pages_in_upl = size / PAGE_SIZE;
370 first_pg = upl_offset / PAGE_SIZE;
371
372 /*
373 * before we start marching forward, we must make sure we end on
374 * a present page, otherwise we will be working with a freed
375 * upl
376 */
377 for (last_pg = pages_in_upl - 1; last_pg >= first_pg; last_pg--) {
378 if (upl_page_present(pl, last_pg))
379 break;
380 }
381 pages_in_upl = last_pg + 1;
382
383 for (last_pg = first_pg; last_pg < pages_in_upl;) {
384 /*
385 * scan the upl looking for the next
386 * page that is present.... if all of the
387 * pages are absent, we're done
388 */
389 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
390 if (upl_page_present(pl, last_pg))
391 break;
392 }
393 if (last_pg == pages_in_upl)
394 break;
395
396 /*
397 * if we get here, we've sitting on a page
398 * that is present... we want to skip over
399 * any range of 'valid' pages... if this takes
400 * us to the end of the request, than we're done
401 */
402 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
403 if (!upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
404 break;
405 }
406 if (last_pg > start_pg) {
407 /*
408 * we've found a range of valid pages
409 * if we've got COMMIT responsibility
410 * commit this range of pages back to the
411 * cache unchanged
412 */
413 xsize = (last_pg - start_pg) * PAGE_SIZE;
1c79356b 414
9bccf70c
A
415 if (!(flags & UPL_NOCOMMIT))
416 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, xsize, UPL_ABORT_FREE_ON_EMPTY);
1c79356b 417
9bccf70c
A
418 abort_needed = 0;
419 }
420 if (last_pg == pages_in_upl)
421 break;
422
423 if (!upl_page_present(pl, last_pg))
424 /*
425 * if we found a range of valid pages
426 * terminated by a non-present page
427 * than start over
428 */
429 continue;
430
431 /*
432 * scan from the found invalid page looking for a valid
433 * or non-present page before the end of the upl is reached, if we
434 * find one, then it will be the last page of the request to
435 * 'cluster_io'
436 */
437 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
438 if (upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
439 break;
440 }
441 if (last_pg > start_pg) {
442 int xoff;
91447636 443 struct vfs_context context;
9bccf70c 444
91447636
A
445 context.vc_proc = p;
446 context.vc_ucred = kauth_cred_get();
9bccf70c
A
447 xsize = (last_pg - start_pg) * PAGE_SIZE;
448 xoff = start_pg * PAGE_SIZE;
449
91447636 450 if ( (error = VNOP_PAGEIN(vp, upl, (vm_offset_t) xoff,
9bccf70c 451 (off_t)f_offset + xoff,
91447636 452 xsize, flags, &context)) ) {
0b4e3aa0
A
453 result = PAGER_ERROR;
454 error = PAGER_ERROR;
9bccf70c 455
0b4e3aa0 456 }
9bccf70c 457 abort_needed = 0;
1c79356b 458 }
9bccf70c
A
459 }
460 if (!(flags & UPL_NOCOMMIT) && abort_needed)
461 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
1c79356b
A
462out:
463 if (errorp)
fa4905b1 464 *errorp = result;
1c79356b 465
91447636
A
466 ut = get_bsdthread_info(current_thread());
467
468 if (ut->uu_lowpri_delay) {
469 /*
470 * task is marked as a low priority I/O type
471 * and the I/O we issued while in this system call
472 * collided with normal I/O operations... we'll
473 * delay in order to mitigate the impact of this
474 * task on the normal operation of the system
475 */
476 IOSleep(ut->uu_lowpri_delay);
477 ut->uu_lowpri_delay = 0;
478 }
1c79356b
A
479 return (error);
480}
481
482void
91447636 483vnode_pager_shutdown(void)
1c79356b
A
484{
485 int i;
91447636 486 vnode_t vp;
1c79356b
A
487
488 for(i = 0; i < MAX_BACKING_STORE; i++) {
91447636 489 vp = (vnode_t)(bs_port_table[i]).vp;
1c79356b
A
490 if (vp) {
491 (bs_port_table[i]).vp = 0;
1c79356b 492
91447636
A
493 /* get rid of macx_swapon() reference */
494 vnode_rele(vp);
1c79356b
A
495 }
496 }
497}
498
0b4e3aa0
A
499
500void *
1c79356b
A
501upl_get_internal_page_list(upl_t upl)
502{
0b4e3aa0 503 return(UPL_GET_INTERNAL_PAGE_LIST(upl));
1c79356b
A
504
505}