]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vnode_pager.c
0aa340f045b3923746c62b5bd095162f148fbc7d
[apple/xnu.git] / bsd / vm / vnode_pager.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 * Mach Operating System
25 * Copyright (c) 1987 Carnegie-Mellon University
26 * All rights reserved. The CMU software License Agreement specifies
27 * the terms and conditions for use and redistribution.
28 */
29 /*
30 * File: vnode_pager.c
31 *
32 * "Swap" pager that pages to/from vnodes. Also
33 * handles demand paging from files.
34 *
35 */
36
37 #include <mach/boolean.h>
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/user.h>
41 #include <sys/proc.h>
42 #include <sys/kauth.h>
43 #include <sys/buf.h>
44 #include <sys/uio.h>
45 #include <sys/vnode_internal.h>
46 #include <sys/namei.h>
47 #include <sys/mount_internal.h> /* needs internal due to fhandle_t */
48 #include <sys/ubc_internal.h>
49 #include <sys/lock.h>
50
51 #include <mach/mach_types.h>
52 #include <mach/memory_object_types.h>
53
54 #include <vm/vm_map.h>
55 #include <vm/vm_kern.h>
56 #include <kern/zalloc.h>
57 #include <kern/kalloc.h>
58 #include <libkern/libkern.h>
59
60 #include <vm/vnode_pager.h>
61 #include <vm/vm_pageout.h>
62
63 #include <kern/assert.h>
64 #include <sys/kdebug.h>
65 #include <machine/spl.h>
66
67 #include <nfs/rpcv2.h>
68 #include <nfs/nfsproto.h>
69 #include <nfs/nfs.h>
70
71 #include <vm/vm_protos.h>
72
73 unsigned int vp_pagein=0;
74 unsigned int vp_pgodirty=0;
75 unsigned int vp_pgoclean=0;
76 unsigned int dp_pgouts=0; /* Default pager pageouts */
77 unsigned int dp_pgins=0; /* Default pager pageins */
78
79 vm_object_offset_t
80 vnode_pager_get_filesize(struct vnode *vp)
81 {
82
83 return (vm_object_offset_t) ubc_getsize(vp);
84 }
85
86 pager_return_t
87 vnode_pageout(struct vnode *vp,
88 upl_t upl,
89 vm_offset_t upl_offset,
90 vm_object_offset_t f_offset,
91 vm_size_t size,
92 int flags,
93 int *errorp)
94 {
95 struct proc *p = current_proc();
96 int result = PAGER_SUCCESS;
97 int error = 0;
98 int error_ret = 0;
99 daddr64_t blkno;
100 int isize;
101 int pg_index;
102 int base_index;
103 int offset;
104 upl_page_info_t *pl;
105 struct vfs_context context;
106
107 context.vc_proc = p;
108 context.vc_ucred = kauth_cred_get();
109
110 isize = (int)size;
111
112 if (isize <= 0) {
113 result = PAGER_ERROR;
114 error_ret = EINVAL;
115 goto out;
116 }
117 UBCINFOCHECK("vnode_pageout", vp);
118
119 if (UBCINVALID(vp)) {
120 result = PAGER_ERROR;
121 error_ret = EINVAL;
122
123 if (upl && !(flags & UPL_NOCOMMIT))
124 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
125 goto out;
126 }
127 if ( !(flags & UPL_VNODE_PAGER)) {
128 /*
129 * This is a pageout from the default pager,
130 * just go ahead and call vnop_pageout since
131 * it has already sorted out the dirty ranges
132 */
133 dp_pgouts++;
134
135 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
136 size, 1, 0, 0, 0);
137
138 if ( (error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
139 (size_t)size, flags, &context)) )
140 result = PAGER_ERROR;
141
142 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
143 size, 1, 0, 0, 0);
144
145 goto out;
146 }
147 /*
148 * we come here for pageouts to 'real' files and
149 * for msyncs... the upl may not contain any
150 * dirty pages.. it's our responsibility to sort
151 * through it and find the 'runs' of dirty pages
152 * to call VNOP_PAGEOUT on...
153 */
154 pl = ubc_upl_pageinfo(upl);
155
156 if (ubc_getsize(vp) == 0) {
157 /*
158 * if the file has been effectively deleted, then
159 * we need to go through the UPL and invalidate any
160 * buffer headers we might have that reference any
161 * of it's pages
162 */
163 for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
164 #if NFSCLIENT
165 if (vp->v_tag == VT_NFS)
166 /* check with nfs if page is OK to drop */
167 error = nfs_buf_page_inval(vp, (off_t)f_offset);
168 else
169 #endif
170 {
171 blkno = ubc_offtoblk(vp, (off_t)f_offset);
172 error = buf_invalblkno(vp, blkno, 0);
173 }
174 if (error) {
175 if ( !(flags & UPL_NOCOMMIT))
176 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
177 if (error_ret == 0)
178 error_ret = error;
179 result = PAGER_ERROR;
180
181 } else if ( !(flags & UPL_NOCOMMIT)) {
182 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
183 }
184 f_offset += PAGE_SIZE;
185 }
186 goto out;
187 }
188 /*
189 * Ignore any non-present pages at the end of the
190 * UPL so that we aren't looking at a upl that
191 * may already have been freed by the preceeding
192 * aborts/completions.
193 */
194 base_index = upl_offset / PAGE_SIZE;
195
196 for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) {
197 if (upl_page_present(pl, --pg_index))
198 break;
199 if (pg_index == base_index) {
200 /*
201 * no pages were returned, so release
202 * our hold on the upl and leave
203 */
204 if ( !(flags & UPL_NOCOMMIT))
205 ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY);
206
207 goto out;
208 }
209 }
210 isize = (pg_index + 1) * PAGE_SIZE;
211
212 offset = upl_offset;
213 pg_index = base_index;
214
215 while (isize) {
216 int xsize;
217 int num_of_pages;
218
219 if ( !upl_page_present(pl, pg_index)) {
220 /*
221 * we asked for RET_ONLY_DIRTY, so it's possible
222 * to get back empty slots in the UPL
223 * just skip over them
224 */
225 offset += PAGE_SIZE;
226 isize -= PAGE_SIZE;
227 pg_index++;
228
229 continue;
230 }
231 if ( !upl_dirty_page(pl, pg_index)) {
232 /*
233 * if the page is not dirty and reached here it is
234 * marked precious or it is due to invalidation in
235 * memory_object_lock request as part of truncation
236 * We also get here from vm_object_terminate()
237 * So all you need to do in these
238 * cases is to invalidate incore buffer if it is there
239 * Note we must not sleep here if the buffer is busy - that is
240 * a lock inversion which causes deadlock.
241 */
242 vp_pgoclean++;
243
244 #if NFSCLIENT
245 if (vp->v_tag == VT_NFS)
246 /* check with nfs if page is OK to drop */
247 error = nfs_buf_page_inval(vp, (off_t)(f_offset + offset));
248 else
249 #endif
250 {
251 blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset));
252 error = buf_invalblkno(vp, blkno, 0);
253 }
254 if (error) {
255 if ( !(flags & UPL_NOCOMMIT))
256 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
257 if (error_ret == 0)
258 error_ret = error;
259 result = PAGER_ERROR;
260
261 } else if ( !(flags & UPL_NOCOMMIT)) {
262 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
263 }
264 offset += PAGE_SIZE;
265 isize -= PAGE_SIZE;
266 pg_index++;
267
268 continue;
269 }
270 vp_pgodirty++;
271
272 num_of_pages = 1;
273 xsize = isize - PAGE_SIZE;
274
275 while (xsize) {
276 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
277 break;
278 num_of_pages++;
279 xsize -= PAGE_SIZE;
280 }
281 xsize = num_of_pages * PAGE_SIZE;
282
283 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
284 xsize, (int)(f_offset + offset), 0, 0, 0);
285
286 if ( (error = VNOP_PAGEOUT(vp, upl, (vm_offset_t)offset,
287 (off_t)(f_offset + offset), xsize,
288 flags, &context)) ) {
289 if (error_ret == 0)
290 error_ret = error;
291 result = PAGER_ERROR;
292 }
293 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
294 xsize, 0, 0, 0, 0);
295
296 offset += xsize;
297 isize -= xsize;
298 pg_index += num_of_pages;
299 }
300 out:
301 if (errorp)
302 *errorp = error_ret;
303
304 return (result);
305 }
306
307
308 void IOSleep(int);
309
310 pager_return_t
311 vnode_pagein(
312 struct vnode *vp,
313 upl_t upl,
314 vm_offset_t upl_offset,
315 vm_object_offset_t f_offset,
316 vm_size_t size,
317 int flags,
318 int *errorp)
319 {
320 struct proc *p = current_proc();
321 struct uthread *ut;
322 upl_page_info_t *pl;
323 int result = PAGER_SUCCESS;
324 int error = 0;
325 int pages_in_upl;
326 int start_pg;
327 int last_pg;
328 int first_pg;
329 int xsize;
330 int abort_needed = 1;
331
332
333 UBCINFOCHECK("vnode_pagein", vp);
334
335 if (UBCINVALID(vp)) {
336 result = PAGER_ERROR;
337 error = PAGER_ERROR;
338 if (upl && !(flags & UPL_NOCOMMIT)) {
339 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
340 }
341 goto out;
342 }
343 if (upl == (upl_t)NULL) {
344 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE)) {
345 result = PAGER_ERROR;
346 error = PAGER_ERROR;
347 goto out;
348 }
349 ubc_create_upl(vp, f_offset, size, &upl, &pl, UPL_RET_ONLY_ABSENT | UPL_SET_LITE);
350
351 if (upl == (upl_t)NULL) {
352 result = PAGER_ABSENT;
353 error = PAGER_ABSENT;
354 goto out;
355 }
356 upl_offset = 0;
357 /*
358 * if we get here, we've created the upl and
359 * are responsible for commiting/aborting it
360 * regardless of what the caller has passed in
361 */
362 flags &= ~UPL_NOCOMMIT;
363
364 vp_pagein++;
365 } else {
366 pl = ubc_upl_pageinfo(upl);
367
368 dp_pgins++;
369 }
370 pages_in_upl = size / PAGE_SIZE;
371 first_pg = upl_offset / PAGE_SIZE;
372
373 /*
374 * before we start marching forward, we must make sure we end on
375 * a present page, otherwise we will be working with a freed
376 * upl
377 */
378 for (last_pg = pages_in_upl - 1; last_pg >= first_pg; last_pg--) {
379 if (upl_page_present(pl, last_pg))
380 break;
381 }
382 pages_in_upl = last_pg + 1;
383
384 for (last_pg = first_pg; last_pg < pages_in_upl;) {
385 /*
386 * scan the upl looking for the next
387 * page that is present.... if all of the
388 * pages are absent, we're done
389 */
390 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
391 if (upl_page_present(pl, last_pg))
392 break;
393 }
394 if (last_pg == pages_in_upl)
395 break;
396
397 /*
398 * if we get here, we've sitting on a page
399 * that is present... we want to skip over
400 * any range of 'valid' pages... if this takes
401 * us to the end of the request, than we're done
402 */
403 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
404 if (!upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
405 break;
406 }
407 if (last_pg > start_pg) {
408 /*
409 * we've found a range of valid pages
410 * if we've got COMMIT responsibility
411 * commit this range of pages back to the
412 * cache unchanged
413 */
414 xsize = (last_pg - start_pg) * PAGE_SIZE;
415
416 if (!(flags & UPL_NOCOMMIT))
417 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, xsize, UPL_ABORT_FREE_ON_EMPTY);
418
419 abort_needed = 0;
420 }
421 if (last_pg == pages_in_upl)
422 break;
423
424 if (!upl_page_present(pl, last_pg))
425 /*
426 * if we found a range of valid pages
427 * terminated by a non-present page
428 * than start over
429 */
430 continue;
431
432 /*
433 * scan from the found invalid page looking for a valid
434 * or non-present page before the end of the upl is reached, if we
435 * find one, then it will be the last page of the request to
436 * 'cluster_io'
437 */
438 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
439 if (upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
440 break;
441 }
442 if (last_pg > start_pg) {
443 int xoff;
444 struct vfs_context context;
445
446 context.vc_proc = p;
447 context.vc_ucred = kauth_cred_get();
448 xsize = (last_pg - start_pg) * PAGE_SIZE;
449 xoff = start_pg * PAGE_SIZE;
450
451 if ( (error = VNOP_PAGEIN(vp, upl, (vm_offset_t) xoff,
452 (off_t)f_offset + xoff,
453 xsize, flags, &context)) ) {
454 result = PAGER_ERROR;
455 error = PAGER_ERROR;
456
457 }
458 abort_needed = 0;
459 }
460 }
461 if (!(flags & UPL_NOCOMMIT) && abort_needed)
462 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
463 out:
464 if (errorp)
465 *errorp = result;
466
467 ut = get_bsdthread_info(current_thread());
468
469 if (ut->uu_lowpri_delay) {
470 /*
471 * task is marked as a low priority I/O type
472 * and the I/O we issued while in this system call
473 * collided with normal I/O operations... we'll
474 * delay in order to mitigate the impact of this
475 * task on the normal operation of the system
476 */
477 IOSleep(ut->uu_lowpri_delay);
478 ut->uu_lowpri_delay = 0;
479 }
480 return (error);
481 }
482
483 void
484 vnode_pager_shutdown(void)
485 {
486 int i;
487 vnode_t vp;
488
489 for(i = 0; i < MAX_BACKING_STORE; i++) {
490 vp = (vnode_t)(bs_port_table[i]).vp;
491 if (vp) {
492 (bs_port_table[i]).vp = 0;
493
494 /* get rid of macx_swapon() reference */
495 vnode_rele(vp);
496 }
497 }
498 }
499
500
501 void *
502 upl_get_internal_page_list(upl_t upl)
503 {
504 return(UPL_GET_INTERNAL_PAGE_LIST(upl));
505
506 }