bsd/vm/vnode_pager.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * Mach Operating System
  24  * Copyright (c) 1987 Carnegie-Mellon University
  25  * All rights reserved.  The CMU software License Agreement specifies
  26  * the terms and conditions for use and redistribution.
  27  */
  28 /*
  29  *      File:   vnode_pager.c
  30  *
  31  *      "Swap" pager that pages to/from vnodes.  Also
  32  *      handles demand paging from files.
  33  *
  34  */
  35
  36 #include <mach/boolean.h>
  37 #include <sys/param.h>
  38 #include <sys/systm.h>
  39 #include <sys/user.h>
  40 #include <sys/proc.h>
  41 #include <sys/kauth.h>
  42 #include <sys/buf.h>
  43 #include <sys/uio.h>
  44 #include <sys/vnode_internal.h>
  45 #include <sys/namei.h>
  46 #include <sys/mount_internal.h> /* needs internal due to fhandle_t */
  47 #include <sys/ubc_internal.h>
  48 #include <sys/lock.h>
  49
  50 #include <mach/mach_types.h>
  51 #include <mach/memory_object_types.h>
  52
  53 #include <vm/vm_map.h>
  54 #include <vm/vm_kern.h>
  55 #include <kern/zalloc.h>
  56 #include <kern/kalloc.h>
  57 #include <libkern/libkern.h>
  58
  59 #include <vm/vnode_pager.h>
  60 #include <vm/vm_pageout.h>
  61
  62 #include <kern/assert.h>
  63 #include <sys/kdebug.h>
  64 #include <machine/spl.h>
  65
  66 #include <nfs/rpcv2.h>
  67 #include <nfs/nfsproto.h>
  68 #include <nfs/nfs.h>
  69
  70 #include <vm/vm_protos.h>
  71
  72 unsigned int vp_pagein=0;
  73 unsigned int vp_pgodirty=0;
  74 unsigned int vp_pgoclean=0;
  75 unsigned int dp_pgouts=0;       /* Default pager pageouts */
  76 unsigned int dp_pgins=0;        /* Default pager pageins */
  77
  78 vm_object_offset_t
  79 vnode_pager_get_filesize(struct vnode *vp)
  80 {
  81
  82         return (vm_object_offset_t) ubc_getsize(vp);
  83 }
  84
  85 kern_return_t
  86 vnode_pager_get_pathname(
  87         struct vnode    *vp,
  88         char            *pathname,
  89         vm_size_t       *length_p)
  90 {
  91         int     error, len;
  92
  93         len = (int) *length_p;
  94         error = vn_getpath(vp, pathname, &len);
  95         if (error != 0) {
  96                 return KERN_FAILURE;
  97         }
  98         *length_p = (vm_size_t) len;
  99         return KERN_SUCCESS;
 100 }
 101
 102 kern_return_t
 103 vnode_pager_get_filename(
 104         struct vnode    *vp,
 105         char            **filename)
 106 {
 107         *filename = vp->v_name;
 108         return KERN_SUCCESS;
 109 }
 110
 111 pager_return_t
 112 vnode_pageout(struct vnode *vp,
 113         upl_t                   upl,
 114         vm_offset_t             upl_offset,
 115         vm_object_offset_t      f_offset,
 116         vm_size_t               size,
 117         int                     flags,
 118         int                     *errorp)
 119 {
 120         struct proc     *p = current_proc();
 121         int             result = PAGER_SUCCESS;
 122         int             error = 0;
 123         int             error_ret = 0;
 124         daddr64_t blkno;
 125         int isize;
 126         int pg_index;
 127         int base_index;
 128         int offset;
 129         upl_page_info_t *pl;
 130         struct vfs_context context;
 131
 132         context.vc_proc = p;
 133         context.vc_ucred = kauth_cred_get();
 134
 135         isize = (int)size;
 136
 137         if (isize <= 0) {
 138                 result    = PAGER_ERROR;
 139                 error_ret = EINVAL;
 140                 goto out;
 141         }
 142         UBCINFOCHECK("vnode_pageout", vp);
 143
 144         if (UBCINVALID(vp)) {
 145                 result    = PAGER_ERROR;
 146                 error_ret = EINVAL;
 147
 148                 if (upl && !(flags & UPL_NOCOMMIT))
 149                         ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
 150                 goto out;
 151         }
 152         if ( !(flags & UPL_VNODE_PAGER)) {
 153                 /*
 154                  * This is a pageout from the default pager,
 155                  * just go ahead and call vnop_pageout since
 156                  * it has already sorted out the dirty ranges
 157                  */
 158                 dp_pgouts++;
 159
 160                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
 161                                       size, 1, 0, 0, 0);
 162
 163                 if ( (error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
 164                                                (size_t)size, flags, &context)) )
 165                         result = PAGER_ERROR;
 166
 167                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
 168                                       size, 1, 0, 0, 0);
 169
 170                 goto out;
 171         }
 172         /*
 173          * we come here for pageouts to 'real' files and
 174          * for msyncs...  the upl may not contain any
 175          * dirty pages.. it's our responsibility to sort
 176          * through it and find the 'runs' of dirty pages
 177          * to call VNOP_PAGEOUT on...
 178          */
 179         pl = ubc_upl_pageinfo(upl);
 180
 181         if (ubc_getsize(vp) == 0) {
 182                 /*
 183                  * if the file has been effectively deleted, then
 184                  * we need to go through the UPL and invalidate any
 185                  * buffer headers we might have that reference any
 186                  * of it's pages
 187                  */
 188                 for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
 189 #if NFSCLIENT
 190                         if (vp->v_tag == VT_NFS)
 191                                 /* check with nfs if page is OK to drop */
 192                                 error = nfs_buf_page_inval(vp, (off_t)f_offset);
 193                         else
 194 #endif
 195                         {
 196                                 blkno = ubc_offtoblk(vp, (off_t)f_offset);
 197                                 error = buf_invalblkno(vp, blkno, 0);
 198                         }
 199                         if (error) {
 200                                 if ( !(flags & UPL_NOCOMMIT))
 201                                         ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
 202                                 if (error_ret == 0)
 203                                         error_ret = error;
 204                                 result = PAGER_ERROR;
 205
 206                         } else if ( !(flags & UPL_NOCOMMIT)) {
 207                                 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
 208                         }
 209                         f_offset += PAGE_SIZE;
 210                 }
 211                 goto out;
 212         }
 213         /*
 214          * Ignore any non-present pages at the end of the
 215          * UPL so that we aren't looking at a upl that
 216          * may already have been freed by the preceeding
 217          * aborts/completions.
 218          */
 219         base_index = upl_offset / PAGE_SIZE;
 220
 221         for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) {
 222                 if (upl_page_present(pl, --pg_index))
 223                         break;
 224                 if (pg_index == base_index) {
 225                         /*
 226                          * no pages were returned, so release
 227                          * our hold on the upl and leave
 228                          */
 229                         if ( !(flags & UPL_NOCOMMIT))
 230                                 ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY);
 231
 232                         goto out;
 233                 }
 234         }
 235         isize = (pg_index + 1) * PAGE_SIZE;
 236
 237         offset = upl_offset;
 238         pg_index = base_index;
 239
 240         while (isize) {
 241                 int  xsize;
 242                 int  num_of_pages;
 243
 244                 if ( !upl_page_present(pl, pg_index)) {
 245                         /*
 246                          * we asked for RET_ONLY_DIRTY, so it's possible
 247                          * to get back empty slots in the UPL
 248                          * just skip over them
 249                          */
 250                         offset += PAGE_SIZE;
 251                         isize  -= PAGE_SIZE;
 252                         pg_index++;
 253
 254                         continue;
 255                 }
 256                 if ( !upl_dirty_page(pl, pg_index)) {
 257                         /*
 258                          * if the page is not dirty and reached here it is
 259                          * marked precious or it is due to invalidation in
 260                          * memory_object_lock request as part of truncation
 261                          * We also get here from vm_object_terminate()
 262                          * So all you need to do in these
 263                          * cases is to invalidate incore buffer if it is there
 264                          * Note we must not sleep here if the buffer is busy - that is
 265                          * a lock inversion which causes deadlock.
 266                          */
 267                         vp_pgoclean++;
 268
 269 #if NFSCLIENT
 270                         if (vp->v_tag == VT_NFS)
 271                                 /* check with nfs if page is OK to drop */
 272                                 error = nfs_buf_page_inval(vp, (off_t)(f_offset + offset));
 273                         else
 274 #endif
 275                         {
 276                                 blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset));
 277                                 error = buf_invalblkno(vp, blkno, 0);
 278                         }
 279                         if (error) {
 280                                 if ( !(flags & UPL_NOCOMMIT))
 281                                         ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
 282                                 if (error_ret == 0)
 283                                         error_ret = error;
 284                                 result = PAGER_ERROR;
 285
 286                         } else if ( !(flags & UPL_NOCOMMIT)) {
 287                                 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
 288                         }
 289                         offset += PAGE_SIZE;
 290                         isize  -= PAGE_SIZE;
 291                         pg_index++;
 292
 293                         continue;
 294                 }
 295                 vp_pgodirty++;
 296
 297                 num_of_pages = 1;
 298                 xsize = isize - PAGE_SIZE;
 299
 300                 while (xsize) {
 301                         if ( !upl_dirty_page(pl, pg_index + num_of_pages))
 302                                 break;
 303                         num_of_pages++;
 304                         xsize -= PAGE_SIZE;
 305                 }
 306                 xsize = num_of_pages * PAGE_SIZE;
 307
 308                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START,
 309                                       xsize, (int)(f_offset + offset), 0, 0, 0);
 310
 311                 if ( (error = VNOP_PAGEOUT(vp, upl, (vm_offset_t)offset,
 312                                         (off_t)(f_offset + offset), xsize,
 313                                            flags, &context)) ) {
 314                         if (error_ret == 0)
 315                                 error_ret = error;
 316                         result = PAGER_ERROR;
 317                 }
 318                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END,
 319                                       xsize, 0, 0, 0, 0);
 320
 321                 offset += xsize;
 322                 isize  -= xsize;
 323                 pg_index += num_of_pages;
 324         }
 325 out:
 326         if (errorp)
 327                 *errorp = error_ret;
 328
 329         return (result);
 330 }
 331
 332
 333 void IOSleep(int);
 334
 335 pager_return_t
 336 vnode_pagein(
 337         struct vnode            *vp,
 338         upl_t                   upl,
 339         vm_offset_t             upl_offset,
 340         vm_object_offset_t      f_offset,
 341         vm_size_t               size,
 342         int                     flags,
 343         int                     *errorp)
 344 {
 345         struct proc     *p = current_proc();
 346         struct uthread  *ut;
 347         upl_page_info_t *pl;
 348         int             result = PAGER_SUCCESS;
 349         int             error = 0;
 350         int             pages_in_upl;
 351         int             start_pg;
 352         int             last_pg;
 353         int             first_pg;
 354         int             xsize;
 355         int             abort_needed = 1;
 356
 357
 358         UBCINFOCHECK("vnode_pagein", vp);
 359
 360         if (UBCINVALID(vp)) {
 361                 result = PAGER_ERROR;
 362                 error  = PAGER_ERROR;
 363                 if (upl && !(flags & UPL_NOCOMMIT)) {
 364                         ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
 365                 }
 366                 goto out;
 367         }
 368         if (upl == (upl_t)NULL) {
 369                 if (size > (MAX_UPL_TRANSFER * PAGE_SIZE)) {
 370                         result = PAGER_ERROR;
 371                         error  = PAGER_ERROR;
 372                         goto out;
 373                 }
 374                 ubc_create_upl(vp, f_offset, size, &upl, &pl, UPL_RET_ONLY_ABSENT | UPL_SET_LITE);
 375
 376                 if (upl == (upl_t)NULL) {
 377                         result =  PAGER_ABSENT;
 378                         error = PAGER_ABSENT;
 379                         goto out;
 380                 }
 381                 upl_offset = 0;
 382                 /*
 383                  * if we get here, we've created the upl and
 384                  * are responsible for commiting/aborting it
 385                  * regardless of what the caller has passed in
 386                  */
 387                 flags &= ~UPL_NOCOMMIT;
 388
 389                 vp_pagein++;
 390         } else {
 391                 pl = ubc_upl_pageinfo(upl);
 392
 393                 dp_pgins++;
 394         }
 395         pages_in_upl = size / PAGE_SIZE;
 396         first_pg     = upl_offset / PAGE_SIZE;
 397
 398         /*
 399          * before we start marching forward, we must make sure we end on
 400          * a present page, otherwise we will be working with a freed
 401          * upl
 402          */
 403         for (last_pg = pages_in_upl - 1; last_pg >= first_pg; last_pg--) {
 404                 if (upl_page_present(pl, last_pg))
 405                         break;
 406         }
 407         pages_in_upl = last_pg + 1;
 408
 409         for (last_pg = first_pg; last_pg < pages_in_upl;) {
 410                 /*
 411                  * scan the upl looking for the next
 412                  * page that is present.... if all of the
 413                  * pages are absent, we're done
 414                  */
 415                 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
 416                         if (upl_page_present(pl, last_pg))
 417                                 break;
 418                 }
 419                 if (last_pg == pages_in_upl)
 420                         break;
 421
 422                 /*
 423                  * if we get here, we've sitting on a page
 424                  * that is present... we want to skip over
 425                  * any range of 'valid' pages... if this takes
 426                  * us to the end of the request, than we're done
 427                  */
 428                 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
 429                         if (!upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
 430                                 break;
 431                 }
 432                 if (last_pg > start_pg) {
 433                         /*
 434                          * we've found a range of valid pages
 435                          * if we've got COMMIT responsibility
 436                          * commit this range of pages back to the
 437                          * cache unchanged
 438                          */
 439                         xsize = (last_pg - start_pg) * PAGE_SIZE;
 440
 441                         if (!(flags & UPL_NOCOMMIT))
 442                                 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, xsize, UPL_ABORT_FREE_ON_EMPTY);
 443
 444                         abort_needed = 0;
 445                 }
 446                 if (last_pg == pages_in_upl)
 447                         break;
 448
 449                 if (!upl_page_present(pl, last_pg))
 450                         /*
 451                          * if we found a range of valid pages
 452                          * terminated by a non-present page
 453                          * than start over
 454                          */
 455                         continue;
 456
 457                 /*
 458                  * scan from the found invalid page looking for a valid
 459                  * or non-present page before the end of the upl is reached, if we
 460                  * find one, then it will be the last page of the request to
 461                  * 'cluster_io'
 462                  */
 463                 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
 464                         if (upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg))
 465                                 break;
 466                 }
 467                 if (last_pg > start_pg) {
 468                         int xoff;
 469                         struct vfs_context context;
 470
 471                         context.vc_proc = p;
 472                         context.vc_ucred = kauth_cred_get();
 473                         xsize = (last_pg - start_pg) * PAGE_SIZE;
 474                         xoff  = start_pg * PAGE_SIZE;
 475
 476                         if ( (error = VNOP_PAGEIN(vp, upl, (vm_offset_t) xoff,
 477                                                (off_t)f_offset + xoff,
 478                                                xsize, flags, &context)) ) {
 479                                 result = PAGER_ERROR;
 480                                 error  = PAGER_ERROR;
 481
 482                         }
 483                         abort_needed = 0;
 484                 }
 485         }
 486         if (!(flags & UPL_NOCOMMIT) && abort_needed)
 487                 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
 488 out:
 489         if (errorp)
 490                 *errorp = result;
 491
 492         ut = get_bsdthread_info(current_thread());
 493
 494         if (ut->uu_lowpri_delay) {
 495                 /*
 496                  * task is marked as a low priority I/O type
 497                  * and the I/O we issued while in this system call
 498                  * collided with normal I/O operations... we'll
 499                  * delay in order to mitigate the impact of this
 500                  * task on the normal operation of the system
 501                  */
 502                 IOSleep(ut->uu_lowpri_delay);
 503                 ut->uu_lowpri_delay = 0;
 504         }
 505         return (error);
 506 }
 507
 508 void
 509 vnode_pager_shutdown(void)
 510 {
 511         int i;
 512         vnode_t vp;
 513
 514         for(i = 0; i < MAX_BACKING_STORE; i++) {
 515                 vp = (vnode_t)(bs_port_table[i]).vp;
 516                 if (vp) {
 517                         (bs_port_table[i]).vp = 0;
 518
 519                         /* get rid of macx_swapon() reference */
 520                         vnode_rele(vp);
 521                 }
 522         }
 523 }
 524
 525
 526 void *
 527 upl_get_internal_page_list(upl_t upl)
 528 {
 529   return(UPL_GET_INTERNAL_PAGE_LIST(upl));
 530
 531 }