xnu-7195.101.1.tar.gz

[apple/xnu.git] / bsd / vm / vm_compressor_backing_file.c
diff --git a/bsd/vm/vm_compressor_backing_file.c b/bsd/vm/vm_compressor_backing_file.c

index 7ec5873dbc65a598ff8c745f2ce5da291f52b5fe..5f4fb755ef0fba4eb9018f08871613cb9bd0d012 100644 (file)
--- a/bsd/vm/vm_compressor_backing_file.c
+++ b/bsd/vm/vm_compressor_backing_file.c
@@ -1,8 +1,8 @@
  /*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
   * This file contains Original Code and/or Modifications of Original Code
   * as defined in and that are subject to the Apple Public Source License
   * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
   * unlawful or unlicensed copies of an Apple operating system, or to
   * circumvent, violate, or enable the circumvention or violation of, any
   * terms of an Apple operating system software license agreement.
- * 
+ *
   * Please obtain a copy of the License at
   * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
   * The Original Code and all software distributed under the License are
   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   * Please see the License for the specific language governing rights and
   * limitations under the License.
- * 
+ *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  
@@ -39,28 +39,44 @@
  #include <sys/disk.h>
  #include <vm/vm_protos.h>
  #include <vm/vm_pageout.h>
-#include <hfs/hfs.h>
+#include <sys/content_protection.h>
  
  void vm_swapfile_open(const char *path, vnode_t *vp);
  void vm_swapfile_close(uint64_t path, vnode_t vp);
  int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin);
  uint64_t vm_swapfile_get_blksize(vnode_t vp);
  uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
-int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags);
+int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *);
  int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size);
  
+#if CONFIG_FREEZE
+int vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget);
+#endif /* CONFIG_FREEZE */
+
  
  void
  vm_swapfile_open(const char *path, vnode_t *vp)
  {
         int error = 0;
-       vfs_context_t   ctx = vfs_context_current();
+       vfs_context_t   ctx = vfs_context_kernel();
  
         if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
                 printf("Failed to open swap file %d\n", error);
                 *vp = NULL;
                 return;
-       }       
+       }
+
+       /*
+        * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
+        * To avoid a race on the mount we only make this check after creating the
+        * vnode.
+        */
+       if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) {
+               vnode_put(*vp);
+               vm_swapfile_close((uint64_t)path, *vp);
+               *vp = NULL;
+               return;
+       }
  
         vnode_put(*vp);
  }
@@ -68,13 +84,13 @@ vm_swapfile_open(const char *path, vnode_t *vp)
  uint64_t
  vm_swapfile_get_blksize(vnode_t vp)
  {
-       return ((uint64_t)vfs_devblocksize(vnode_mount(vp)));
+       return (uint64_t)vfs_devblocksize(vnode_mount(vp));
  }
  
  uint64_t
  vm_swapfile_get_transfer_size(vnode_t vp)
  {
-       return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize);
+       return (uint64_t)vp->v_mount->mnt_vfsstat.f_iosize;
  }
  
  int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
@@ -82,53 +98,35 @@ int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
  void
  vm_swapfile_close(uint64_t path_addr, vnode_t vp)
  {
-       vfs_context_t context = vfs_context_current();
+       vfs_context_t context = vfs_context_kernel();
         int error;
  
         vnode_getwithref(vp);
         vnode_close(vp, 0, context);
-       
+
         error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
             UIO_SYSSPACE, 0);
  
  #if DEVELOPMENT || DEBUG
-       if (error)
+       if (error) {
                 printf("%s : unlink of %s failed with error %d", __FUNCTION__,
                     (char *)path_addr, error);
+       }
  #endif
  }
  
  int
  vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
  {
-       int             error = 0;
-       uint64_t        file_size = 0;
-       vfs_context_t   ctx = NULL;
+       int             error = 0;
+       uint64_t        file_size = 0;
+       vfs_context_t   ctx = NULL;
+#if CONFIG_FREEZE
+       struct vnode_attr va;
+#endif /* CONFIG_FREEZE */
  
+       ctx = vfs_context_kernel();
  
-       ctx = vfs_context_current();
-
-#if CONFIG_PROTECT
-       {
-#if 0  // <rdar://11771612>
-
-               if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) {
-                       if(config_protect_bug) {
-                               printf("swap protection class set failed with %d\n", error);
-                       } else {
-                               panic("swap protection class set failed with %d\n", error);
-                       }
-               }
-#endif
-               /* initialize content protection keys manually */
-               if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
-                       printf("Content Protection key failure on swap: %d\n", error);
-                       vnode_put(vp);
-                       vp = NULL;
-                       goto done;
-               }
-       }
-#endif
         error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
  
         if (error) {
@@ -141,17 +139,14 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
         if (error) {
                 printf("vnode_size (new file) for swap file failed: %d\n", error);
                 goto done;
-       }       
+       }
         assert(file_size == *size);
-       
-       if (pin != NULL && *pin != FALSE) {
  
-               assert(vnode_tag(vp) == VT_HFS);
-
-               error = hfs_pin_vnode(VTOHFS(vp), vp, HFS_PIN_IT | HFS_DATALESS_PIN, NULL, ctx);
+       if (pin != NULL && *pin != FALSE) {
+               error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);
  
                 if (error) {
-                       printf("hfs_pin_vnode for swap files failed: %d\n", error);
+                       printf("pin for swap files failed: %d,  file_size = %lld\n", error, file_size);
                         /* this is not fatal, carry on with files wherever they landed */
                         *pin = FALSE;
                         error = 0;
@@ -161,6 +156,18 @@ vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
         vnode_lock_spin(vp);
         SET(vp->v_flag, VSWAP);
         vnode_unlock(vp);
+
+#if CONFIG_FREEZE
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_C);
+       error = VNOP_SETATTR(vp, &va, ctx);
+
+       if (error) {
+               printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error);
+               goto done;
+       }
+#endif /* CONFIG_FREEZE */
+
  done:
         return error;
  }
@@ -173,48 +180,51 @@ vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
         vfs_context_t ctx;
  
         ctx = vfs_context_kernel();
-               
+
         error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
-               UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+           UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
  
-       return (error);
+       return error;
  }
  
  
  
  int
-vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
+vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *upl_iodone)
  {
         int error = 0;
-       uint64_t io_size = npages * PAGE_SIZE_64;
+       upl_size_t io_size = (upl_size_t) (npages * PAGE_SIZE_64);
  #if 1
-       kern_return_t   kr = KERN_SUCCESS;
-       upl_t           upl = NULL;
-       unsigned int    count = 0;
+       kern_return_t   kr = KERN_SUCCESS;
+       upl_t           upl = NULL;
+       unsigned int    count = 0;
         upl_control_flags_t upl_create_flags = 0;
-       int             upl_control_flags = 0;
-       upl_size_t      upl_size = 0;
+       int             upl_control_flags = 0;
+       upl_size_t      upl_size = 0;
+
+       upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
  
-       upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE
-                       | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK);
+       if (upl_iodone == NULL) {
+               upl_control_flags = UPL_IOSYNC;
+       }
  
  #if ENCRYPTED_SWAP
-       upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED;
-#else
-       upl_control_flags = UPL_IOSYNC;
+       upl_control_flags |= UPL_PAGING_ENCRYPTED;
  #endif
+
         if ((flags & SWAP_READ) == FALSE) {
                 upl_create_flags |= UPL_COPYOUT_FROM;
         }
- 
+
         upl_size = io_size;
         kr = vm_map_create_upl( kernel_map,
-                               start,
-                               &upl_size,
-                               &upl,
-                               NULL,
-                               &count,
-                               &upl_create_flags);
+           start,
+           &upl_size,
+           &upl,
+           NULL,
+           &count,
+           &upl_create_flags,
+           VM_KERN_MEMORY_OSFMK);
  
         if (kr != KERN_SUCCESS || (upl_size != io_size)) {
                 panic("vm_map_create_upl failed with %d\n", kr);
@@ -222,44 +232,46 @@ vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flag
  
         if (flags & SWAP_READ) {
                 vnode_pagein(vp,
-                             upl,
-                             0,
-                             offset,
-                             io_size,
-                             upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
-                             &error);
+                   upl,
+                   0,
+                   offset,
+                   io_size,
+                   upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
+                   &error);
                 if (error) {
  #if DEBUG
-                       printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
+                       printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
  #else /* DEBUG */
                         printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
  #endif /* DEBUG */
                 }
-       
         } else {
+               upl_set_iodone(upl, upl_iodone);
+
                 vnode_pageout(vp,
-                             upl,
-                             0,
-                             offset,
-                             io_size,
-                             upl_control_flags,
-                             &error);
+                   upl,
+                   0,
+                   offset,
+                   io_size,
+                   upl_control_flags,
+                   &error);
                 if (error) {
  #if DEBUG
-                       printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
+                       printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
  #else /* DEBUG */
                         printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
  #endif /* DEBUG */
                 }
         }
+
         return error;
  
  #else /* 1 */
         vfs_context_t ctx;
         ctx = vfs_context_kernel();
-               
+
         error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
-               UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+           UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
  
         if (error) {
                 printf("vn_rdwr: Swap I/O failed with %d\n", error);
@@ -269,28 +281,31 @@ vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flag
  }
  
  
-#define MAX_BATCH_TO_TRIM      256
+#define MAX_BATCH_TO_TRIM       256
  
-#define ROUTE_ONLY             0x10            /* if corestorage is present, tell it to just pass */
+#define ROUTE_ONLY              0x10            /* if corestorage is present, tell it to just pass */
                                                  /* the DKIOUNMAP command through w/o acting on it */
                                                  /* this is used by the compressed swap system to reclaim empty space */
  
  
-u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only)
+u_int32_t
+vnode_trim_list(vnode_t vp, struct trim_list *tl, boolean_t route_only)
  {
-       int             error = 0;
-       int             trim_index = 0;
-       u_int32_t       blocksize = 0;
-       struct vnode    *devvp;
-       dk_extent_t     *extents;
-       dk_unmap_t      unmap;
-       _dk_cs_unmap_t  cs_unmap;
-
-       if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED))
-               return (ENOTSUP);
+       int             error = 0;
+       int             trim_index = 0;
+       u_int32_t       blocksize = 0;
+       struct vnode    *devvp;
+       dk_extent_t     *extents;
+       dk_unmap_t      unmap;
+       _dk_cs_unmap_t  cs_unmap;
+
+       if (!(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) {
+               return ENOTSUP;
+       }
  
-       if (tl == NULL)
-               return (0);
+       if (tl == NULL) {
+               return 0;
+       }
  
         /*
          * Get the underlying device vnode and physical block size
@@ -298,31 +313,33 @@ u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_onl
         devvp = vp->v_mount->mnt_devvp;
         blocksize = vp->v_mount->mnt_devblocksize;
  
-       extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
+       extents = kheap_alloc(KHEAP_TEMP,
+           sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM, Z_WAITOK);
  
         if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
-               memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
+               memset(&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
                 cs_unmap.extents = extents;
  
-               if (route_only == TRUE)
+               if (route_only == TRUE) {
                         cs_unmap.options = ROUTE_ONLY;
+               }
         } else {
-               memset (&unmap, 0, sizeof(dk_unmap_t));
+               memset(&unmap, 0, sizeof(dk_unmap_t));
                 unmap.extents = extents;
         }
  
         while (tl) {
-               daddr64_t       io_blockno;     /* Block number corresponding to the start of the extent */
-               size_t          io_bytecount;   /* Number of bytes in current extent for the specified range */
-               size_t          trimmed;
-               size_t          remaining_length;
-               off_t           current_offset; 
+               daddr64_t       io_blockno;     /* Block number corresponding to the start of the extent */
+               size_t          io_bytecount;   /* Number of bytes in current extent for the specified range */
+               size_t          trimmed;
+               size_t          remaining_length;
+               off_t           current_offset;
  
                 current_offset = tl->tl_offset;
                 remaining_length = tl->tl_length;
                 trimmed = 0;
-               
-               /* 
+
+               /*
                  * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
                  * extent from the blockmap call.  Keep looping/going until we are sure we've hit
                  * the whole range or if we encounter an error.
@@ -330,24 +347,23 @@ u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_onl
                 while (trimmed < tl->tl_length) {
                         /*
                          * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
-                        * specified offset.  It returns blocks in contiguous chunks, so if the logical range is 
+                        * specified offset.  It returns blocks in contiguous chunks, so if the logical range is
                          * broken into multiple extents, it must be called multiple times, increasing the offset
                          * in each call to ensure that the entire range is covered.
                          */
-                       error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, 
-                                              &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);
+                       error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
+                           &io_blockno, &io_bytecount, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL);
  
                         if (error) {
                                 goto trim_exit;
                         }
+                       if (io_blockno != -1) {
+                               extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
+                               extents[trim_index].length = io_bytecount;
  
-                       extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
-                       extents[trim_index].length = io_bytecount;
-
-                       trim_index++;
-
+                               trim_index++;
+                       }
                         if (trim_index == MAX_BATCH_TO_TRIM) {
-
                                 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
                                         cs_unmap.extentsCount = trim_index;
                                         error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
@@ -376,7 +392,23 @@ u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_onl
                 }
         }
  trim_exit:
-       kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
+       kheap_free(KHEAP_TEMP, extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
  
         return error;
  }
+
+#if CONFIG_FREEZE
+int
+vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget)
+{
+       vnode_t         devvp = NULL;
+       vfs_context_t   ctx = vfs_context_kernel();
+       errno_t         err = 0;
+
+       devvp = vp->v_mount->mnt_devvp;
+
+       err = VNOP_IOCTL(devvp, DKIOCGETMAXSWAPWRITE, (caddr_t)freeze_daily_budget, 0, ctx);
+
+       return err;
+}
+#endif /* CONFIG_FREEZE */