]> git.saurik.com Git - apple/xnu.git/blame - bsd/vm/vm_compressor_backing_file.c
xnu-3789.51.2.tar.gz
[apple/xnu.git] / bsd / vm / vm_compressor_backing_file.c
CommitLineData
39236c6e 1/*
813fb2f6 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <stdint.h>
30#include <sys/fcntl.h>
31#include <sys/vnode_internal.h>
32#include <sys/vnode.h>
33#include <sys/kauth.h>
34#include <sys/mount_internal.h>
35#include <sys/buf_internal.h>
36#include <kern/debug.h>
37#include <kern/kalloc.h>
38#include <sys/cprotect.h>
39#include <sys/disk.h>
40#include <vm/vm_protos.h>
41#include <vm/vm_pageout.h>
42
43void vm_swapfile_open(const char *path, vnode_t *vp);
44void vm_swapfile_close(uint64_t path, vnode_t vp);
3e170ce0 45int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin);
39236c6e
A
46uint64_t vm_swapfile_get_blksize(vnode_t vp);
47uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
48int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags);
3e170ce0
A
49int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size);
50
39236c6e
A
51
52void
53vm_swapfile_open(const char *path, vnode_t *vp)
54{
55 int error = 0;
56 vfs_context_t ctx = vfs_context_current();
57
fe8ab488 58 if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
39236c6e
A
59 printf("Failed to open swap file %d\n", error);
60 *vp = NULL;
61 return;
62 }
63
813fb2f6
A
64 /*
65 * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
66 * To avoid a race on the mount we only make this check after creating the
67 * vnode.
68 */
69 if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) {
70 vnode_put(*vp);
71 vm_swapfile_close((uint64_t)path, *vp);
72 *vp = NULL;
73 return;
74 }
75
39236c6e
A
76 vnode_put(*vp);
77}
78
79uint64_t
80vm_swapfile_get_blksize(vnode_t vp)
81{
82 return ((uint64_t)vfs_devblocksize(vnode_mount(vp)));
83}
84
85uint64_t
86vm_swapfile_get_transfer_size(vnode_t vp)
87{
88 return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize);
89}
90
c18c124e 91int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
39236c6e
A
92
93void
94vm_swapfile_close(uint64_t path_addr, vnode_t vp)
95{
39236c6e 96 vfs_context_t context = vfs_context_current();
c18c124e 97 int error;
39236c6e
A
98
99 vnode_getwithref(vp);
100 vnode_close(vp, 0, context);
101
c18c124e
A
102 error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
103 UIO_SYSSPACE, 0);
39236c6e 104
c18c124e
A
105#if DEVELOPMENT || DEBUG
106 if (error)
107 printf("%s : unlink of %s failed with error %d", __FUNCTION__,
108 (char *)path_addr, error);
109#endif
39236c6e
A
110}
111
112int
3e170ce0 113vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
39236c6e
A
114{
115 int error = 0;
116 uint64_t file_size = 0;
117 vfs_context_t ctx = NULL;
118
119
120 ctx = vfs_context_current();
121
fe8ab488 122 error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
39236c6e 123
fe8ab488
A
124 if (error) {
125 printf("vnode_setsize for swap files failed: %d\n", error);
39236c6e 126 goto done;
fe8ab488 127 }
39236c6e 128
fe8ab488 129 error = vnode_size(vp, (off_t*) &file_size, ctx);
39236c6e 130
fe8ab488
A
131 if (error) {
132 printf("vnode_size (new file) for swap file failed: %d\n", error);
3e170ce0 133 goto done;
fe8ab488 134 }
fe8ab488
A
135 assert(file_size == *size);
136
3e170ce0 137 if (pin != NULL && *pin != FALSE) {
39037602 138 error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);
3e170ce0
A
139
140 if (error) {
39037602 141 printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size);
3e170ce0
A
142 /* this is not fatal, carry on with files wherever they landed */
143 *pin = FALSE;
144 error = 0;
145 }
146 }
147
39236c6e
A
148 vnode_lock_spin(vp);
149 SET(vp->v_flag, VSWAP);
150 vnode_unlock(vp);
151done:
152 return error;
153}
154
3e170ce0
A
155
156int
157vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
158{
159 int error = 0;
160 vfs_context_t ctx;
161
162 ctx = vfs_context_kernel();
163
164 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
165 UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
166
167 return (error);
168}
169
170
171
39236c6e
A
172int
173vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
174{
175 int error = 0;
176 uint64_t io_size = npages * PAGE_SIZE_64;
177#if 1
178 kern_return_t kr = KERN_SUCCESS;
179 upl_t upl = NULL;
180 unsigned int count = 0;
3e170ce0
A
181 upl_control_flags_t upl_create_flags = 0;
182 int upl_control_flags = 0;
39236c6e
A
183 upl_size_t upl_size = 0;
184
3e170ce0
A
185 upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE
186 | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK);
39236c6e 187
fe8ab488
A
188#if ENCRYPTED_SWAP
189 upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED;
190#else
191 upl_control_flags = UPL_IOSYNC;
192#endif
39236c6e
A
193 if ((flags & SWAP_READ) == FALSE) {
194 upl_create_flags |= UPL_COPYOUT_FROM;
195 }
196
197 upl_size = io_size;
198 kr = vm_map_create_upl( kernel_map,
199 start,
200 &upl_size,
201 &upl,
202 NULL,
203 &count,
204 &upl_create_flags);
205
206 if (kr != KERN_SUCCESS || (upl_size != io_size)) {
207 panic("vm_map_create_upl failed with %d\n", kr);
208 }
209
210 if (flags & SWAP_READ) {
211 vnode_pagein(vp,
212 upl,
213 0,
214 offset,
215 io_size,
216 upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
217 &error);
218 if (error) {
219#if DEBUG
220 printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
221#else /* DEBUG */
222 printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
223#endif /* DEBUG */
224 }
225
226 } else {
227 vnode_pageout(vp,
228 upl,
229 0,
230 offset,
231 io_size,
232 upl_control_flags,
233 &error);
234 if (error) {
235#if DEBUG
236 printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
237#else /* DEBUG */
238 printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
239#endif /* DEBUG */
240 }
241 }
242 return error;
243
244#else /* 1 */
245 vfs_context_t ctx;
246 ctx = vfs_context_kernel();
247
248 error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
249 UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
250
251 if (error) {
252 printf("vn_rdwr: Swap I/O failed with %d\n", error);
253 }
254 return error;
255#endif /* 1 */
256}
257
258
259#define MAX_BATCH_TO_TRIM 256
260
fe8ab488
A
261#define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */
262 /* the DKIOUNMAP command through w/o acting on it */
263 /* this is used by the compressed swap system to reclaim empty space */
264
265
266u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only)
39236c6e
A
267{
268 int error = 0;
269 int trim_index = 0;
270 u_int32_t blocksize = 0;
271 struct vnode *devvp;
272 dk_extent_t *extents;
273 dk_unmap_t unmap;
fe8ab488 274 _dk_cs_unmap_t cs_unmap;
39236c6e
A
275
276 if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED))
277 return (ENOTSUP);
278
279 if (tl == NULL)
280 return (0);
281
282 /*
283 * Get the underlying device vnode and physical block size
284 */
285 devvp = vp->v_mount->mnt_devvp;
286 blocksize = vp->v_mount->mnt_devblocksize;
287
288 extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
289
fe8ab488
A
290 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
291 memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
292 cs_unmap.extents = extents;
293
294 if (route_only == TRUE)
295 cs_unmap.options = ROUTE_ONLY;
296 } else {
297 memset (&unmap, 0, sizeof(dk_unmap_t));
298 unmap.extents = extents;
299 }
39236c6e
A
300
301 while (tl) {
302 daddr64_t io_blockno; /* Block number corresponding to the start of the extent */
303 size_t io_bytecount; /* Number of bytes in current extent for the specified range */
304 size_t trimmed;
305 size_t remaining_length;
306 off_t current_offset;
307
308 current_offset = tl->tl_offset;
309 remaining_length = tl->tl_length;
310 trimmed = 0;
311
312 /*
313 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
314 * extent from the blockmap call. Keep looping/going until we are sure we've hit
315 * the whole range or if we encounter an error.
316 */
317 while (trimmed < tl->tl_length) {
318 /*
319 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
320 * specified offset. It returns blocks in contiguous chunks, so if the logical range is
321 * broken into multiple extents, it must be called multiple times, increasing the offset
322 * in each call to ensure that the entire range is covered.
323 */
324 error = VNOP_BLOCKMAP (vp, current_offset, remaining_length,
325 &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);
326
327 if (error) {
328 goto trim_exit;
329 }
330
331 extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
332 extents[trim_index].length = io_bytecount;
333
334 trim_index++;
335
336 if (trim_index == MAX_BATCH_TO_TRIM) {
337
fe8ab488
A
338 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
339 cs_unmap.extentsCount = trim_index;
340 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
341 } else {
342 unmap.extentsCount = trim_index;
343 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
344 }
39236c6e
A
345 if (error) {
346 goto trim_exit;
347 }
348 trim_index = 0;
349 }
350 trimmed += io_bytecount;
351 current_offset += io_bytecount;
352 remaining_length -= io_bytecount;
353 }
354 tl = tl->tl_next;
355 }
356 if (trim_index) {
fe8ab488
A
357 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
358 cs_unmap.extentsCount = trim_index;
359 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
360 } else {
361 unmap.extentsCount = trim_index;
362 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
363 }
39236c6e
A
364 }
365trim_exit:
366 kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
367
368 return error;
369}