]> git.saurik.com Git - apple/xnu.git/blame - bsd/vm/vm_compressor_backing_file.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / vm / vm_compressor_backing_file.c
CommitLineData
39236c6e 1/*
813fb2f6 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
39236c6e
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
39236c6e
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
39236c6e
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
39236c6e
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <stdint.h>
30#include <sys/fcntl.h>
31#include <sys/vnode_internal.h>
32#include <sys/vnode.h>
33#include <sys/kauth.h>
34#include <sys/mount_internal.h>
35#include <sys/buf_internal.h>
36#include <kern/debug.h>
37#include <kern/kalloc.h>
38#include <sys/cprotect.h>
39#include <sys/disk.h>
40#include <vm/vm_protos.h>
41#include <vm/vm_pageout.h>
d9a64523 42#include <sys/content_protection.h>
39236c6e
A
43
44void vm_swapfile_open(const char *path, vnode_t *vp);
45void vm_swapfile_close(uint64_t path, vnode_t vp);
3e170ce0 46int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin);
39236c6e
A
47uint64_t vm_swapfile_get_blksize(vnode_t vp);
48uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
d9a64523 49int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *);
3e170ce0
A
50int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size);
51
d9a64523
A
52#if CONFIG_FREEZE
53int vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget);
54#endif /* CONFIG_FREEZE */
55
39236c6e
A
56
57void
58vm_swapfile_open(const char *path, vnode_t *vp)
59{
60 int error = 0;
0a7de745 61 vfs_context_t ctx = vfs_context_kernel();
39236c6e 62
fe8ab488 63 if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
39236c6e
A
64 printf("Failed to open swap file %d\n", error);
65 *vp = NULL;
66 return;
0a7de745 67 }
39236c6e 68
813fb2f6
A
69 /*
70 * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
71 * To avoid a race on the mount we only make this check after creating the
72 * vnode.
73 */
74 if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) {
75 vnode_put(*vp);
76 vm_swapfile_close((uint64_t)path, *vp);
77 *vp = NULL;
78 return;
79 }
80
39236c6e
A
81 vnode_put(*vp);
82}
83
84uint64_t
85vm_swapfile_get_blksize(vnode_t vp)
86{
0a7de745 87 return (uint64_t)vfs_devblocksize(vnode_mount(vp));
39236c6e
A
88}
89
90uint64_t
91vm_swapfile_get_transfer_size(vnode_t vp)
92{
0a7de745 93 return (uint64_t)vp->v_mount->mnt_vfsstat.f_iosize;
39236c6e
A
94}
95
c18c124e 96int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
39236c6e
A
97
98void
99vm_swapfile_close(uint64_t path_addr, vnode_t vp)
100{
00867663 101 vfs_context_t context = vfs_context_kernel();
c18c124e 102 int error;
39236c6e
A
103
104 vnode_getwithref(vp);
105 vnode_close(vp, 0, context);
0a7de745 106
c18c124e
A
107 error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr),
108 UIO_SYSSPACE, 0);
39236c6e 109
c18c124e 110#if DEVELOPMENT || DEBUG
0a7de745 111 if (error) {
c18c124e
A
112 printf("%s : unlink of %s failed with error %d", __FUNCTION__,
113 (char *)path_addr, error);
0a7de745 114 }
c18c124e 115#endif
39236c6e
A
116}
117
118int
3e170ce0 119vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
39236c6e 120{
0a7de745
A
121 int error = 0;
122 uint64_t file_size = 0;
123 vfs_context_t ctx = NULL;
d9a64523
A
124#if CONFIG_FREEZE
125 struct vnode_attr va;
126#endif /* CONFIG_FREEZE */
39236c6e 127
00867663 128 ctx = vfs_context_kernel();
39236c6e 129
fe8ab488 130 error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
39236c6e 131
fe8ab488
A
132 if (error) {
133 printf("vnode_setsize for swap files failed: %d\n", error);
39236c6e 134 goto done;
fe8ab488 135 }
39236c6e 136
fe8ab488 137 error = vnode_size(vp, (off_t*) &file_size, ctx);
39236c6e 138
fe8ab488
A
139 if (error) {
140 printf("vnode_size (new file) for swap file failed: %d\n", error);
3e170ce0 141 goto done;
0a7de745 142 }
fe8ab488 143 assert(file_size == *size);
0a7de745 144
3e170ce0 145 if (pin != NULL && *pin != FALSE) {
39037602 146 error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);
3e170ce0
A
147
148 if (error) {
39037602 149 printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size);
3e170ce0
A
150 /* this is not fatal, carry on with files wherever they landed */
151 *pin = FALSE;
152 error = 0;
153 }
154 }
155
39236c6e
A
156 vnode_lock_spin(vp);
157 SET(vp->v_flag, VSWAP);
158 vnode_unlock(vp);
d9a64523
A
159
160#if CONFIG_FREEZE
161 VATTR_INIT(&va);
162 VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_C);
163 error = VNOP_SETATTR(vp, &va, ctx);
164
165 if (error) {
166 printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error);
167 goto done;
168 }
169#endif /* CONFIG_FREEZE */
170
39236c6e
A
171done:
172 return error;
173}
174
3e170ce0
A
175
176int
177vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
178{
179 int error = 0;
180 vfs_context_t ctx;
181
182 ctx = vfs_context_kernel();
0a7de745 183
3e170ce0 184 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
0a7de745 185 UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3e170ce0 186
0a7de745 187 return error;
3e170ce0
A
188}
189
190
191
39236c6e 192int
d9a64523 193vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *upl_iodone)
39236c6e
A
194{
195 int error = 0;
f427ee49 196 upl_size_t io_size = (upl_size_t) (npages * PAGE_SIZE_64);
39236c6e 197#if 1
0a7de745
A
198 kern_return_t kr = KERN_SUCCESS;
199 upl_t upl = NULL;
200 unsigned int count = 0;
3e170ce0 201 upl_control_flags_t upl_create_flags = 0;
0a7de745
A
202 int upl_control_flags = 0;
203 upl_size_t upl_size = 0;
39236c6e 204
5ba3f43e 205 upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
39236c6e 206
0a7de745
A
207 if (upl_iodone == NULL) {
208 upl_control_flags = UPL_IOSYNC;
209 }
d9a64523 210
fe8ab488 211#if ENCRYPTED_SWAP
d9a64523 212 upl_control_flags |= UPL_PAGING_ENCRYPTED;
fe8ab488 213#endif
d9a64523 214
39236c6e
A
215 if ((flags & SWAP_READ) == FALSE) {
216 upl_create_flags |= UPL_COPYOUT_FROM;
217 }
0a7de745 218
39236c6e
A
219 upl_size = io_size;
220 kr = vm_map_create_upl( kernel_map,
0a7de745
A
221 start,
222 &upl_size,
223 &upl,
224 NULL,
225 &count,
226 &upl_create_flags,
227 VM_KERN_MEMORY_OSFMK);
39236c6e
A
228
229 if (kr != KERN_SUCCESS || (upl_size != io_size)) {
230 panic("vm_map_create_upl failed with %d\n", kr);
231 }
232
233 if (flags & SWAP_READ) {
234 vnode_pagein(vp,
0a7de745
A
235 upl,
236 0,
237 offset,
238 io_size,
239 upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
240 &error);
39236c6e
A
241 if (error) {
242#if DEBUG
f427ee49 243 printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
39236c6e
A
244#else /* DEBUG */
245 printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
246#endif /* DEBUG */
247 }
39236c6e 248 } else {
0a7de745 249 upl_set_iodone(upl, upl_iodone);
d9a64523 250
39236c6e 251 vnode_pageout(vp,
0a7de745
A
252 upl,
253 0,
254 offset,
255 io_size,
256 upl_control_flags,
257 &error);
39236c6e
A
258 if (error) {
259#if DEBUG
f427ee49 260 printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size);
39236c6e
A
261#else /* DEBUG */
262 printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
263#endif /* DEBUG */
264 }
265 }
f427ee49 266
39236c6e
A
267 return error;
268
269#else /* 1 */
270 vfs_context_t ctx;
271 ctx = vfs_context_kernel();
0a7de745 272
39236c6e 273 error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
0a7de745 274 UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
39236c6e
A
275
276 if (error) {
277 printf("vn_rdwr: Swap I/O failed with %d\n", error);
278 }
279 return error;
280#endif /* 1 */
281}
282
283
0a7de745 284#define MAX_BATCH_TO_TRIM 256
39236c6e 285
0a7de745 286#define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */
fe8ab488
A
287 /* the DKIOUNMAP command through w/o acting on it */
288 /* this is used by the compressed swap system to reclaim empty space */
289
290
0a7de745
A
291u_int32_t
292vnode_trim_list(vnode_t vp, struct trim_list *tl, boolean_t route_only)
39236c6e 293{
0a7de745
A
294 int error = 0;
295 int trim_index = 0;
296 u_int32_t blocksize = 0;
297 struct vnode *devvp;
298 dk_extent_t *extents;
299 dk_unmap_t unmap;
300 _dk_cs_unmap_t cs_unmap;
301
302 if (!(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) {
303 return ENOTSUP;
304 }
39236c6e 305
0a7de745
A
306 if (tl == NULL) {
307 return 0;
308 }
39236c6e
A
309
310 /*
311 * Get the underlying device vnode and physical block size
312 */
313 devvp = vp->v_mount->mnt_devvp;
314 blocksize = vp->v_mount->mnt_devblocksize;
315
f427ee49
A
316 extents = kheap_alloc(KHEAP_TEMP,
317 sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM, Z_WAITOK);
39236c6e 318
fe8ab488 319 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
0a7de745 320 memset(&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
fe8ab488
A
321 cs_unmap.extents = extents;
322
0a7de745 323 if (route_only == TRUE) {
fe8ab488 324 cs_unmap.options = ROUTE_ONLY;
0a7de745 325 }
fe8ab488 326 } else {
0a7de745 327 memset(&unmap, 0, sizeof(dk_unmap_t));
fe8ab488
A
328 unmap.extents = extents;
329 }
39236c6e
A
330
331 while (tl) {
0a7de745
A
332 daddr64_t io_blockno; /* Block number corresponding to the start of the extent */
333 size_t io_bytecount; /* Number of bytes in current extent for the specified range */
334 size_t trimmed;
335 size_t remaining_length;
336 off_t current_offset;
39236c6e
A
337
338 current_offset = tl->tl_offset;
339 remaining_length = tl->tl_length;
340 trimmed = 0;
0a7de745
A
341
342 /*
39236c6e
A
343 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
344 * extent from the blockmap call. Keep looping/going until we are sure we've hit
345 * the whole range or if we encounter an error.
346 */
347 while (trimmed < tl->tl_length) {
348 /*
349 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
0a7de745 350 * specified offset. It returns blocks in contiguous chunks, so if the logical range is
39236c6e
A
351 * broken into multiple extents, it must be called multiple times, increasing the offset
352 * in each call to ensure that the entire range is covered.
353 */
0a7de745
A
354 error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
355 &io_blockno, &io_bytecount, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL);
39236c6e
A
356
357 if (error) {
358 goto trim_exit;
359 }
5ba3f43e 360 if (io_blockno != -1) {
0a7de745 361 extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
5ba3f43e 362 extents[trim_index].length = io_bytecount;
39236c6e 363
5ba3f43e
A
364 trim_index++;
365 }
39236c6e 366 if (trim_index == MAX_BATCH_TO_TRIM) {
fe8ab488
A
367 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
368 cs_unmap.extentsCount = trim_index;
369 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
370 } else {
371 unmap.extentsCount = trim_index;
372 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
373 }
39236c6e
A
374 if (error) {
375 goto trim_exit;
376 }
377 trim_index = 0;
378 }
379 trimmed += io_bytecount;
380 current_offset += io_bytecount;
381 remaining_length -= io_bytecount;
382 }
383 tl = tl->tl_next;
384 }
385 if (trim_index) {
fe8ab488
A
386 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
387 cs_unmap.extentsCount = trim_index;
388 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
389 } else {
390 unmap.extentsCount = trim_index;
391 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
392 }
39236c6e
A
393 }
394trim_exit:
f427ee49 395 kheap_free(KHEAP_TEMP, extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
39236c6e
A
396
397 return error;
398}
d9a64523
A
399
400#if CONFIG_FREEZE
401int
402vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget)
403{
0a7de745
A
404 vnode_t devvp = NULL;
405 vfs_context_t ctx = vfs_context_kernel();
406 errno_t err = 0;
d9a64523
A
407
408 devvp = vp->v_mount->mnt_devvp;
409
410 err = VNOP_IOCTL(devvp, DKIOCGETMAXSWAPWRITE, (caddr_t)freeze_daily_budget, 0, ctx);
411
412 return err;
413}
414#endif /* CONFIG_FREEZE */