]>
Commit | Line | Data |
---|---|---|
39236c6e | 1 | /* |
813fb2f6 | 2 | * Copyright (c) 2000-2016 Apple Inc. All rights reserved. |
39236c6e A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
0a7de745 | 5 | * |
39236c6e A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
39236c6e A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
39236c6e A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
39236c6e A |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ | |
28 | ||
29 | #include <stdint.h> | |
30 | #include <sys/fcntl.h> | |
31 | #include <sys/vnode_internal.h> | |
32 | #include <sys/vnode.h> | |
33 | #include <sys/kauth.h> | |
34 | #include <sys/mount_internal.h> | |
35 | #include <sys/buf_internal.h> | |
36 | #include <kern/debug.h> | |
37 | #include <kern/kalloc.h> | |
38 | #include <sys/cprotect.h> | |
39 | #include <sys/disk.h> | |
40 | #include <vm/vm_protos.h> | |
41 | #include <vm/vm_pageout.h> | |
d9a64523 | 42 | #include <sys/content_protection.h> |
39236c6e A |
43 | |
44 | void vm_swapfile_open(const char *path, vnode_t *vp); | |
45 | void vm_swapfile_close(uint64_t path, vnode_t vp); | |
3e170ce0 | 46 | int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin); |
39236c6e A |
47 | uint64_t vm_swapfile_get_blksize(vnode_t vp); |
48 | uint64_t vm_swapfile_get_transfer_size(vnode_t vp); | |
d9a64523 | 49 | int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *); |
3e170ce0 A |
50 | int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size); |
51 | ||
d9a64523 A |
52 | #if CONFIG_FREEZE |
53 | int vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget); | |
54 | #endif /* CONFIG_FREEZE */ | |
55 | ||
39236c6e A |
56 | |
57 | void | |
58 | vm_swapfile_open(const char *path, vnode_t *vp) | |
59 | { | |
60 | int error = 0; | |
0a7de745 | 61 | vfs_context_t ctx = vfs_context_kernel(); |
39236c6e | 62 | |
fe8ab488 | 63 | if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { |
39236c6e A |
64 | printf("Failed to open swap file %d\n", error); |
65 | *vp = NULL; | |
66 | return; | |
0a7de745 | 67 | } |
39236c6e | 68 | |
813fb2f6 A |
69 | /* |
70 | * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail. | |
71 | * To avoid a race on the mount we only make this check after creating the | |
72 | * vnode. | |
73 | */ | |
74 | if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) { | |
75 | vnode_put(*vp); | |
76 | vm_swapfile_close((uint64_t)path, *vp); | |
77 | *vp = NULL; | |
78 | return; | |
79 | } | |
80 | ||
39236c6e A |
81 | vnode_put(*vp); |
82 | } | |
83 | ||
84 | uint64_t | |
85 | vm_swapfile_get_blksize(vnode_t vp) | |
86 | { | |
0a7de745 | 87 | return (uint64_t)vfs_devblocksize(vnode_mount(vp)); |
39236c6e A |
88 | } |
89 | ||
90 | uint64_t | |
91 | vm_swapfile_get_transfer_size(vnode_t vp) | |
92 | { | |
0a7de745 | 93 | return (uint64_t)vp->v_mount->mnt_vfsstat.f_iosize; |
39236c6e A |
94 | } |
95 | ||
c18c124e | 96 | int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int); |
39236c6e A |
97 | |
98 | void | |
99 | vm_swapfile_close(uint64_t path_addr, vnode_t vp) | |
100 | { | |
00867663 | 101 | vfs_context_t context = vfs_context_kernel(); |
c18c124e | 102 | int error; |
39236c6e A |
103 | |
104 | vnode_getwithref(vp); | |
105 | vnode_close(vp, 0, context); | |
0a7de745 | 106 | |
c18c124e A |
107 | error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr), |
108 | UIO_SYSSPACE, 0); | |
39236c6e | 109 | |
c18c124e | 110 | #if DEVELOPMENT || DEBUG |
0a7de745 | 111 | if (error) { |
c18c124e A |
112 | printf("%s : unlink of %s failed with error %d", __FUNCTION__, |
113 | (char *)path_addr, error); | |
0a7de745 | 114 | } |
c18c124e | 115 | #endif |
39236c6e A |
116 | } |
117 | ||
118 | int | |
3e170ce0 | 119 | vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin) |
39236c6e | 120 | { |
0a7de745 A |
121 | int error = 0; |
122 | uint64_t file_size = 0; | |
123 | vfs_context_t ctx = NULL; | |
d9a64523 A |
124 | #if CONFIG_FREEZE |
125 | struct vnode_attr va; | |
126 | #endif /* CONFIG_FREEZE */ | |
39236c6e | 127 | |
00867663 | 128 | ctx = vfs_context_kernel(); |
39236c6e | 129 | |
fe8ab488 | 130 | error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); |
39236c6e | 131 | |
fe8ab488 A |
132 | if (error) { |
133 | printf("vnode_setsize for swap files failed: %d\n", error); | |
39236c6e | 134 | goto done; |
fe8ab488 | 135 | } |
39236c6e | 136 | |
fe8ab488 | 137 | error = vnode_size(vp, (off_t*) &file_size, ctx); |
39236c6e | 138 | |
fe8ab488 A |
139 | if (error) { |
140 | printf("vnode_size (new file) for swap file failed: %d\n", error); | |
3e170ce0 | 141 | goto done; |
0a7de745 | 142 | } |
fe8ab488 | 143 | assert(file_size == *size); |
0a7de745 | 144 | |
3e170ce0 | 145 | if (pin != NULL && *pin != FALSE) { |
39037602 | 146 | error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx); |
3e170ce0 A |
147 | |
148 | if (error) { | |
39037602 | 149 | printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size); |
3e170ce0 A |
150 | /* this is not fatal, carry on with files wherever they landed */ |
151 | *pin = FALSE; | |
152 | error = 0; | |
153 | } | |
154 | } | |
155 | ||
39236c6e A |
156 | vnode_lock_spin(vp); |
157 | SET(vp->v_flag, VSWAP); | |
158 | vnode_unlock(vp); | |
d9a64523 A |
159 | |
160 | #if CONFIG_FREEZE | |
161 | VATTR_INIT(&va); | |
162 | VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_C); | |
163 | error = VNOP_SETATTR(vp, &va, ctx); | |
164 | ||
165 | if (error) { | |
166 | printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error); | |
167 | goto done; | |
168 | } | |
169 | #endif /* CONFIG_FREEZE */ | |
170 | ||
39236c6e A |
171 | done: |
172 | return error; | |
173 | } | |
174 | ||
3e170ce0 A |
175 | |
176 | int | |
177 | vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size) | |
178 | { | |
179 | int error = 0; | |
180 | vfs_context_t ctx; | |
181 | ||
182 | ctx = vfs_context_kernel(); | |
0a7de745 | 183 | |
3e170ce0 | 184 | error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset, |
0a7de745 | 185 | UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); |
3e170ce0 | 186 | |
0a7de745 | 187 | return error; |
3e170ce0 A |
188 | } |
189 | ||
190 | ||
191 | ||
39236c6e | 192 | int |
d9a64523 | 193 | vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags, void *upl_iodone) |
39236c6e A |
194 | { |
195 | int error = 0; | |
f427ee49 | 196 | upl_size_t io_size = (upl_size_t) (npages * PAGE_SIZE_64); |
39236c6e | 197 | #if 1 |
0a7de745 A |
198 | kern_return_t kr = KERN_SUCCESS; |
199 | upl_t upl = NULL; | |
200 | unsigned int count = 0; | |
3e170ce0 | 201 | upl_control_flags_t upl_create_flags = 0; |
0a7de745 A |
202 | int upl_control_flags = 0; |
203 | upl_size_t upl_size = 0; | |
39236c6e | 204 | |
5ba3f43e | 205 | upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; |
39236c6e | 206 | |
0a7de745 A |
207 | if (upl_iodone == NULL) { |
208 | upl_control_flags = UPL_IOSYNC; | |
209 | } | |
d9a64523 | 210 | |
fe8ab488 | 211 | #if ENCRYPTED_SWAP |
d9a64523 | 212 | upl_control_flags |= UPL_PAGING_ENCRYPTED; |
fe8ab488 | 213 | #endif |
d9a64523 | 214 | |
39236c6e A |
215 | if ((flags & SWAP_READ) == FALSE) { |
216 | upl_create_flags |= UPL_COPYOUT_FROM; | |
217 | } | |
0a7de745 | 218 | |
39236c6e A |
219 | upl_size = io_size; |
220 | kr = vm_map_create_upl( kernel_map, | |
0a7de745 A |
221 | start, |
222 | &upl_size, | |
223 | &upl, | |
224 | NULL, | |
225 | &count, | |
226 | &upl_create_flags, | |
227 | VM_KERN_MEMORY_OSFMK); | |
39236c6e A |
228 | |
229 | if (kr != KERN_SUCCESS || (upl_size != io_size)) { | |
230 | panic("vm_map_create_upl failed with %d\n", kr); | |
231 | } | |
232 | ||
233 | if (flags & SWAP_READ) { | |
234 | vnode_pagein(vp, | |
0a7de745 A |
235 | upl, |
236 | 0, | |
237 | offset, | |
238 | io_size, | |
239 | upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, | |
240 | &error); | |
39236c6e A |
241 | if (error) { |
242 | #if DEBUG | |
f427ee49 | 243 | printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size); |
39236c6e A |
244 | #else /* DEBUG */ |
245 | printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); | |
246 | #endif /* DEBUG */ | |
247 | } | |
39236c6e | 248 | } else { |
0a7de745 | 249 | upl_set_iodone(upl, upl_iodone); |
d9a64523 | 250 | |
39236c6e | 251 | vnode_pageout(vp, |
0a7de745 A |
252 | upl, |
253 | 0, | |
254 | offset, | |
255 | io_size, | |
256 | upl_control_flags, | |
257 | &error); | |
39236c6e A |
258 | if (error) { |
259 | #if DEBUG | |
f427ee49 | 260 | printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%u)\n", error, vp, offset, io_size); |
39236c6e A |
261 | #else /* DEBUG */ |
262 | printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); | |
263 | #endif /* DEBUG */ | |
264 | } | |
265 | } | |
f427ee49 | 266 | |
39236c6e A |
267 | return error; |
268 | ||
269 | #else /* 1 */ | |
270 | vfs_context_t ctx; | |
271 | ctx = vfs_context_kernel(); | |
0a7de745 | 272 | |
39236c6e | 273 | error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, |
0a7de745 | 274 | UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); |
39236c6e A |
275 | |
276 | if (error) { | |
277 | printf("vn_rdwr: Swap I/O failed with %d\n", error); | |
278 | } | |
279 | return error; | |
280 | #endif /* 1 */ | |
281 | } | |
282 | ||
283 | ||
0a7de745 | 284 | #define MAX_BATCH_TO_TRIM 256 |
39236c6e | 285 | |
0a7de745 | 286 | #define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */ |
fe8ab488 A |
287 | /* the DKIOUNMAP command through w/o acting on it */ |
288 | /* this is used by the compressed swap system to reclaim empty space */ | |
289 | ||
290 | ||
0a7de745 A |
291 | u_int32_t |
292 | vnode_trim_list(vnode_t vp, struct trim_list *tl, boolean_t route_only) | |
39236c6e | 293 | { |
0a7de745 A |
294 | int error = 0; |
295 | int trim_index = 0; | |
296 | u_int32_t blocksize = 0; | |
297 | struct vnode *devvp; | |
298 | dk_extent_t *extents; | |
299 | dk_unmap_t unmap; | |
300 | _dk_cs_unmap_t cs_unmap; | |
301 | ||
302 | if (!(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) { | |
303 | return ENOTSUP; | |
304 | } | |
39236c6e | 305 | |
0a7de745 A |
306 | if (tl == NULL) { |
307 | return 0; | |
308 | } | |
39236c6e A |
309 | |
310 | /* | |
311 | * Get the underlying device vnode and physical block size | |
312 | */ | |
313 | devvp = vp->v_mount->mnt_devvp; | |
314 | blocksize = vp->v_mount->mnt_devblocksize; | |
315 | ||
f427ee49 A |
316 | extents = kheap_alloc(KHEAP_TEMP, |
317 | sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM, Z_WAITOK); | |
39236c6e | 318 | |
fe8ab488 | 319 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
0a7de745 | 320 | memset(&cs_unmap, 0, sizeof(_dk_cs_unmap_t)); |
fe8ab488 A |
321 | cs_unmap.extents = extents; |
322 | ||
0a7de745 | 323 | if (route_only == TRUE) { |
fe8ab488 | 324 | cs_unmap.options = ROUTE_ONLY; |
0a7de745 | 325 | } |
fe8ab488 | 326 | } else { |
0a7de745 | 327 | memset(&unmap, 0, sizeof(dk_unmap_t)); |
fe8ab488 A |
328 | unmap.extents = extents; |
329 | } | |
39236c6e A |
330 | |
331 | while (tl) { | |
0a7de745 A |
332 | daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ |
333 | size_t io_bytecount; /* Number of bytes in current extent for the specified range */ | |
334 | size_t trimmed; | |
335 | size_t remaining_length; | |
336 | off_t current_offset; | |
39236c6e A |
337 | |
338 | current_offset = tl->tl_offset; | |
339 | remaining_length = tl->tl_length; | |
340 | trimmed = 0; | |
0a7de745 A |
341 | |
342 | /* | |
39236c6e A |
343 | * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single |
344 | * extent from the blockmap call. Keep looping/going until we are sure we've hit | |
345 | * the whole range or if we encounter an error. | |
346 | */ | |
347 | while (trimmed < tl->tl_length) { | |
348 | /* | |
349 | * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the | |
0a7de745 | 350 | * specified offset. It returns blocks in contiguous chunks, so if the logical range is |
39236c6e A |
351 | * broken into multiple extents, it must be called multiple times, increasing the offset |
352 | * in each call to ensure that the entire range is covered. | |
353 | */ | |
0a7de745 A |
354 | error = VNOP_BLOCKMAP(vp, current_offset, remaining_length, |
355 | &io_blockno, &io_bytecount, NULL, VNODE_READ | VNODE_BLOCKMAP_NO_TRACK, NULL); | |
39236c6e A |
356 | |
357 | if (error) { | |
358 | goto trim_exit; | |
359 | } | |
5ba3f43e | 360 | if (io_blockno != -1) { |
0a7de745 | 361 | extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; |
5ba3f43e | 362 | extents[trim_index].length = io_bytecount; |
39236c6e | 363 | |
5ba3f43e A |
364 | trim_index++; |
365 | } | |
39236c6e | 366 | if (trim_index == MAX_BATCH_TO_TRIM) { |
fe8ab488 A |
367 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
368 | cs_unmap.extentsCount = trim_index; | |
369 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
370 | } else { | |
371 | unmap.extentsCount = trim_index; | |
372 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
373 | } | |
39236c6e A |
374 | if (error) { |
375 | goto trim_exit; | |
376 | } | |
377 | trim_index = 0; | |
378 | } | |
379 | trimmed += io_bytecount; | |
380 | current_offset += io_bytecount; | |
381 | remaining_length -= io_bytecount; | |
382 | } | |
383 | tl = tl->tl_next; | |
384 | } | |
385 | if (trim_index) { | |
fe8ab488 A |
386 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
387 | cs_unmap.extentsCount = trim_index; | |
388 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
389 | } else { | |
390 | unmap.extentsCount = trim_index; | |
391 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
392 | } | |
39236c6e A |
393 | } |
394 | trim_exit: | |
f427ee49 | 395 | kheap_free(KHEAP_TEMP, extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); |
39236c6e A |
396 | |
397 | return error; | |
398 | } | |
d9a64523 A |
399 | |
400 | #if CONFIG_FREEZE | |
401 | int | |
402 | vm_swap_vol_get_budget(vnode_t vp, uint64_t *freeze_daily_budget) | |
403 | { | |
0a7de745 A |
404 | vnode_t devvp = NULL; |
405 | vfs_context_t ctx = vfs_context_kernel(); | |
406 | errno_t err = 0; | |
d9a64523 A |
407 | |
408 | devvp = vp->v_mount->mnt_devvp; | |
409 | ||
410 | err = VNOP_IOCTL(devvp, DKIOCGETMAXSWAPWRITE, (caddr_t)freeze_daily_budget, 0, ctx); | |
411 | ||
412 | return err; | |
413 | } | |
414 | #endif /* CONFIG_FREEZE */ |