]>
Commit | Line | Data |
---|---|---|
39236c6e | 1 | /* |
813fb2f6 | 2 | * Copyright (c) 2000-2016 Apple Inc. All rights reserved. |
39236c6e A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <stdint.h> | |
30 | #include <sys/fcntl.h> | |
31 | #include <sys/vnode_internal.h> | |
32 | #include <sys/vnode.h> | |
33 | #include <sys/kauth.h> | |
34 | #include <sys/mount_internal.h> | |
35 | #include <sys/buf_internal.h> | |
36 | #include <kern/debug.h> | |
37 | #include <kern/kalloc.h> | |
38 | #include <sys/cprotect.h> | |
39 | #include <sys/disk.h> | |
40 | #include <vm/vm_protos.h> | |
41 | #include <vm/vm_pageout.h> | |
42 | ||
43 | void vm_swapfile_open(const char *path, vnode_t *vp); | |
44 | void vm_swapfile_close(uint64_t path, vnode_t vp); | |
3e170ce0 | 45 | int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin); |
39236c6e A |
46 | uint64_t vm_swapfile_get_blksize(vnode_t vp); |
47 | uint64_t vm_swapfile_get_transfer_size(vnode_t vp); | |
48 | int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags); | |
3e170ce0 A |
49 | int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size); |
50 | ||
39236c6e A |
51 | |
52 | void | |
53 | vm_swapfile_open(const char *path, vnode_t *vp) | |
54 | { | |
55 | int error = 0; | |
56 | vfs_context_t ctx = vfs_context_current(); | |
57 | ||
fe8ab488 | 58 | if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { |
39236c6e A |
59 | printf("Failed to open swap file %d\n", error); |
60 | *vp = NULL; | |
61 | return; | |
62 | } | |
63 | ||
813fb2f6 A |
64 | /* |
65 | * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail. | |
66 | * To avoid a race on the mount we only make this check after creating the | |
67 | * vnode. | |
68 | */ | |
69 | if ((*vp)->v_mount->mnt_kern_flag & MNTK_NOSWAP) { | |
70 | vnode_put(*vp); | |
71 | vm_swapfile_close((uint64_t)path, *vp); | |
72 | *vp = NULL; | |
73 | return; | |
74 | } | |
75 | ||
39236c6e A |
76 | vnode_put(*vp); |
77 | } | |
78 | ||
79 | uint64_t | |
80 | vm_swapfile_get_blksize(vnode_t vp) | |
81 | { | |
82 | return ((uint64_t)vfs_devblocksize(vnode_mount(vp))); | |
83 | } | |
84 | ||
85 | uint64_t | |
86 | vm_swapfile_get_transfer_size(vnode_t vp) | |
87 | { | |
88 | return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize); | |
89 | } | |
90 | ||
c18c124e | 91 | int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int); |
39236c6e A |
92 | |
93 | void | |
94 | vm_swapfile_close(uint64_t path_addr, vnode_t vp) | |
95 | { | |
39236c6e | 96 | vfs_context_t context = vfs_context_current(); |
c18c124e | 97 | int error; |
39236c6e A |
98 | |
99 | vnode_getwithref(vp); | |
100 | vnode_close(vp, 0, context); | |
101 | ||
c18c124e A |
102 | error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr), |
103 | UIO_SYSSPACE, 0); | |
39236c6e | 104 | |
c18c124e A |
105 | #if DEVELOPMENT || DEBUG |
106 | if (error) | |
107 | printf("%s : unlink of %s failed with error %d", __FUNCTION__, | |
108 | (char *)path_addr, error); | |
109 | #endif | |
39236c6e A |
110 | } |
111 | ||
112 | int | |
3e170ce0 | 113 | vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin) |
39236c6e A |
114 | { |
115 | int error = 0; | |
116 | uint64_t file_size = 0; | |
117 | vfs_context_t ctx = NULL; | |
118 | ||
119 | ||
120 | ctx = vfs_context_current(); | |
121 | ||
fe8ab488 | 122 | error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); |
39236c6e | 123 | |
fe8ab488 A |
124 | if (error) { |
125 | printf("vnode_setsize for swap files failed: %d\n", error); | |
39236c6e | 126 | goto done; |
fe8ab488 | 127 | } |
39236c6e | 128 | |
fe8ab488 | 129 | error = vnode_size(vp, (off_t*) &file_size, ctx); |
39236c6e | 130 | |
fe8ab488 A |
131 | if (error) { |
132 | printf("vnode_size (new file) for swap file failed: %d\n", error); | |
3e170ce0 | 133 | goto done; |
fe8ab488 | 134 | } |
fe8ab488 A |
135 | assert(file_size == *size); |
136 | ||
3e170ce0 | 137 | if (pin != NULL && *pin != FALSE) { |
39037602 | 138 | error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx); |
3e170ce0 A |
139 | |
140 | if (error) { | |
39037602 | 141 | printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size); |
3e170ce0 A |
142 | /* this is not fatal, carry on with files wherever they landed */ |
143 | *pin = FALSE; | |
144 | error = 0; | |
145 | } | |
146 | } | |
147 | ||
39236c6e A |
148 | vnode_lock_spin(vp); |
149 | SET(vp->v_flag, VSWAP); | |
150 | vnode_unlock(vp); | |
151 | done: | |
152 | return error; | |
153 | } | |
154 | ||
3e170ce0 A |
155 | |
156 | int | |
157 | vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size) | |
158 | { | |
159 | int error = 0; | |
160 | vfs_context_t ctx; | |
161 | ||
162 | ctx = vfs_context_kernel(); | |
163 | ||
164 | error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset, | |
165 | UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); | |
166 | ||
167 | return (error); | |
168 | } | |
169 | ||
170 | ||
171 | ||
39236c6e A |
172 | int |
173 | vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) | |
174 | { | |
175 | int error = 0; | |
176 | uint64_t io_size = npages * PAGE_SIZE_64; | |
177 | #if 1 | |
178 | kern_return_t kr = KERN_SUCCESS; | |
179 | upl_t upl = NULL; | |
180 | unsigned int count = 0; | |
3e170ce0 A |
181 | upl_control_flags_t upl_create_flags = 0; |
182 | int upl_control_flags = 0; | |
39236c6e A |
183 | upl_size_t upl_size = 0; |
184 | ||
3e170ce0 A |
185 | upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE |
186 | | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK); | |
39236c6e | 187 | |
fe8ab488 A |
188 | #if ENCRYPTED_SWAP |
189 | upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; | |
190 | #else | |
191 | upl_control_flags = UPL_IOSYNC; | |
192 | #endif | |
39236c6e A |
193 | if ((flags & SWAP_READ) == FALSE) { |
194 | upl_create_flags |= UPL_COPYOUT_FROM; | |
195 | } | |
196 | ||
197 | upl_size = io_size; | |
198 | kr = vm_map_create_upl( kernel_map, | |
199 | start, | |
200 | &upl_size, | |
201 | &upl, | |
202 | NULL, | |
203 | &count, | |
204 | &upl_create_flags); | |
205 | ||
206 | if (kr != KERN_SUCCESS || (upl_size != io_size)) { | |
207 | panic("vm_map_create_upl failed with %d\n", kr); | |
208 | } | |
209 | ||
210 | if (flags & SWAP_READ) { | |
211 | vnode_pagein(vp, | |
212 | upl, | |
213 | 0, | |
214 | offset, | |
215 | io_size, | |
216 | upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, | |
217 | &error); | |
218 | if (error) { | |
219 | #if DEBUG | |
220 | printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
221 | #else /* DEBUG */ | |
222 | printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); | |
223 | #endif /* DEBUG */ | |
224 | } | |
225 | ||
226 | } else { | |
227 | vnode_pageout(vp, | |
228 | upl, | |
229 | 0, | |
230 | offset, | |
231 | io_size, | |
232 | upl_control_flags, | |
233 | &error); | |
234 | if (error) { | |
235 | #if DEBUG | |
236 | printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
237 | #else /* DEBUG */ | |
238 | printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); | |
239 | #endif /* DEBUG */ | |
240 | } | |
241 | } | |
242 | return error; | |
243 | ||
244 | #else /* 1 */ | |
245 | vfs_context_t ctx; | |
246 | ctx = vfs_context_kernel(); | |
247 | ||
248 | error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, | |
249 | UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); | |
250 | ||
251 | if (error) { | |
252 | printf("vn_rdwr: Swap I/O failed with %d\n", error); | |
253 | } | |
254 | return error; | |
255 | #endif /* 1 */ | |
256 | } | |
257 | ||
258 | ||
259 | #define MAX_BATCH_TO_TRIM 256 | |
260 | ||
fe8ab488 A |
261 | #define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */ |
262 | /* the DKIOUNMAP command through w/o acting on it */ | |
263 | /* this is used by the compressed swap system to reclaim empty space */ | |
264 | ||
265 | ||
266 | u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only) | |
39236c6e A |
267 | { |
268 | int error = 0; | |
269 | int trim_index = 0; | |
270 | u_int32_t blocksize = 0; | |
271 | struct vnode *devvp; | |
272 | dk_extent_t *extents; | |
273 | dk_unmap_t unmap; | |
fe8ab488 | 274 | _dk_cs_unmap_t cs_unmap; |
39236c6e A |
275 | |
276 | if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) | |
277 | return (ENOTSUP); | |
278 | ||
279 | if (tl == NULL) | |
280 | return (0); | |
281 | ||
282 | /* | |
283 | * Get the underlying device vnode and physical block size | |
284 | */ | |
285 | devvp = vp->v_mount->mnt_devvp; | |
286 | blocksize = vp->v_mount->mnt_devblocksize; | |
287 | ||
288 | extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
289 | ||
fe8ab488 A |
290 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
291 | memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t)); | |
292 | cs_unmap.extents = extents; | |
293 | ||
294 | if (route_only == TRUE) | |
295 | cs_unmap.options = ROUTE_ONLY; | |
296 | } else { | |
297 | memset (&unmap, 0, sizeof(dk_unmap_t)); | |
298 | unmap.extents = extents; | |
299 | } | |
39236c6e A |
300 | |
301 | while (tl) { | |
302 | daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ | |
303 | size_t io_bytecount; /* Number of bytes in current extent for the specified range */ | |
304 | size_t trimmed; | |
305 | size_t remaining_length; | |
306 | off_t current_offset; | |
307 | ||
308 | current_offset = tl->tl_offset; | |
309 | remaining_length = tl->tl_length; | |
310 | trimmed = 0; | |
311 | ||
312 | /* | |
313 | * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single | |
314 | * extent from the blockmap call. Keep looping/going until we are sure we've hit | |
315 | * the whole range or if we encounter an error. | |
316 | */ | |
317 | while (trimmed < tl->tl_length) { | |
318 | /* | |
319 | * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the | |
320 | * specified offset. It returns blocks in contiguous chunks, so if the logical range is | |
321 | * broken into multiple extents, it must be called multiple times, increasing the offset | |
322 | * in each call to ensure that the entire range is covered. | |
323 | */ | |
324 | error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, | |
325 | &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL); | |
326 | ||
327 | if (error) { | |
328 | goto trim_exit; | |
329 | } | |
330 | ||
331 | extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; | |
332 | extents[trim_index].length = io_bytecount; | |
333 | ||
334 | trim_index++; | |
335 | ||
336 | if (trim_index == MAX_BATCH_TO_TRIM) { | |
337 | ||
fe8ab488 A |
338 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
339 | cs_unmap.extentsCount = trim_index; | |
340 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
341 | } else { | |
342 | unmap.extentsCount = trim_index; | |
343 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
344 | } | |
39236c6e A |
345 | if (error) { |
346 | goto trim_exit; | |
347 | } | |
348 | trim_index = 0; | |
349 | } | |
350 | trimmed += io_bytecount; | |
351 | current_offset += io_bytecount; | |
352 | remaining_length -= io_bytecount; | |
353 | } | |
354 | tl = tl->tl_next; | |
355 | } | |
356 | if (trim_index) { | |
fe8ab488 A |
357 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
358 | cs_unmap.extentsCount = trim_index; | |
359 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
360 | } else { | |
361 | unmap.extentsCount = trim_index; | |
362 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
363 | } | |
39236c6e A |
364 | } |
365 | trim_exit: | |
366 | kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
367 | ||
368 | return error; | |
369 | } |