]>
Commit | Line | Data |
---|---|---|
39236c6e A |
1 | /* |
2 | * Copyright (c) 2000-2013 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <stdint.h> | |
30 | #include <sys/fcntl.h> | |
31 | #include <sys/vnode_internal.h> | |
32 | #include <sys/vnode.h> | |
33 | #include <sys/kauth.h> | |
34 | #include <sys/mount_internal.h> | |
35 | #include <sys/buf_internal.h> | |
36 | #include <kern/debug.h> | |
37 | #include <kern/kalloc.h> | |
38 | #include <sys/cprotect.h> | |
39 | #include <sys/disk.h> | |
40 | #include <vm/vm_protos.h> | |
41 | #include <vm/vm_pageout.h> | |
42 | ||
43 | void vm_swapfile_open(const char *path, vnode_t *vp); | |
44 | void vm_swapfile_close(uint64_t path, vnode_t vp); | |
45 | int vm_swapfile_preallocate(vnode_t vp, uint64_t *size); | |
46 | uint64_t vm_swapfile_get_blksize(vnode_t vp); | |
47 | uint64_t vm_swapfile_get_transfer_size(vnode_t vp); | |
48 | int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags); | |
49 | ||
50 | void | |
51 | vm_swapfile_open(const char *path, vnode_t *vp) | |
52 | { | |
53 | int error = 0; | |
54 | vfs_context_t ctx = vfs_context_current(); | |
55 | ||
fe8ab488 | 56 | if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { |
39236c6e A |
57 | printf("Failed to open swap file %d\n", error); |
58 | *vp = NULL; | |
59 | return; | |
60 | } | |
61 | ||
62 | vnode_put(*vp); | |
63 | } | |
64 | ||
65 | uint64_t | |
66 | vm_swapfile_get_blksize(vnode_t vp) | |
67 | { | |
68 | return ((uint64_t)vfs_devblocksize(vnode_mount(vp))); | |
69 | } | |
70 | ||
71 | uint64_t | |
72 | vm_swapfile_get_transfer_size(vnode_t vp) | |
73 | { | |
74 | return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize); | |
75 | } | |
76 | ||
77 | int unlink1(vfs_context_t, struct nameidata *, int); | |
78 | ||
79 | void | |
80 | vm_swapfile_close(uint64_t path_addr, vnode_t vp) | |
81 | { | |
82 | struct nameidata nd; | |
83 | vfs_context_t context = vfs_context_current(); | |
84 | int error = 0; | |
85 | ||
86 | vnode_getwithref(vp); | |
87 | vnode_close(vp, 0, context); | |
88 | ||
89 | NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE, | |
90 | path_addr, context); | |
91 | ||
92 | error = unlink1(context, &nd, 0); | |
93 | } | |
94 | ||
95 | int | |
96 | vm_swapfile_preallocate(vnode_t vp, uint64_t *size) | |
97 | { | |
98 | int error = 0; | |
99 | uint64_t file_size = 0; | |
100 | vfs_context_t ctx = NULL; | |
101 | ||
102 | ||
103 | ctx = vfs_context_current(); | |
104 | ||
105 | #if CONFIG_PROTECT | |
106 | { | |
107 | #if 0 // <rdar://11771612> | |
108 | ||
109 | if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) { | |
110 | if(config_protect_bug) { | |
111 | printf("swap protection class set failed with %d\n", error); | |
112 | } else { | |
113 | panic("swap protection class set failed with %d\n", error); | |
114 | } | |
115 | } | |
116 | #endif | |
117 | /* initialize content protection keys manually */ | |
118 | if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { | |
119 | printf("Content Protection key failure on swap: %d\n", error); | |
120 | vnode_put(vp); | |
121 | vp = NULL; | |
122 | goto done; | |
123 | } | |
124 | } | |
125 | #endif | |
126 | ||
fe8ab488 | 127 | error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); |
39236c6e | 128 | |
fe8ab488 A |
129 | if (error) { |
130 | printf("vnode_setsize for swap files failed: %d\n", error); | |
39236c6e | 131 | goto done; |
fe8ab488 | 132 | } |
39236c6e | 133 | |
fe8ab488 | 134 | error = vnode_size(vp, (off_t*) &file_size, ctx); |
39236c6e | 135 | |
fe8ab488 A |
136 | if (error) { |
137 | printf("vnode_size (new file) for swap file failed: %d\n", error); | |
138 | } | |
39236c6e | 139 | |
fe8ab488 A |
140 | assert(file_size == *size); |
141 | ||
39236c6e A |
142 | vnode_lock_spin(vp); |
143 | SET(vp->v_flag, VSWAP); | |
144 | vnode_unlock(vp); | |
145 | done: | |
146 | return error; | |
147 | } | |
148 | ||
149 | int | |
150 | vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) | |
151 | { | |
152 | int error = 0; | |
153 | uint64_t io_size = npages * PAGE_SIZE_64; | |
154 | #if 1 | |
155 | kern_return_t kr = KERN_SUCCESS; | |
156 | upl_t upl = NULL; | |
157 | unsigned int count = 0; | |
158 | int upl_create_flags = 0, upl_control_flags = 0; | |
159 | upl_size_t upl_size = 0; | |
160 | ||
161 | upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; | |
39236c6e | 162 | |
fe8ab488 A |
163 | #if ENCRYPTED_SWAP |
164 | upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; | |
165 | #else | |
166 | upl_control_flags = UPL_IOSYNC; | |
167 | #endif | |
39236c6e A |
168 | if ((flags & SWAP_READ) == FALSE) { |
169 | upl_create_flags |= UPL_COPYOUT_FROM; | |
170 | } | |
171 | ||
172 | upl_size = io_size; | |
173 | kr = vm_map_create_upl( kernel_map, | |
174 | start, | |
175 | &upl_size, | |
176 | &upl, | |
177 | NULL, | |
178 | &count, | |
179 | &upl_create_flags); | |
180 | ||
181 | if (kr != KERN_SUCCESS || (upl_size != io_size)) { | |
182 | panic("vm_map_create_upl failed with %d\n", kr); | |
183 | } | |
184 | ||
185 | if (flags & SWAP_READ) { | |
186 | vnode_pagein(vp, | |
187 | upl, | |
188 | 0, | |
189 | offset, | |
190 | io_size, | |
191 | upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, | |
192 | &error); | |
193 | if (error) { | |
194 | #if DEBUG | |
195 | printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
196 | #else /* DEBUG */ | |
197 | printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); | |
198 | #endif /* DEBUG */ | |
199 | } | |
200 | ||
201 | } else { | |
202 | vnode_pageout(vp, | |
203 | upl, | |
204 | 0, | |
205 | offset, | |
206 | io_size, | |
207 | upl_control_flags, | |
208 | &error); | |
209 | if (error) { | |
210 | #if DEBUG | |
211 | printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
212 | #else /* DEBUG */ | |
213 | printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); | |
214 | #endif /* DEBUG */ | |
215 | } | |
216 | } | |
217 | return error; | |
218 | ||
219 | #else /* 1 */ | |
220 | vfs_context_t ctx; | |
221 | ctx = vfs_context_kernel(); | |
222 | ||
223 | error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, | |
224 | UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); | |
225 | ||
226 | if (error) { | |
227 | printf("vn_rdwr: Swap I/O failed with %d\n", error); | |
228 | } | |
229 | return error; | |
230 | #endif /* 1 */ | |
231 | } | |
232 | ||
233 | ||
234 | #define MAX_BATCH_TO_TRIM 256 | |
235 | ||
fe8ab488 A |
236 | #define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */ |
237 | /* the DKIOUNMAP command through w/o acting on it */ | |
238 | /* this is used by the compressed swap system to reclaim empty space */ | |
239 | ||
240 | ||
241 | u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only) | |
39236c6e A |
242 | { |
243 | int error = 0; | |
244 | int trim_index = 0; | |
245 | u_int32_t blocksize = 0; | |
246 | struct vnode *devvp; | |
247 | dk_extent_t *extents; | |
248 | dk_unmap_t unmap; | |
fe8ab488 | 249 | _dk_cs_unmap_t cs_unmap; |
39236c6e A |
250 | |
251 | if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) | |
252 | return (ENOTSUP); | |
253 | ||
254 | if (tl == NULL) | |
255 | return (0); | |
256 | ||
257 | /* | |
258 | * Get the underlying device vnode and physical block size | |
259 | */ | |
260 | devvp = vp->v_mount->mnt_devvp; | |
261 | blocksize = vp->v_mount->mnt_devblocksize; | |
262 | ||
263 | extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
264 | ||
fe8ab488 A |
265 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
266 | memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t)); | |
267 | cs_unmap.extents = extents; | |
268 | ||
269 | if (route_only == TRUE) | |
270 | cs_unmap.options = ROUTE_ONLY; | |
271 | } else { | |
272 | memset (&unmap, 0, sizeof(dk_unmap_t)); | |
273 | unmap.extents = extents; | |
274 | } | |
39236c6e A |
275 | |
276 | while (tl) { | |
277 | daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ | |
278 | size_t io_bytecount; /* Number of bytes in current extent for the specified range */ | |
279 | size_t trimmed; | |
280 | size_t remaining_length; | |
281 | off_t current_offset; | |
282 | ||
283 | current_offset = tl->tl_offset; | |
284 | remaining_length = tl->tl_length; | |
285 | trimmed = 0; | |
286 | ||
287 | /* | |
288 | * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single | |
289 | * extent from the blockmap call. Keep looping/going until we are sure we've hit | |
290 | * the whole range or if we encounter an error. | |
291 | */ | |
292 | while (trimmed < tl->tl_length) { | |
293 | /* | |
294 | * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the | |
295 | * specified offset. It returns blocks in contiguous chunks, so if the logical range is | |
296 | * broken into multiple extents, it must be called multiple times, increasing the offset | |
297 | * in each call to ensure that the entire range is covered. | |
298 | */ | |
299 | error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, | |
300 | &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL); | |
301 | ||
302 | if (error) { | |
303 | goto trim_exit; | |
304 | } | |
305 | ||
306 | extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; | |
307 | extents[trim_index].length = io_bytecount; | |
308 | ||
309 | trim_index++; | |
310 | ||
311 | if (trim_index == MAX_BATCH_TO_TRIM) { | |
312 | ||
fe8ab488 A |
313 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
314 | cs_unmap.extentsCount = trim_index; | |
315 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
316 | } else { | |
317 | unmap.extentsCount = trim_index; | |
318 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
319 | } | |
39236c6e A |
320 | if (error) { |
321 | goto trim_exit; | |
322 | } | |
323 | trim_index = 0; | |
324 | } | |
325 | trimmed += io_bytecount; | |
326 | current_offset += io_bytecount; | |
327 | remaining_length -= io_bytecount; | |
328 | } | |
329 | tl = tl->tl_next; | |
330 | } | |
331 | if (trim_index) { | |
fe8ab488 A |
332 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
333 | cs_unmap.extentsCount = trim_index; | |
334 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
335 | } else { | |
336 | unmap.extentsCount = trim_index; | |
337 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
338 | } | |
39236c6e A |
339 | } |
340 | trim_exit: | |
341 | kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
342 | ||
343 | return error; | |
344 | } |