]>
Commit | Line | Data |
---|---|---|
39236c6e A |
1 | /* |
2 | * Copyright (c) 2000-2013 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <stdint.h> | |
30 | #include <sys/fcntl.h> | |
31 | #include <sys/vnode_internal.h> | |
32 | #include <sys/vnode.h> | |
33 | #include <sys/kauth.h> | |
34 | #include <sys/mount_internal.h> | |
35 | #include <sys/buf_internal.h> | |
36 | #include <kern/debug.h> | |
37 | #include <kern/kalloc.h> | |
38 | #include <sys/cprotect.h> | |
39 | #include <sys/disk.h> | |
40 | #include <vm/vm_protos.h> | |
41 | #include <vm/vm_pageout.h> | |
42 | ||
43 | void vm_swapfile_open(const char *path, vnode_t *vp); | |
44 | void vm_swapfile_close(uint64_t path, vnode_t vp); | |
45 | int vm_swapfile_preallocate(vnode_t vp, uint64_t *size); | |
46 | uint64_t vm_swapfile_get_blksize(vnode_t vp); | |
47 | uint64_t vm_swapfile_get_transfer_size(vnode_t vp); | |
48 | int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags); | |
49 | ||
50 | void | |
51 | vm_swapfile_open(const char *path, vnode_t *vp) | |
52 | { | |
53 | int error = 0; | |
54 | vfs_context_t ctx = vfs_context_current(); | |
55 | ||
56 | if ((error = vnode_open(path, (O_CREAT | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { | |
57 | printf("Failed to open swap file %d\n", error); | |
58 | *vp = NULL; | |
59 | return; | |
60 | } | |
61 | ||
62 | vnode_put(*vp); | |
63 | } | |
64 | ||
65 | uint64_t | |
66 | vm_swapfile_get_blksize(vnode_t vp) | |
67 | { | |
68 | return ((uint64_t)vfs_devblocksize(vnode_mount(vp))); | |
69 | } | |
70 | ||
71 | uint64_t | |
72 | vm_swapfile_get_transfer_size(vnode_t vp) | |
73 | { | |
74 | return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize); | |
75 | } | |
76 | ||
77 | int unlink1(vfs_context_t, struct nameidata *, int); | |
78 | ||
79 | void | |
80 | vm_swapfile_close(uint64_t path_addr, vnode_t vp) | |
81 | { | |
82 | struct nameidata nd; | |
83 | vfs_context_t context = vfs_context_current(); | |
84 | int error = 0; | |
85 | ||
86 | vnode_getwithref(vp); | |
87 | vnode_close(vp, 0, context); | |
88 | ||
89 | NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE, | |
90 | path_addr, context); | |
91 | ||
92 | error = unlink1(context, &nd, 0); | |
93 | } | |
94 | ||
95 | int | |
96 | vm_swapfile_preallocate(vnode_t vp, uint64_t *size) | |
97 | { | |
98 | int error = 0; | |
99 | uint64_t file_size = 0; | |
100 | vfs_context_t ctx = NULL; | |
101 | ||
102 | ||
103 | ctx = vfs_context_current(); | |
104 | ||
105 | #if CONFIG_PROTECT | |
106 | { | |
107 | #if 0 // <rdar://11771612> | |
108 | ||
109 | if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) { | |
110 | if(config_protect_bug) { | |
111 | printf("swap protection class set failed with %d\n", error); | |
112 | } else { | |
113 | panic("swap protection class set failed with %d\n", error); | |
114 | } | |
115 | } | |
116 | #endif | |
117 | /* initialize content protection keys manually */ | |
118 | if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { | |
119 | printf("Content Protection key failure on swap: %d\n", error); | |
120 | vnode_put(vp); | |
121 | vp = NULL; | |
122 | goto done; | |
123 | } | |
124 | } | |
125 | #endif | |
126 | ||
127 | /* | |
128 | * This check exists because dynamic_pager creates the 1st swapfile, | |
129 | * swapfile0, for us from user-space in a supported manner (with IO_NOZEROFILL etc). | |
130 | * | |
131 | * If dynamic_pager, in the future, discontinues creating that file, | |
132 | * then we need to change this check to a panic / assert or return an error. | |
133 | * That's because we can't be sure if the file has been created correctly. | |
134 | */ | |
135 | ||
136 | if ((error = vnode_size(vp, (off_t*) &file_size, ctx)) != 0) { | |
137 | ||
138 | printf("vnode_size (existing files) for swap files failed: %d\n", error); | |
139 | goto done; | |
140 | } else { | |
141 | ||
142 | if (file_size == 0) { | |
143 | ||
144 | error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); | |
145 | ||
146 | if (error) { | |
147 | printf("vnode_setsize for swap files failed: %d\n", error); | |
148 | goto done; | |
149 | } | |
150 | } else { | |
151 | ||
152 | *size = file_size; | |
153 | } | |
154 | } | |
155 | ||
156 | vnode_lock_spin(vp); | |
157 | SET(vp->v_flag, VSWAP); | |
158 | vnode_unlock(vp); | |
159 | done: | |
160 | return error; | |
161 | } | |
162 | ||
163 | int | |
164 | vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) | |
165 | { | |
166 | int error = 0; | |
167 | uint64_t io_size = npages * PAGE_SIZE_64; | |
168 | #if 1 | |
169 | kern_return_t kr = KERN_SUCCESS; | |
170 | upl_t upl = NULL; | |
171 | unsigned int count = 0; | |
172 | int upl_create_flags = 0, upl_control_flags = 0; | |
173 | upl_size_t upl_size = 0; | |
174 | ||
175 | upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; | |
15129b1c | 176 | upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; |
39236c6e A |
177 | |
178 | if ((flags & SWAP_READ) == FALSE) { | |
179 | upl_create_flags |= UPL_COPYOUT_FROM; | |
180 | } | |
181 | ||
182 | upl_size = io_size; | |
183 | kr = vm_map_create_upl( kernel_map, | |
184 | start, | |
185 | &upl_size, | |
186 | &upl, | |
187 | NULL, | |
188 | &count, | |
189 | &upl_create_flags); | |
190 | ||
191 | if (kr != KERN_SUCCESS || (upl_size != io_size)) { | |
192 | panic("vm_map_create_upl failed with %d\n", kr); | |
193 | } | |
194 | ||
195 | if (flags & SWAP_READ) { | |
196 | vnode_pagein(vp, | |
197 | upl, | |
198 | 0, | |
199 | offset, | |
200 | io_size, | |
201 | upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, | |
202 | &error); | |
203 | if (error) { | |
204 | #if DEBUG | |
205 | printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
206 | #else /* DEBUG */ | |
207 | printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); | |
208 | #endif /* DEBUG */ | |
209 | } | |
210 | ||
211 | } else { | |
212 | vnode_pageout(vp, | |
213 | upl, | |
214 | 0, | |
215 | offset, | |
216 | io_size, | |
217 | upl_control_flags, | |
218 | &error); | |
219 | if (error) { | |
220 | #if DEBUG | |
221 | printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
222 | #else /* DEBUG */ | |
223 | printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); | |
224 | #endif /* DEBUG */ | |
225 | } | |
226 | } | |
227 | return error; | |
228 | ||
229 | #else /* 1 */ | |
230 | vfs_context_t ctx; | |
231 | ctx = vfs_context_kernel(); | |
232 | ||
233 | error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, | |
234 | UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); | |
235 | ||
236 | if (error) { | |
237 | printf("vn_rdwr: Swap I/O failed with %d\n", error); | |
238 | } | |
239 | return error; | |
240 | #endif /* 1 */ | |
241 | } | |
242 | ||
243 | ||
244 | #define MAX_BATCH_TO_TRIM 256 | |
245 | ||
246 | u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl) | |
247 | { | |
248 | int error = 0; | |
249 | int trim_index = 0; | |
250 | u_int32_t blocksize = 0; | |
251 | struct vnode *devvp; | |
252 | dk_extent_t *extents; | |
253 | dk_unmap_t unmap; | |
254 | ||
255 | if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) | |
256 | return (ENOTSUP); | |
257 | ||
258 | if (tl == NULL) | |
259 | return (0); | |
260 | ||
261 | /* | |
262 | * Get the underlying device vnode and physical block size | |
263 | */ | |
264 | devvp = vp->v_mount->mnt_devvp; | |
265 | blocksize = vp->v_mount->mnt_devblocksize; | |
266 | ||
267 | extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
268 | ||
269 | memset (&unmap, 0, sizeof(dk_unmap_t)); | |
270 | unmap.extents = extents; | |
271 | ||
272 | while (tl) { | |
273 | daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ | |
274 | size_t io_bytecount; /* Number of bytes in current extent for the specified range */ | |
275 | size_t trimmed; | |
276 | size_t remaining_length; | |
277 | off_t current_offset; | |
278 | ||
279 | current_offset = tl->tl_offset; | |
280 | remaining_length = tl->tl_length; | |
281 | trimmed = 0; | |
282 | ||
283 | /* | |
284 | * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single | |
285 | * extent from the blockmap call. Keep looping/going until we are sure we've hit | |
286 | * the whole range or if we encounter an error. | |
287 | */ | |
288 | while (trimmed < tl->tl_length) { | |
289 | /* | |
290 | * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the | |
291 | * specified offset. It returns blocks in contiguous chunks, so if the logical range is | |
292 | * broken into multiple extents, it must be called multiple times, increasing the offset | |
293 | * in each call to ensure that the entire range is covered. | |
294 | */ | |
295 | error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, | |
296 | &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL); | |
297 | ||
298 | if (error) { | |
299 | goto trim_exit; | |
300 | } | |
301 | ||
302 | extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; | |
303 | extents[trim_index].length = io_bytecount; | |
304 | ||
305 | trim_index++; | |
306 | ||
307 | if (trim_index == MAX_BATCH_TO_TRIM) { | |
308 | ||
309 | unmap.extentsCount = trim_index; | |
310 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
311 | ||
312 | if (error) { | |
313 | goto trim_exit; | |
314 | } | |
315 | trim_index = 0; | |
316 | } | |
317 | trimmed += io_bytecount; | |
318 | current_offset += io_bytecount; | |
319 | remaining_length -= io_bytecount; | |
320 | } | |
321 | tl = tl->tl_next; | |
322 | } | |
323 | if (trim_index) { | |
324 | ||
325 | unmap.extentsCount = trim_index; | |
326 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
327 | } | |
328 | trim_exit: | |
329 | kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
330 | ||
331 | return error; | |
332 | } |