]>
Commit | Line | Data |
---|---|---|
39236c6e A |
1 | /* |
2 | * Copyright (c) 2000-2013 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <stdint.h> | |
30 | #include <sys/fcntl.h> | |
31 | #include <sys/vnode_internal.h> | |
32 | #include <sys/vnode.h> | |
33 | #include <sys/kauth.h> | |
34 | #include <sys/mount_internal.h> | |
35 | #include <sys/buf_internal.h> | |
36 | #include <kern/debug.h> | |
37 | #include <kern/kalloc.h> | |
38 | #include <sys/cprotect.h> | |
39 | #include <sys/disk.h> | |
40 | #include <vm/vm_protos.h> | |
41 | #include <vm/vm_pageout.h> | |
42 | ||
43 | void vm_swapfile_open(const char *path, vnode_t *vp); | |
44 | void vm_swapfile_close(uint64_t path, vnode_t vp); | |
3e170ce0 | 45 | int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin); |
39236c6e A |
46 | uint64_t vm_swapfile_get_blksize(vnode_t vp); |
47 | uint64_t vm_swapfile_get_transfer_size(vnode_t vp); | |
48 | int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags); | |
3e170ce0 A |
49 | int vm_record_file_write(struct vnode *vp, uint64_t offset, char *buf, int size); |
50 | ||
39236c6e A |
51 | |
52 | void | |
53 | vm_swapfile_open(const char *path, vnode_t *vp) | |
54 | { | |
55 | int error = 0; | |
56 | vfs_context_t ctx = vfs_context_current(); | |
57 | ||
fe8ab488 | 58 | if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { |
39236c6e A |
59 | printf("Failed to open swap file %d\n", error); |
60 | *vp = NULL; | |
61 | return; | |
62 | } | |
63 | ||
64 | vnode_put(*vp); | |
65 | } | |
66 | ||
67 | uint64_t | |
68 | vm_swapfile_get_blksize(vnode_t vp) | |
69 | { | |
70 | return ((uint64_t)vfs_devblocksize(vnode_mount(vp))); | |
71 | } | |
72 | ||
73 | uint64_t | |
74 | vm_swapfile_get_transfer_size(vnode_t vp) | |
75 | { | |
76 | return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize); | |
77 | } | |
78 | ||
c18c124e | 79 | int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int); |
39236c6e A |
80 | |
81 | void | |
82 | vm_swapfile_close(uint64_t path_addr, vnode_t vp) | |
83 | { | |
39236c6e | 84 | vfs_context_t context = vfs_context_current(); |
c18c124e | 85 | int error; |
39236c6e A |
86 | |
87 | vnode_getwithref(vp); | |
88 | vnode_close(vp, 0, context); | |
89 | ||
c18c124e A |
90 | error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr), |
91 | UIO_SYSSPACE, 0); | |
39236c6e | 92 | |
c18c124e A |
93 | #if DEVELOPMENT || DEBUG |
94 | if (error) | |
95 | printf("%s : unlink of %s failed with error %d", __FUNCTION__, | |
96 | (char *)path_addr, error); | |
97 | #endif | |
39236c6e A |
98 | } |
99 | ||
100 | int | |
3e170ce0 | 101 | vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin) |
39236c6e A |
102 | { |
103 | int error = 0; | |
104 | uint64_t file_size = 0; | |
105 | vfs_context_t ctx = NULL; | |
106 | ||
107 | ||
108 | ctx = vfs_context_current(); | |
109 | ||
fe8ab488 | 110 | error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); |
39236c6e | 111 | |
fe8ab488 A |
112 | if (error) { |
113 | printf("vnode_setsize for swap files failed: %d\n", error); | |
39236c6e | 114 | goto done; |
fe8ab488 | 115 | } |
39236c6e | 116 | |
fe8ab488 | 117 | error = vnode_size(vp, (off_t*) &file_size, ctx); |
39236c6e | 118 | |
fe8ab488 A |
119 | if (error) { |
120 | printf("vnode_size (new file) for swap file failed: %d\n", error); | |
3e170ce0 | 121 | goto done; |
fe8ab488 | 122 | } |
fe8ab488 A |
123 | assert(file_size == *size); |
124 | ||
3e170ce0 | 125 | if (pin != NULL && *pin != FALSE) { |
39037602 | 126 | error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx); |
3e170ce0 A |
127 | |
128 | if (error) { | |
39037602 | 129 | printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size); |
3e170ce0 A |
130 | /* this is not fatal, carry on with files wherever they landed */ |
131 | *pin = FALSE; | |
132 | error = 0; | |
133 | } | |
134 | } | |
135 | ||
39236c6e A |
136 | vnode_lock_spin(vp); |
137 | SET(vp->v_flag, VSWAP); | |
138 | vnode_unlock(vp); | |
139 | done: | |
140 | return error; | |
141 | } | |
142 | ||
3e170ce0 A |
143 | |
144 | int | |
145 | vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size) | |
146 | { | |
147 | int error = 0; | |
148 | vfs_context_t ctx; | |
149 | ||
150 | ctx = vfs_context_kernel(); | |
151 | ||
152 | error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset, | |
153 | UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); | |
154 | ||
155 | return (error); | |
156 | } | |
157 | ||
158 | ||
159 | ||
39236c6e A |
160 | int |
161 | vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) | |
162 | { | |
163 | int error = 0; | |
164 | uint64_t io_size = npages * PAGE_SIZE_64; | |
165 | #if 1 | |
166 | kern_return_t kr = KERN_SUCCESS; | |
167 | upl_t upl = NULL; | |
168 | unsigned int count = 0; | |
3e170ce0 A |
169 | upl_control_flags_t upl_create_flags = 0; |
170 | int upl_control_flags = 0; | |
39236c6e A |
171 | upl_size_t upl_size = 0; |
172 | ||
3e170ce0 A |
173 | upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE |
174 | | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK); | |
39236c6e | 175 | |
fe8ab488 A |
176 | #if ENCRYPTED_SWAP |
177 | upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; | |
178 | #else | |
179 | upl_control_flags = UPL_IOSYNC; | |
180 | #endif | |
39236c6e A |
181 | if ((flags & SWAP_READ) == FALSE) { |
182 | upl_create_flags |= UPL_COPYOUT_FROM; | |
183 | } | |
184 | ||
185 | upl_size = io_size; | |
186 | kr = vm_map_create_upl( kernel_map, | |
187 | start, | |
188 | &upl_size, | |
189 | &upl, | |
190 | NULL, | |
191 | &count, | |
192 | &upl_create_flags); | |
193 | ||
194 | if (kr != KERN_SUCCESS || (upl_size != io_size)) { | |
195 | panic("vm_map_create_upl failed with %d\n", kr); | |
196 | } | |
197 | ||
198 | if (flags & SWAP_READ) { | |
199 | vnode_pagein(vp, | |
200 | upl, | |
201 | 0, | |
202 | offset, | |
203 | io_size, | |
204 | upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, | |
205 | &error); | |
206 | if (error) { | |
207 | #if DEBUG | |
208 | printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
209 | #else /* DEBUG */ | |
210 | printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); | |
211 | #endif /* DEBUG */ | |
212 | } | |
213 | ||
214 | } else { | |
215 | vnode_pageout(vp, | |
216 | upl, | |
217 | 0, | |
218 | offset, | |
219 | io_size, | |
220 | upl_control_flags, | |
221 | &error); | |
222 | if (error) { | |
223 | #if DEBUG | |
224 | printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); | |
225 | #else /* DEBUG */ | |
226 | printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); | |
227 | #endif /* DEBUG */ | |
228 | } | |
229 | } | |
230 | return error; | |
231 | ||
232 | #else /* 1 */ | |
233 | vfs_context_t ctx; | |
234 | ctx = vfs_context_kernel(); | |
235 | ||
236 | error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, | |
237 | UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); | |
238 | ||
239 | if (error) { | |
240 | printf("vn_rdwr: Swap I/O failed with %d\n", error); | |
241 | } | |
242 | return error; | |
243 | #endif /* 1 */ | |
244 | } | |
245 | ||
246 | ||
247 | #define MAX_BATCH_TO_TRIM 256 | |
248 | ||
fe8ab488 A |
249 | #define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */ |
250 | /* the DKIOUNMAP command through w/o acting on it */ | |
251 | /* this is used by the compressed swap system to reclaim empty space */ | |
252 | ||
253 | ||
254 | u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only) | |
39236c6e A |
255 | { |
256 | int error = 0; | |
257 | int trim_index = 0; | |
258 | u_int32_t blocksize = 0; | |
259 | struct vnode *devvp; | |
260 | dk_extent_t *extents; | |
261 | dk_unmap_t unmap; | |
fe8ab488 | 262 | _dk_cs_unmap_t cs_unmap; |
39236c6e A |
263 | |
264 | if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) | |
265 | return (ENOTSUP); | |
266 | ||
267 | if (tl == NULL) | |
268 | return (0); | |
269 | ||
270 | /* | |
271 | * Get the underlying device vnode and physical block size | |
272 | */ | |
273 | devvp = vp->v_mount->mnt_devvp; | |
274 | blocksize = vp->v_mount->mnt_devblocksize; | |
275 | ||
276 | extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
277 | ||
fe8ab488 A |
278 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
279 | memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t)); | |
280 | cs_unmap.extents = extents; | |
281 | ||
282 | if (route_only == TRUE) | |
283 | cs_unmap.options = ROUTE_ONLY; | |
284 | } else { | |
285 | memset (&unmap, 0, sizeof(dk_unmap_t)); | |
286 | unmap.extents = extents; | |
287 | } | |
39236c6e A |
288 | |
289 | while (tl) { | |
290 | daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ | |
291 | size_t io_bytecount; /* Number of bytes in current extent for the specified range */ | |
292 | size_t trimmed; | |
293 | size_t remaining_length; | |
294 | off_t current_offset; | |
295 | ||
296 | current_offset = tl->tl_offset; | |
297 | remaining_length = tl->tl_length; | |
298 | trimmed = 0; | |
299 | ||
300 | /* | |
301 | * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single | |
302 | * extent from the blockmap call. Keep looping/going until we are sure we've hit | |
303 | * the whole range or if we encounter an error. | |
304 | */ | |
305 | while (trimmed < tl->tl_length) { | |
306 | /* | |
307 | * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the | |
308 | * specified offset. It returns blocks in contiguous chunks, so if the logical range is | |
309 | * broken into multiple extents, it must be called multiple times, increasing the offset | |
310 | * in each call to ensure that the entire range is covered. | |
311 | */ | |
312 | error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, | |
313 | &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL); | |
314 | ||
315 | if (error) { | |
316 | goto trim_exit; | |
317 | } | |
318 | ||
319 | extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; | |
320 | extents[trim_index].length = io_bytecount; | |
321 | ||
322 | trim_index++; | |
323 | ||
324 | if (trim_index == MAX_BATCH_TO_TRIM) { | |
325 | ||
fe8ab488 A |
326 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
327 | cs_unmap.extentsCount = trim_index; | |
328 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
329 | } else { | |
330 | unmap.extentsCount = trim_index; | |
331 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
332 | } | |
39236c6e A |
333 | if (error) { |
334 | goto trim_exit; | |
335 | } | |
336 | trim_index = 0; | |
337 | } | |
338 | trimmed += io_bytecount; | |
339 | current_offset += io_bytecount; | |
340 | remaining_length -= io_bytecount; | |
341 | } | |
342 | tl = tl->tl_next; | |
343 | } | |
344 | if (trim_index) { | |
fe8ab488 A |
345 | if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { |
346 | cs_unmap.extentsCount = trim_index; | |
347 | error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); | |
348 | } else { | |
349 | unmap.extentsCount = trim_index; | |
350 | error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
351 | } | |
39236c6e A |
352 | } |
353 | trim_exit: | |
354 | kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); | |
355 | ||
356 | return error; | |
357 | } |