2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <sys/fcntl.h>
31 #include <sys/vnode_internal.h>
32 #include <sys/vnode.h>
33 #include <sys/kauth.h>
34 #include <sys/mount_internal.h>
35 #include <sys/buf_internal.h>
36 #include <kern/debug.h>
37 #include <kern/kalloc.h>
38 #include <sys/cprotect.h>
40 #include <vm/vm_protos.h>
41 #include <vm/vm_pageout.h>
42 #include <sys/content_protection.h>
44 void vm_swapfile_open(const char *path
, vnode_t
*vp
);
45 void vm_swapfile_close(uint64_t path
, vnode_t vp
);
46 int vm_swapfile_preallocate(vnode_t vp
, uint64_t *size
, boolean_t
*pin
);
47 uint64_t vm_swapfile_get_blksize(vnode_t vp
);
48 uint64_t vm_swapfile_get_transfer_size(vnode_t vp
);
49 int vm_swapfile_io(vnode_t vp
, uint64_t offset
, uint64_t start
, int npages
, int flags
, void *);
50 int vm_record_file_write(struct vnode
*vp
, uint64_t offset
, char *buf
, int size
);
53 int vm_swap_vol_get_budget(vnode_t vp
, uint64_t *freeze_daily_budget
);
54 #endif /* CONFIG_FREEZE */
58 vm_swapfile_open(const char *path
, vnode_t
*vp
)
61 vfs_context_t ctx
= vfs_context_kernel();
63 if ((error
= vnode_open(path
, (O_CREAT
| O_TRUNC
| FREAD
| FWRITE
), S_IRUSR
| S_IWUSR
, 0, vp
, ctx
))) {
64 printf("Failed to open swap file %d\n", error
);
70 * If MNT_IOFLAGS_NOSWAP is set, opening the swap file should fail.
71 * To avoid a race on the mount we only make this check after creating the
74 if ((*vp
)->v_mount
->mnt_kern_flag
& MNTK_NOSWAP
) {
76 vm_swapfile_close((uint64_t)path
, *vp
);
85 vm_swapfile_get_blksize(vnode_t vp
)
87 return (uint64_t)vfs_devblocksize(vnode_mount(vp
));
91 vm_swapfile_get_transfer_size(vnode_t vp
)
93 return (uint64_t)vp
->v_mount
->mnt_vfsstat
.f_iosize
;
96 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int);
99 vm_swapfile_close(uint64_t path_addr
, vnode_t vp
)
101 vfs_context_t context
= vfs_context_kernel();
104 vnode_getwithref(vp
);
105 vnode_close(vp
, 0, context
);
107 error
= unlink1(context
, NULLVP
, CAST_USER_ADDR_T(path_addr
),
110 #if DEVELOPMENT || DEBUG
112 printf("%s : unlink of %s failed with error %d", __FUNCTION__
,
113 (char *)path_addr
, error
);
119 vm_swapfile_preallocate(vnode_t vp
, uint64_t *size
, boolean_t
*pin
)
122 uint64_t file_size
= 0;
123 vfs_context_t ctx
= NULL
;
125 struct vnode_attr va
;
126 #endif /* CONFIG_FREEZE */
128 ctx
= vfs_context_kernel();
130 error
= vnode_setsize(vp
, *size
, IO_NOZEROFILL
, ctx
);
133 printf("vnode_setsize for swap files failed: %d\n", error
);
137 error
= vnode_size(vp
, (off_t
*) &file_size
, ctx
);
140 printf("vnode_size (new file) for swap file failed: %d\n", error
);
143 assert(file_size
== *size
);
145 if (pin
!= NULL
&& *pin
!= FALSE
) {
146 error
= VNOP_IOCTL(vp
, FIOPINSWAP
, NULL
, 0, ctx
);
149 printf("pin for swap files failed: %d, file_size = %lld\n", error
, file_size
);
150 /* this is not fatal, carry on with files wherever they landed */
157 SET(vp
->v_flag
, VSWAP
);
162 VATTR_SET(&va
, va_dataprotect_class
, PROTECTION_CLASS_C
);
163 error
= VNOP_SETATTR(vp
, &va
, ctx
);
166 printf("setattr PROTECTION_CLASS_C for swap file failed: %d\n", error
);
169 #endif /* CONFIG_FREEZE */
177 vm_record_file_write(vnode_t vp
, uint64_t offset
, char *buf
, int size
)
182 ctx
= vfs_context_kernel();
184 error
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)buf
, size
, offset
,
185 UIO_SYSSPACE
, IO_NODELOCKED
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
193 vm_swapfile_io(vnode_t vp
, uint64_t offset
, uint64_t start
, int npages
, int flags
, void *upl_iodone
)
196 uint64_t io_size
= npages
* PAGE_SIZE_64
;
198 kern_return_t kr
= KERN_SUCCESS
;
200 unsigned int count
= 0;
201 upl_control_flags_t upl_create_flags
= 0;
202 int upl_control_flags
= 0;
203 upl_size_t upl_size
= 0;
205 upl_create_flags
= UPL_SET_INTERNAL
| UPL_SET_LITE
;
207 if (upl_iodone
== NULL
) {
208 upl_control_flags
= UPL_IOSYNC
;
212 upl_control_flags
|= UPL_PAGING_ENCRYPTED
;
215 if ((flags
& SWAP_READ
) == FALSE
) {
216 upl_create_flags
|= UPL_COPYOUT_FROM
;
220 kr
= vm_map_create_upl( kernel_map
,
227 VM_KERN_MEMORY_OSFMK
);
229 if (kr
!= KERN_SUCCESS
|| (upl_size
!= io_size
)) {
230 panic("vm_map_create_upl failed with %d\n", kr
);
233 if (flags
& SWAP_READ
) {
239 upl_control_flags
| UPL_IGNORE_VALID_PAGE_CHECK
,
243 printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error
, vp
, offset
, io_size
);
245 printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error
);
249 upl_set_iodone(upl
, upl_iodone
);
260 printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error
, vp
, offset
, io_size
);
262 printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error
);
270 ctx
= vfs_context_kernel();
272 error
= vn_rdwr((flags
& SWAP_READ
) ? UIO_READ
: UIO_WRITE
, vp
, (caddr_t
)start
, io_size
, offset
,
273 UIO_SYSSPACE
, IO_SYNC
| IO_NODELOCKED
| IO_UNIT
| IO_NOCACHE
| IO_SWAP_DISPATCH
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
276 printf("vn_rdwr: Swap I/O failed with %d\n", error
);
283 #define MAX_BATCH_TO_TRIM 256
285 #define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */
286 /* the DKIOUNMAP command through w/o acting on it */
287 /* this is used by the compressed swap system to reclaim empty space */
291 vnode_trim_list(vnode_t vp
, struct trim_list
*tl
, boolean_t route_only
)
295 u_int32_t blocksize
= 0;
297 dk_extent_t
*extents
;
299 _dk_cs_unmap_t cs_unmap
;
301 if (!(vp
->v_mount
->mnt_ioflags
& MNT_IOFLAGS_UNMAP_SUPPORTED
)) {
310 * Get the underlying device vnode and physical block size
312 devvp
= vp
->v_mount
->mnt_devvp
;
313 blocksize
= vp
->v_mount
->mnt_devblocksize
;
315 extents
= kalloc(sizeof(dk_extent_t
) * MAX_BATCH_TO_TRIM
);
317 if (vp
->v_mount
->mnt_ioflags
& MNT_IOFLAGS_CSUNMAP_SUPPORTED
) {
318 memset(&cs_unmap
, 0, sizeof(_dk_cs_unmap_t
));
319 cs_unmap
.extents
= extents
;
321 if (route_only
== TRUE
) {
322 cs_unmap
.options
= ROUTE_ONLY
;
325 memset(&unmap
, 0, sizeof(dk_unmap_t
));
326 unmap
.extents
= extents
;
330 daddr64_t io_blockno
; /* Block number corresponding to the start of the extent */
331 size_t io_bytecount
; /* Number of bytes in current extent for the specified range */
333 size_t remaining_length
;
334 off_t current_offset
;
336 current_offset
= tl
->tl_offset
;
337 remaining_length
= tl
->tl_length
;
341 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
342 * extent from the blockmap call. Keep looping/going until we are sure we've hit
343 * the whole range or if we encounter an error.
345 while (trimmed
< tl
->tl_length
) {
347 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
348 * specified offset. It returns blocks in contiguous chunks, so if the logical range is
349 * broken into multiple extents, it must be called multiple times, increasing the offset
350 * in each call to ensure that the entire range is covered.
352 error
= VNOP_BLOCKMAP(vp
, current_offset
, remaining_length
,
353 &io_blockno
, &io_bytecount
, NULL
, VNODE_READ
| VNODE_BLOCKMAP_NO_TRACK
, NULL
);
358 if (io_blockno
!= -1) {
359 extents
[trim_index
].offset
= (uint64_t) io_blockno
* (u_int64_t
) blocksize
;
360 extents
[trim_index
].length
= io_bytecount
;
364 if (trim_index
== MAX_BATCH_TO_TRIM
) {
365 if (vp
->v_mount
->mnt_ioflags
& MNT_IOFLAGS_CSUNMAP_SUPPORTED
) {
366 cs_unmap
.extentsCount
= trim_index
;
367 error
= VNOP_IOCTL(devvp
, _DKIOCCSUNMAP
, (caddr_t
)&cs_unmap
, 0, vfs_context_kernel());
369 unmap
.extentsCount
= trim_index
;
370 error
= VNOP_IOCTL(devvp
, DKIOCUNMAP
, (caddr_t
)&unmap
, 0, vfs_context_kernel());
377 trimmed
+= io_bytecount
;
378 current_offset
+= io_bytecount
;
379 remaining_length
-= io_bytecount
;
384 if (vp
->v_mount
->mnt_ioflags
& MNT_IOFLAGS_CSUNMAP_SUPPORTED
) {
385 cs_unmap
.extentsCount
= trim_index
;
386 error
= VNOP_IOCTL(devvp
, _DKIOCCSUNMAP
, (caddr_t
)&cs_unmap
, 0, vfs_context_kernel());
388 unmap
.extentsCount
= trim_index
;
389 error
= VNOP_IOCTL(devvp
, DKIOCUNMAP
, (caddr_t
)&unmap
, 0, vfs_context_kernel());
393 kfree(extents
, sizeof(dk_extent_t
) * MAX_BATCH_TO_TRIM
);
400 vm_swap_vol_get_budget(vnode_t vp
, uint64_t *freeze_daily_budget
)
402 vnode_t devvp
= NULL
;
403 vfs_context_t ctx
= vfs_context_kernel();
406 devvp
= vp
->v_mount
->mnt_devvp
;
408 err
= VNOP_IOCTL(devvp
, DKIOCGETMAXSWAPWRITE
, (caddr_t
)freeze_daily_budget
, 0, ctx
);
412 #endif /* CONFIG_FREEZE */