[apple/xnu.git] / bsd / vm / vm_compressor_backing_file.c

/*
 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include <stdint.h>
#include <sys/fcntl.h>
#include <sys/vnode_internal.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/mount_internal.h>
#include <sys/buf_internal.h>
#include <kern/debug.h>
#include <kern/kalloc.h>
#include <sys/cprotect.h>
#include <sys/disk.h>
#include <vm/vm_protos.h>
#include <vm/vm_pageout.h>

void vm_swapfile_open(const char *path, vnode_t *vp);
void vm_swapfile_close(uint64_t path, vnode_t vp);
int vm_swapfile_preallocate(vnode_t vp, uint64_t *size);
uint64_t vm_swapfile_get_blksize(vnode_t vp);
uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags);

void
vm_swapfile_open(const char *path, vnode_t *vp)
{
	int error = 0;
	vfs_context_t	ctx = vfs_context_current();

	if ((error = vnode_open(path, (O_CREAT | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
		printf("Failed to open swap file %d\n", error);
		*vp = NULL;
		return;
	}	

	vnode_put(*vp);
}

uint64_t
vm_swapfile_get_blksize(vnode_t vp)
{
	return ((uint64_t)vfs_devblocksize(vnode_mount(vp)));
}

uint64_t
vm_swapfile_get_transfer_size(vnode_t vp)
{
	return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize);
}

int unlink1(vfs_context_t, struct nameidata *, int);

void
vm_swapfile_close(uint64_t path_addr, vnode_t vp)
{
	struct nameidata nd;
	vfs_context_t context = vfs_context_current();
	int error = 0;

	vnode_getwithref(vp);
	vnode_close(vp, 0, context);
	
	NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE,
	       path_addr, context);

	error = unlink1(context, &nd, 0);
}

int
vm_swapfile_preallocate(vnode_t vp, uint64_t *size)
{
	int		error = 0;
	uint64_t	file_size = 0;
	vfs_context_t	ctx = NULL;


	ctx = vfs_context_current();

#if CONFIG_PROTECT
	{
#if 0	// <rdar://11771612>

		if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) {
			if(config_protect_bug) {
				printf("swap protection class set failed with %d\n", error);
			} else {
				panic("swap protection class set failed with %d\n", error);
			}
		}
#endif
		/* initialize content protection keys manually */
		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
			printf("Content Protection key failure on swap: %d\n", error);
			vnode_put(vp);
			vp = NULL;
			goto done;
 		}
	}
#endif

	/*
  	 * This check exists because dynamic_pager creates the 1st swapfile,
	 * swapfile0, for us from user-space in a supported manner (with IO_NOZEROFILL etc).
	 * 
	 * If dynamic_pager, in the future, discontinues creating that file,
	 * then we need to change this check to a panic / assert or return an error.
	 * That's because we can't be sure if the file has been created correctly.
	 */

	if ((error = vnode_size(vp, (off_t*) &file_size, ctx)) != 0) {

		printf("vnode_size (existing files) for swap files failed: %d\n", error);
		goto done;
	} else {
	
		if (file_size == 0) {

			error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
		
			if (error) {
				printf("vnode_setsize for swap files failed: %d\n", error);
				goto done;
			}
		} else {

			*size = file_size;
		}
	}

	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);
done:
	return error;
}

int
vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
{
	int error = 0;
	uint64_t io_size = npages * PAGE_SIZE_64;
#if 1
	kern_return_t	kr = KERN_SUCCESS;
	upl_t		upl = NULL;
	unsigned int	count = 0;
	int		upl_create_flags = 0, upl_control_flags = 0;
	upl_size_t	upl_size = 0;

	upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
	upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED;

	if ((flags & SWAP_READ) == FALSE) {
		upl_create_flags |= UPL_COPYOUT_FROM;
	}
 
	upl_size = io_size;
	kr = vm_map_create_upl( kernel_map,
				start,
				&upl_size,
				&upl,
				NULL,
				&count,
				&upl_create_flags);

	if (kr != KERN_SUCCESS || (upl_size != io_size)) {
		panic("vm_map_create_upl failed with %d\n", kr);
	}

	if (flags & SWAP_READ) {
		vnode_pagein(vp,
			      upl,
			      0,
			      offset,
			      io_size,
			      upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
			      &error);
		if (error) {
#if DEBUG
			printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
#else /* DEBUG */
			printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
#endif /* DEBUG */
		}
	
	} else {
		vnode_pageout(vp,
			      upl,
			      0,
			      offset,
			      io_size,
			      upl_control_flags,
			      &error);
		if (error) {
#if DEBUG
			printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
#else /* DEBUG */
			printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
#endif /* DEBUG */
		}
	}
	return error;

#else /* 1 */
	vfs_context_t ctx;
	ctx = vfs_context_kernel();
		
	error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
		UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));

	if (error) {
		printf("vn_rdwr: Swap I/O failed with %d\n", error);
	}
	return error;
#endif /* 1 */
}


#define MAX_BATCH_TO_TRIM	256

u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl)
{
	int		error = 0;
	int		trim_index = 0;
	u_int32_t	blocksize = 0;
	struct vnode	*devvp;
	dk_extent_t	*extents;
	dk_unmap_t	unmap;

	if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED))
		return (ENOTSUP);

	if (tl == NULL)
		return (0);

	/*
	 * Get the underlying device vnode and physical block size
	 */
	devvp = vp->v_mount->mnt_devvp;
	blocksize = vp->v_mount->mnt_devblocksize;

	extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);

	memset (&unmap, 0, sizeof(dk_unmap_t));
	unmap.extents = extents;

	while (tl) {
		daddr64_t	io_blockno;	/* Block number corresponding to the start of the extent */
		size_t		io_bytecount;	/* Number of bytes in current extent for the specified range */
		size_t		trimmed;
		size_t		remaining_length;
		off_t		current_offset; 

		current_offset = tl->tl_offset;
		remaining_length = tl->tl_length;
		trimmed = 0;
		
		/* 
		 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
		 * extent from the blockmap call.  Keep looping/going until we are sure we've hit
		 * the whole range or if we encounter an error.
		 */
		while (trimmed < tl->tl_length) {
			/*
			 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
			 * specified offset.  It returns blocks in contiguous chunks, so if the logical range is 
			 * broken into multiple extents, it must be called multiple times, increasing the offset
			 * in each call to ensure that the entire range is covered.
			 */
			error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, 
					       &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);

			if (error) {
				goto trim_exit;
			}

			extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
			extents[trim_index].length = io_bytecount;

			trim_index++;

			if (trim_index == MAX_BATCH_TO_TRIM) {

				unmap.extentsCount = trim_index;
				error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());

				if (error) {
					goto trim_exit;
				}
				trim_index = 0;
			}
			trimmed += io_bytecount;
			current_offset += io_bytecount;
			remaining_length -= io_bytecount;
		}
		tl = tl->tl_next;
	}
	if (trim_index) {

		unmap.extentsCount = trim_index;
		error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
	}
trim_exit:
	kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);

	return error;
}
Commit	Line	Data
39236c6e A	1	/*
	2	* Copyright (c) 2000-2013 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	#include <stdint.h>
	30	#include <sys/fcntl.h>
	31	#include <sys/vnode_internal.h>
	32	#include <sys/vnode.h>
	33	#include <sys/kauth.h>
	34	#include <sys/mount_internal.h>
	35	#include <sys/buf_internal.h>
	36	#include <kern/debug.h>
	37	#include <kern/kalloc.h>
	38	#include <sys/cprotect.h>
	39	#include <sys/disk.h>
	40	#include <vm/vm_protos.h>
	41	#include <vm/vm_pageout.h>
	42
	43	void vm_swapfile_open(const char path, vnode_t vp);
	44	void vm_swapfile_close(uint64_t path, vnode_t vp);
	45	int vm_swapfile_preallocate(vnode_t vp, uint64_t *size);
	46	uint64_t vm_swapfile_get_blksize(vnode_t vp);
	47	uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
	48	int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags);
	49
	50	void
	51	vm_swapfile_open(const char path, vnode_t vp)
	52	{
	53	int error = 0;
	54	vfs_context_t ctx = vfs_context_current();
	55
	56	if ((error = vnode_open(path, (O_CREAT \| FREAD \| FWRITE), S_IRUSR \| S_IWUSR, 0, vp, ctx))) {
	57	printf("Failed to open swap file %d\n", error);
	58	*vp = NULL;
	59	return;
	60	}
	61
	62	vnode_put(*vp);
	63	}
	64
65	uint64_t
66	vm_swapfile_get_blksize(vnode_t vp)
67	{
68	return ((uint64_t)vfs_devblocksize(vnode_mount(vp)));
69	}
70
71	uint64_t
72	vm_swapfile_get_transfer_size(vnode_t vp)
73	{
74	return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize);
75	}
76
77	int unlink1(vfs_context_t, struct nameidata *, int);
78
79	void
80	vm_swapfile_close(uint64_t path_addr, vnode_t vp)
81	{
82	struct nameidata nd;
83	vfs_context_t context = vfs_context_current();
84	int error = 0;
85
86	vnode_getwithref(vp);
87	vnode_close(vp, 0, context);
88
89	NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE,
90	path_addr, context);
91
92	error = unlink1(context, &nd, 0);
93	}
94
95	int
96	vm_swapfile_preallocate(vnode_t vp, uint64_t *size)
97	{
98	int error = 0;
99	uint64_t file_size = 0;
100	vfs_context_t ctx = NULL;
101
102
103	ctx = vfs_context_current();
104
105	#if CONFIG_PROTECT
106	{
107	#if 0 // <rdar://11771612>
108
109	if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) {
110	if(config_protect_bug) {
111	printf("swap protection class set failed with %d\n", error);
112	} else {
113	panic("swap protection class set failed with %d\n", error);
114	}
115	}
116	#endif
117	/* initialize content protection keys manually */
118	if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
119	printf("Content Protection key failure on swap: %d\n", error);
120	vnode_put(vp);
121	vp = NULL;
122	goto done;
123	}
124	}
125	#endif
126
127	/*
128	* This check exists because dynamic_pager creates the 1st swapfile,
129	* swapfile0, for us from user-space in a supported manner (with IO_NOZEROFILL etc).
130	*
131	* If dynamic_pager, in the future, discontinues creating that file,
132	* then we need to change this check to a panic / assert or return an error.
133	* That's because we can't be sure if the file has been created correctly.
134	*/
135
136	if ((error = vnode_size(vp, (off_t*) &file_size, ctx)) != 0) {
137
138	printf("vnode_size (existing files) for swap files failed: %d\n", error);
139	goto done;
140	} else {
141
142	if (file_size == 0) {
143
144	error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
145
146	if (error) {
147	printf("vnode_setsize for swap files failed: %d\n", error);
148	goto done;
149	}
150	} else {
151
152	*size = file_size;
153	}
154	}
155
156	vnode_lock_spin(vp);
157	SET(vp->v_flag, VSWAP);
158	vnode_unlock(vp);
159	done:
160	return error;
161	}
162
163	int
164	vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
165	{
166	int error = 0;
167	uint64_t io_size = npages * PAGE_SIZE_64;
168	#if 1
169	kern_return_t kr = KERN_SUCCESS;
170	upl_t upl = NULL;
171	unsigned int count = 0;
172	int upl_create_flags = 0, upl_control_flags = 0;
173	upl_size_t upl_size = 0;
174
175	upl_create_flags = UPL_SET_INTERNAL \| UPL_SET_LITE;
15129b1c	176	upl_control_flags = UPL_IOSYNC \| UPL_PAGING_ENCRYPTED;
39236c6e A	177
	178	if ((flags & SWAP_READ) == FALSE) {
	179	upl_create_flags \|= UPL_COPYOUT_FROM;
	180	}
	181
	182	upl_size = io_size;
	183	kr = vm_map_create_upl( kernel_map,
	184	start,
	185	&upl_size,
	186	&upl,
	187	NULL,
	188	&count,
	189	&upl_create_flags);
	190
	191	if (kr != KERN_SUCCESS \|\| (upl_size != io_size)) {
	192	panic("vm_map_create_upl failed with %d\n", kr);
	193	}
	194
	195	if (flags & SWAP_READ) {
	196	vnode_pagein(vp,
	197	upl,
	198	0,
	199	offset,
	200	io_size,
	201	upl_control_flags \| UPL_IGNORE_VALID_PAGE_CHECK,
	202	&error);
	203	if (error) {
	204	#if DEBUG
	205	printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
	206	#else /* DEBUG */
	207	printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
	208	#endif /* DEBUG */
	209	}
	210
	211	} else {
	212	vnode_pageout(vp,
	213	upl,
	214	0,
	215	offset,
	216	io_size,
	217	upl_control_flags,
	218	&error);
	219	if (error) {
	220	#if DEBUG
	221	printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
	222	#else /* DEBUG */
	223	printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
	224	#endif /* DEBUG */
	225	}
	226	}
	227	return error;
	228
	229	#else /* 1 */
	230	vfs_context_t ctx;
	231	ctx = vfs_context_kernel();
	232
	233	error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
	234	UIO_SYSSPACE, IO_SYNC \| IO_NODELOCKED \| IO_UNIT \| IO_NOCACHE \| IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
	235
	236	if (error) {
	237	printf("vn_rdwr: Swap I/O failed with %d\n", error);
	238	}
	239	return error;
	240	#endif /* 1 */
241	}
242
243
244	#define MAX_BATCH_TO_TRIM 256
245
246	u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl)
247	{
248	int error = 0;
249	int trim_index = 0;
250	u_int32_t blocksize = 0;
251	struct vnode *devvp;
252	dk_extent_t *extents;
253	dk_unmap_t unmap;
254
255	if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED))
256	return (ENOTSUP);
257
258	if (tl == NULL)
259	return (0);
260
261	/*
262	* Get the underlying device vnode and physical block size
263	*/
264	devvp = vp->v_mount->mnt_devvp;
265	blocksize = vp->v_mount->mnt_devblocksize;
266
267	extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
268
269	memset (&unmap, 0, sizeof(dk_unmap_t));
270	unmap.extents = extents;
271
272	while (tl) {
273	daddr64_t io_blockno; /* Block number corresponding to the start of the extent */
274	size_t io_bytecount; /* Number of bytes in current extent for the specified range */
275	size_t trimmed;
276	size_t remaining_length;
277	off_t current_offset;
278
279	current_offset = tl->tl_offset;
280	remaining_length = tl->tl_length;
281	trimmed = 0;
282
283	/*
284	* We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
285	* extent from the blockmap call. Keep looping/going until we are sure we've hit
286	* the whole range or if we encounter an error.
287	*/
288	while (trimmed < tl->tl_length) {
289	/*
290	* VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
291	* specified offset. It returns blocks in contiguous chunks, so if the logical range is
292	* broken into multiple extents, it must be called multiple times, increasing the offset
293	* in each call to ensure that the entire range is covered.
294	*/
295	error = VNOP_BLOCKMAP (vp, current_offset, remaining_length,
296	&io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);
297
298	if (error) {
299	goto trim_exit;
300	}
301
302	extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
303	extents[trim_index].length = io_bytecount;
304
305	trim_index++;
306
307	if (trim_index == MAX_BATCH_TO_TRIM) {
308
309	unmap.extentsCount = trim_index;
310	error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
311
312	if (error) {
313	goto trim_exit;
314	}
315	trim_index = 0;
316	}
317	trimmed += io_bytecount;
318	current_offset += io_bytecount;
319	remaining_length -= io_bytecount;
320	}
321	tl = tl->tl_next;
322	}
323	if (trim_index) {
324
325	unmap.extentsCount = trim_index;
326	error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
327	}
328	trim_exit:
329	kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
330
331	return error;
332	}