git.saurik.com Git - apple/xnu.git/blame

Commit	Line	Data
1c79356b	1	/*
f427ee49	2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
5d5c5d0d	3	*
2d21ac55	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745	5	*
2d21ac55 A	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
0a7de745	14	*
2d21ac55 A	15	* Please obtain a copy of the License at
2d21ac55 A	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745	17	*
2d21ac55 A	18	* The Original Code and all software distributed under the License are
2d21ac55 A	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5 A	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
8f6c56a5 A	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55 A	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
0a7de745	25	*
2d21ac55	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b	27	*/
0a7de745	28	/*
1c79356b A	29	* Mach Operating System
	30	* Copyright (c) 1987 Carnegie-Mellon University
	31	* All rights reserved. The CMU software License Agreement specifies
	32	* the terms and conditions for use and redistribution.
	33	*/
	34	/*
	35	* File: vnode_pager.c
	36	*
	37	* "Swap" pager that pages to/from vnodes. Also
	38	* handles demand paging from files.
	39	*
	40	*/
	41
	42	#include <mach/boolean.h>
	43	#include <sys/param.h>
	44	#include <sys/systm.h>
91447636	45	#include <sys/user.h>
1c79356b	46	#include <sys/proc.h>
91447636	47	#include <sys/kauth.h>
1c79356b A	48	#include <sys/buf.h>
1c79356b A	49	#include <sys/uio.h>
91447636	50	#include <sys/vnode_internal.h>
1c79356b	51	#include <sys/namei.h>
0a7de745	52	#include <sys/mount_internal.h> /* needs internal due to fhandle_t */
91447636	53	#include <sys/ubc_internal.h>
1c79356b	54	#include <sys/lock.h>
0a7de745	55	#include <sys/disk.h> /* For DKIOC calls */
1c79356b A	56
	57	#include <mach/mach_types.h>
	58	#include <mach/memory_object_types.h>
b0d623f7 A	59	#include <mach/vm_map.h>
	60	#include <mach/mach_vm.h>
	61	#include <mach/upl.h>
2d21ac55	62	#include <mach/sdt.h>
1c79356b A	63
	64	#include <vm/vm_map.h>
	65	#include <vm/vm_kern.h>
1c79356b	66	#include <kern/zalloc.h>
1c79356b A	67	#include <libkern/libkern.h>
	68
	69	#include <vm/vnode_pager.h>
	70	#include <vm/vm_pageout.h>
	71
	72	#include <kern/assert.h>
9bccf70c	73	#include <sys/kdebug.h>
ea3f0419	74	#include <nfs/nfs_conf.h>
91447636 A	75	#include <nfs/rpcv2.h>
	76	#include <nfs/nfsproto.h>
	77	#include <nfs/nfs.h>
	78
	79	#include <vm/vm_protos.h>
1c79356b	80
5ba3f43e	81	#include <vfs/vfs_disk_conditioner.h>
b0d623f7	82
6d2010ae	83	void
f427ee49	84	vnode_pager_throttle(void)
6d2010ae A	85	{
	86	struct uthread *ut;
	87
	88	ut = get_bsdthread_info(current_thread());
	89
0a7de745	90	if (ut->uu_lowpri_window) {
39236c6e	91	throttle_lowpri_io(1);
0a7de745	92	}
6d2010ae A	93	}
6d2010ae A	94
6d2010ae A	95	boolean_t
	96	vnode_pager_isSSD(vnode_t vp)
	97	{
5ba3f43e	98	return disk_conditioner_mount_is_ssd(vp->v_mount);
6d2010ae A	99	}
6d2010ae A	100
fe8ab488 A	101	#if CONFIG_IOSCHED
	102	void
	103	vnode_pager_issue_reprioritize_io(struct vnode *devvp, uint64_t blkno, uint32_t len, int priority)
	104	{
	105	u_int32_t blocksize = 0;
	106	dk_extent_t extent;
0a7de745	107	dk_set_tier_t set_tier;
fe8ab488 A	108	int error = 0;
	109
	110	error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blocksize, 0, vfs_context_kernel());
0a7de745	111	if (error) {
fe8ab488	112	return;
0a7de745	113	}
fe8ab488 A	114
	115	memset(&extent, 0, sizeof(dk_extent_t));
	116	memset(&set_tier, 0, sizeof(dk_set_tier_t));
0a7de745	117
fe8ab488 A	118	extent.offset = blkno * (u_int64_t) blocksize;
	119	extent.length = len;
	120
0a7de745	121	set_tier.extents = &extent;
fe8ab488	122	set_tier.extentsCount = 1;
f427ee49	123	set_tier.tier = (uint8_t)priority;
0a7de745	124
fe8ab488 A	125	error = VNOP_IOCTL(devvp, DKIOCSETTIER, (caddr_t)&set_tier, 0, vfs_context_kernel());
	126	return;
	127	}
	128	#endif
6d2010ae	129
d9a64523 A	130	void
d9a64523 A	131	vnode_pager_was_dirtied(
0a7de745 A	132	struct vnode *vp,
	133	vm_object_offset_t s_offset,
	134	vm_object_offset_t e_offset)
d9a64523	135	{
0a7de745	136	cluster_update_state(vp, s_offset, e_offset, TRUE);
d9a64523 A	137	}
d9a64523 A	138
b0d623f7 A	139	uint32_t
	140	vnode_pager_isinuse(struct vnode *vp)
	141	{
0a7de745 A	142	if (vp->v_usecount > vp->v_kusecount) {
	143	return 1;
	144	}
	145	return 0;
b0d623f7 A	146	}
	147
	148	uint32_t
39236c6e	149	vnode_pager_return_throttle_io_limit(struct vnode vp, uint32_t limit)
b0d623f7	150	{
0a7de745	151	return cluster_throttle_io_limit(vp, limit);
b0d623f7	152	}
1c79356b	153
0b4e3aa0 A	154	vm_object_offset_t
	155	vnode_pager_get_filesize(struct vnode *vp)
	156	{
0b4e3aa0	157	return (vm_object_offset_t) ubc_getsize(vp);
0b4e3aa0 A	158	}
0b4e3aa0 A	159
15129b1c A	160	extern int safe_getpath(struct vnode dvp, char leafname, char path, int _len, int truncated_path);
15129b1c A	161
0c530ab8	162	kern_return_t
15129b1c	163	vnode_pager_get_name(
0a7de745 A	164	struct vnode *vp,
	165	char *pathname,
	166	vm_size_t pathname_len,
	167	char *filename,
	168	vm_size_t filename_len,
	169	boolean_t *truncated_path_p)
0c530ab8	170	{
15129b1c A	171	*truncated_path_p = FALSE;
	172	if (pathname != NULL) {
	173	/* get the path name */
	174	safe_getpath(vp, NULL,
0a7de745 A	175	pathname, (int) pathname_len,
0a7de745 A	176	truncated_path_p);
15129b1c A	177	}
	178	if ((pathname == NULL \|\| *truncated_path_p) &&
	179	filename != NULL) {
	180	/* get the file name */
	181	const char *name;
	182
	183	name = vnode_getname_printable(vp);
	184	strlcpy(filename, name, (size_t) filename_len);
	185	vnode_putname_printable(name);
0c530ab8	186	}
0c530ab8 A	187	return KERN_SUCCESS;
	188	}
	189
	190	kern_return_t
15129b1c	191	vnode_pager_get_mtime(
0a7de745 A	192	struct vnode *vp,
	193	struct timespec *current_mtime,
	194	struct timespec *cs_mtime)
0c530ab8	195	{
15129b1c A	196	vnode_mtime(vp, current_mtime, vfs_context_current());
	197	if (cs_mtime != NULL) {
	198	ubc_get_cs_mtime(vp, cs_mtime);
	199	}
0c530ab8 A	200	return KERN_SUCCESS;
	201	}
	202
2d21ac55 A	203	kern_return_t
2d21ac55 A	204	vnode_pager_get_cs_blobs(
0a7de745 A	205	struct vnode *vp,
0a7de745 A	206	void **blobs)
2d21ac55 A	207	{
	208	*blobs = ubc_get_cs_blobs(vp);
	209	return KERN_SUCCESS;
	210	}
	211
0a7de745	212	/*
6d2010ae A	213	* vnode_trim:
	214	* Used to call the DKIOCUNMAP ioctl on the underlying disk device for the specified vnode.
	215	* Trims the region at offset bytes into the file, for length bytes.
	216	*
	217	* Care must be taken to ensure that the vnode is sufficiently reference counted at the time this
	218	* function is called; no iocounts or usecounts are taken on the vnode.
	219	* This function is non-idempotent in error cases; We cannot un-discard the blocks if only some of them
	220	* are successfully discarded.
	221	*/
0a7de745 A	222	u_int32_t
	223	vnode_trim(
	224	struct vnode *vp,
	225	off_t offset,
	226	size_t length)
6d2010ae	227	{
0a7de745 A	228	daddr64_t io_blockno; /* Block number corresponding to the start of the extent */
0a7de745 A	229	size_t io_bytecount; /* Number of bytes in current extent for the specified range */
6d2010ae	230	size_t trimmed = 0;
0a7de745	231	off_t current_offset = offset;
6d2010ae A	232	size_t remaining_length = length;
	233	int error = 0;
	234	u_int32_t blocksize = 0;
	235	struct vnode *devvp;
	236	dk_extent_t extent;
	237	dk_unmap_t unmap;
	238
	239
	240	/* Get the underlying device vnode */
	241	devvp = vp->v_mount->mnt_devvp;
	242
	243	/* Figure out the underlying device block size */
	244	error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blocksize, 0, vfs_context_kernel());
	245	if (error) {
	246	goto trim_exit;
	247	}
	248
0a7de745	249	/*
6d2010ae A	250	* We may not get the entire range from offset -> offset+length in a single
	251	* extent from the blockmap call. Keep looping/going until we are sure we've hit
	252	* the whole range or if we encounter an error.
	253	*/
	254	while (trimmed < length) {
	255	/*
	256	* VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
0a7de745	257	* specified offset. It returns blocks in contiguous chunks, so if the logical range is
6d2010ae A	258	* broken into multiple extents, it must be called multiple times, increasing the offset
	259	* in each call to ensure that the entire range is covered.
	260	*/
0a7de745 A	261	error = VNOP_BLOCKMAP(vp, current_offset, remaining_length,
0a7de745 A	262	&io_blockno, &io_bytecount, NULL, VNODE_READ \| VNODE_BLOCKMAP_NO_TRACK, NULL);
6d2010ae A	263
	264	if (error) {
	265	goto trim_exit;
	266	}
0a7de745	267	/*
6d2010ae A	268	* We have a contiguous run. Prepare & issue the ioctl for the device.
	269	* the DKIOCUNMAP ioctl takes offset in bytes from the start of the device.
	270	*/
0a7de745 A	271	memset(&extent, 0, sizeof(dk_extent_t));
0a7de745 A	272	memset(&unmap, 0, sizeof(dk_unmap_t));
6d2010ae A	273	extent.offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
	274	extent.length = io_bytecount;
	275	unmap.extents = &extent;
	276	unmap.extentsCount = 1;
	277	error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
	278
	279	if (error) {
	280	goto trim_exit;
	281	}
	282	remaining_length = remaining_length - io_bytecount;
	283	trimmed = trimmed + io_bytecount;
	284	current_offset = current_offset + io_bytecount;
	285	}
	286	trim_exit:
	287
	288	return error;
6d2010ae A	289	}
6d2010ae A	290
1c79356b A	291	pager_return_t
1c79356b A	292	vnode_pageout(struct vnode *vp,
0a7de745 A	293	upl_t upl,
	294	upl_offset_t upl_offset,
	295	vm_object_offset_t f_offset,
	296	upl_size_t size,
	297	int flags,
	298	int *errorp)
1c79356b	299	{
0a7de745 A	300	int result = PAGER_SUCCESS;
	301	int error = 0;
	302	int error_ret = 0;
91447636 A	303	daddr64_t blkno;
91447636 A	304	int isize;
1c79356b	305	int pg_index;
91447636	306	int base_index;
b0d623f7	307	upl_offset_t offset;
1c79356b	308	upl_page_info_t *pl;
0a7de745	309	vfs_context_t ctx = vfs_context_current(); /* pager context */
1c79356b	310
1c79356b A	311	isize = (int)size;
1c79356b A	312
f427ee49 A	313	/*
	314	* This call is non-blocking and does not ever fail but it can
	315	* only be made when there is other explicit synchronization
	316	* with reclaiming of the vnode which, in this path, is provided
	317	* by the paging in progress counter.
	318	*
	319	* In addition, this may also be entered via explicit ubc_msync
	320	* calls or vm_swapfile_io where the existing iocount provides
	321	* the necessary synchronization. Ideally we would not take an
	322	* additional iocount here in the cases where an explcit iocount
	323	* has already been taken but this call doesn't cause a deadlock
	324	* as other forms of vnode_get* might if this thread has already
	325	* taken an iocount.
	326	*/
	327	error = vnode_getalways_from_pager(vp);
	328	if (error != 0) {
	329	/* This can't happen */
	330	panic("vnode_getalways returned %d for vp %p", error, vp);
	331	}
	332
9bccf70c	333	if (isize <= 0) {
0a7de745	334	result = PAGER_ERROR;
91447636	335	error_ret = EINVAL;
9bccf70c A	336	goto out;
9bccf70c A	337	}
1c79356b	338
2d21ac55	339	if (UBCINFOEXISTS(vp) == 0) {
91447636 A	340	result = PAGER_ERROR;
91447636 A	341	error_ret = EINVAL;
9bccf70c	342
0a7de745 A	343	if (upl && !(flags & UPL_NOCOMMIT)) {
	344	ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
	345	}
1c79356b A	346	goto out;
1c79356b A	347	}
0a7de745	348	if (!(flags & UPL_VNODE_PAGER)) {
1c79356b	349	/*
91447636 A	350	* This is a pageout from the default pager,
	351	* just go ahead and call vnop_pageout since
	352	* it has already sorted out the dirty ranges
1c79356b	353	*/
316670eb	354	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
0a7de745 A	355	(MACHDBG_CODE(DBG_MACH_VM, 1)) \| DBG_FUNC_START,
0a7de745 A	356	size, 1, 0, 0, 0);
9bccf70c	357
0a7de745 A	358	if ((error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset,
0a7de745 A	359	(size_t)size, flags, ctx))) {
91447636	360	result = PAGER_ERROR;
0a7de745	361	}
9bccf70c	362
316670eb	363	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
0a7de745 A	364	(MACHDBG_CODE(DBG_MACH_VM, 1)) \| DBG_FUNC_END,
0a7de745 A	365	size, 1, 0, 0, 0);
9bccf70c	366
1c79356b A	367	goto out;
1c79356b A	368	}
b0d623f7	369	if (upl == NULL) {
0a7de745	370	int request_flags;
b0d623f7 A	371
	372	if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_PAGEOUTV2) {
	373	/*
	374	* filesystem has requested the new form of VNOP_PAGEOUT for file
	375	* backed objects... we will not grab the UPL befofe calling VNOP_PAGEOUT...
	376	* it is the fileystem's responsibility to grab the range we're denoting
	377	* via 'f_offset' and 'size' into a UPL... this allows the filesystem to first
	378	* take any locks it needs, before effectively locking the pages into a UPL...
	379	*/
0a7de745 A	380	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
	381	(MACHDBG_CODE(DBG_MACH_VM, 1)) \| DBG_FUNC_START,
	382	size, (int)f_offset, 0, 0, 0);
b0d623f7	383
0a7de745 A	384	if ((error_ret = VNOP_PAGEOUT(vp, NULL, upl_offset, (off_t)f_offset,
0a7de745 A	385	size, flags, ctx))) {
b0d623f7 A	386	result = PAGER_ERROR;
b0d623f7 A	387	}
316670eb	388	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
0a7de745 A	389	(MACHDBG_CODE(DBG_MACH_VM, 1)) \| DBG_FUNC_END,
0a7de745 A	390	size, 0, 0, 0, 0);
b0d623f7 A	391
	392	goto out;
	393	}
0a7de745	394	if (flags & UPL_MSYNC) {
b0d623f7	395	request_flags = UPL_UBC_MSYNC \| UPL_RET_ONLY_DIRTY;
0a7de745	396	} else {
b0d623f7	397	request_flags = UPL_UBC_PAGEOUT \| UPL_RET_ONLY_DIRTY;
0a7de745 A	398	}
	399
	400	if (ubc_create_upl_kernel(vp, f_offset, size, &upl, &pl, request_flags, VM_KERN_MEMORY_FILE) != KERN_SUCCESS) {
b0d623f7 A	401	result = PAGER_ERROR;
	402	error_ret = EINVAL;
	403	goto out;
	404	}
	405	upl_offset = 0;
0a7de745	406	} else {
b0d623f7	407	pl = ubc_upl_pageinfo(upl);
0a7de745	408	}
b0d623f7	409
fe8ab488 A	410	/*
fe8ab488 A	411	* Ignore any non-present pages at the end of the
0a7de745	412	* UPL so that we aren't looking at a upl that
fe8ab488 A	413	* may already have been freed by the preceeding
	414	* aborts/completions.
	415	*/
	416	base_index = upl_offset / PAGE_SIZE;
	417
	418	for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) {
0a7de745 A	419	if (upl_page_present(pl, --pg_index)) {
	420	break;
	421	}
fe8ab488	422	if (pg_index == base_index) {
0a7de745	423	/*
fe8ab488 A	424	* no pages were returned, so release
	425	* our hold on the upl and leave
	426	*/
0a7de745 A	427	if (!(flags & UPL_NOCOMMIT)) {
	428	ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY);
	429	}
fe8ab488 A	430
	431	goto out;
	432	}
	433	}
	434	isize = ((pg_index + 1) - base_index) * PAGE_SIZE;
	435
9bccf70c	436	/*
91447636 A	437	* we come here for pageouts to 'real' files and
	438	* for msyncs... the upl may not contain any
	439	* dirty pages.. it's our responsibility to sort
	440	* through it and find the 'runs' of dirty pages
	441	* to call VNOP_PAGEOUT on...
9bccf70c	442	*/
fe8ab488	443
fa4905b1	444	if (ubc_getsize(vp) == 0) {
0a7de745	445	/*
91447636 A	446	* if the file has been effectively deleted, then
	447	* we need to go through the UPL and invalidate any
	448	* buffer headers we might have that reference any
	449	* of it's pages
	450	*/
	451	for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
ea3f0419	452	#if CONFIG_NFS_CLIENT
0a7de745	453	if (vp->v_tag == VT_NFS) {
91447636 A	454	/* check with nfs if page is OK to drop */
91447636 A	455	error = nfs_buf_page_inval(vp, (off_t)f_offset);
0a7de745	456	} else
ea3f0419	457	#endif /* CONFIG_NFS_CLIENT */
91447636	458	{
0a7de745 A	459	blkno = ubc_offtoblk(vp, (off_t)f_offset);
0a7de745 A	460	error = buf_invalblkno(vp, blkno, 0);
91447636 A	461	}
91447636 A	462	if (error) {
0a7de745 A	463	if (!(flags & UPL_NOCOMMIT)) {
	464	ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
	465	}
	466	if (error_ret == 0) {
	467	error_ret = error;
	468	}
91447636	469	result = PAGER_ERROR;
0a7de745 A	470	} else if (!(flags & UPL_NOCOMMIT)) {
0a7de745 A	471	ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
fa4905b1	472	}
91447636	473	f_offset += PAGE_SIZE;
1c79356b	474	}
1c79356b A	475	goto out;
1c79356b A	476	}
91447636 A	477
	478	offset = upl_offset;
	479	pg_index = base_index;
1c79356b A	480
	481	while (isize) {
	482	int xsize;
	483	int num_of_pages;
	484
0a7de745 A	485	if (!upl_page_present(pl, pg_index)) {
0a7de745 A	486	/*
91447636 A	487	* we asked for RET_ONLY_DIRTY, so it's possible
	488	* to get back empty slots in the UPL
	489	* just skip over them
	490	*/
0a7de745	491	f_offset += PAGE_SIZE;
2d21ac55 A	492	offset += PAGE_SIZE;
2d21ac55 A	493	isize -= PAGE_SIZE;
1c79356b A	494	pg_index++;
	495
	496	continue;
	497	}
0a7de745	498	if (!upl_dirty_page(pl, pg_index)) {
1c79356b A	499	/*
	500	* if the page is not dirty and reached here it is
	501	* marked precious or it is due to invalidation in
	502	* memory_object_lock request as part of truncation
	503	* We also get here from vm_object_terminate()
	504	* So all you need to do in these
	505	* cases is to invalidate incore buffer if it is there
91447636	506	* Note we must not sleep here if the buffer is busy - that is
fa4905b1	507	* a lock inversion which causes deadlock.
1c79356b	508	*/
ea3f0419	509	#if CONFIG_NFS_CLIENT
0a7de745	510	if (vp->v_tag == VT_NFS) {
55e303ae	511	/* check with nfs if page is OK to drop */
2d21ac55	512	error = nfs_buf_page_inval(vp, (off_t)f_offset);
0a7de745	513	} else
ea3f0419	514	#endif /* CONFIG_NFS_CLIENT */
91447636	515	{
0a7de745 A	516	blkno = ubc_offtoblk(vp, (off_t)f_offset);
0a7de745 A	517	error = buf_invalblkno(vp, blkno, 0);
91447636 A	518	}
91447636 A	519	if (error) {
0a7de745 A	520	if (!(flags & UPL_NOCOMMIT)) {
	521	ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
	522	}
	523	if (error_ret == 0) {
	524	error_ret = error;
	525	}
91447636	526	result = PAGER_ERROR;
0a7de745 A	527	} else if (!(flags & UPL_NOCOMMIT)) {
0a7de745 A	528	ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY);
91447636	529	}
0a7de745	530	f_offset += PAGE_SIZE;
2d21ac55 A	531	offset += PAGE_SIZE;
2d21ac55 A	532	isize -= PAGE_SIZE;
1c79356b A	533	pg_index++;
	534
	535	continue;
	536	}
1c79356b A	537	num_of_pages = 1;
	538	xsize = isize - PAGE_SIZE;
	539
	540	while (xsize) {
0a7de745	541	if (!upl_dirty_page(pl, pg_index + num_of_pages)) {
1c79356b	542	break;
0a7de745	543	}
1c79356b A	544	num_of_pages++;
	545	xsize -= PAGE_SIZE;
	546	}
	547	xsize = num_of_pages * PAGE_SIZE;
	548
316670eb	549	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
0a7de745 A	550	(MACHDBG_CODE(DBG_MACH_VM, 1)) \| DBG_FUNC_START,
0a7de745 A	551	xsize, (int)f_offset, 0, 0, 0);
9bccf70c	552
0a7de745 A	553	if ((error = VNOP_PAGEOUT(vp, upl, offset, (off_t)f_offset,
	554	xsize, flags, ctx))) {
	555	if (error_ret == 0) {
	556	error_ret = error;
	557	}
91447636 A	558	result = PAGER_ERROR;
91447636 A	559	}
316670eb	560	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
0a7de745 A	561	(MACHDBG_CODE(DBG_MACH_VM, 1)) \| DBG_FUNC_END,
0a7de745 A	562	xsize, 0, 0, 0, 0);
9bccf70c	563
0a7de745	564	f_offset += xsize;
2d21ac55 A	565	offset += xsize;
2d21ac55 A	566	isize -= xsize;
1c79356b A	567	pg_index += num_of_pages;
	568	}
	569	out:
f427ee49 A	570	vnode_put_from_pager(vp);
f427ee49 A	571
0a7de745	572	if (errorp) {
91447636	573	*errorp = error_ret;
0a7de745	574	}
1c79356b	575
0a7de745	576	return result;
1c79356b A	577	}
	578
	579
	580	pager_return_t
	581	vnode_pagein(
0a7de745 A	582	struct vnode *vp,
	583	upl_t upl,
	584	upl_offset_t upl_offset,
	585	vm_object_offset_t f_offset,
	586	upl_size_t size,
	587	int flags,
	588	int *errorp)
1c79356b	589	{
0a7de745 A	590	upl_page_info_t *pl;
	591	int result = PAGER_SUCCESS;
	592	int error = 0;
	593	int pages_in_upl;
	594	int start_pg;
	595	int last_pg;
9bccf70c	596	int first_pg;
0a7de745 A	597	int xsize;
	598	int must_commit = 1;
	599	int ignore_valid_page_check = 0;
1c79356b	600
0a7de745 A	601	if (flags & UPL_NOCOMMIT) {
	602	must_commit = 0;
	603	}
1c79356b	604
0a7de745	605	if (flags & UPL_IGNORE_VALID_PAGE_CHECK) {
39236c6e	606	ignore_valid_page_check = 1;
0a7de745	607	}
39236c6e	608
f427ee49 A	609	/*
	610	* This call is non-blocking and does not ever fail but it can
	611	* only be made when there is other explicit synchronization
	612	* with reclaiming of the vnode which, in this path, is provided
	613	* by the paging in progress counter.
	614	*
	615	* In addition, this may also be entered via vm_swapfile_io
	616	* where the existing iocount provides the necessary synchronization.
	617	* Ideally we would not take an additional iocount here in the cases
	618	* where an explcit iocount has already been taken but this call
	619	* doesn't cause a deadlock as other forms of vnode_get* might if
	620	* this thread has already taken an iocount.
	621	*/
	622	error = vnode_getalways_from_pager(vp);
	623	if (error != 0) {
	624	/* This can't happen */
	625	panic("vnode_getalways returned %d for vp %p", error, vp);
	626	}
	627
2d21ac55	628	if (UBCINFOEXISTS(vp) == 0) {
1c79356b A	629	result = PAGER_ERROR;
1c79356b A	630	error = PAGER_ERROR;
2d21ac55	631
0a7de745	632	if (upl && must_commit) {
9bccf70c	633	ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY \| UPL_ABORT_ERROR);
0a7de745	634	}
2d21ac55	635
1c79356b A	636	goto out;
1c79356b A	637	}
9bccf70c	638	if (upl == (upl_t)NULL) {
b0d623f7	639	flags &= ~UPL_NOCOMMIT;
2d21ac55	640
0a7de745 A	641	if (size > MAX_UPL_SIZE_BYTES) {
0a7de745 A	642	result = PAGER_ERROR;
9bccf70c A	643	error = PAGER_ERROR;
	644	goto out;
	645	}
b0d623f7 A	646	if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_PAGEINV2) {
	647	/*
	648	* filesystem has requested the new form of VNOP_PAGEIN for file
	649	* backed objects... we will not grab the UPL befofe calling VNOP_PAGEIN...
	650	* it is the fileystem's responsibility to grab the range we're denoting
	651	* via 'f_offset' and 'size' into a UPL... this allows the filesystem to first
	652	* take any locks it needs, before effectively locking the pages into a UPL...
	653	* so we pass a NULL into the filesystem instead of a UPL pointer... the 'upl_offset'
	654	* is used to identify the "must have" page in the extent... the filesystem is free
0a7de745	655	* to clip the extent to better fit the underlying FS blocksize if it desires as
b0d623f7 A	656	* long as it continues to include the "must have" page... 'f_offset' + 'upl_offset'
	657	* identifies that page
	658	*/
0a7de745 A	659	if ((error = VNOP_PAGEIN(vp, NULL, upl_offset, (off_t)f_offset,
0a7de745 A	660	size, flags, vfs_context_current()))) {
cb323159	661	set_thread_pagein_error(current_thread(), error);
b0d623f7 A	662	result = PAGER_ERROR;
	663	error = PAGER_ERROR;
	664	}
	665	goto out;
	666	}
0a7de745	667	ubc_create_upl_kernel(vp, f_offset, size, &upl, &pl, UPL_UBC_PAGEIN \| UPL_RET_ONLY_ABSENT, VM_KERN_MEMORY_FILE);
1c79356b	668
9bccf70c	669	if (upl == (upl_t)NULL) {
0a7de745	670	result = PAGER_ABSENT;
9bccf70c A	671	error = PAGER_ABSENT;
9bccf70c A	672	goto out;
1c79356b	673	}
316670eb A	674	ubc_upl_range_needed(upl, upl_offset / PAGE_SIZE, 1);
316670eb A	675
9bccf70c	676	upl_offset = 0;
2d21ac55	677	first_pg = 0;
0a7de745	678
9bccf70c A	679	/*
	680	* if we get here, we've created the upl and
	681	* are responsible for commiting/aborting it
	682	* regardless of what the caller has passed in
	683	*/
2d21ac55	684	must_commit = 1;
1c79356b	685	} else {
0a7de745	686	pl = ubc_upl_pageinfo(upl);
2d21ac55	687	first_pg = upl_offset / PAGE_SIZE;
9bccf70c A	688	}
9bccf70c A	689	pages_in_upl = size / PAGE_SIZE;
2d21ac55	690	DTRACE_VM2(pgpgin, int, pages_in_upl, (uint64_t *), NULL);
9bccf70c A	691
9bccf70c A	692	/*
0a7de745	693	* before we start marching forward, we must make sure we end on
9bccf70c	694	* a present page, otherwise we will be working with a freed
0a7de745	695	* upl
9bccf70c A	696	*/
9bccf70c A	697	for (last_pg = pages_in_upl - 1; last_pg >= first_pg; last_pg--) {
0a7de745	698	if (upl_page_present(pl, last_pg)) {
9bccf70c	699	break;
0a7de745	700	}
2d21ac55	701	if (last_pg == first_pg) {
0a7de745	702	/*
2d21ac55 A	703	* empty UPL, no pages are present
2d21ac55 A	704	*/
0a7de745 A	705	if (must_commit) {
	706	ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY);
	707	}
2d21ac55 A	708	goto out;
2d21ac55 A	709	}
9bccf70c A	710	}
9bccf70c A	711	pages_in_upl = last_pg + 1;
2d21ac55	712	last_pg = first_pg;
9bccf70c	713
2d21ac55	714	while (last_pg < pages_in_upl) {
0a7de745	715	/*
2d21ac55	716	* skip over missing pages...
9bccf70c	717	*/
0a7de745 A	718	for (; last_pg < pages_in_upl; last_pg++) {
	719	if (upl_page_present(pl, last_pg)) {
	720	break;
	721	}
9bccf70c	722	}
39236c6e A	723
	724	if (ignore_valid_page_check == 1) {
	725	start_pg = last_pg;
	726	} else {
0a7de745	727	/*
39236c6e A	728	* skip over 'valid' pages... we don't want to issue I/O for these
39236c6e A	729	*/
0a7de745 A	730	for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
	731	if (!upl_valid_page(pl, last_pg)) {
	732	break;
	733	}
39236c6e	734	}
9bccf70c	735	}
39236c6e	736
9bccf70c	737	if (last_pg > start_pg) {
0a7de745	738	/*
9bccf70c A	739	* we've found a range of valid pages
	740	* if we've got COMMIT responsibility
	741	* commit this range of pages back to the
	742	* cache unchanged
	743	*/
0a7de745	744	xsize = (last_pg - start_pg) * PAGE_SIZE;
1c79356b	745
0a7de745 A	746	if (must_commit) {
	747	ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, xsize, UPL_ABORT_FREE_ON_EMPTY);
	748	}
9bccf70c	749	}
0a7de745 A	750	if (last_pg == pages_in_upl) {
0a7de745 A	751	/*
2d21ac55	752	* we're done... all pages that were present
0a7de745	753	* have either had I/O issued on them or
2d21ac55 A	754	* were aborted unchanged...
2d21ac55 A	755	*/
0a7de745 A	756	break;
0a7de745 A	757	}
9bccf70c	758
2d21ac55	759	if (!upl_page_present(pl, last_pg)) {
0a7de745 A	760	/*
0a7de745 A	761	* we found a range of valid pages
2d21ac55 A	762	* terminated by a missing page...
2d21ac55 A	763	* bump index to the next page and continue on
9bccf70c	764	*/
0a7de745 A	765	last_pg++;
0a7de745 A	766	continue;
2d21ac55	767	}
9bccf70c A	768	/*
	769	* scan from the found invalid page looking for a valid
	770	* or non-present page before the end of the upl is reached, if we
	771	* find one, then it will be the last page of the request to
	772	* 'cluster_io'
	773	*/
	774	for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) {
0a7de745 A	775	if ((!ignore_valid_page_check && upl_valid_page(pl, last_pg)) \|\| !upl_page_present(pl, last_pg)) {
	776	break;
	777	}
9bccf70c A	778	}
9bccf70c A	779	if (last_pg > start_pg) {
0a7de745 A	780	int xoff;
0a7de745 A	781	xsize = (last_pg - start_pg) * PAGE_SIZE;
9bccf70c A	782	xoff = start_pg * PAGE_SIZE;
9bccf70c A	783
0a7de745 A	784	if ((error = VNOP_PAGEIN(vp, upl, (upl_offset_t) xoff,
	785	(off_t)f_offset + xoff,
	786	xsize, flags, vfs_context_current()))) {
	787	/*
b0d623f7	788	* Usually this UPL will be aborted/committed by the lower cluster layer.
6d2010ae A	789	*
6d2010ae A	790	* a) In the case of decmpfs, however, we may return an error (EAGAIN) to avoid
0a7de745	791	* a deadlock with another thread already inflating the file.
6d2010ae A	792	*
	793	* b) In the case of content protection, EPERM is a valid error and we should respect it.
	794	*
	795	* In those cases, we must take care of our UPL at this layer itself.
b0d623f7 A	796	*/
b0d623f7 A	797	if (must_commit) {
0a7de745 A	798	if (error == EAGAIN) {
0a7de745 A	799	ubc_upl_abort_range(upl, (upl_offset_t) xoff, xsize, UPL_ABORT_FREE_ON_EMPTY \| UPL_ABORT_RESTART);
b0d623f7	800	}
0a7de745 A	801	if (error == EPERM) {
0a7de745 A	802	ubc_upl_abort_range(upl, (upl_offset_t) xoff, xsize, UPL_ABORT_FREE_ON_EMPTY \| UPL_ABORT_ERROR);
6d2010ae	803	}
b0d623f7	804	}
cb323159	805	set_thread_pagein_error(current_thread(), error);
0b4e3aa0 A	806	result = PAGER_ERROR;
	807	error = PAGER_ERROR;
	808	}
1c79356b	809	}
0a7de745	810	}
1c79356b	811	out:
f427ee49 A	812	vnode_put_from_pager(vp);
f427ee49 A	813
0a7de745	814	if (errorp) {
fa4905b1	815	*errorp = result;
0a7de745	816	}
1c79356b	817
0a7de745	818	return error;
1c79356b A	819	}
1c79356b A	820
0b4e3aa0	821	void *
1c79356b A	822	upl_get_internal_page_list(upl_t upl)
1c79356b A	823	{
0a7de745	824	return UPL_GET_INTERNAL_PAGE_LIST(upl);
1c79356b	825	}