2 * Copyright (c) 1999-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Author: Umesh Vaishampayan [umeshv@apple.com]
31 * 05-Aug-1999 umeshv Created.
33 * Functions related to Unified Buffer cache.
35 * Caller of UBC functions MUST have a valid reference on the vnode.
42 #include <sys/types.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
47 #include <sys/mount_internal.h>
48 #include <sys/vnode_internal.h>
49 #include <sys/ubc_internal.h>
50 #include <sys/ucred.h>
51 #include <sys/proc_internal.h>
52 #include <sys/kauth.h>
56 #include <mach/mach_types.h>
57 #include <mach/memory_object_types.h>
58 #include <mach/memory_object_control.h>
59 #include <mach/vm_map.h>
62 #include <kern/kern_types.h>
63 #include <kern/zalloc.h>
64 #include <kern/thread.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_protos.h> /* last */
72 #define assert(cond) \
73 ((void) ((cond) ? 0 : panic("%s:%d (%s)", __FILE__, __LINE__, # cond)))
75 #include <kern/assert.h>
76 #endif /* DIAGNOSTIC */
78 int ubc_info_init_internal(struct vnode
*vp
, int withfsize
, off_t filesize
);
79 int ubc_umcallback(vnode_t
, void *);
80 int ubc_isinuse_locked(vnode_t
, int, int);
81 int ubc_msync_internal(vnode_t
, off_t
, off_t
, off_t
*, int, int *);
83 struct zone
*ubc_info_zone
;
86 * Initialization of the zone for Unified Buffer Cache.
88 __private_extern__
void
93 i
= (vm_size_t
) sizeof (struct ubc_info
);
94 /* XXX the number of elements should be tied in to maxvnodes */
95 ubc_info_zone
= zinit (i
, 10000*i
, 8192, "ubc_info zone");
100 * Initialize a ubc_info structure for a vnode.
103 ubc_info_init(struct vnode
*vp
)
105 return(ubc_info_init_internal(vp
, 0, 0));
108 ubc_info_init_withsize(struct vnode
*vp
, off_t filesize
)
110 return(ubc_info_init_internal(vp
, 1, filesize
));
114 ubc_info_init_internal(struct vnode
*vp
, int withfsize
, off_t filesize
)
116 register struct ubc_info
*uip
;
118 struct proc
*p
= current_proc();
121 memory_object_control_t control
;
125 if (uip
== UBC_INFO_NULL
) {
127 uip
= (struct ubc_info
*) zalloc(ubc_info_zone
);
128 bzero((char *)uip
, sizeof(struct ubc_info
));
131 uip
->ui_flags
= UI_INITED
;
132 uip
->ui_ucred
= NOCRED
;
136 Debugger("ubc_info_init: already");
137 #endif /* DIAGNOSTIC */
139 assert(uip
->ui_flags
!= UI_NONE
);
140 assert(uip
->ui_vnode
== vp
);
142 /* now set this ubc_info in the vnode */
145 pager
= (void *)vnode_pager_setup(vp
, uip
->ui_pager
);
148 SET(uip
->ui_flags
, UI_HASPAGER
);
149 uip
->ui_pager
= pager
;
152 * Note: We can not use VNOP_GETATTR() to get accurate
153 * value of ui_size. Thanks to NFS.
154 * nfs_getattr() can call vinvalbuf() and in this case
155 * ubc_info is not set up to deal with that.
160 * create a vnode - vm_object association
161 * memory_object_create_named() creates a "named" reference on the
162 * memory object we hold this reference as long as the vnode is
163 * "alive." Since memory_object_create_named() took its own reference
164 * on the vnode pager we passed it, we can drop the reference
165 * vnode_pager_setup() returned here.
167 kret
= memory_object_create_named(pager
,
168 (memory_object_size_t
)uip
->ui_size
, &control
);
169 vnode_pager_deallocate(pager
);
170 if (kret
!= KERN_SUCCESS
)
171 panic("ubc_info_init: memory_object_create_named returned %d", kret
);
174 uip
->ui_control
= control
; /* cache the value of the mo control */
175 SET(uip
->ui_flags
, UI_HASOBJREF
); /* with a named reference */
177 /* create a pager reference on the vnode */
178 error
= vnode_pager_vget(vp
);
180 panic("ubc_info_init: vnode_pager_vget error = %d", error
);
182 if (withfsize
== 0) {
183 struct vfs_context context
;
184 /* initialize the size */
186 context
.vc_ucred
= kauth_cred_get();
187 error
= vnode_size(vp
, &uip
->ui_size
, &context
);
191 uip
->ui_size
= filesize
;
193 vp
->v_lflag
|= VNAMED_UBC
;
198 /* Free the ubc_info */
200 ubc_info_free(struct ubc_info
*uip
)
204 credp
= uip
->ui_ucred
;
205 if (credp
!= NOCRED
) {
206 uip
->ui_ucred
= NOCRED
;
207 kauth_cred_rele(credp
);
210 if (uip
->ui_control
!= MEMORY_OBJECT_CONTROL_NULL
)
211 memory_object_control_deallocate(uip
->ui_control
);
213 cluster_release(uip
);
215 zfree(ubc_info_zone
, (vm_offset_t
)uip
);
220 ubc_info_deallocate(struct ubc_info
*uip
)
226 * Communicate with VM the size change of the file
227 * returns 1 on success, 0 on failure
230 ubc_setsize(struct vnode
*vp
, off_t nsize
)
232 off_t osize
; /* ui_size before change */
233 off_t lastpg
, olastpgend
, lastoff
;
234 struct ubc_info
*uip
;
235 memory_object_control_t control
;
238 if (nsize
< (off_t
)0)
241 if (!UBCINFOEXISTS(vp
))
245 osize
= uip
->ui_size
; /* call ubc_getsize() ??? */
246 /* Update the size before flushing the VM */
247 uip
->ui_size
= nsize
;
249 if (nsize
>= osize
) /* Nothing more to do */
250 return (1); /* return success */
253 * When the file shrinks, invalidate the pages beyond the
254 * new size. Also get rid of garbage beyond nsize on the
255 * last page. The ui_size already has the nsize. This
256 * insures that the pageout would not write beyond the new
260 lastpg
= trunc_page_64(nsize
);
261 olastpgend
= round_page_64(osize
);
262 control
= uip
->ui_control
;
264 lastoff
= (nsize
& PAGE_MASK_64
);
267 * If length is multiple of page size, we should not flush
268 * invalidating is sufficient
271 /* invalidate last page and old contents beyond nsize */
272 kret
= memory_object_lock_request(control
,
273 (memory_object_offset_t
)lastpg
,
274 (memory_object_size_t
)(olastpgend
- lastpg
), NULL
, NULL
,
275 MEMORY_OBJECT_RETURN_NONE
, MEMORY_OBJECT_DATA_FLUSH
,
277 if (kret
!= KERN_SUCCESS
)
278 printf("ubc_setsize: invalidate failed (error = %d)\n", kret
);
280 return ((kret
== KERN_SUCCESS
) ? 1 : 0);
283 /* flush the last page */
284 kret
= memory_object_lock_request(control
,
285 (memory_object_offset_t
)lastpg
,
286 PAGE_SIZE_64
, NULL
, NULL
,
287 MEMORY_OBJECT_RETURN_DIRTY
, FALSE
,
290 if (kret
== KERN_SUCCESS
) {
291 /* invalidate last page and old contents beyond nsize */
292 kret
= memory_object_lock_request(control
,
293 (memory_object_offset_t
)lastpg
,
294 (memory_object_size_t
)(olastpgend
- lastpg
), NULL
, NULL
,
295 MEMORY_OBJECT_RETURN_NONE
, MEMORY_OBJECT_DATA_FLUSH
,
297 if (kret
!= KERN_SUCCESS
)
298 printf("ubc_setsize: invalidate failed (error = %d)\n", kret
);
300 printf("ubc_setsize: flush failed (error = %d)\n", kret
);
302 return ((kret
== KERN_SUCCESS
) ? 1 : 0);
306 * Get the size of the file
309 ubc_getsize(struct vnode
*vp
)
311 /* people depend on the side effect of this working this way
312 * as they call this for directory
314 if (!UBCINFOEXISTS(vp
))
316 return (vp
->v_ubcinfo
->ui_size
);
320 * call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes
321 * for this mount point.
322 * returns 1 on success, 0 on failure
325 __private_extern__
int
326 ubc_umount(struct mount
*mp
)
328 vnode_iterate(mp
, 0, ubc_umcallback
, 0);
333 ubc_umcallback(vnode_t vp
, __unused
void * args
)
336 if (UBCINFOEXISTS(vp
)) {
340 (void) ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL
, UBC_PUSHALL
);
342 return (VNODE_RETURNED
);
347 /* Get the credentials */
349 ubc_getcred(struct vnode
*vp
)
351 if (UBCINFOEXISTS(vp
))
352 return (vp
->v_ubcinfo
->ui_ucred
);
358 ubc_setthreadcred(struct vnode
*vp
, struct proc
*p
, thread_t thread
)
360 struct ubc_info
*uip
;
362 struct uthread
*uthread
= get_bsdthread_info(thread
);
364 if (!UBCINFOEXISTS(vp
))
370 credp
= uip
->ui_ucred
;
372 if (credp
== NOCRED
) {
373 /* use per-thread cred, if assumed identity, else proc cred */
374 if (uthread
== NULL
|| (uthread
->uu_flag
& UT_SETUID
) == 0) {
375 uip
->ui_ucred
= kauth_cred_proc_ref(p
);
377 uip
->ui_ucred
= uthread
->uu_ucred
;
378 kauth_cred_ref(uip
->ui_ucred
);
387 * Set the credentials
388 * existing credentials are not changed
389 * returns 1 on success and 0 on failure
392 ubc_setcred(struct vnode
*vp
, struct proc
*p
)
394 struct ubc_info
*uip
;
397 if ( !UBCINFOEXISTS(vp
))
403 credp
= uip
->ui_ucred
;
405 if (credp
== NOCRED
) {
406 uip
->ui_ucred
= kauth_cred_proc_ref(p
);
414 __private_extern__ memory_object_t
415 ubc_getpager(struct vnode
*vp
)
417 if (UBCINFOEXISTS(vp
))
418 return (vp
->v_ubcinfo
->ui_pager
);
424 * Get the memory object associated with this vnode
425 * If the vnode was reactivated, memory object would not exist.
426 * Unless "do not rectivate" was specified, look it up using the pager.
427 * If hold was requested create an object reference of one does not
431 memory_object_control_t
432 ubc_getobject(struct vnode
*vp
, __unused
int flags
)
434 if (UBCINFOEXISTS(vp
))
435 return((vp
->v_ubcinfo
->ui_control
));
442 ubc_blktooff(vnode_t vp
, daddr64_t blkno
)
450 error
= VNOP_BLKTOOFF(vp
, blkno
, &file_offset
);
454 return (file_offset
);
458 ubc_offtoblk(vnode_t vp
, off_t offset
)
464 return ((daddr64_t
)-1);
466 error
= VNOP_OFFTOBLK(vp
, offset
, &blkno
);
474 ubc_pages_resident(vnode_t vp
)
477 boolean_t has_pages_resident
;
479 if ( !UBCINFOEXISTS(vp
))
482 kret
= memory_object_pages_resident(vp
->v_ubcinfo
->ui_control
, &has_pages_resident
);
484 if (kret
!= KERN_SUCCESS
)
487 if (has_pages_resident
== TRUE
)
496 * This interface will eventually be deprecated
498 * clean and/or invalidate a range in the memory object that backs this
499 * vnode. The start offset is truncated to the page boundary and the
500 * size is adjusted to include the last page in the range.
502 * returns 1 for success, 0 for failure
505 ubc_sync_range(vnode_t vp
, off_t beg_off
, off_t end_off
, int flags
)
507 return (ubc_msync_internal(vp
, beg_off
, end_off
, NULL
, flags
, NULL
));
512 * clean and/or invalidate a range in the memory object that backs this
513 * vnode. The start offset is truncated to the page boundary and the
514 * size is adjusted to include the last page in the range.
518 ubc_msync(vnode_t vp
, off_t beg_off
, off_t end_off
, off_t
*resid_off
, int flags
)
524 *resid_off
= beg_off
;
526 retval
= ubc_msync_internal(vp
, beg_off
, end_off
, resid_off
, flags
, &io_errno
);
528 if (retval
== 0 && io_errno
== 0)
536 * clean and/or invalidate a range in the memory object that backs this
537 * vnode. The start offset is truncated to the page boundary and the
538 * size is adjusted to include the last page in the range.
541 ubc_msync_internal(vnode_t vp
, off_t beg_off
, off_t end_off
, off_t
*resid_off
, int flags
, int *io_errno
)
543 memory_object_size_t tsize
;
545 int request_flags
= 0;
546 int flush_flags
= MEMORY_OBJECT_RETURN_NONE
;
548 if ( !UBCINFOEXISTS(vp
))
550 if (end_off
<= beg_off
)
552 if ((flags
& (UBC_INVALIDATE
| UBC_PUSHDIRTY
| UBC_PUSHALL
)) == 0)
555 if (flags
& UBC_INVALIDATE
)
557 * discard the resident pages
559 request_flags
= (MEMORY_OBJECT_DATA_FLUSH
| MEMORY_OBJECT_DATA_NO_CHANGE
);
561 if (flags
& UBC_SYNC
)
563 * wait for all the I/O to complete before returning
565 request_flags
|= MEMORY_OBJECT_IO_SYNC
;
567 if (flags
& UBC_PUSHDIRTY
)
569 * we only return the dirty pages in the range
571 flush_flags
= MEMORY_OBJECT_RETURN_DIRTY
;
573 if (flags
& UBC_PUSHALL
)
575 * then return all the interesting pages in the range (both dirty and precious)
578 flush_flags
= MEMORY_OBJECT_RETURN_ALL
;
580 beg_off
= trunc_page_64(beg_off
);
581 end_off
= round_page_64(end_off
);
582 tsize
= (memory_object_size_t
)end_off
- beg_off
;
584 /* flush and/or invalidate pages in the range requested */
585 kret
= memory_object_lock_request(vp
->v_ubcinfo
->ui_control
,
586 beg_off
, tsize
, resid_off
, io_errno
,
587 flush_flags
, request_flags
, VM_PROT_NO_CHANGE
);
589 return ((kret
== KERN_SUCCESS
) ? 1 : 0);
594 * The vnode is mapped explicitly, mark it so.
596 __private_extern__
int
597 ubc_map(vnode_t vp
, int flags
)
599 struct ubc_info
*uip
;
602 struct vfs_context context
;
604 if (vnode_getwithref(vp
))
607 if (UBCINFOEXISTS(vp
)) {
608 context
.vc_proc
= current_proc();
609 context
.vc_ucred
= kauth_cred_get();
611 error
= VNOP_MMAP(vp
, flags
, &context
);
621 if ( !ISSET(uip
->ui_flags
, UI_ISMAPPED
))
623 SET(uip
->ui_flags
, (UI_WASMAPPED
| UI_ISMAPPED
));
637 * destroy the named reference for a given vnode
639 __private_extern__
int
640 ubc_destroy_named(struct vnode
*vp
)
642 memory_object_control_t control
;
643 struct ubc_info
*uip
;
647 * We may already have had the object terminated
648 * and the ubcinfo released as a side effect of
649 * some earlier processing. If so, pretend we did
650 * it, because it probably was a result of our
653 if (!UBCINFOEXISTS(vp
))
659 * Terminate the memory object.
660 * memory_object_destroy() will result in
661 * vnode_pager_no_senders().
662 * That will release the pager reference
663 * and the vnode will move to the free list.
665 control
= ubc_getobject(vp
, UBC_HOLDOBJECT
);
666 if (control
!= MEMORY_OBJECT_CONTROL_NULL
) {
669 * XXXXX - should we hold the vnode lock here?
671 if (ISSET(vp
->v_flag
, VTERMINATE
))
672 panic("ubc_destroy_named: already teminating");
673 SET(vp
->v_flag
, VTERMINATE
);
675 kret
= memory_object_destroy(control
, 0);
676 if (kret
!= KERN_SUCCESS
)
680 * memory_object_destroy() is asynchronous
681 * with respect to vnode_pager_no_senders().
682 * wait for vnode_pager_no_senders() to clear
686 while (ISSET(vp
->v_lflag
, VNAMED_UBC
)) {
687 (void)msleep((caddr_t
)&vp
->v_lflag
, &vp
->v_lock
,
688 PINOD
, "ubc_destroy_named", 0);
697 * Find out whether a vnode is in use by UBC
698 * Returns 1 if file is in use by UBC, 0 if not
701 ubc_isinuse(struct vnode
*vp
, int busycount
)
703 if ( !UBCINFOEXISTS(vp
))
705 return(ubc_isinuse_locked(vp
, busycount
, 0));
710 ubc_isinuse_locked(struct vnode
*vp
, int busycount
, int locked
)
718 if ((vp
->v_usecount
- vp
->v_kusecount
) > busycount
)
728 * MUST only be called by the VM
730 __private_extern__
void
731 ubc_unmap(struct vnode
*vp
)
733 struct vfs_context context
;
734 struct ubc_info
*uip
;
737 if (vnode_getwithref(vp
))
740 if (UBCINFOEXISTS(vp
)) {
744 if (ISSET(uip
->ui_flags
, UI_ISMAPPED
)) {
745 CLR(uip
->ui_flags
, UI_ISMAPPED
);
751 context
.vc_proc
= current_proc();
752 context
.vc_ucred
= kauth_cred_get();
753 (void)VNOP_MNOMAP(vp
, &context
);
759 * the drop of the vnode ref will cleanup
769 ppnum_t
*phys_entryp
,
772 memory_object_control_t control
;
774 control
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
775 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
776 return KERN_INVALID_ARGUMENT
;
778 return (memory_object_page_op(control
,
779 (memory_object_offset_t
)f_offset
,
785 __private_extern__ kern_return_t
786 ubc_page_op_with_control(
787 memory_object_control_t control
,
790 ppnum_t
*phys_entryp
,
793 return (memory_object_page_op(control
,
794 (memory_object_offset_t
)f_offset
,
808 memory_object_control_t control
;
810 control
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
811 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
812 return KERN_INVALID_ARGUMENT
;
814 return (memory_object_range_op(control
,
815 (memory_object_offset_t
)f_offset_beg
,
816 (memory_object_offset_t
)f_offset_end
,
827 upl_page_info_t
**plp
,
830 memory_object_control_t control
;
836 return KERN_INVALID_ARGUMENT
;
838 if (uplflags
& UPL_FOR_PAGEOUT
) {
839 uplflags
&= ~UPL_FOR_PAGEOUT
;
840 ubcflags
= UBC_FOR_PAGEOUT
;
842 ubcflags
= UBC_FLAGS_NONE
;
844 control
= ubc_getobject(vp
, ubcflags
);
845 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
846 return KERN_INVALID_ARGUMENT
;
848 if (uplflags
& UPL_WILL_BE_DUMPED
) {
849 uplflags
&= ~UPL_WILL_BE_DUMPED
;
850 uplflags
|= (UPL_NO_SYNC
|UPL_SET_INTERNAL
);
852 uplflags
|= (UPL_NO_SYNC
|UPL_CLEAN_IN_PLACE
|UPL_SET_INTERNAL
);
854 kr
= memory_object_upl_request(control
, f_offset
, bufsize
,
855 uplp
, NULL
, &count
, uplflags
);
857 *plp
= UPL_GET_INTERNAL_PAGE_LIST(*uplp
);
865 vm_offset_t
*dst_addr
)
867 return (vm_upl_map(kernel_map
, upl
, dst_addr
));
875 return(vm_upl_unmap(kernel_map
, upl
));
885 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
886 kr
= upl_commit(upl
, pl
, MAX_UPL_TRANSFER
);
893 ubc_upl_commit_range(
903 if (flags
& UPL_COMMIT_FREE_ON_EMPTY
)
904 flags
|= UPL_COMMIT_NOTIFY_EMPTY
;
906 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
908 kr
= upl_commit_range(upl
, offset
, size
, flags
,
909 pl
, MAX_UPL_TRANSFER
, &empty
);
911 if((flags
& UPL_COMMIT_FREE_ON_EMPTY
) && empty
)
925 boolean_t empty
= FALSE
;
927 if (abort_flags
& UPL_ABORT_FREE_ON_EMPTY
)
928 abort_flags
|= UPL_ABORT_NOTIFY_EMPTY
;
930 kr
= upl_abort_range(upl
, offset
, size
, abort_flags
, &empty
);
932 if((abort_flags
& UPL_ABORT_FREE_ON_EMPTY
) && empty
)
945 kr
= upl_abort(upl
, abort_type
);
954 return (UPL_GET_INTERNAL_PAGE_LIST(upl
));
957 /************* UBC APIS **************/
960 UBCINFOMISSING(struct vnode
* vp
)
962 return((vp
) && ((vp
)->v_type
== VREG
) && ((vp
)->v_ubcinfo
== UBC_INFO_NULL
));
966 UBCINFORECLAIMED(struct vnode
* vp
)
968 return((vp
) && ((vp
)->v_type
== VREG
) && ((vp
)->v_ubcinfo
== UBC_INFO_NULL
));
973 UBCINFOEXISTS(struct vnode
* vp
)
975 return((vp
) && ((vp
)->v_type
== VREG
) && ((vp
)->v_ubcinfo
!= UBC_INFO_NULL
));
978 UBCISVALID(struct vnode
* vp
)
980 return((vp
) && ((vp
)->v_type
== VREG
) && !((vp
)->v_flag
& VSYSTEM
));
983 UBCINVALID(struct vnode
* vp
)
985 return(((vp
) == NULL
) || ((vp
) && ((vp
)->v_type
!= VREG
))
986 || ((vp
) && ((vp
)->v_flag
& VSYSTEM
)));
989 UBCINFOCHECK(const char * fun
, struct vnode
* vp
)
991 if ((vp
) && ((vp
)->v_type
== VREG
) &&
992 ((vp
)->v_ubcinfo
== UBC_INFO_NULL
)) {
993 panic("%s: lost ubc_info", (fun
));