2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1994 Jan-Simon Pendry
32 * The Regents of the University of California. All rights reserved.
34 * This code is derived from software contributed to Berkeley by
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/proc_internal.h>
77 #include <sys/kauth.h>
79 #include <sys/kernel.h>
80 #include <sys/vnode_internal.h>
81 #include <sys/namei.h>
82 #include <sys/malloc.h>
83 #include <sys/file_internal.h>
84 #include <sys/filedesc.h>
85 #include <sys/queue.h>
86 #include <sys/mount_internal.h>
89 #include <sys/uio_internal.h>
90 #include <miscfs/union/union.h>
92 #include <sys/kdebug.h>
94 #include <security/mac_framework.h>
98 static int union_vn_close(struct vnode
*vp
, int fmode
, vfs_context_t ctx
);
100 /* must be power of two, otherwise change UNION_HASH() */
103 /* unsigned int ... */
104 #define UNION_HASH(u, l) \
105 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
107 static LIST_HEAD(unhead
, union_node
) unhead
[NHASH
];
108 static int unvplock
[NHASH
];
110 static lck_grp_t
* union_lck_grp
;
111 static lck_grp_attr_t
* union_lck_grp_attr
;
112 static lck_attr_t
* union_lck_attr
;
113 static lck_mtx_t
* union_mtxp
;
115 static int union_dircheck(struct vnode
**, struct fileproc
*, vfs_context_t ctx
);
116 static void union_newlower(struct union_node
*, struct vnode
*);
117 static void union_newupper(struct union_node
*, struct vnode
*);
121 union_init(__unused
struct vfsconf
*vfsp
)
125 union_lck_grp_attr
= lck_grp_attr_alloc_init();
127 lck_grp_attr_setstat(union_lck_grp_attr
);
129 union_lck_grp
= lck_grp_alloc_init("union", union_lck_grp_attr
);
130 union_lck_attr
= lck_attr_alloc_init();
132 lck_attr_setdebug(union_lck_attr
);
134 union_mtxp
= lck_mtx_alloc_init(union_lck_grp
, union_lck_attr
);
136 for (i
= 0; i
< NHASH
; i
++)
137 LIST_INIT(&unhead
[i
]);
138 bzero((caddr_t
) unvplock
, sizeof(unvplock
));
139 /* add the hook for getdirentries */
140 union_dircheckp
= union_dircheck
;
148 lck_mtx_lock(union_mtxp
);
154 lck_mtx_unlock(union_mtxp
);
159 union_list_lock(int ix
)
162 if (unvplock
[ix
] & UNVP_LOCKED
) {
163 unvplock
[ix
] |= UNVP_WANT
;
164 msleep((caddr_t
) &unvplock
[ix
], union_mtxp
, PINOD
, "union_list_lock", NULL
);
168 unvplock
[ix
] |= UNVP_LOCKED
;
174 union_list_unlock(int ix
)
177 unvplock
[ix
] &= ~UNVP_LOCKED
;
179 if (unvplock
[ix
] & UNVP_WANT
) {
180 unvplock
[ix
] &= ~UNVP_WANT
;
181 wakeup((caddr_t
) &unvplock
[ix
]);
188 * The uppervp, if not NULL, must be referenced and not locked by us
189 * The lowervp, if not NULL, must be referenced.
191 * If uppervp and lowervp match pointers already installed, then
192 * nothing happens. The passed vp's (when matching) are not adjusted.
194 * This routine may only be called by union_newupper() and
198 /* always called with union lock held */
200 union_updatevp(struct union_node
*un
, struct vnode
*uppervp
,
201 struct vnode
*lowervp
)
203 int ohash
= UNION_HASH(un
->un_uppervp
, un
->un_lowervp
);
204 int nhash
= UNION_HASH(uppervp
, lowervp
);
205 int docache
= (lowervp
!= NULLVP
|| uppervp
!= NULLVP
);
212 * Ensure locking is ordered from lower to higher
213 * to avoid deadlocks.
223 if (lhash
!= uhash
) {
224 while (union_list_lock(lhash
))
228 while (union_list_lock(uhash
))
231 if (ohash
!= nhash
|| !docache
) {
232 if (un
->un_flags
& UN_CACHED
) {
233 un
->un_flags
&= ~UN_CACHED
;
234 LIST_REMOVE(un
, un_cache
);
239 union_list_unlock(ohash
);
241 if (un
->un_lowervp
!= lowervp
) {
242 freevp
= freedirvp
= NULLVP
;
243 freepath
= (caddr_t
)0;
244 if (un
->un_lowervp
) {
245 freevp
= un
->un_lowervp
;
246 un
->un_lowervp
= lowervp
;
248 freepath
= un
->un_path
;
252 freedirvp
= un
->un_dirvp
;
253 un
->un_dirvp
= NULLVP
;
259 vnode_put(freedirvp
);
261 _FREE(un
->un_path
, M_TEMP
);
264 un
->un_lowervp
= lowervp
;
265 if (lowervp
!= NULLVP
)
266 un
->un_lowervid
= vnode_vid(lowervp
);
267 un
->un_lowersz
= VNOVAL
;
270 if (un
->un_uppervp
!= uppervp
) {
272 if (un
->un_uppervp
) {
273 freevp
= un
->un_uppervp
;
275 un
->un_uppervp
= uppervp
;
276 if (uppervp
!= NULLVP
)
277 un
->un_uppervid
= vnode_vid(uppervp
);
278 un
->un_uppersz
= VNOVAL
;
285 if (docache
&& (ohash
!= nhash
)) {
286 LIST_INSERT_HEAD(&unhead
[nhash
], un
, un_cache
);
287 un
->un_flags
|= UN_CACHED
;
290 union_list_unlock(nhash
);
294 * Set a new lowervp. The passed lowervp must be referenced and will be
295 * stored in the vp in a referenced state.
297 /* always called with union lock held */
300 union_newlower(un
, lowervp
)
301 struct union_node
*un
;
302 struct vnode
*lowervp
;
304 union_updatevp(un
, un
->un_uppervp
, lowervp
);
308 * Set a new uppervp. The passed uppervp must be locked and will be
309 * stored in the vp in a locked state. The caller should not unlock
313 /* always called with union lock held */
315 union_newupper(un
, uppervp
)
316 struct union_node
*un
;
317 struct vnode
*uppervp
;
319 union_updatevp(un
, uppervp
, un
->un_lowervp
);
323 * Keep track of size changes in the underlying vnodes.
324 * If the size changes, then callback to the vm layer
325 * giving priority to the upper layer size.
327 /* always called with union lock held */
329 union_newsize(vp
, uppersz
, lowersz
)
331 off_t uppersz
, lowersz
;
333 struct union_node
*un
;
336 /* only interested in regular files */
337 if (vp
->v_type
!= VREG
)
343 if ((uppersz
!= VNOVAL
) && (un
->un_uppersz
!= uppersz
)) {
344 un
->un_uppersz
= uppersz
;
349 if ((lowersz
!= VNOVAL
) && (un
->un_lowersz
!= lowersz
)) {
350 un
->un_lowersz
= lowersz
;
356 #ifdef UNION_DIAGNOSTIC
357 printf("union: %s size now %ld\n",
358 uppersz
!= VNOVAL
? "upper" : "lower", (long) sz
);
367 * union_allocvp: allocate a union_node and associate it with a
368 * parent union_node and one or two vnodes.
370 * vpp Holds the returned vnode locked and referenced if no
373 * mp Holds the mount point. mp may or may not be busied.
374 * allocvp() makes no changes to mp.
376 * dvp Holds the parent union_node to the one we wish to create.
377 * XXX may only be used to traverse an uncopied lowervp-based
380 * dvp may or may not be locked. allocvp() makes no changes
383 * upperdvp Holds the parent vnode to uppervp, generally used along
384 * with path component information to create a shadow of
385 * lowervp when uppervp does not exist.
387 * upperdvp is referenced but unlocked on entry, and will be
388 * dereferenced on return.
390 * uppervp Holds the new uppervp vnode to be stored in the
391 * union_node we are allocating. uppervp is referenced but
392 * not locked, and will be dereferenced on return.
394 * lowervp Holds the new lowervp vnode to be stored in the
395 * union_node we are allocating. lowervp is referenced but
396 * not locked, and will be dereferenced on return.
398 * cnp Holds path component information to be coupled with
399 * lowervp and upperdvp to allow unionfs to create an uppervp
400 * later on. Only used if lowervp is valid. The contents
401 * of cnp is only valid for the duration of the call.
403 * docache Determine whether this node should be entered in the
404 * cache or whether it should be destroyed as soon as possible.
406 * All union_nodes are maintained on a singly-linked
407 * list. New nodes are only allocated when they cannot
408 * be found on this list. Entries on the list are
409 * removed when the vfs reclaim entry is called.
411 * A single lock is kept for the entire list. This is
412 * needed because the getnewvnode() function can block
413 * waiting for a vnode to become free, in which case there
414 * may be more than one process trying to get the same
415 * vnode. This lock is only taken if we are going to
416 * call getnewvnode(), since the kernel itself is single-threaded.
418 * If an entry is found on the list, then call vget() to
419 * take a reference. This is done because there may be
420 * zero references to it and so it needs to removed from
421 * the vnode free list.
424 /* always called with union lock held */
427 union_allocvp(struct vnode
**vpp
,
431 struct componentname
*cnp
,
432 struct vnode
*uppervp
,
433 struct vnode
*lowervp
,
437 struct union_node
*un
= NULL
;
438 struct union_node
*unp
;
439 struct vnode
*xlowervp
= NULLVP
;
440 struct union_mount
*um
= MOUNTTOUNIONMOUNT(mp
);
441 int hash
= 0; /* protected by docache */
444 struct vnode_fsparam vfsp
;
447 if (uppervp
== NULLVP
&& lowervp
== NULLVP
)
448 panic("union: unidentifiable allocation");
451 * if both upper and lower vp are provided and are off different type
452 * consider lowervp as NULL
454 if (uppervp
&& lowervp
&& (uppervp
->v_type
!= lowervp
->v_type
)) {
459 /* detect the root vnode (and aliases) */
461 if ((uppervp
== um
->um_uppervp
) &&
462 ((lowervp
== NULLVP
) || lowervp
== um
->um_lowervp
)) {
463 if (lowervp
== NULLVP
) {
464 lowervp
= um
->um_lowervp
;
465 if (lowervp
!= NULLVP
) {
477 } else for (try = 0; try < 3; try++) {
480 if (lowervp
== NULLVP
)
482 hash
= UNION_HASH(uppervp
, lowervp
);
486 if (uppervp
== NULLVP
)
488 hash
= UNION_HASH(uppervp
, NULLVP
);
492 if (lowervp
== NULLVP
)
494 /* Not sure how this path gets exercised ? */
495 hash
= UNION_HASH(NULLVP
, lowervp
);
499 while (union_list_lock(hash
))
502 for (un
= unhead
[hash
].lh_first
; un
!= 0;
503 un
= un
->un_cache
.le_next
) {
504 if ((un
->un_lowervp
== lowervp
||
505 un
->un_lowervp
== NULLVP
) &&
506 (un
->un_uppervp
== uppervp
||
507 un
->un_uppervp
== NULLVP
) &&
508 (un
->un_mount
== mp
)) {
513 union_list_unlock(hash
);
521 * Obtain a lock on the union_node.
522 * uppervp is locked, though un->un_uppervp
523 * may not be. this doesn't break the locking
524 * hierarchy since in the case that un->un_uppervp
525 * is not yet locked it will be vnode_put'd and replaced
529 if (un
->un_flags
& UN_LOCKED
) {
530 un
->un_flags
|= UN_WANT
;
531 msleep((caddr_t
) &un
->un_flags
, union_mtxp
, PINOD
, "union node locked", 0);
534 un
->un_flags
|= UN_LOCKED
;
537 if (UNIONTOV(un
) == NULLVP
)
538 panic("null vnode in union node\n");
539 if (vnode_get(UNIONTOV(un
))) {
541 un
->un_flags
&= ~UN_LOCKED
;
542 if ((un
->un_flags
& UN_WANT
) == UN_WANT
) {
543 un
->un_flags
&= ~UN_LOCKED
;
544 wakeup(&un
->un_flags
);
551 * At this point, the union_node is locked,
552 * un->un_uppervp may not be locked, and uppervp
557 * Save information about the upper layer.
559 if (uppervp
!= un
->un_uppervp
) {
560 union_newupper(un
, uppervp
);
561 } else if (uppervp
) {
568 * Save information about the lower layer.
569 * This needs to keep track of pathname
570 * and directory information which union_vn_create
573 if (lowervp
!= un
->un_lowervp
) {
574 union_newlower(un
, lowervp
);
575 if (cnp
&& (lowervp
!= NULLVP
)) {
576 un
->un_hash
= cnp
->cn_hash
;
578 MALLOC(un
->un_path
, caddr_t
, cnp
->cn_namelen
+1,
580 bcopy(cnp
->cn_nameptr
, un
->un_path
,
584 un
->un_path
[cnp
->cn_namelen
] = '\0';
587 } else if (lowervp
) {
593 un
->un_flags
&= ~UN_LOCKED
;
594 if ((un
->un_flags
& UN_WANT
) == UN_WANT
) {
595 un
->un_flags
&= ~UN_WANT
;
596 wakeup(&un
->un_flags
);
603 * otherwise lock the vp list while we call getnewvnode
604 * since that can block.
606 hash
= UNION_HASH(uppervp
, lowervp
);
608 if (union_list_lock(hash
))
613 MALLOC(unp
, void *, sizeof(struct union_node
), M_TEMP
, M_WAITOK
);
616 bzero(unp
, sizeof(struct union_node
));
618 un
->un_uppervp
= uppervp
;
619 if (uppervp
!= NULLVP
)
620 un
->un_uppervid
= vnode_vid(uppervp
);
621 un
->un_uppersz
= VNOVAL
;
622 un
->un_lowervp
= lowervp
;
623 if (lowervp
!= NULLVP
)
624 un
->un_lowervid
= vnode_vid(lowervp
);
625 un
->un_lowersz
= VNOVAL
;
632 un
->un_flags
= UN_LOCKED
;
634 if (UNION_FAULTIN(um
))
635 un
->un_flags
|= UN_FAULTFS
;
639 /* Insert with lock held */
640 LIST_INSERT_HEAD(&unhead
[hash
], un
, un_cache
);
641 un
->un_flags
|= UN_CACHED
;
642 union_list_unlock(hash
);
648 vtype
= uppervp
->v_type
;
650 vtype
= lowervp
->v_type
;
652 bzero(&vfsp
, sizeof(struct vnode_fsparam
));
654 vfsp
.vnfs_vtype
= vtype
;
655 vfsp
.vnfs_str
= "unionfs";
656 vfsp
.vnfs_dvp
= undvp
;
657 vfsp
.vnfs_fsnode
= unp
;
659 vfsp
.vnfs_vops
= union_vnodeop_p
;
661 vfsp
.vnfs_filesize
= 0;
662 vfsp
.vnfs_flags
= VNFS_NOCACHE
| VNFS_CANTCACHE
;
663 vfsp
.vnfs_marksystem
= 0;
664 vfsp
.vnfs_markroot
= markroot
;
666 error
= vnode_create(VNCREATE_FLAVOR
, VCREATESIZE
, &vfsp
, vpp
);
668 /* XXXXX Is this right ???? XXXXXXX */
676 if (un
->un_flags
& UN_CACHED
) {
677 un
->un_flags
&= ~UN_CACHED
;
678 LIST_REMOVE(un
, un_cache
);
681 union_list_unlock(hash
);
688 if (cnp
&& (lowervp
!= NULLVP
)) {
689 un
->un_hash
= cnp
->cn_hash
;
690 un
->un_path
= _MALLOC(cnp
->cn_namelen
+1, M_TEMP
, M_WAITOK
);
691 bcopy(cnp
->cn_nameptr
, un
->un_path
, cnp
->cn_namelen
);
692 un
->un_path
[cnp
->cn_namelen
] = '\0';
706 vnode_settag(*vpp
, VT_UNION
);
708 if (un
->un_vnode
->v_type
== VDIR
) {
709 if (un
->un_uppervp
== NULLVP
) {
710 panic("faulting fs and no upper vp for dir?");
716 un
->un_flags
&= ~UN_LOCKED
;
717 if ((un
->un_flags
& UN_WANT
) == UN_WANT
) {
718 un
->un_flags
&= ~UN_WANT
;
719 wakeup(&un
->un_flags
);
726 /* always called with union lock held */
728 union_freevp(struct vnode
*vp
)
730 struct union_node
*un
= VTOUNION(vp
);
732 if (un
->un_flags
& UN_CACHED
) {
733 un
->un_flags
&= ~UN_CACHED
;
734 LIST_REMOVE(un
, un_cache
);
738 if (un
->un_pvp
!= NULLVP
)
739 vnode_put(un
->un_pvp
);
740 if (un
->un_uppervp
!= NULLVP
)
741 vnode_put(un
->un_uppervp
);
742 if (un
->un_lowervp
!= NULLVP
)
743 vnode_put(un
->un_lowervp
);
744 if (un
->un_dirvp
!= NULLVP
)
745 vnode_put(un
->un_dirvp
);
747 _FREE(un
->un_path
, M_TEMP
);
749 FREE(vp
->v_data
, M_TEMP
);
757 * copyfile. copy the vnode (fvp) to the vnode (tvp)
758 * using a sequence of reads and writes. both (fvp)
759 * and (tvp) are locked on entry and exit.
761 /* called with no union lock held */
763 union_copyfile(struct vnode
*fvp
, struct vnode
*tvp
, vfs_context_t context
)
772 * allocate a buffer of size MAXPHYSIO.
773 * loop doing reads and writes, keeping track
774 * of the current uio offset.
775 * give up at the first sign of trouble.
779 #if 1 /* LP64todo - can't use new segment flags until the drivers are ready */
780 uio
.uio_segflg
= UIO_SYSSPACE
;
782 uio
.uio_segflg
= UIO_SYSSPACE32
;
786 bufp
= _MALLOC(MAXPHYSIO
, M_TEMP
, M_WAITOK
);
788 /* ugly loop follows... */
790 off_t offset
= uio
.uio_offset
;
792 uio
.uio_iovs
.iov32p
= &iov
;
794 iov
.iov_base
= (uintptr_t)bufp
;
795 iov
.iov_len
= MAXPHYSIO
;
796 uio_setresid(&uio
, iov
.iov_len
);
797 uio
.uio_rw
= UIO_READ
;
798 error
= VNOP_READ(fvp
, &uio
, 0, context
);
801 uio
.uio_iovs
.iov32p
= &iov
;
803 iov
.iov_base
= (uintptr_t)bufp
;
804 iov
.iov_len
= MAXPHYSIO
- uio_resid(&uio
);
805 uio
.uio_offset
= offset
;
806 uio
.uio_rw
= UIO_WRITE
;
807 uio_setresid(&uio
, iov
.iov_len
);
809 if (uio_resid(&uio
) == 0)
813 error
= VNOP_WRITE(tvp
, &uio
, 0, context
);
814 } while ((uio_resid(&uio
) > 0) && (error
== 0));
817 } while (error
== 0);
824 * (un) is assumed to be locked on entry and remains
827 /* always called with union lock held */
829 union_copyup(struct union_node
*un
, int docopy
, vfs_context_t context
)
832 struct vnode
*lvp
, *uvp
;
833 struct vnode_attr vattr
;
837 lvp
= un
->un_lowervp
;
841 if (UNNODE_FAULTIN(un
)) {
842 /* Need to inherit exec mode in faulting fs */
844 VATTR_WANTED(&vattr
, va_flags
);
845 if (vnode_getattr(lvp
, &vattr
, context
) == 0 )
846 cmode
= vattr
.va_mode
;
849 error
= union_vn_create(&uvp
, un
, cmode
, context
);
852 if (error
== EEXIST
) {
854 union_newupper(un
, uvp
);
862 /* at this point, uppervp is locked */
863 union_newupper(un
, uvp
);
869 * XX - should not ignore errors
872 error
= VNOP_OPEN(lvp
, FREAD
, context
);
874 error
= union_copyfile(lvp
, uvp
, context
);
875 (void) VNOP_CLOSE(lvp
, FREAD
, context
);
877 #ifdef UNION_DIAGNOSTIC
879 uprintf("union: copied up %s\n", un
->un_path
);
883 union_vn_close(uvp
, FWRITE
, context
);
886 * Subsequent IOs will go to the top layer, so
887 * call close on the lower vnode and open on the
888 * upper vnode to ensure that the filesystem keeps
889 * its references counts right. This doesn't do
890 * the right thing with (cred) and (FREAD) though.
891 * Ignoring error returns is not right, either.
894 /* No need to hold the lock as the union node should be locked for this(it is in faultin mode) */
898 for (i
= 0; i
< un
->un_openl
; i
++) {
899 (void) VNOP_CLOSE(lvp
, FREAD
, context
);
900 (void) VNOP_OPEN(uvp
, FREAD
, context
);
913 union_faultin_copyup(struct vnode
**vpp
, vnode_t udvp
, vnode_t lvp
, struct componentname
* cnp
, vfs_context_t context
)
917 struct vnode_attr vattr
;
918 struct vnode_attr
*vap
;
920 int fmode
= FFLAGS(O_WRONLY
|O_CREAT
|O_TRUNC
|O_EXCL
);
921 struct proc
* p
= vfs_context_proc(context
);
922 struct componentname cn
;
927 VATTR_WANTED(vap
, va_flags
);
928 if (vnode_getattr(lvp
, vap
, context
) == 0 )
929 cmode
= vattr
.va_mode
;
934 if (cmode
== (mode_t
)0)
935 cmode
= UN_FILEMODE
& ~p
->p_fd
->fd_cmask
;
937 cmode
= cmode
& ~p
->p_fd
->fd_cmask
;
941 * Build a new componentname structure (for the same
942 * reasons outlines in union_mkshadow()).
943 * The difference here is that the file is owned by
944 * the current user, rather than by the person who
945 * did the mount, since the current user needs to be
946 * able to write the file (that's why it is being
947 * copied in the first place).
949 bzero(&cn
, sizeof(struct componentname
));
951 cn
.cn_namelen
= cnp
->cn_namelen
;
952 cn
.cn_pnbuf
= (caddr_t
) _MALLOC_ZONE(cn
.cn_namelen
+1,
954 cn
.cn_pnlen
= cn
.cn_namelen
+1;
955 bcopy(cnp
->cn_nameptr
, cn
.cn_pnbuf
, cn
.cn_namelen
+1);
956 cn
.cn_nameiop
= CREATE
;
957 cn
.cn_flags
= (HASBUF
|SAVENAME
|SAVESTART
|ISLASTCN
|UNIONCREATED
);
958 cn
.cn_context
= context
;
959 cn
.cn_nameptr
= cn
.cn_pnbuf
;
964 * Pass dvp unlocked and referenced on call to relookup().
966 * If an error occurs, dvp will be returned unlocked and dereferenced.
968 if ((error
= relookup(udvp
, &uvp
, &cn
)) != 0) {
973 * If no error occurs, dvp will be returned locked with the reference
974 * left as before, and vpp will be returned referenced and locked.
983 * Good - there was no race to create the file
984 * so go ahead and create it. The permissions
985 * on the file will be 0666 modified by the
986 * current user's umask. Access to the file, while
987 * it is unioned, will require access to the top *and*
988 * bottom files. Access when not unioned will simply
989 * require access to the top-level file.
991 * TODO: confirm choice of access permissions.
992 * decide on authorisation behaviour
996 VATTR_SET(vap
, va_type
, VREG
);
997 VATTR_SET(vap
, va_mode
, cmode
);
999 cn
.cn_flags
|= (UNIONCREATED
);
1000 if ((error
= vn_create(udvp
, &uvp
, &cn
, vap
, 0, context
)) != 0) {
1005 if ((error
= VNOP_OPEN(uvp
, fmode
, context
)) != 0) {
1006 vn_clearunionwait(uvp
, 0);
1012 error
= vnode_ref_ext(uvp
, fmode
);
1014 vn_clearunionwait(uvp
, 0);
1015 VNOP_CLOSE(uvp
, fmode
, context
);
1023 * XX - should not ignore errors
1026 error
= VNOP_OPEN(lvp
, FREAD
, context
);
1028 error
= union_copyfile(lvp
, uvp
, context
);
1029 (void) VNOP_CLOSE(lvp
, FREAD
, context
);
1032 VNOP_CLOSE(uvp
, fmode
, context
);
1033 vnode_rele_ext(uvp
, fmode
, 0);
1034 vn_clearunionwait(uvp
, 0);
1038 if ((cn
.cn_flags
& HASBUF
) == HASBUF
) {
1039 FREE_ZONE(cn
.cn_pnbuf
, cn
.cn_pnlen
, M_NAMEI
);
1040 cn
.cn_flags
&= ~HASBUF
;
1049 * dvp should be locked on entry and will be locked on return. No
1050 * net change in the ref count will occur.
1052 * If an error is returned, *vpp will be invalid, otherwise it
1053 * will hold a locked, referenced vnode. If *vpp == dvp then
1054 * remember that only one exclusive lock is held.
1057 /* No union lock held for this call */
1061 struct union_mount
*um
,
1062 #else /* !XXX_HELP_ME */
1063 __unused
struct union_mount
*um
,
1064 #endif /* !XXX_HELP_ME */
1067 struct componentname
*cnp
,
1068 struct componentname
*cn
,
1075 * A new componentname structure must be faked up because
1076 * there is no way to know where the upper level cnp came
1077 * from or what it is being used for. This must duplicate
1078 * some of the work done by NDINIT, some of the work done
1079 * by namei, some of the work done by lookup and some of
1080 * the work done by vnop_lookup when given a CREATE flag.
1081 * Conclusion: Horrible.
1083 cn
->cn_namelen
= pathlen
;
1084 cn
->cn_pnbuf
= _MALLOC_ZONE(cn
->cn_namelen
+1, M_NAMEI
, M_WAITOK
);
1085 cn
->cn_pnlen
= cn
->cn_namelen
+1;
1086 bcopy(path
, cn
->cn_pnbuf
, cn
->cn_namelen
);
1087 cn
->cn_pnbuf
[cn
->cn_namelen
] = '\0';
1089 cn
->cn_nameiop
= CREATE
;
1090 cn
->cn_flags
= (HASBUF
|SAVENAME
|SAVESTART
|ISLASTCN
);
1092 cn
->cn_proc
= cnp
->cn_proc
;
1093 if (um
->um_op
== UNMNT_ABOVE
)
1094 cn
->cn_cred
= cnp
->cn_cred
;
1096 cn
->cn_cred
= um
->um_cred
;
1098 cn
->cn_context
= cnp
->cn_context
; /* XXX !UNMNT_ABOVE case ??? */
1099 cn
->cn_nameptr
= cn
->cn_pnbuf
;
1101 cn
->cn_consume
= cnp
->cn_consume
;
1104 error
= relookup(dvp
, vpp
, cn
);
1111 * Create a shadow directory in the upper layer.
1112 * The new vnode is returned locked.
1114 * (um) points to the union mount structure for access to the
1115 * the mounting process's credentials.
1116 * (dvp) is the directory in which to create the shadow directory,
1117 * It is locked (but not ref'd) on entry and return.
1118 * (cnp) is the component name to be created.
1119 * (vpp) is the returned newly created shadow directory, which
1120 * is returned locked and ref'd
1122 /* No union lock held for this call */
1124 union_mkshadow(um
, dvp
, cnp
, vpp
)
1125 struct union_mount
*um
;
1127 struct componentname
*cnp
;
1131 struct vnode_attr va
;
1132 struct componentname cn
;
1134 bzero(&cn
, sizeof(struct componentname
));
1137 error
= union_relookup(um
, dvp
, vpp
, cnp
, &cn
,
1138 cnp
->cn_nameptr
, cnp
->cn_namelen
);
1148 * Policy: when creating the shadow directory in the
1149 * upper layer, create it owned by the user who did
1150 * the mount, group from parent directory, and mode
1151 * 777 modified by umask (ie mostly identical to the
1152 * mkdir syscall). (jsp, kb)
1156 VATTR_SET(&va
, va_type
, VDIR
);
1157 VATTR_SET(&va
, va_mode
, um
->um_cmode
);
1159 error
= vn_create(dvp
, vpp
, &cn
, &va
, 0, cnp
->cn_context
);
1161 if ((cn
.cn_flags
& HASBUF
) == HASBUF
) {
1162 FREE_ZONE(cn
.cn_pnbuf
, cn
.cn_pnlen
, M_NAMEI
);
1163 cn
.cn_flags
&= ~HASBUF
;
1169 * Create a whiteout entry in the upper layer.
1171 * (um) points to the union mount structure for access to the
1172 * the mounting process's credentials.
1173 * (dvp) is the directory in which to create the whiteout.
1174 * it is locked on entry and exit.
1175 * (cnp) is the componentname to be created.
1177 /* No union lock held for this call */
1179 union_mkwhiteout(um
, dvp
, cnp
, path
)
1180 struct union_mount
*um
;
1182 struct componentname
*cnp
;
1187 struct componentname cn
;
1189 bzero(&cn
, sizeof(struct componentname
));
1191 error
= union_relookup(um
, dvp
, &wvp
, cnp
, &cn
, path
, strlen(path
));
1200 error
= VNOP_WHITEOUT(dvp
, &cn
, CREATE
, cnp
->cn_context
);
1203 if ((cn
.cn_flags
& HASBUF
) == HASBUF
) {
1204 FREE_ZONE(cn
.cn_pnbuf
, cn
.cn_pnlen
, M_NAMEI
);
1205 cn
.cn_flags
&= ~HASBUF
;
1212 * union_vn_create: creates and opens a new shadow file
1213 * on the upper union layer. This function is similar
1214 * in spirit to calling vn_open() but it avoids calling namei().
1215 * The problem with calling namei() is that a) it locks too many
1216 * things, and b) it doesn't start at the "right" directory,
1217 * whereas relookup() is told where to start.
1219 * On entry, the vnode associated with un is locked. It remains locked
1222 * If no error occurs, *vpp contains a locked referenced vnode for your
1223 * use. If an error occurs *vpp iis undefined.
1225 /* called with no union lock held */
1227 union_vn_create(struct vnode
**vpp
, struct union_node
*un
, mode_t cmode
, vfs_context_t context
)
1230 struct vnode_attr vat
;
1231 struct vnode_attr
*vap
= &vat
;
1232 int fmode
= FFLAGS(O_WRONLY
|O_CREAT
|O_TRUNC
|O_EXCL
);
1234 struct proc
* p
= vfs_context_proc(context
);
1235 struct componentname cn
;
1237 bzero(&cn
, sizeof(struct componentname
));
1240 if (cmode
== (mode_t
)0)
1241 cmode
= UN_FILEMODE
& ~p
->p_fd
->fd_cmask
;
1243 cmode
= cmode
& ~p
->p_fd
->fd_cmask
;
1247 * Build a new componentname structure (for the same
1248 * reasons outlines in union_mkshadow()).
1249 * The difference here is that the file is owned by
1250 * the current user, rather than by the person who
1251 * did the mount, since the current user needs to be
1252 * able to write the file (that's why it is being
1253 * copied in the first place).
1255 cn
.cn_namelen
= strlen(un
->un_path
);
1256 cn
.cn_pnbuf
= (caddr_t
) _MALLOC_ZONE(cn
.cn_namelen
+1,
1258 cn
.cn_pnlen
= cn
.cn_namelen
+1;
1259 bcopy(un
->un_path
, cn
.cn_pnbuf
, cn
.cn_namelen
+1);
1260 cn
.cn_nameiop
= CREATE
;
1261 if (UNNODE_FAULTIN(un
))
1262 cn
.cn_flags
= (HASBUF
|SAVENAME
|SAVESTART
|ISLASTCN
|UNIONCREATED
);
1264 cn
.cn_flags
= (HASBUF
|SAVENAME
|SAVESTART
|ISLASTCN
);
1265 cn
.cn_context
= context
;
1266 cn
.cn_nameptr
= cn
.cn_pnbuf
;
1267 cn
.cn_hash
= un
->un_hash
;
1271 * Pass dvp unlocked and referenced on call to relookup().
1273 * If an error occurs, dvp will be returned unlocked and dereferenced.
1275 vnode_get(un
->un_dirvp
);
1276 if ((error
= relookup(un
->un_dirvp
, &vp
, &cn
)) != 0) {
1277 vnode_put(un
->un_dirvp
);
1280 vnode_put(un
->un_dirvp
);
1283 * If no error occurs, dvp will be returned locked with the reference
1284 * left as before, and vpp will be returned referenced and locked.
1293 * Good - there was no race to create the file
1294 * so go ahead and create it. The permissions
1295 * on the file will be 0666 modified by the
1296 * current user's umask. Access to the file, while
1297 * it is unioned, will require access to the top *and*
1298 * bottom files. Access when not unioned will simply
1299 * require access to the top-level file.
1301 * TODO: confirm choice of access permissions.
1302 * decide on authorisation behaviour
1306 VATTR_SET(vap
, va_type
, VREG
);
1307 VATTR_SET(vap
, va_mode
, cmode
);
1309 if ((error
= vn_create(un
->un_dirvp
, &vp
, &cn
, vap
, 0, context
)) != 0) {
1313 if ((error
= VNOP_OPEN(vp
, fmode
, context
)) != 0) {
1319 if (++vp
->v_writecount
<= 0)
1320 panic("union: v_writecount");
1326 if ((cn
.cn_flags
& HASBUF
) == HASBUF
) {
1327 FREE_ZONE(cn
.cn_pnbuf
, cn
.cn_pnlen
, M_NAMEI
);
1328 cn
.cn_flags
&= ~HASBUF
;
1333 /* called with no union lock held */
1335 union_vn_close(struct vnode
*vp
, int fmode
, vfs_context_t context
)
1338 if (fmode
& FWRITE
) {
1343 return (VNOP_CLOSE(vp
, fmode
, context
));
1347 * union_removed_upper:
1349 * An upper-only file/directory has been removed; un-cache it so
1350 * that unionfs vnode gets reclaimed and the last uppervp reference
1353 * Called with union_node unlocked.
1355 /* always called with union lock held */
1357 union_removed_upper(un
)
1358 struct union_node
*un
;
1360 union_newupper(un
, NULLVP
);
1361 if (un
->un_flags
& UN_CACHED
) {
1362 un
->un_flags
&= ~UN_CACHED
;
1363 LIST_REMOVE(un
, un_cache
);
1373 struct union_node
*un
= VTOUNION(vp
);
1375 if ((un
->un_lowervp
!= NULLVP
) &&
1376 (vp
->v_type
== un
->un_lowervp
->v_type
)) {
1377 if (vnode_get(un
->un_lowervp
) == 0)
1378 return (un
->un_lowervp
);
1386 * Determine whether a whiteout is needed
1387 * during a remove/rmdir operation.
1389 /* called with no union lock held */
1391 union_dowhiteout(struct union_node
*un
, vfs_context_t ctx
)
1393 struct vnode_attr va
;
1395 if (UNNODE_FAULTIN(un
))
1398 if ((un
->un_lowervp
!= NULLVP
) )
1402 VATTR_WANTED(&va
, va_flags
);
1403 if (vnode_getattr(un
->un_uppervp
, &va
, ctx
) == 0 &&
1404 (va
.va_flags
& OPAQUE
))
1410 /* called with no union lock held */
1412 union_dircache_r(struct vnode
*vp
, struct vnode
***vppp
, int *cntp
)
1414 struct union_node
*un
;
1416 if (vp
->v_op
!= union_vnodeop_p
) {
1421 panic("union: dircache table too small");
1430 if (un
->un_uppervp
!= NULLVP
)
1431 union_dircache_r(un
->un_uppervp
, vppp
, cntp
);
1432 if (un
->un_lowervp
!= NULLVP
)
1433 union_dircache_r(un
->un_lowervp
, vppp
, cntp
);
1436 /* called with no union lock held */
1438 union_dircache(struct vnode
*vp
, __unused vfs_context_t context
)
1441 struct vnode
*nvp
, *lvp
;
1443 struct vnode
**dircache
, **newdircache
;
1444 struct union_node
*un
;
1454 dircache
= un
->un_dircache
;
1455 if (dircache
== 0) {
1458 union_dircache_r(vp
, 0, &count
);
1461 /* too bad; we need Union now! */
1463 panic("MAC Framework doesn't support unionfs (yet)\n");
1467 dircache
= (struct vnode
**)
1468 _MALLOC(count
* sizeof(struct vnode
*),
1470 newdircache
= dircache
;
1473 union_dircache_r(vp
, &vpp
, &count
);
1480 if (*vpp
++ == un
->un_uppervp
)
1482 } while (*vpp
!= NULLVP
);
1487 if (lvp
== NULLVP
) {
1494 error
= union_allocvp(&nvp
, vp
->v_mount
, NULLVP
, NULLVP
, 0, lvp
, NULLVP
, 0);
1501 un
->un_dircache
= 0;
1504 if ((alloced
!= 0) && (un
->un_dircache
!= 0)) {
1506 for (vpp
= newdircache
; *vpp
!= NULLVP
; vpp
++)
1508 _FREE(newdircache
, M_TEMP
);
1516 un
->un_dircache
= dircache
;
1517 un
->un_flags
|= UN_DIRENVN
;
1525 * If we allocated a new dircache and couldn't attach
1526 * it to a new vp, free the resources we allocated.
1529 for (vpp
= newdircache
; *vpp
!= NULLVP
; vpp
++)
1531 _FREE(newdircache
, M_TEMP
);
1537 * Module glue to remove #ifdef UNION from vfs_syscalls.c
1539 /* Called with no union lock, the union_dircache takes locks when necessary */
1541 union_dircheck(struct vnode
**vpp
, struct fileproc
*fp
, vfs_context_t ctx
)
1546 if (vp
->v_op
== union_vnodeop_p
) {
1549 lvp
= union_dircache(vp
, ctx
);
1550 if (lvp
!= NULLVP
) {
1551 struct vnode_attr va
;
1553 * If the directory is opaque,
1554 * then don't show lower entries
1557 VATTR_WANTED(&va
, va_flags
);
1558 error
= vnode_getattr(vp
, &va
, ctx
);
1559 if (va
.va_flags
& OPAQUE
) {
1565 if (lvp
!= NULLVP
) {
1567 error
= mac_vnode_check_open(ctx
, lvp
, FREAD
);
1573 error
= VNOP_OPEN(lvp
, FREAD
, ctx
);
1579 fp
->f_fglob
->fg_data
= (caddr_t
) lvp
;
1580 fp
->f_fglob
->fg_offset
= 0;
1582 error
= VNOP_CLOSE(vp
, FREAD
, ctx
);
1589 return -1; /* goto unionread */
1595 /* called from inactive with union lock held */
1597 union_dircache_free(struct union_node
*un
)
1601 vpp
= un
->un_dircache
;
1602 un
->un_dircache
= NULL
;
1605 for (; *vpp
!= NULLVP
; vpp
++)
1607 _FREE(un
->un_dircache
, M_TEMP
);