]> git.saurik.com Git - apple/xnu.git/blame - bsd/miscfs/union/union_subr.c
xnu-792.24.17.tar.gz
[apple/xnu.git] / bsd / miscfs / union / union_subr.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
6601e61a 4 * @APPLE_LICENSE_HEADER_START@
1c79356b 5 *
6601e61a
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
8f6c56a5 11 *
6601e61a
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
6601e61a
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
8f6c56a5 19 *
6601e61a 20 * @APPLE_LICENSE_HEADER_END@
1c79356b
A
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1994 Jan-Simon Pendry
25 * Copyright (c) 1994
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software contributed to Berkeley by
29 * Jan-Simon Pendry.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. All advertising materials mentioning features or use of this software
40 * must display the following acknowledgement:
41 * This product includes software developed by the University of
42 * California, Berkeley and its contributors.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
60 */
61
62#include <sys/param.h>
63#include <sys/systm.h>
91447636
A
64#include <sys/proc_internal.h>
65#include <sys/kauth.h>
1c79356b
A
66#include <sys/time.h>
67#include <sys/kernel.h>
91447636 68#include <sys/vnode_internal.h>
1c79356b
A
69#include <sys/namei.h>
70#include <sys/malloc.h>
71#include <sys/file.h>
72#include <sys/filedesc.h>
73#include <sys/queue.h>
91447636 74#include <sys/mount_internal.h>
1c79356b
A
75#include <sys/stat.h>
76#include <sys/ubc.h>
91447636 77#include <sys/uio_internal.h>
1c79356b
A
78#include <miscfs/union/union.h>
79
80#if DIAGNOSTIC
81#include <sys/proc.h>
82#endif
83
84/* must be power of two, otherwise change UNION_HASH() */
85#define NHASH 32
86
87/* unsigned int ... */
88#define UNION_HASH(u, l) \
89 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
90
91static LIST_HEAD(unhead, union_node) unhead[NHASH];
92static int unvplock[NHASH];
93
94int
95union_init()
96{
97 int i;
98
99 for (i = 0; i < NHASH; i++)
100 LIST_INIT(&unhead[i]);
101 bzero((caddr_t) unvplock, sizeof(unvplock));
102}
103
104static int
105union_list_lock(ix)
106 int ix;
107{
108
109 if (unvplock[ix] & UN_LOCKED) {
110 unvplock[ix] |= UN_WANT;
111 sleep((caddr_t) &unvplock[ix], PINOD);
112 return (1);
113 }
114
115 unvplock[ix] |= UN_LOCKED;
116
117 return (0);
118}
119
120static void
121union_list_unlock(ix)
122 int ix;
123{
124
125 unvplock[ix] &= ~UN_LOCKED;
126
127 if (unvplock[ix] & UN_WANT) {
128 unvplock[ix] &= ~UN_WANT;
129 wakeup((caddr_t) &unvplock[ix]);
130 }
131}
132
133void
134union_updatevp(un, uppervp, lowervp)
135 struct union_node *un;
136 struct vnode *uppervp;
137 struct vnode *lowervp;
138{
139 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
140 int nhash = UNION_HASH(uppervp, lowervp);
141 int docache = (lowervp != NULLVP || uppervp != NULLVP);
91447636 142 int lhash, uhash;
1c79356b
A
143
144 /*
145 * Ensure locking is ordered from lower to higher
146 * to avoid deadlocks.
147 */
148 if (nhash < ohash) {
149 lhash = nhash;
150 uhash = ohash;
151 } else {
152 lhash = ohash;
153 uhash = nhash;
154 }
155
156 if (lhash != uhash)
157 while (union_list_lock(lhash))
158 continue;
159
160 while (union_list_lock(uhash))
161 continue;
162
163 if (ohash != nhash || !docache) {
164 if (un->un_flags & UN_CACHED) {
165 un->un_flags &= ~UN_CACHED;
166 LIST_REMOVE(un, un_cache);
167 }
168 }
169
170 if (ohash != nhash)
171 union_list_unlock(ohash);
172
173 if (un->un_lowervp != lowervp) {
174 if (un->un_lowervp) {
91447636 175 vnode_put(un->un_lowervp);
1c79356b
A
176 if (un->un_path) {
177 _FREE(un->un_path, M_TEMP);
178 un->un_path = 0;
179 }
180 if (un->un_dirvp) {
91447636 181 vnode_put(un->un_dirvp);
1c79356b
A
182 un->un_dirvp = NULLVP;
183 }
184 }
185 un->un_lowervp = lowervp;
186 un->un_lowersz = VNOVAL;
187 }
188
189 if (un->un_uppervp != uppervp) {
190 if (un->un_uppervp)
91447636 191 vnode_put(un->un_uppervp);
1c79356b
A
192
193 un->un_uppervp = uppervp;
194 un->un_uppersz = VNOVAL;
195 }
196
197 if (docache && (ohash != nhash)) {
198 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
199 un->un_flags |= UN_CACHED;
200 }
201
202 union_list_unlock(nhash);
203}
204
205void
206union_newlower(un, lowervp)
207 struct union_node *un;
208 struct vnode *lowervp;
209{
210
211 union_updatevp(un, un->un_uppervp, lowervp);
212}
213
214void
215union_newupper(un, uppervp)
216 struct union_node *un;
217 struct vnode *uppervp;
218{
219
220 union_updatevp(un, uppervp, un->un_lowervp);
221}
222
223/*
224 * Keep track of size changes in the underlying vnodes.
225 * If the size changes, then callback to the vm layer
226 * giving priority to the upper layer size.
227 */
228void
229union_newsize(vp, uppersz, lowersz)
230 struct vnode *vp;
231 off_t uppersz, lowersz;
232{
233 struct union_node *un;
234 off_t sz;
235
236 /* only interested in regular files */
237 if (vp->v_type != VREG)
238 return;
239
240 un = VTOUNION(vp);
241 sz = VNOVAL;
242
243 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
244 un->un_uppersz = uppersz;
245 if (sz == VNOVAL)
246 sz = un->un_uppersz;
247 }
248
249 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
250 un->un_lowersz = lowersz;
251 if (sz == VNOVAL)
252 sz = un->un_lowersz;
253 }
254
255 if (sz != VNOVAL) {
256#ifdef UNION_DIAGNOSTIC
257 printf("union: %s size now %ld\n",
258 uppersz != VNOVAL ? "upper" : "lower", (long) sz);
259#endif
91447636 260 ubc_setsize(vp, sz);
1c79356b
A
261 }
262}
263
264/*
265 * allocate a union_node/vnode pair. the vnode is
266 * referenced and locked. the new vnode is returned
267 * via (vpp). (mp) is the mountpoint of the union filesystem,
268 * (dvp) is the parent directory where the upper layer object
269 * should exist (but doesn't) and (cnp) is the componentname
270 * information which is partially copied to allow the upper
271 * layer object to be created at a later time. (uppervp)
272 * and (lowervp) reference the upper and lower layer objects
273 * being mapped. either, but not both, can be nil.
274 * if supplied, (uppervp) is locked.
275 * the reference is either maintained in the new union_node
91447636 276 * object which is allocated, or they are vnode_put'd.
1c79356b
A
277 *
278 * all union_nodes are maintained on a singly-linked
279 * list. new nodes are only allocated when they cannot
280 * be found on this list. entries on the list are
281 * removed when the vfs reclaim entry is called.
282 *
283 * a single lock is kept for the entire list. this is
284 * needed because the getnewvnode() function can block
285 * waiting for a vnode to become free, in which case there
286 * may be more than one process trying to get the same
287 * vnode. this lock is only taken if we are going to
288 * call getnewvnode, since the kernel itself is single-threaded.
289 *
91447636 290 * if an entry is found on the list, then call vnode_get() to
1c79356b
A
291 * take a reference. this is done because there may be
292 * zero references to it and so it needs to removed from
293 * the vnode free list.
294 */
295int
296union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
297 struct vnode **vpp;
298 struct mount *mp;
299 struct vnode *undvp; /* parent union vnode */
300 struct vnode *dvp; /* may be null */
301 struct componentname *cnp; /* may be null */
302 struct vnode *uppervp; /* may be null */
303 struct vnode *lowervp; /* may be null */
304 int docache;
305{
306 int error;
307 struct union_node *un;
308 struct union_node **pp;
309 struct vnode *xlowervp = NULLVP;
310 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
311 int hash;
91447636 312 int markroot;
1c79356b
A
313 int try;
314 struct union_node *unp;
91447636
A
315 struct vnode_fsparam vfsp;
316 enum vtype vtype;
1c79356b
A
317
318 if (uppervp == NULLVP && lowervp == NULLVP)
319 panic("union: unidentifiable allocation");
320
321 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
322 xlowervp = lowervp;
323 lowervp = NULLVP;
324 }
325
326 /* detect the root vnode (and aliases) */
91447636 327 markroot = 0;
1c79356b
A
328 if ((uppervp == um->um_uppervp) &&
329 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
330 if (lowervp == NULLVP) {
331 lowervp = um->um_lowervp;
332 if (lowervp != NULLVP)
91447636 333 vnode_get(lowervp);
1c79356b 334 }
91447636 335 markroot = VROOT;
1c79356b
A
336 }
337
338loop:
339 if (!docache) {
340 un = 0;
341 } else for (try = 0; try < 3; try++) {
342 switch (try) {
343 case 0:
344 if (lowervp == NULLVP)
345 continue;
346 hash = UNION_HASH(uppervp, lowervp);
347 break;
348
349 case 1:
350 if (uppervp == NULLVP)
351 continue;
352 hash = UNION_HASH(uppervp, NULLVP);
353 break;
354
355 case 2:
356 if (lowervp == NULLVP)
357 continue;
358 hash = UNION_HASH(NULLVP, lowervp);
359 break;
360 }
361
362 while (union_list_lock(hash))
363 continue;
364
365 for (un = unhead[hash].lh_first; un != 0;
366 un = un->un_cache.le_next) {
367 if ((un->un_lowervp == lowervp ||
368 un->un_lowervp == NULLVP) &&
369 (un->un_uppervp == uppervp ||
370 un->un_uppervp == NULLVP) &&
371 (UNIONTOV(un)->v_mount == mp)) {
91447636 372 if (vnode_get(UNIONTOV(un))) {
1c79356b
A
373 union_list_unlock(hash);
374 goto loop;
375 }
376 break;
377 }
378 }
379
380 union_list_unlock(hash);
381
382 if (un)
383 break;
384 }
385
386 if (un) {
387 /*
388 * Obtain a lock on the union_node.
389 * uppervp is locked, though un->un_uppervp
390 * may not be. this doesn't break the locking
391 * hierarchy since in the case that un->un_uppervp
91447636 392 * is not yet locked it will be vnode_put'd and replaced
1c79356b
A
393 * with uppervp.
394 */
395
396 if ((dvp != NULLVP) && (uppervp == dvp)) {
397 /*
398 * Access ``.'', so (un) will already
399 * be locked. Since this process has
400 * the lock on (uppervp) no other
401 * process can hold the lock on (un).
402 */
403#if DIAGNOSTIC
404 if ((un->un_flags & UN_LOCKED) == 0)
405 panic("union: . not locked");
406 else if (current_proc() && un->un_pid != current_proc()->p_pid &&
407 un->un_pid > -1 && current_proc()->p_pid > -1)
408 panic("union: allocvp not lock owner");
409#endif
410 } else {
411 if (un->un_flags & UN_LOCKED) {
91447636 412 vnode_put(UNIONTOV(un));
1c79356b
A
413 un->un_flags |= UN_WANT;
414 sleep((caddr_t) &un->un_flags, PINOD);
415 goto loop;
416 }
417 un->un_flags |= UN_LOCKED;
418
419#if DIAGNOSTIC
420 if (current_proc())
421 un->un_pid = current_proc()->p_pid;
422 else
423 un->un_pid = -1;
424#endif
425 }
426
427 /*
428 * At this point, the union_node is locked,
429 * un->un_uppervp may not be locked, and uppervp
430 * is locked or nil.
431 */
432
433 /*
434 * Save information about the upper layer.
435 */
436 if (uppervp != un->un_uppervp) {
437 union_newupper(un, uppervp);
438 } else if (uppervp) {
91447636 439 vnode_put(uppervp);
1c79356b
A
440 }
441
442 if (un->un_uppervp) {
443 un->un_flags |= UN_ULOCK;
444 un->un_flags &= ~UN_KLOCK;
445 }
446
447 /*
448 * Save information about the lower layer.
449 * This needs to keep track of pathname
450 * and directory information which union_vn_create
451 * might need.
452 */
453 if (lowervp != un->un_lowervp) {
454 union_newlower(un, lowervp);
455 if (cnp && (lowervp != NULLVP)) {
456 un->un_hash = cnp->cn_hash;
457 MALLOC(un->un_path, caddr_t, cnp->cn_namelen+1,
458 M_TEMP, M_WAITOK);
459 bcopy(cnp->cn_nameptr, un->un_path,
460 cnp->cn_namelen);
461 un->un_path[cnp->cn_namelen] = '\0';
91447636 462 vnode_get(dvp);
1c79356b
A
463 un->un_dirvp = dvp;
464 }
465 } else if (lowervp) {
91447636 466 vnode_put(lowervp);
1c79356b
A
467 }
468 *vpp = UNIONTOV(un);
469 return (0);
470 }
471
472 if (docache) {
473 /*
474 * otherwise lock the vp list while we call getnewvnode
475 * since that can block.
476 */
477 hash = UNION_HASH(uppervp, lowervp);
478
479 if (union_list_lock(hash))
480 goto loop;
481 }
482
483 MALLOC(unp, void *, sizeof(struct union_node), M_TEMP, M_WAITOK);
91447636
A
484
485 if (uppervp)
486 vtype = uppervp->v_type;
487 else
488 vtype = lowervp->v_type;
489 //bzero(&vfsp, sizeof(struct vnode_fsparam));
490 vfsp.vnfs_mp = mp;
491 vfsp.vnfs_vtype = vtype;
492 vfsp.vnfs_str = "unionfs";
493 vfsp.vnfs_dvp = dvp;
494 vfsp.vnfs_fsnode = unp;
495 vfsp.vnfs_cnp = cnp;
496 vfsp.vnfs_vops = union_vnodeop_p;
497 vfsp.vnfs_rdev = 0;
498 vfsp.vnfs_filesize = 0;
499 vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
500 vfsp.vnfs_marksystem = 0;
501 vfsp.vnfs_markroot = markroot;
502
503 error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp);
1c79356b
A
504 if (error) {
505 FREE(unp, M_TEMP);
506 if (uppervp) {
91447636 507 vnode_put(uppervp);
1c79356b
A
508 }
509 if (lowervp)
91447636 510 vnode_put(lowervp);
1c79356b
A
511
512 goto out;
513 }
514
91447636 515 (*vpp)->v_tag = VT_UNION;
1c79356b
A
516 un = VTOUNION(*vpp);
517 un->un_vnode = *vpp;
518 un->un_uppervp = uppervp;
519 un->un_uppersz = VNOVAL;
520 un->un_lowervp = lowervp;
521 un->un_lowersz = VNOVAL;
522 un->un_pvp = undvp;
523 if (undvp != NULLVP)
91447636 524 vnode_get(undvp);
1c79356b
A
525 un->un_dircache = 0;
526 un->un_openl = 0;
527 un->un_flags = UN_LOCKED;
528 if (un->un_uppervp)
529 un->un_flags |= UN_ULOCK;
530#if DIAGNOSTIC
531 if (current_proc())
532 un->un_pid = current_proc()->p_pid;
533 else
534 un->un_pid = -1;
535#endif
536 if (cnp && (lowervp != NULLVP)) {
537 un->un_hash = cnp->cn_hash;
538 un->un_path = _MALLOC(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
539 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
540 un->un_path[cnp->cn_namelen] = '\0';
91447636 541 vnode_get(dvp);
1c79356b
A
542 un->un_dirvp = dvp;
543 } else {
544 un->un_hash = 0;
545 un->un_path = 0;
546 un->un_dirvp = 0;
547 }
548
549 if (docache) {
550 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
551 un->un_flags |= UN_CACHED;
552 }
553
554 if (xlowervp)
91447636 555 vnode_put(xlowervp);
1c79356b
A
556
557out:
558 if (docache)
559 union_list_unlock(hash);
560
561 return (error);
562}
563
564int
565union_freevp(vp)
566 struct vnode *vp;
567{
568 struct union_node *un = VTOUNION(vp);
569
570 if (un->un_flags & UN_CACHED) {
571 un->un_flags &= ~UN_CACHED;
572 LIST_REMOVE(un, un_cache);
573 }
574
575 if (un->un_pvp != NULLVP)
91447636 576 vnode_put(un->un_pvp);
1c79356b 577 if (un->un_uppervp != NULLVP)
91447636 578 vnode_put(un->un_uppervp);
1c79356b 579 if (un->un_lowervp != NULLVP)
91447636 580 vnode_put(un->un_lowervp);
1c79356b 581 if (un->un_dirvp != NULLVP)
91447636 582 vnode_put(un->un_dirvp);
1c79356b
A
583 if (un->un_path)
584 _FREE(un->un_path, M_TEMP);
585
586 FREE(vp->v_data, M_TEMP);
587 vp->v_data = 0;
588
589 return (0);
590}
591
592/*
593 * copyfile. copy the vnode (fvp) to the vnode (tvp)
594 * using a sequence of reads and writes. both (fvp)
595 * and (tvp) are locked on entry and exit.
596 */
597int
91447636
A
598union_copyfile(struct vnode *fvp, struct vnode *tvp, kauth_cred_t cred,
599 struct proc *p)
1c79356b 600{
91447636 601 char *bufp;
1c79356b 602 struct uio uio;
91447636
A
603 struct iovec_32 iov;
604 struct vfs_context context;
1c79356b
A
605 int error = 0;
606
607 /*
608 * strategy:
609 * allocate a buffer of size MAXPHYSIO.
610 * loop doing reads and writes, keeping track
611 * of the current uio offset.
612 * give up at the first sign of trouble.
613 */
614
91447636
A
615 context.vc_proc = p;
616 context.vc_ucred = cred;
617
618#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */
1c79356b 619 uio.uio_segflg = UIO_SYSSPACE;
91447636
A
620#else
621 uio.uio_segflg = UIO_SYSSPACE32;
622#endif
1c79356b
A
623 uio.uio_offset = 0;
624
91447636 625 bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK);
1c79356b
A
626
627 /* ugly loop follows... */
628 do {
629 off_t offset = uio.uio_offset;
630
91447636 631 uio.uio_iovs.iov32p = &iov;
1c79356b 632 uio.uio_iovcnt = 1;
91447636 633 iov.iov_base = (uintptr_t)bufp;
1c79356b 634 iov.iov_len = MAXPHYSIO;
91447636 635 uio_setresid(&uio, iov.iov_len);
1c79356b 636 uio.uio_rw = UIO_READ;
91447636 637 error = VNOP_READ(fvp, &uio, 0, &context);
1c79356b
A
638
639 if (error == 0) {
91447636 640 uio.uio_iovs.iov32p = &iov;
1c79356b 641 uio.uio_iovcnt = 1;
91447636
A
642 iov.iov_base = (uintptr_t)bufp;
643 iov.iov_len = MAXPHYSIO - uio_resid(&uio);
1c79356b
A
644 uio.uio_offset = offset;
645 uio.uio_rw = UIO_WRITE;
91447636 646 uio_setresid(&uio, iov.iov_len);
1c79356b 647
91447636 648 if (uio_resid(&uio) == 0)
1c79356b
A
649 break;
650
651 do {
91447636
A
652 error = VNOP_WRITE(tvp, &uio, 0, &context);
653 } while ((uio_resid(&uio) > 0) && (error == 0));
1c79356b
A
654 }
655
656 } while (error == 0);
657
91447636 658 _FREE(bufp, M_TEMP);
1c79356b
A
659 return (error);
660}
661
662/*
663 * (un) is assumed to be locked on entry and remains
664 * locked on exit.
665 */
666int
91447636
A
667union_copyup(struct union_node *un, int docopy, kauth_cred_t cred,
668 struct proc *p)
1c79356b
A
669{
670 int error;
671 struct vnode *lvp, *uvp;
91447636 672 struct vfs_context context;
1c79356b
A
673
674 error = union_vn_create(&uvp, un, p);
675 if (error)
676 return (error);
677
91447636
A
678 context.vc_proc = p;
679 context.vc_ucred = cred;
680
1c79356b
A
681 /* at this point, uppervp is locked */
682 union_newupper(un, uvp);
683 un->un_flags |= UN_ULOCK;
684
685 lvp = un->un_lowervp;
686
687 if (docopy) {
688 /*
689 * XX - should not ignore errors
91447636 690 * from vnop_close
1c79356b 691 */
91447636 692 error = VNOP_OPEN(lvp, FREAD, &context);
1c79356b
A
693 if (error == 0) {
694 error = union_copyfile(lvp, uvp, cred, p);
91447636 695 (void) VNOP_CLOSE(lvp, FREAD, &context);
1c79356b
A
696 }
697#ifdef UNION_DIAGNOSTIC
698 if (error == 0)
699 uprintf("union: copied up %s\n", un->un_path);
700#endif
701
702 }
703 un->un_flags &= ~UN_ULOCK;
1c79356b 704 union_vn_close(uvp, FWRITE, cred, p);
1c79356b
A
705 un->un_flags |= UN_ULOCK;
706
707 /*
708 * Subsequent IOs will go to the top layer, so
709 * call close on the lower vnode and open on the
710 * upper vnode to ensure that the filesystem keeps
711 * its references counts right. This doesn't do
712 * the right thing with (cred) and (FREAD) though.
713 * Ignoring error returns is not right, either.
714 */
715 if (error == 0) {
716 int i;
717
718 for (i = 0; i < un->un_openl; i++) {
91447636
A
719 (void) VNOP_CLOSE(lvp, FREAD, &context);
720 (void) VNOP_OPEN(uvp, FREAD, &context);
1c79356b
A
721 }
722 un->un_openl = 0;
723 }
724
725 return (error);
726
727}
728
729static int
730union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
731 struct union_mount *um;
732 struct vnode *dvp;
733 struct vnode **vpp;
734 struct componentname *cnp;
735 struct componentname *cn;
736 char *path;
737 int pathlen;
738{
739 int error;
740
741 /*
742 * A new componentname structure must be faked up because
743 * there is no way to know where the upper level cnp came
744 * from or what it is being used for. This must duplicate
745 * some of the work done by NDINIT, some of the work done
746 * by namei, some of the work done by lookup and some of
91447636 747 * the work done by vnop_lookup when given a CREATE flag.
1c79356b 748 * Conclusion: Horrible.
1c79356b
A
749 */
750 cn->cn_namelen = pathlen;
751 cn->cn_pnbuf = _MALLOC_ZONE(cn->cn_namelen+1, M_NAMEI, M_WAITOK);
752 cn->cn_pnlen = cn->cn_namelen+1;
753 bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
754 cn->cn_pnbuf[cn->cn_namelen] = '\0';
755
756 cn->cn_nameiop = CREATE;
757 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
91447636 758#ifdef XXX_HELP_ME
1c79356b
A
759 cn->cn_proc = cnp->cn_proc;
760 if (um->um_op == UNMNT_ABOVE)
761 cn->cn_cred = cnp->cn_cred;
762 else
763 cn->cn_cred = um->um_cred;
91447636
A
764#endif
765 cn->cn_context = cnp->cn_context; /* XXX !UNMNT_ABOVE case ??? */
1c79356b
A
766 cn->cn_nameptr = cn->cn_pnbuf;
767 cn->cn_hash = cnp->cn_hash;
768 cn->cn_consume = cnp->cn_consume;
769
91447636 770 vnode_get(dvp);
1c79356b
A
771 error = relookup(dvp, vpp, cn);
772 if (!error)
91447636 773 vnode_put(dvp);
1c79356b
A
774
775 return (error);
776}
777
778/*
779 * Create a shadow directory in the upper layer.
780 * The new vnode is returned locked.
781 *
782 * (um) points to the union mount structure for access to the
783 * the mounting process's credentials.
784 * (dvp) is the directory in which to create the shadow directory.
785 * it is unlocked on entry and exit.
786 * (cnp) is the componentname to be created.
787 * (vpp) is the returned newly created shadow directory, which
788 * is returned locked.
789 */
790int
791union_mkshadow(um, dvp, cnp, vpp)
792 struct union_mount *um;
793 struct vnode *dvp;
794 struct componentname *cnp;
795 struct vnode **vpp;
796{
797 int error;
91447636 798 struct vnode_attr va;
1c79356b
A
799 struct componentname cn;
800
801 error = union_relookup(um, dvp, vpp, cnp, &cn,
802 cnp->cn_nameptr, cnp->cn_namelen);
803 if (error)
804 return (error);
805
806 if (*vpp) {
91447636 807 vnode_put(*vpp);
1c79356b
A
808 *vpp = NULLVP;
809 return (EEXIST);
810 }
811
812 /*
813 * policy: when creating the shadow directory in the
814 * upper layer, create it owned by the user who did
815 * the mount, group from parent directory, and mode
816 * 777 modified by umask (ie mostly identical to the
817 * mkdir syscall). (jsp, kb)
818 */
91447636
A
819 VATTR_INIT(&va);
820 VATTR_SET(&va, va_type, VDIR);
821 VATTR_SET(&va, va_mode, um->um_cmode);
1c79356b 822
91447636 823 error = vn_create(dvp, vpp, &cn, &va, 0, cnp->cn_context);
1c79356b
A
824 return (error);
825}
826
827/*
828 * Create a whiteout entry in the upper layer.
829 *
830 * (um) points to the union mount structure for access to the
831 * the mounting process's credentials.
832 * (dvp) is the directory in which to create the whiteout.
833 * it is locked on entry and exit.
834 * (cnp) is the componentname to be created.
835 */
836int
837union_mkwhiteout(um, dvp, cnp, path)
838 struct union_mount *um;
839 struct vnode *dvp;
840 struct componentname *cnp;
841 char *path;
842{
843 int error;
1c79356b
A
844 struct vnode *wvp;
845 struct componentname cn;
846
1c79356b
A
847 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
848 if (error) {
1c79356b
A
849 return (error);
850 }
1c79356b 851 if (wvp) {
91447636
A
852 vnode_put(dvp);
853 vnode_put(wvp);
1c79356b
A
854 return (EEXIST);
855 }
856
91447636 857 error = VNOP_WHITEOUT(dvp, &cn, CREATE, cnp->cn_context);
1c79356b 858
91447636 859 vnode_put(dvp);
1c79356b
A
860
861 return (error);
862}
863
864/*
865 * union_vn_create: creates and opens a new shadow file
866 * on the upper union layer. this function is similar
867 * in spirit to calling vn_open but it avoids calling namei().
868 * the problem with calling namei is that a) it locks too many
869 * things, and b) it doesn't start at the "right" directory,
870 * whereas relookup is told where to start.
871 */
872int
873union_vn_create(vpp, un, p)
874 struct vnode **vpp;
875 struct union_node *un;
876 struct proc *p;
877{
878 struct vnode *vp;
6601e61a 879 kauth_cred_t cred = p->p_ucred;
91447636
A
880 struct vnode_attr vat;
881 struct vnode_attr *vap = &vat;
882 struct vfs_context context;
1c79356b
A
883 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
884 int error;
885 int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
886 char *cp;
887 struct componentname cn;
888
889 *vpp = NULLVP;
890
91447636 891 context.vc_proc = p;
6601e61a 892 context.vc_ucred = p->p_ucred;
91447636 893
1c79356b
A
894 /*
895 * Build a new componentname structure (for the same
896 * reasons outlines in union_mkshadow).
897 * The difference here is that the file is owned by
898 * the current user, rather than by the person who
899 * did the mount, since the current user needs to be
900 * able to write the file (that's why it is being
901 * copied in the first place).
902 */
903 cn.cn_namelen = strlen(un->un_path);
904 cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
905 M_NAMEI, M_WAITOK);
906 cn.cn_pnlen = cn.cn_namelen+1;
907 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
908 cn.cn_nameiop = CREATE;
909 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
91447636 910 cn.cn_context = &context;
1c79356b
A
911 cn.cn_nameptr = cn.cn_pnbuf;
912 cn.cn_hash = un->un_hash;
913 cn.cn_consume = 0;
914
91447636 915 vnode_get(un->un_dirvp);
6601e61a 916 if (error = relookup(un->un_dirvp, &vp, &cn))
1c79356b 917 return (error);
91447636 918 vnode_put(un->un_dirvp);
1c79356b
A
919
920 if (vp) {
91447636
A
921 vnode_put(un->un_dirvp);
922 vnode_put(vp);
1c79356b
A
923 return (EEXIST);
924 }
925
926 /*
927 * Good - there was no race to create the file
928 * so go ahead and create it. The permissions
929 * on the file will be 0666 modified by the
930 * current user's umask. Access to the file, while
931 * it is unioned, will require access to the top *and*
932 * bottom files. Access when not unioned will simply
933 * require access to the top-level file.
91447636 934 *
1c79356b 935 * TODO: confirm choice of access permissions.
91447636 936 * decide on authorisation behaviour
1c79356b 937 */
91447636
A
938
939 VATTR_INIT(vap);
940 VATTR_SET(vap, va_type, VREG);
941 VATTR_SET(vap, va_mode, cmode);
942
6601e61a 943 if (error = vn_create(un->un_dirvp, &vp, &cn, vap, 0, &context))
1c79356b
A
944 return (error);
945
91447636
A
946 if (error = VNOP_OPEN(vp, fmode, &context)) {
947 vnode_put(vp);
1c79356b
A
948 return (error);
949 }
950
91447636 951 vnode_lock(vp);
1c79356b
A
952 if (++vp->v_writecount <= 0)
953 panic("union: v_writecount");
91447636 954 vnode_unlock(vp);
1c79356b
A
955 *vpp = vp;
956 return (0);
957}
958
959int
91447636
A
960union_vn_close(struct vnode *vp, int fmode, kauth_cred_t cred,
961 struct proc *p)
1c79356b 962{
91447636
A
963 struct vfs_context context;
964
965 context.vc_proc = p;
966 context.vc_ucred = cred;
1c79356b 967
91447636
A
968 if (fmode & FWRITE) {
969 vnode_lock(vp);
1c79356b 970 --vp->v_writecount;
91447636
A
971 vnode_unlock(vp);
972 }
973 return (VNOP_CLOSE(vp, fmode, &context));
1c79356b
A
974}
975
976void
977union_removed_upper(un)
978 struct union_node *un;
979{
980 struct proc *p = current_proc(); /* XXX */
981
982 union_newupper(un, NULLVP);
983 if (un->un_flags & UN_CACHED) {
984 un->un_flags &= ~UN_CACHED;
985 LIST_REMOVE(un, un_cache);
986 }
987
988 if (un->un_flags & UN_ULOCK) {
989 un->un_flags &= ~UN_ULOCK;
1c79356b
A
990 }
991}
992
993#if 0
994struct vnode *
995union_lowervp(vp)
996 struct vnode *vp;
997{
998 struct union_node *un = VTOUNION(vp);
999
1000 if ((un->un_lowervp != NULLVP) &&
1001 (vp->v_type == un->un_lowervp->v_type)) {
91447636 1002 if (vnode_get(un->un_lowervp) == 0)
1c79356b
A
1003 return (un->un_lowervp);
1004 }
1005
1006 return (NULLVP);
1007}
1008#endif
1009
1010/*
1011 * determine whether a whiteout is needed
1012 * during a remove/rmdir operation.
1013 */
1014int
91447636 1015union_dowhiteout(struct union_node *un, vfs_context_t ctx)
1c79356b 1016{
91447636 1017 struct vnode_attr va;
1c79356b
A
1018
1019 if (un->un_lowervp != NULLVP)
1020 return (1);
1021
91447636
A
1022 VATTR_INIT(&va);
1023 VATTR_WANTED(&va, va_flags);
1024 if (vnode_getattr(un->un_uppervp, &va, ctx) == 0 &&
1c79356b
A
1025 (va.va_flags & OPAQUE))
1026 return (1);
1027
1028 return (0);
1029}
1030
1031static void
1032union_dircache_r(vp, vppp, cntp)
1033 struct vnode *vp;
1034 struct vnode ***vppp;
1035 int *cntp;
1036{
1037 struct union_node *un;
1038
1039 if (vp->v_op != union_vnodeop_p) {
1040 if (vppp) {
91447636 1041 vnode_get(vp);
1c79356b
A
1042 *(*vppp)++ = vp;
1043 if (--(*cntp) == 0)
1044 panic("union: dircache table too small");
1045 } else {
1046 (*cntp)++;
1047 }
1048
1049 return;
1050 }
1051
1052 un = VTOUNION(vp);
1053 if (un->un_uppervp != NULLVP)
1054 union_dircache_r(un->un_uppervp, vppp, cntp);
1055 if (un->un_lowervp != NULLVP)
1056 union_dircache_r(un->un_lowervp, vppp, cntp);
1057}
1058
1059struct vnode *
1060union_dircache(vp, p)
1061 struct vnode *vp;
1062 struct proc *p;
1063{
91447636 1064 int count;
1c79356b
A
1065 struct vnode *nvp;
1066 struct vnode **vpp;
1067 struct vnode **dircache;
1068 struct union_node *un;
1069 int error;
1070
1c79356b
A
1071 dircache = VTOUNION(vp)->un_dircache;
1072
1073 nvp = NULLVP;
1074
1075 if (dircache == 0) {
91447636
A
1076 count = 0;
1077 union_dircache_r(vp, 0, &count);
1078 count++;
1c79356b 1079 dircache = (struct vnode **)
91447636 1080 _MALLOC(count * sizeof(struct vnode *),
1c79356b
A
1081 M_TEMP, M_WAITOK);
1082 vpp = dircache;
91447636 1083 union_dircache_r(vp, &vpp, &count);
1c79356b
A
1084 *vpp = NULLVP;
1085 vpp = dircache + 1;
1086 } else {
1087 vpp = dircache;
1088 do {
1089 if (*vpp++ == VTOUNION(vp)->un_uppervp)
1090 break;
1091 } while (*vpp != NULLVP);
1092 }
1093
1094 if (*vpp == NULLVP)
1095 goto out;
1096
91447636 1097 vnode_get(*vpp);
1c79356b
A
1098 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1099 if (error)
1100 goto out;
1101
1102 VTOUNION(vp)->un_dircache = 0;
1103 un = VTOUNION(nvp);
1104 un->un_dircache = dircache;
1105
1106out:
1c79356b
A
1107 return (nvp);
1108}