]> git.saurik.com Git - apple/xnu.git/blame - bsd/miscfs/union/union_subr.c
xnu-792.17.14.tar.gz
[apple/xnu.git] / bsd / miscfs / union / union_subr.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1994 Jan-Simon Pendry
31 * Copyright (c) 1994
32 * The Regents of the University of California. All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * Jan-Simon Pendry.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
66 */
67
68#include <sys/param.h>
69#include <sys/systm.h>
91447636
A
70#include <sys/proc_internal.h>
71#include <sys/kauth.h>
1c79356b
A
72#include <sys/time.h>
73#include <sys/kernel.h>
91447636 74#include <sys/vnode_internal.h>
1c79356b
A
75#include <sys/namei.h>
76#include <sys/malloc.h>
77#include <sys/file.h>
78#include <sys/filedesc.h>
79#include <sys/queue.h>
91447636 80#include <sys/mount_internal.h>
1c79356b
A
81#include <sys/stat.h>
82#include <sys/ubc.h>
91447636 83#include <sys/uio_internal.h>
1c79356b
A
84#include <miscfs/union/union.h>
85
86#if DIAGNOSTIC
87#include <sys/proc.h>
88#endif
89
90/* must be power of two, otherwise change UNION_HASH() */
91#define NHASH 32
92
93/* unsigned int ... */
94#define UNION_HASH(u, l) \
95 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
96
97static LIST_HEAD(unhead, union_node) unhead[NHASH];
98static int unvplock[NHASH];
99
100int
101union_init()
102{
103 int i;
104
105 for (i = 0; i < NHASH; i++)
106 LIST_INIT(&unhead[i]);
107 bzero((caddr_t) unvplock, sizeof(unvplock));
108}
109
110static int
111union_list_lock(ix)
112 int ix;
113{
114
115 if (unvplock[ix] & UN_LOCKED) {
116 unvplock[ix] |= UN_WANT;
117 sleep((caddr_t) &unvplock[ix], PINOD);
118 return (1);
119 }
120
121 unvplock[ix] |= UN_LOCKED;
122
123 return (0);
124}
125
126static void
127union_list_unlock(ix)
128 int ix;
129{
130
131 unvplock[ix] &= ~UN_LOCKED;
132
133 if (unvplock[ix] & UN_WANT) {
134 unvplock[ix] &= ~UN_WANT;
135 wakeup((caddr_t) &unvplock[ix]);
136 }
137}
138
139void
140union_updatevp(un, uppervp, lowervp)
141 struct union_node *un;
142 struct vnode *uppervp;
143 struct vnode *lowervp;
144{
145 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
146 int nhash = UNION_HASH(uppervp, lowervp);
147 int docache = (lowervp != NULLVP || uppervp != NULLVP);
91447636 148 int lhash, uhash;
1c79356b
A
149
150 /*
151 * Ensure locking is ordered from lower to higher
152 * to avoid deadlocks.
153 */
154 if (nhash < ohash) {
155 lhash = nhash;
156 uhash = ohash;
157 } else {
158 lhash = ohash;
159 uhash = nhash;
160 }
161
162 if (lhash != uhash)
163 while (union_list_lock(lhash))
164 continue;
165
166 while (union_list_lock(uhash))
167 continue;
168
169 if (ohash != nhash || !docache) {
170 if (un->un_flags & UN_CACHED) {
171 un->un_flags &= ~UN_CACHED;
172 LIST_REMOVE(un, un_cache);
173 }
174 }
175
176 if (ohash != nhash)
177 union_list_unlock(ohash);
178
179 if (un->un_lowervp != lowervp) {
180 if (un->un_lowervp) {
91447636 181 vnode_put(un->un_lowervp);
1c79356b
A
182 if (un->un_path) {
183 _FREE(un->un_path, M_TEMP);
184 un->un_path = 0;
185 }
186 if (un->un_dirvp) {
91447636 187 vnode_put(un->un_dirvp);
1c79356b
A
188 un->un_dirvp = NULLVP;
189 }
190 }
191 un->un_lowervp = lowervp;
192 un->un_lowersz = VNOVAL;
193 }
194
195 if (un->un_uppervp != uppervp) {
196 if (un->un_uppervp)
91447636 197 vnode_put(un->un_uppervp);
1c79356b
A
198
199 un->un_uppervp = uppervp;
200 un->un_uppersz = VNOVAL;
201 }
202
203 if (docache && (ohash != nhash)) {
204 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
205 un->un_flags |= UN_CACHED;
206 }
207
208 union_list_unlock(nhash);
209}
210
211void
212union_newlower(un, lowervp)
213 struct union_node *un;
214 struct vnode *lowervp;
215{
216
217 union_updatevp(un, un->un_uppervp, lowervp);
218}
219
220void
221union_newupper(un, uppervp)
222 struct union_node *un;
223 struct vnode *uppervp;
224{
225
226 union_updatevp(un, uppervp, un->un_lowervp);
227}
228
229/*
230 * Keep track of size changes in the underlying vnodes.
231 * If the size changes, then callback to the vm layer
232 * giving priority to the upper layer size.
233 */
234void
235union_newsize(vp, uppersz, lowersz)
236 struct vnode *vp;
237 off_t uppersz, lowersz;
238{
239 struct union_node *un;
240 off_t sz;
241
242 /* only interested in regular files */
243 if (vp->v_type != VREG)
244 return;
245
246 un = VTOUNION(vp);
247 sz = VNOVAL;
248
249 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
250 un->un_uppersz = uppersz;
251 if (sz == VNOVAL)
252 sz = un->un_uppersz;
253 }
254
255 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
256 un->un_lowersz = lowersz;
257 if (sz == VNOVAL)
258 sz = un->un_lowersz;
259 }
260
261 if (sz != VNOVAL) {
262#ifdef UNION_DIAGNOSTIC
263 printf("union: %s size now %ld\n",
264 uppersz != VNOVAL ? "upper" : "lower", (long) sz);
265#endif
91447636 266 ubc_setsize(vp, sz);
1c79356b
A
267 }
268}
269
270/*
271 * allocate a union_node/vnode pair. the vnode is
272 * referenced and locked. the new vnode is returned
273 * via (vpp). (mp) is the mountpoint of the union filesystem,
274 * (dvp) is the parent directory where the upper layer object
275 * should exist (but doesn't) and (cnp) is the componentname
276 * information which is partially copied to allow the upper
277 * layer object to be created at a later time. (uppervp)
278 * and (lowervp) reference the upper and lower layer objects
279 * being mapped. either, but not both, can be nil.
280 * if supplied, (uppervp) is locked.
281 * the reference is either maintained in the new union_node
91447636 282 * object which is allocated, or they are vnode_put'd.
1c79356b
A
283 *
284 * all union_nodes are maintained on a singly-linked
285 * list. new nodes are only allocated when they cannot
286 * be found on this list. entries on the list are
287 * removed when the vfs reclaim entry is called.
288 *
289 * a single lock is kept for the entire list. this is
290 * needed because the getnewvnode() function can block
291 * waiting for a vnode to become free, in which case there
292 * may be more than one process trying to get the same
293 * vnode. this lock is only taken if we are going to
294 * call getnewvnode, since the kernel itself is single-threaded.
295 *
91447636 296 * if an entry is found on the list, then call vnode_get() to
1c79356b
A
297 * take a reference. this is done because there may be
298 * zero references to it and so it needs to removed from
299 * the vnode free list.
300 */
301int
302union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
303 struct vnode **vpp;
304 struct mount *mp;
305 struct vnode *undvp; /* parent union vnode */
306 struct vnode *dvp; /* may be null */
307 struct componentname *cnp; /* may be null */
308 struct vnode *uppervp; /* may be null */
309 struct vnode *lowervp; /* may be null */
310 int docache;
311{
312 int error;
313 struct union_node *un;
314 struct union_node **pp;
315 struct vnode *xlowervp = NULLVP;
316 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
317 int hash;
91447636 318 int markroot;
1c79356b
A
319 int try;
320 struct union_node *unp;
91447636
A
321 struct vnode_fsparam vfsp;
322 enum vtype vtype;
1c79356b
A
323
324 if (uppervp == NULLVP && lowervp == NULLVP)
325 panic("union: unidentifiable allocation");
326
327 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
328 xlowervp = lowervp;
329 lowervp = NULLVP;
330 }
331
332 /* detect the root vnode (and aliases) */
91447636 333 markroot = 0;
1c79356b
A
334 if ((uppervp == um->um_uppervp) &&
335 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
336 if (lowervp == NULLVP) {
337 lowervp = um->um_lowervp;
338 if (lowervp != NULLVP)
91447636 339 vnode_get(lowervp);
1c79356b 340 }
91447636 341 markroot = VROOT;
1c79356b
A
342 }
343
344loop:
345 if (!docache) {
346 un = 0;
347 } else for (try = 0; try < 3; try++) {
348 switch (try) {
349 case 0:
350 if (lowervp == NULLVP)
351 continue;
352 hash = UNION_HASH(uppervp, lowervp);
353 break;
354
355 case 1:
356 if (uppervp == NULLVP)
357 continue;
358 hash = UNION_HASH(uppervp, NULLVP);
359 break;
360
361 case 2:
362 if (lowervp == NULLVP)
363 continue;
364 hash = UNION_HASH(NULLVP, lowervp);
365 break;
366 }
367
368 while (union_list_lock(hash))
369 continue;
370
371 for (un = unhead[hash].lh_first; un != 0;
372 un = un->un_cache.le_next) {
373 if ((un->un_lowervp == lowervp ||
374 un->un_lowervp == NULLVP) &&
375 (un->un_uppervp == uppervp ||
376 un->un_uppervp == NULLVP) &&
377 (UNIONTOV(un)->v_mount == mp)) {
91447636 378 if (vnode_get(UNIONTOV(un))) {
1c79356b
A
379 union_list_unlock(hash);
380 goto loop;
381 }
382 break;
383 }
384 }
385
386 union_list_unlock(hash);
387
388 if (un)
389 break;
390 }
391
392 if (un) {
393 /*
394 * Obtain a lock on the union_node.
395 * uppervp is locked, though un->un_uppervp
396 * may not be. this doesn't break the locking
397 * hierarchy since in the case that un->un_uppervp
91447636 398 * is not yet locked it will be vnode_put'd and replaced
1c79356b
A
399 * with uppervp.
400 */
401
402 if ((dvp != NULLVP) && (uppervp == dvp)) {
403 /*
404 * Access ``.'', so (un) will already
405 * be locked. Since this process has
406 * the lock on (uppervp) no other
407 * process can hold the lock on (un).
408 */
409#if DIAGNOSTIC
410 if ((un->un_flags & UN_LOCKED) == 0)
411 panic("union: . not locked");
412 else if (current_proc() && un->un_pid != current_proc()->p_pid &&
413 un->un_pid > -1 && current_proc()->p_pid > -1)
414 panic("union: allocvp not lock owner");
415#endif
416 } else {
417 if (un->un_flags & UN_LOCKED) {
91447636 418 vnode_put(UNIONTOV(un));
1c79356b
A
419 un->un_flags |= UN_WANT;
420 sleep((caddr_t) &un->un_flags, PINOD);
421 goto loop;
422 }
423 un->un_flags |= UN_LOCKED;
424
425#if DIAGNOSTIC
426 if (current_proc())
427 un->un_pid = current_proc()->p_pid;
428 else
429 un->un_pid = -1;
430#endif
431 }
432
433 /*
434 * At this point, the union_node is locked,
435 * un->un_uppervp may not be locked, and uppervp
436 * is locked or nil.
437 */
438
439 /*
440 * Save information about the upper layer.
441 */
442 if (uppervp != un->un_uppervp) {
443 union_newupper(un, uppervp);
444 } else if (uppervp) {
91447636 445 vnode_put(uppervp);
1c79356b
A
446 }
447
448 if (un->un_uppervp) {
449 un->un_flags |= UN_ULOCK;
450 un->un_flags &= ~UN_KLOCK;
451 }
452
453 /*
454 * Save information about the lower layer.
455 * This needs to keep track of pathname
456 * and directory information which union_vn_create
457 * might need.
458 */
459 if (lowervp != un->un_lowervp) {
460 union_newlower(un, lowervp);
461 if (cnp && (lowervp != NULLVP)) {
462 un->un_hash = cnp->cn_hash;
463 MALLOC(un->un_path, caddr_t, cnp->cn_namelen+1,
464 M_TEMP, M_WAITOK);
465 bcopy(cnp->cn_nameptr, un->un_path,
466 cnp->cn_namelen);
467 un->un_path[cnp->cn_namelen] = '\0';
91447636 468 vnode_get(dvp);
1c79356b
A
469 un->un_dirvp = dvp;
470 }
471 } else if (lowervp) {
91447636 472 vnode_put(lowervp);
1c79356b
A
473 }
474 *vpp = UNIONTOV(un);
475 return (0);
476 }
477
478 if (docache) {
479 /*
480 * otherwise lock the vp list while we call getnewvnode
481 * since that can block.
482 */
483 hash = UNION_HASH(uppervp, lowervp);
484
485 if (union_list_lock(hash))
486 goto loop;
487 }
488
489 MALLOC(unp, void *, sizeof(struct union_node), M_TEMP, M_WAITOK);
91447636
A
490
491 if (uppervp)
492 vtype = uppervp->v_type;
493 else
494 vtype = lowervp->v_type;
495 //bzero(&vfsp, sizeof(struct vnode_fsparam));
496 vfsp.vnfs_mp = mp;
497 vfsp.vnfs_vtype = vtype;
498 vfsp.vnfs_str = "unionfs";
499 vfsp.vnfs_dvp = dvp;
500 vfsp.vnfs_fsnode = unp;
501 vfsp.vnfs_cnp = cnp;
502 vfsp.vnfs_vops = union_vnodeop_p;
503 vfsp.vnfs_rdev = 0;
504 vfsp.vnfs_filesize = 0;
505 vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
506 vfsp.vnfs_marksystem = 0;
507 vfsp.vnfs_markroot = markroot;
508
509 error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp);
1c79356b
A
510 if (error) {
511 FREE(unp, M_TEMP);
512 if (uppervp) {
91447636 513 vnode_put(uppervp);
1c79356b
A
514 }
515 if (lowervp)
91447636 516 vnode_put(lowervp);
1c79356b
A
517
518 goto out;
519 }
520
91447636 521 (*vpp)->v_tag = VT_UNION;
1c79356b
A
522 un = VTOUNION(*vpp);
523 un->un_vnode = *vpp;
524 un->un_uppervp = uppervp;
525 un->un_uppersz = VNOVAL;
526 un->un_lowervp = lowervp;
527 un->un_lowersz = VNOVAL;
528 un->un_pvp = undvp;
529 if (undvp != NULLVP)
91447636 530 vnode_get(undvp);
1c79356b
A
531 un->un_dircache = 0;
532 un->un_openl = 0;
533 un->un_flags = UN_LOCKED;
534 if (un->un_uppervp)
535 un->un_flags |= UN_ULOCK;
536#if DIAGNOSTIC
537 if (current_proc())
538 un->un_pid = current_proc()->p_pid;
539 else
540 un->un_pid = -1;
541#endif
542 if (cnp && (lowervp != NULLVP)) {
543 un->un_hash = cnp->cn_hash;
544 un->un_path = _MALLOC(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
545 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
546 un->un_path[cnp->cn_namelen] = '\0';
91447636 547 vnode_get(dvp);
1c79356b
A
548 un->un_dirvp = dvp;
549 } else {
550 un->un_hash = 0;
551 un->un_path = 0;
552 un->un_dirvp = 0;
553 }
554
555 if (docache) {
556 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
557 un->un_flags |= UN_CACHED;
558 }
559
560 if (xlowervp)
91447636 561 vnode_put(xlowervp);
1c79356b
A
562
563out:
564 if (docache)
565 union_list_unlock(hash);
566
567 return (error);
568}
569
570int
571union_freevp(vp)
572 struct vnode *vp;
573{
574 struct union_node *un = VTOUNION(vp);
575
576 if (un->un_flags & UN_CACHED) {
577 un->un_flags &= ~UN_CACHED;
578 LIST_REMOVE(un, un_cache);
579 }
580
581 if (un->un_pvp != NULLVP)
91447636 582 vnode_put(un->un_pvp);
1c79356b 583 if (un->un_uppervp != NULLVP)
91447636 584 vnode_put(un->un_uppervp);
1c79356b 585 if (un->un_lowervp != NULLVP)
91447636 586 vnode_put(un->un_lowervp);
1c79356b 587 if (un->un_dirvp != NULLVP)
91447636 588 vnode_put(un->un_dirvp);
1c79356b
A
589 if (un->un_path)
590 _FREE(un->un_path, M_TEMP);
591
592 FREE(vp->v_data, M_TEMP);
593 vp->v_data = 0;
594
595 return (0);
596}
597
598/*
599 * copyfile. copy the vnode (fvp) to the vnode (tvp)
600 * using a sequence of reads and writes. both (fvp)
601 * and (tvp) are locked on entry and exit.
602 */
603int
91447636
A
604union_copyfile(struct vnode *fvp, struct vnode *tvp, kauth_cred_t cred,
605 struct proc *p)
1c79356b 606{
91447636 607 char *bufp;
1c79356b 608 struct uio uio;
91447636
A
609 struct iovec_32 iov;
610 struct vfs_context context;
1c79356b
A
611 int error = 0;
612
613 /*
614 * strategy:
615 * allocate a buffer of size MAXPHYSIO.
616 * loop doing reads and writes, keeping track
617 * of the current uio offset.
618 * give up at the first sign of trouble.
619 */
620
91447636
A
621 context.vc_proc = p;
622 context.vc_ucred = cred;
623
624#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */
1c79356b 625 uio.uio_segflg = UIO_SYSSPACE;
91447636
A
626#else
627 uio.uio_segflg = UIO_SYSSPACE32;
628#endif
1c79356b
A
629 uio.uio_offset = 0;
630
91447636 631 bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK);
1c79356b
A
632
633 /* ugly loop follows... */
634 do {
635 off_t offset = uio.uio_offset;
636
91447636 637 uio.uio_iovs.iov32p = &iov;
1c79356b 638 uio.uio_iovcnt = 1;
91447636 639 iov.iov_base = (uintptr_t)bufp;
1c79356b 640 iov.iov_len = MAXPHYSIO;
91447636 641 uio_setresid(&uio, iov.iov_len);
1c79356b 642 uio.uio_rw = UIO_READ;
91447636 643 error = VNOP_READ(fvp, &uio, 0, &context);
1c79356b
A
644
645 if (error == 0) {
91447636 646 uio.uio_iovs.iov32p = &iov;
1c79356b 647 uio.uio_iovcnt = 1;
91447636
A
648 iov.iov_base = (uintptr_t)bufp;
649 iov.iov_len = MAXPHYSIO - uio_resid(&uio);
1c79356b
A
650 uio.uio_offset = offset;
651 uio.uio_rw = UIO_WRITE;
91447636 652 uio_setresid(&uio, iov.iov_len);
1c79356b 653
91447636 654 if (uio_resid(&uio) == 0)
1c79356b
A
655 break;
656
657 do {
91447636
A
658 error = VNOP_WRITE(tvp, &uio, 0, &context);
659 } while ((uio_resid(&uio) > 0) && (error == 0));
1c79356b
A
660 }
661
662 } while (error == 0);
663
91447636 664 _FREE(bufp, M_TEMP);
1c79356b
A
665 return (error);
666}
667
668/*
669 * (un) is assumed to be locked on entry and remains
670 * locked on exit.
671 */
672int
91447636
A
673union_copyup(struct union_node *un, int docopy, kauth_cred_t cred,
674 struct proc *p)
1c79356b
A
675{
676 int error;
677 struct vnode *lvp, *uvp;
91447636 678 struct vfs_context context;
1c79356b
A
679
680 error = union_vn_create(&uvp, un, p);
681 if (error)
682 return (error);
683
91447636
A
684 context.vc_proc = p;
685 context.vc_ucred = cred;
686
1c79356b
A
687 /* at this point, uppervp is locked */
688 union_newupper(un, uvp);
689 un->un_flags |= UN_ULOCK;
690
691 lvp = un->un_lowervp;
692
693 if (docopy) {
694 /*
695 * XX - should not ignore errors
91447636 696 * from vnop_close
1c79356b 697 */
91447636 698 error = VNOP_OPEN(lvp, FREAD, &context);
1c79356b
A
699 if (error == 0) {
700 error = union_copyfile(lvp, uvp, cred, p);
91447636 701 (void) VNOP_CLOSE(lvp, FREAD, &context);
1c79356b
A
702 }
703#ifdef UNION_DIAGNOSTIC
704 if (error == 0)
705 uprintf("union: copied up %s\n", un->un_path);
706#endif
707
708 }
709 un->un_flags &= ~UN_ULOCK;
1c79356b 710 union_vn_close(uvp, FWRITE, cred, p);
1c79356b
A
711 un->un_flags |= UN_ULOCK;
712
713 /*
714 * Subsequent IOs will go to the top layer, so
715 * call close on the lower vnode and open on the
716 * upper vnode to ensure that the filesystem keeps
717 * its references counts right. This doesn't do
718 * the right thing with (cred) and (FREAD) though.
719 * Ignoring error returns is not right, either.
720 */
721 if (error == 0) {
722 int i;
723
724 for (i = 0; i < un->un_openl; i++) {
91447636
A
725 (void) VNOP_CLOSE(lvp, FREAD, &context);
726 (void) VNOP_OPEN(uvp, FREAD, &context);
1c79356b
A
727 }
728 un->un_openl = 0;
729 }
730
731 return (error);
732
733}
734
735static int
736union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
737 struct union_mount *um;
738 struct vnode *dvp;
739 struct vnode **vpp;
740 struct componentname *cnp;
741 struct componentname *cn;
742 char *path;
743 int pathlen;
744{
745 int error;
746
747 /*
748 * A new componentname structure must be faked up because
749 * there is no way to know where the upper level cnp came
750 * from or what it is being used for. This must duplicate
751 * some of the work done by NDINIT, some of the work done
752 * by namei, some of the work done by lookup and some of
91447636 753 * the work done by vnop_lookup when given a CREATE flag.
1c79356b 754 * Conclusion: Horrible.
1c79356b
A
755 */
756 cn->cn_namelen = pathlen;
757 cn->cn_pnbuf = _MALLOC_ZONE(cn->cn_namelen+1, M_NAMEI, M_WAITOK);
758 cn->cn_pnlen = cn->cn_namelen+1;
759 bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
760 cn->cn_pnbuf[cn->cn_namelen] = '\0';
761
762 cn->cn_nameiop = CREATE;
763 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
91447636 764#ifdef XXX_HELP_ME
1c79356b
A
765 cn->cn_proc = cnp->cn_proc;
766 if (um->um_op == UNMNT_ABOVE)
767 cn->cn_cred = cnp->cn_cred;
768 else
769 cn->cn_cred = um->um_cred;
91447636
A
770#endif
771 cn->cn_context = cnp->cn_context; /* XXX !UNMNT_ABOVE case ??? */
1c79356b
A
772 cn->cn_nameptr = cn->cn_pnbuf;
773 cn->cn_hash = cnp->cn_hash;
774 cn->cn_consume = cnp->cn_consume;
775
91447636 776 vnode_get(dvp);
1c79356b
A
777 error = relookup(dvp, vpp, cn);
778 if (!error)
91447636 779 vnode_put(dvp);
1c79356b
A
780
781 return (error);
782}
783
784/*
785 * Create a shadow directory in the upper layer.
786 * The new vnode is returned locked.
787 *
788 * (um) points to the union mount structure for access to the
789 * the mounting process's credentials.
790 * (dvp) is the directory in which to create the shadow directory.
791 * it is unlocked on entry and exit.
792 * (cnp) is the componentname to be created.
793 * (vpp) is the returned newly created shadow directory, which
794 * is returned locked.
795 */
796int
797union_mkshadow(um, dvp, cnp, vpp)
798 struct union_mount *um;
799 struct vnode *dvp;
800 struct componentname *cnp;
801 struct vnode **vpp;
802{
803 int error;
91447636 804 struct vnode_attr va;
1c79356b
A
805 struct componentname cn;
806
807 error = union_relookup(um, dvp, vpp, cnp, &cn,
808 cnp->cn_nameptr, cnp->cn_namelen);
809 if (error)
810 return (error);
811
812 if (*vpp) {
91447636 813 vnode_put(*vpp);
1c79356b
A
814 *vpp = NULLVP;
815 return (EEXIST);
816 }
817
818 /*
819 * policy: when creating the shadow directory in the
820 * upper layer, create it owned by the user who did
821 * the mount, group from parent directory, and mode
822 * 777 modified by umask (ie mostly identical to the
823 * mkdir syscall). (jsp, kb)
824 */
91447636
A
825 VATTR_INIT(&va);
826 VATTR_SET(&va, va_type, VDIR);
827 VATTR_SET(&va, va_mode, um->um_cmode);
1c79356b 828
91447636 829 error = vn_create(dvp, vpp, &cn, &va, 0, cnp->cn_context);
1c79356b
A
830 return (error);
831}
832
833/*
834 * Create a whiteout entry in the upper layer.
835 *
836 * (um) points to the union mount structure for access to the
837 * the mounting process's credentials.
838 * (dvp) is the directory in which to create the whiteout.
839 * it is locked on entry and exit.
840 * (cnp) is the componentname to be created.
841 */
842int
843union_mkwhiteout(um, dvp, cnp, path)
844 struct union_mount *um;
845 struct vnode *dvp;
846 struct componentname *cnp;
847 char *path;
848{
849 int error;
1c79356b
A
850 struct vnode *wvp;
851 struct componentname cn;
852
1c79356b
A
853 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
854 if (error) {
1c79356b
A
855 return (error);
856 }
1c79356b 857 if (wvp) {
91447636
A
858 vnode_put(dvp);
859 vnode_put(wvp);
1c79356b
A
860 return (EEXIST);
861 }
862
91447636 863 error = VNOP_WHITEOUT(dvp, &cn, CREATE, cnp->cn_context);
1c79356b 864
91447636 865 vnode_put(dvp);
1c79356b
A
866
867 return (error);
868}
869
870/*
871 * union_vn_create: creates and opens a new shadow file
872 * on the upper union layer. this function is similar
873 * in spirit to calling vn_open but it avoids calling namei().
874 * the problem with calling namei is that a) it locks too many
875 * things, and b) it doesn't start at the "right" directory,
876 * whereas relookup is told where to start.
877 */
878int
879union_vn_create(vpp, un, p)
880 struct vnode **vpp;
881 struct union_node *un;
882 struct proc *p;
883{
884 struct vnode *vp;
91447636
A
885 kauth_cred_t cred = p->p_ucred;
886 struct vnode_attr vat;
887 struct vnode_attr *vap = &vat;
888 struct vfs_context context;
1c79356b
A
889 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
890 int error;
891 int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
892 char *cp;
893 struct componentname cn;
894
895 *vpp = NULLVP;
896
91447636
A
897 context.vc_proc = p;
898 context.vc_ucred = p->p_ucred;
899
1c79356b
A
900 /*
901 * Build a new componentname structure (for the same
902 * reasons outlines in union_mkshadow).
903 * The difference here is that the file is owned by
904 * the current user, rather than by the person who
905 * did the mount, since the current user needs to be
906 * able to write the file (that's why it is being
907 * copied in the first place).
908 */
909 cn.cn_namelen = strlen(un->un_path);
910 cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
911 M_NAMEI, M_WAITOK);
912 cn.cn_pnlen = cn.cn_namelen+1;
913 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
914 cn.cn_nameiop = CREATE;
915 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
91447636 916 cn.cn_context = &context;
1c79356b
A
917 cn.cn_nameptr = cn.cn_pnbuf;
918 cn.cn_hash = un->un_hash;
919 cn.cn_consume = 0;
920
91447636 921 vnode_get(un->un_dirvp);
1c79356b
A
922 if (error = relookup(un->un_dirvp, &vp, &cn))
923 return (error);
91447636 924 vnode_put(un->un_dirvp);
1c79356b
A
925
926 if (vp) {
91447636
A
927 vnode_put(un->un_dirvp);
928 vnode_put(vp);
1c79356b
A
929 return (EEXIST);
930 }
931
932 /*
933 * Good - there was no race to create the file
934 * so go ahead and create it. The permissions
935 * on the file will be 0666 modified by the
936 * current user's umask. Access to the file, while
937 * it is unioned, will require access to the top *and*
938 * bottom files. Access when not unioned will simply
939 * require access to the top-level file.
91447636 940 *
1c79356b 941 * TODO: confirm choice of access permissions.
91447636 942 * decide on authorisation behaviour
1c79356b 943 */
91447636
A
944
945 VATTR_INIT(vap);
946 VATTR_SET(vap, va_type, VREG);
947 VATTR_SET(vap, va_mode, cmode);
948
949 if (error = vn_create(un->un_dirvp, &vp, &cn, vap, 0, &context))
1c79356b
A
950 return (error);
951
91447636
A
952 if (error = VNOP_OPEN(vp, fmode, &context)) {
953 vnode_put(vp);
1c79356b
A
954 return (error);
955 }
956
91447636 957 vnode_lock(vp);
1c79356b
A
958 if (++vp->v_writecount <= 0)
959 panic("union: v_writecount");
91447636 960 vnode_unlock(vp);
1c79356b
A
961 *vpp = vp;
962 return (0);
963}
964
965int
91447636
A
966union_vn_close(struct vnode *vp, int fmode, kauth_cred_t cred,
967 struct proc *p)
1c79356b 968{
91447636
A
969 struct vfs_context context;
970
971 context.vc_proc = p;
972 context.vc_ucred = cred;
1c79356b 973
91447636
A
974 if (fmode & FWRITE) {
975 vnode_lock(vp);
1c79356b 976 --vp->v_writecount;
91447636
A
977 vnode_unlock(vp);
978 }
979 return (VNOP_CLOSE(vp, fmode, &context));
1c79356b
A
980}
981
982void
983union_removed_upper(un)
984 struct union_node *un;
985{
986 struct proc *p = current_proc(); /* XXX */
987
988 union_newupper(un, NULLVP);
989 if (un->un_flags & UN_CACHED) {
990 un->un_flags &= ~UN_CACHED;
991 LIST_REMOVE(un, un_cache);
992 }
993
994 if (un->un_flags & UN_ULOCK) {
995 un->un_flags &= ~UN_ULOCK;
1c79356b
A
996 }
997}
998
999#if 0
1000struct vnode *
1001union_lowervp(vp)
1002 struct vnode *vp;
1003{
1004 struct union_node *un = VTOUNION(vp);
1005
1006 if ((un->un_lowervp != NULLVP) &&
1007 (vp->v_type == un->un_lowervp->v_type)) {
91447636 1008 if (vnode_get(un->un_lowervp) == 0)
1c79356b
A
1009 return (un->un_lowervp);
1010 }
1011
1012 return (NULLVP);
1013}
1014#endif
1015
1016/*
1017 * determine whether a whiteout is needed
1018 * during a remove/rmdir operation.
1019 */
1020int
91447636 1021union_dowhiteout(struct union_node *un, vfs_context_t ctx)
1c79356b 1022{
91447636 1023 struct vnode_attr va;
1c79356b
A
1024
1025 if (un->un_lowervp != NULLVP)
1026 return (1);
1027
91447636
A
1028 VATTR_INIT(&va);
1029 VATTR_WANTED(&va, va_flags);
1030 if (vnode_getattr(un->un_uppervp, &va, ctx) == 0 &&
1c79356b
A
1031 (va.va_flags & OPAQUE))
1032 return (1);
1033
1034 return (0);
1035}
1036
1037static void
1038union_dircache_r(vp, vppp, cntp)
1039 struct vnode *vp;
1040 struct vnode ***vppp;
1041 int *cntp;
1042{
1043 struct union_node *un;
1044
1045 if (vp->v_op != union_vnodeop_p) {
1046 if (vppp) {
91447636 1047 vnode_get(vp);
1c79356b
A
1048 *(*vppp)++ = vp;
1049 if (--(*cntp) == 0)
1050 panic("union: dircache table too small");
1051 } else {
1052 (*cntp)++;
1053 }
1054
1055 return;
1056 }
1057
1058 un = VTOUNION(vp);
1059 if (un->un_uppervp != NULLVP)
1060 union_dircache_r(un->un_uppervp, vppp, cntp);
1061 if (un->un_lowervp != NULLVP)
1062 union_dircache_r(un->un_lowervp, vppp, cntp);
1063}
1064
1065struct vnode *
1066union_dircache(vp, p)
1067 struct vnode *vp;
1068 struct proc *p;
1069{
91447636 1070 int count;
1c79356b
A
1071 struct vnode *nvp;
1072 struct vnode **vpp;
1073 struct vnode **dircache;
1074 struct union_node *un;
1075 int error;
1076
1c79356b
A
1077 dircache = VTOUNION(vp)->un_dircache;
1078
1079 nvp = NULLVP;
1080
1081 if (dircache == 0) {
91447636
A
1082 count = 0;
1083 union_dircache_r(vp, 0, &count);
1084 count++;
1c79356b 1085 dircache = (struct vnode **)
91447636 1086 _MALLOC(count * sizeof(struct vnode *),
1c79356b
A
1087 M_TEMP, M_WAITOK);
1088 vpp = dircache;
91447636 1089 union_dircache_r(vp, &vpp, &count);
1c79356b
A
1090 *vpp = NULLVP;
1091 vpp = dircache + 1;
1092 } else {
1093 vpp = dircache;
1094 do {
1095 if (*vpp++ == VTOUNION(vp)->un_uppervp)
1096 break;
1097 } while (*vpp != NULLVP);
1098 }
1099
1100 if (*vpp == NULLVP)
1101 goto out;
1102
91447636 1103 vnode_get(*vpp);
1c79356b
A
1104 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1105 if (error)
1106 goto out;
1107
1108 VTOUNION(vp)->un_dircache = 0;
1109 un = VTOUNION(nvp);
1110 un->un_dircache = dircache;
1111
1112out:
1c79356b
A
1113 return (nvp);
1114}