]> git.saurik.com Git - apple/xnu.git/blob - bsd/miscfs/union/union_subr.c
xnu-792.tar.gz
[apple/xnu.git] / bsd / miscfs / union / union_subr.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1994 Jan-Simon Pendry
25 * Copyright (c) 1994
26 * The Regents of the University of California. All rights reserved.
27 *
28 * This code is derived from software contributed to Berkeley by
29 * Jan-Simon Pendry.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. All advertising materials mentioning features or use of this software
40 * must display the following acknowledgement:
41 * This product includes software developed by the University of
42 * California, Berkeley and its contributors.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
60 */
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/proc_internal.h>
65 #include <sys/kauth.h>
66 #include <sys/time.h>
67 #include <sys/kernel.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/namei.h>
70 #include <sys/malloc.h>
71 #include <sys/file.h>
72 #include <sys/filedesc.h>
73 #include <sys/queue.h>
74 #include <sys/mount_internal.h>
75 #include <sys/stat.h>
76 #include <sys/ubc.h>
77 #include <sys/uio_internal.h>
78 #include <miscfs/union/union.h>
79
80 #if DIAGNOSTIC
81 #include <sys/proc.h>
82 #endif
83
84 /* must be power of two, otherwise change UNION_HASH() */
85 #define NHASH 32
86
87 /* unsigned int ... */
88 #define UNION_HASH(u, l) \
89 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
90
91 static LIST_HEAD(unhead, union_node) unhead[NHASH];
92 static int unvplock[NHASH];
93
94 int
95 union_init()
96 {
97 int i;
98
99 for (i = 0; i < NHASH; i++)
100 LIST_INIT(&unhead[i]);
101 bzero((caddr_t) unvplock, sizeof(unvplock));
102 }
103
104 static int
105 union_list_lock(ix)
106 int ix;
107 {
108
109 if (unvplock[ix] & UN_LOCKED) {
110 unvplock[ix] |= UN_WANT;
111 sleep((caddr_t) &unvplock[ix], PINOD);
112 return (1);
113 }
114
115 unvplock[ix] |= UN_LOCKED;
116
117 return (0);
118 }
119
120 static void
121 union_list_unlock(ix)
122 int ix;
123 {
124
125 unvplock[ix] &= ~UN_LOCKED;
126
127 if (unvplock[ix] & UN_WANT) {
128 unvplock[ix] &= ~UN_WANT;
129 wakeup((caddr_t) &unvplock[ix]);
130 }
131 }
132
133 void
134 union_updatevp(un, uppervp, lowervp)
135 struct union_node *un;
136 struct vnode *uppervp;
137 struct vnode *lowervp;
138 {
139 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
140 int nhash = UNION_HASH(uppervp, lowervp);
141 int docache = (lowervp != NULLVP || uppervp != NULLVP);
142 int lhash, uhash;
143
144 /*
145 * Ensure locking is ordered from lower to higher
146 * to avoid deadlocks.
147 */
148 if (nhash < ohash) {
149 lhash = nhash;
150 uhash = ohash;
151 } else {
152 lhash = ohash;
153 uhash = nhash;
154 }
155
156 if (lhash != uhash)
157 while (union_list_lock(lhash))
158 continue;
159
160 while (union_list_lock(uhash))
161 continue;
162
163 if (ohash != nhash || !docache) {
164 if (un->un_flags & UN_CACHED) {
165 un->un_flags &= ~UN_CACHED;
166 LIST_REMOVE(un, un_cache);
167 }
168 }
169
170 if (ohash != nhash)
171 union_list_unlock(ohash);
172
173 if (un->un_lowervp != lowervp) {
174 if (un->un_lowervp) {
175 vnode_put(un->un_lowervp);
176 if (un->un_path) {
177 _FREE(un->un_path, M_TEMP);
178 un->un_path = 0;
179 }
180 if (un->un_dirvp) {
181 vnode_put(un->un_dirvp);
182 un->un_dirvp = NULLVP;
183 }
184 }
185 un->un_lowervp = lowervp;
186 un->un_lowersz = VNOVAL;
187 }
188
189 if (un->un_uppervp != uppervp) {
190 if (un->un_uppervp)
191 vnode_put(un->un_uppervp);
192
193 un->un_uppervp = uppervp;
194 un->un_uppersz = VNOVAL;
195 }
196
197 if (docache && (ohash != nhash)) {
198 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
199 un->un_flags |= UN_CACHED;
200 }
201
202 union_list_unlock(nhash);
203 }
204
205 void
206 union_newlower(un, lowervp)
207 struct union_node *un;
208 struct vnode *lowervp;
209 {
210
211 union_updatevp(un, un->un_uppervp, lowervp);
212 }
213
214 void
215 union_newupper(un, uppervp)
216 struct union_node *un;
217 struct vnode *uppervp;
218 {
219
220 union_updatevp(un, uppervp, un->un_lowervp);
221 }
222
223 /*
224 * Keep track of size changes in the underlying vnodes.
225 * If the size changes, then callback to the vm layer
226 * giving priority to the upper layer size.
227 */
228 void
229 union_newsize(vp, uppersz, lowersz)
230 struct vnode *vp;
231 off_t uppersz, lowersz;
232 {
233 struct union_node *un;
234 off_t sz;
235
236 /* only interested in regular files */
237 if (vp->v_type != VREG)
238 return;
239
240 un = VTOUNION(vp);
241 sz = VNOVAL;
242
243 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
244 un->un_uppersz = uppersz;
245 if (sz == VNOVAL)
246 sz = un->un_uppersz;
247 }
248
249 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
250 un->un_lowersz = lowersz;
251 if (sz == VNOVAL)
252 sz = un->un_lowersz;
253 }
254
255 if (sz != VNOVAL) {
256 #ifdef UNION_DIAGNOSTIC
257 printf("union: %s size now %ld\n",
258 uppersz != VNOVAL ? "upper" : "lower", (long) sz);
259 #endif
260 ubc_setsize(vp, sz);
261 }
262 }
263
264 /*
265 * allocate a union_node/vnode pair. the vnode is
266 * referenced and locked. the new vnode is returned
267 * via (vpp). (mp) is the mountpoint of the union filesystem,
268 * (dvp) is the parent directory where the upper layer object
269 * should exist (but doesn't) and (cnp) is the componentname
270 * information which is partially copied to allow the upper
271 * layer object to be created at a later time. (uppervp)
272 * and (lowervp) reference the upper and lower layer objects
273 * being mapped. either, but not both, can be nil.
274 * if supplied, (uppervp) is locked.
275 * the reference is either maintained in the new union_node
276 * object which is allocated, or they are vnode_put'd.
277 *
278 * all union_nodes are maintained on a singly-linked
279 * list. new nodes are only allocated when they cannot
280 * be found on this list. entries on the list are
281 * removed when the vfs reclaim entry is called.
282 *
283 * a single lock is kept for the entire list. this is
284 * needed because the getnewvnode() function can block
285 * waiting for a vnode to become free, in which case there
286 * may be more than one process trying to get the same
287 * vnode. this lock is only taken if we are going to
288 * call getnewvnode, since the kernel itself is single-threaded.
289 *
290 * if an entry is found on the list, then call vnode_get() to
291 * take a reference. this is done because there may be
292 * zero references to it and so it needs to removed from
293 * the vnode free list.
294 */
295 int
296 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
297 struct vnode **vpp;
298 struct mount *mp;
299 struct vnode *undvp; /* parent union vnode */
300 struct vnode *dvp; /* may be null */
301 struct componentname *cnp; /* may be null */
302 struct vnode *uppervp; /* may be null */
303 struct vnode *lowervp; /* may be null */
304 int docache;
305 {
306 int error;
307 struct union_node *un;
308 struct union_node **pp;
309 struct vnode *xlowervp = NULLVP;
310 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
311 int hash;
312 int markroot;
313 int try;
314 struct union_node *unp;
315 struct vnode_fsparam vfsp;
316 enum vtype vtype;
317
318 if (uppervp == NULLVP && lowervp == NULLVP)
319 panic("union: unidentifiable allocation");
320
321 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
322 xlowervp = lowervp;
323 lowervp = NULLVP;
324 }
325
326 /* detect the root vnode (and aliases) */
327 markroot = 0;
328 if ((uppervp == um->um_uppervp) &&
329 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
330 if (lowervp == NULLVP) {
331 lowervp = um->um_lowervp;
332 if (lowervp != NULLVP)
333 vnode_get(lowervp);
334 }
335 markroot = VROOT;
336 }
337
338 loop:
339 if (!docache) {
340 un = 0;
341 } else for (try = 0; try < 3; try++) {
342 switch (try) {
343 case 0:
344 if (lowervp == NULLVP)
345 continue;
346 hash = UNION_HASH(uppervp, lowervp);
347 break;
348
349 case 1:
350 if (uppervp == NULLVP)
351 continue;
352 hash = UNION_HASH(uppervp, NULLVP);
353 break;
354
355 case 2:
356 if (lowervp == NULLVP)
357 continue;
358 hash = UNION_HASH(NULLVP, lowervp);
359 break;
360 }
361
362 while (union_list_lock(hash))
363 continue;
364
365 for (un = unhead[hash].lh_first; un != 0;
366 un = un->un_cache.le_next) {
367 if ((un->un_lowervp == lowervp ||
368 un->un_lowervp == NULLVP) &&
369 (un->un_uppervp == uppervp ||
370 un->un_uppervp == NULLVP) &&
371 (UNIONTOV(un)->v_mount == mp)) {
372 if (vnode_get(UNIONTOV(un))) {
373 union_list_unlock(hash);
374 goto loop;
375 }
376 break;
377 }
378 }
379
380 union_list_unlock(hash);
381
382 if (un)
383 break;
384 }
385
386 if (un) {
387 /*
388 * Obtain a lock on the union_node.
389 * uppervp is locked, though un->un_uppervp
390 * may not be. this doesn't break the locking
391 * hierarchy since in the case that un->un_uppervp
392 * is not yet locked it will be vnode_put'd and replaced
393 * with uppervp.
394 */
395
396 if ((dvp != NULLVP) && (uppervp == dvp)) {
397 /*
398 * Access ``.'', so (un) will already
399 * be locked. Since this process has
400 * the lock on (uppervp) no other
401 * process can hold the lock on (un).
402 */
403 #if DIAGNOSTIC
404 if ((un->un_flags & UN_LOCKED) == 0)
405 panic("union: . not locked");
406 else if (current_proc() && un->un_pid != current_proc()->p_pid &&
407 un->un_pid > -1 && current_proc()->p_pid > -1)
408 panic("union: allocvp not lock owner");
409 #endif
410 } else {
411 if (un->un_flags & UN_LOCKED) {
412 vnode_put(UNIONTOV(un));
413 un->un_flags |= UN_WANT;
414 sleep((caddr_t) &un->un_flags, PINOD);
415 goto loop;
416 }
417 un->un_flags |= UN_LOCKED;
418
419 #if DIAGNOSTIC
420 if (current_proc())
421 un->un_pid = current_proc()->p_pid;
422 else
423 un->un_pid = -1;
424 #endif
425 }
426
427 /*
428 * At this point, the union_node is locked,
429 * un->un_uppervp may not be locked, and uppervp
430 * is locked or nil.
431 */
432
433 /*
434 * Save information about the upper layer.
435 */
436 if (uppervp != un->un_uppervp) {
437 union_newupper(un, uppervp);
438 } else if (uppervp) {
439 vnode_put(uppervp);
440 }
441
442 if (un->un_uppervp) {
443 un->un_flags |= UN_ULOCK;
444 un->un_flags &= ~UN_KLOCK;
445 }
446
447 /*
448 * Save information about the lower layer.
449 * This needs to keep track of pathname
450 * and directory information which union_vn_create
451 * might need.
452 */
453 if (lowervp != un->un_lowervp) {
454 union_newlower(un, lowervp);
455 if (cnp && (lowervp != NULLVP)) {
456 un->un_hash = cnp->cn_hash;
457 MALLOC(un->un_path, caddr_t, cnp->cn_namelen+1,
458 M_TEMP, M_WAITOK);
459 bcopy(cnp->cn_nameptr, un->un_path,
460 cnp->cn_namelen);
461 un->un_path[cnp->cn_namelen] = '\0';
462 vnode_get(dvp);
463 un->un_dirvp = dvp;
464 }
465 } else if (lowervp) {
466 vnode_put(lowervp);
467 }
468 *vpp = UNIONTOV(un);
469 return (0);
470 }
471
472 if (docache) {
473 /*
474 * otherwise lock the vp list while we call getnewvnode
475 * since that can block.
476 */
477 hash = UNION_HASH(uppervp, lowervp);
478
479 if (union_list_lock(hash))
480 goto loop;
481 }
482
483 MALLOC(unp, void *, sizeof(struct union_node), M_TEMP, M_WAITOK);
484
485 if (uppervp)
486 vtype = uppervp->v_type;
487 else
488 vtype = lowervp->v_type;
489 //bzero(&vfsp, sizeof(struct vnode_fsparam));
490 vfsp.vnfs_mp = mp;
491 vfsp.vnfs_vtype = vtype;
492 vfsp.vnfs_str = "unionfs";
493 vfsp.vnfs_dvp = dvp;
494 vfsp.vnfs_fsnode = unp;
495 vfsp.vnfs_cnp = cnp;
496 vfsp.vnfs_vops = union_vnodeop_p;
497 vfsp.vnfs_rdev = 0;
498 vfsp.vnfs_filesize = 0;
499 vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
500 vfsp.vnfs_marksystem = 0;
501 vfsp.vnfs_markroot = markroot;
502
503 error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp);
504 if (error) {
505 FREE(unp, M_TEMP);
506 if (uppervp) {
507 vnode_put(uppervp);
508 }
509 if (lowervp)
510 vnode_put(lowervp);
511
512 goto out;
513 }
514
515 (*vpp)->v_tag = VT_UNION;
516 un = VTOUNION(*vpp);
517 un->un_vnode = *vpp;
518 un->un_uppervp = uppervp;
519 un->un_uppersz = VNOVAL;
520 un->un_lowervp = lowervp;
521 un->un_lowersz = VNOVAL;
522 un->un_pvp = undvp;
523 if (undvp != NULLVP)
524 vnode_get(undvp);
525 un->un_dircache = 0;
526 un->un_openl = 0;
527 un->un_flags = UN_LOCKED;
528 if (un->un_uppervp)
529 un->un_flags |= UN_ULOCK;
530 #if DIAGNOSTIC
531 if (current_proc())
532 un->un_pid = current_proc()->p_pid;
533 else
534 un->un_pid = -1;
535 #endif
536 if (cnp && (lowervp != NULLVP)) {
537 un->un_hash = cnp->cn_hash;
538 un->un_path = _MALLOC(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
539 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
540 un->un_path[cnp->cn_namelen] = '\0';
541 vnode_get(dvp);
542 un->un_dirvp = dvp;
543 } else {
544 un->un_hash = 0;
545 un->un_path = 0;
546 un->un_dirvp = 0;
547 }
548
549 if (docache) {
550 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
551 un->un_flags |= UN_CACHED;
552 }
553
554 if (xlowervp)
555 vnode_put(xlowervp);
556
557 out:
558 if (docache)
559 union_list_unlock(hash);
560
561 return (error);
562 }
563
564 int
565 union_freevp(vp)
566 struct vnode *vp;
567 {
568 struct union_node *un = VTOUNION(vp);
569
570 if (un->un_flags & UN_CACHED) {
571 un->un_flags &= ~UN_CACHED;
572 LIST_REMOVE(un, un_cache);
573 }
574
575 if (un->un_pvp != NULLVP)
576 vnode_put(un->un_pvp);
577 if (un->un_uppervp != NULLVP)
578 vnode_put(un->un_uppervp);
579 if (un->un_lowervp != NULLVP)
580 vnode_put(un->un_lowervp);
581 if (un->un_dirvp != NULLVP)
582 vnode_put(un->un_dirvp);
583 if (un->un_path)
584 _FREE(un->un_path, M_TEMP);
585
586 FREE(vp->v_data, M_TEMP);
587 vp->v_data = 0;
588
589 return (0);
590 }
591
592 /*
593 * copyfile. copy the vnode (fvp) to the vnode (tvp)
594 * using a sequence of reads and writes. both (fvp)
595 * and (tvp) are locked on entry and exit.
596 */
597 int
598 union_copyfile(struct vnode *fvp, struct vnode *tvp, kauth_cred_t cred,
599 struct proc *p)
600 {
601 char *bufp;
602 struct uio uio;
603 struct iovec_32 iov;
604 struct vfs_context context;
605 int error = 0;
606
607 /*
608 * strategy:
609 * allocate a buffer of size MAXPHYSIO.
610 * loop doing reads and writes, keeping track
611 * of the current uio offset.
612 * give up at the first sign of trouble.
613 */
614
615 context.vc_proc = p;
616 context.vc_ucred = cred;
617
618 #if 1 /* LP64todo - can't use new segment flags until the drivers are ready */
619 uio.uio_segflg = UIO_SYSSPACE;
620 #else
621 uio.uio_segflg = UIO_SYSSPACE32;
622 #endif
623 uio.uio_offset = 0;
624
625 bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK);
626
627 /* ugly loop follows... */
628 do {
629 off_t offset = uio.uio_offset;
630
631 uio.uio_iovs.iov32p = &iov;
632 uio.uio_iovcnt = 1;
633 iov.iov_base = (uintptr_t)bufp;
634 iov.iov_len = MAXPHYSIO;
635 uio_setresid(&uio, iov.iov_len);
636 uio.uio_rw = UIO_READ;
637 error = VNOP_READ(fvp, &uio, 0, &context);
638
639 if (error == 0) {
640 uio.uio_iovs.iov32p = &iov;
641 uio.uio_iovcnt = 1;
642 iov.iov_base = (uintptr_t)bufp;
643 iov.iov_len = MAXPHYSIO - uio_resid(&uio);
644 uio.uio_offset = offset;
645 uio.uio_rw = UIO_WRITE;
646 uio_setresid(&uio, iov.iov_len);
647
648 if (uio_resid(&uio) == 0)
649 break;
650
651 do {
652 error = VNOP_WRITE(tvp, &uio, 0, &context);
653 } while ((uio_resid(&uio) > 0) && (error == 0));
654 }
655
656 } while (error == 0);
657
658 _FREE(bufp, M_TEMP);
659 return (error);
660 }
661
662 /*
663 * (un) is assumed to be locked on entry and remains
664 * locked on exit.
665 */
666 int
667 union_copyup(struct union_node *un, int docopy, kauth_cred_t cred,
668 struct proc *p)
669 {
670 int error;
671 struct vnode *lvp, *uvp;
672 struct vfs_context context;
673
674 error = union_vn_create(&uvp, un, p);
675 if (error)
676 return (error);
677
678 context.vc_proc = p;
679 context.vc_ucred = cred;
680
681 /* at this point, uppervp is locked */
682 union_newupper(un, uvp);
683 un->un_flags |= UN_ULOCK;
684
685 lvp = un->un_lowervp;
686
687 if (docopy) {
688 /*
689 * XX - should not ignore errors
690 * from vnop_close
691 */
692 error = VNOP_OPEN(lvp, FREAD, &context);
693 if (error == 0) {
694 error = union_copyfile(lvp, uvp, cred, p);
695 (void) VNOP_CLOSE(lvp, FREAD, &context);
696 }
697 #ifdef UNION_DIAGNOSTIC
698 if (error == 0)
699 uprintf("union: copied up %s\n", un->un_path);
700 #endif
701
702 }
703 un->un_flags &= ~UN_ULOCK;
704 union_vn_close(uvp, FWRITE, cred, p);
705 un->un_flags |= UN_ULOCK;
706
707 /*
708 * Subsequent IOs will go to the top layer, so
709 * call close on the lower vnode and open on the
710 * upper vnode to ensure that the filesystem keeps
711 * its references counts right. This doesn't do
712 * the right thing with (cred) and (FREAD) though.
713 * Ignoring error returns is not right, either.
714 */
715 if (error == 0) {
716 int i;
717
718 for (i = 0; i < un->un_openl; i++) {
719 (void) VNOP_CLOSE(lvp, FREAD, &context);
720 (void) VNOP_OPEN(uvp, FREAD, &context);
721 }
722 un->un_openl = 0;
723 }
724
725 return (error);
726
727 }
728
729 static int
730 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
731 struct union_mount *um;
732 struct vnode *dvp;
733 struct vnode **vpp;
734 struct componentname *cnp;
735 struct componentname *cn;
736 char *path;
737 int pathlen;
738 {
739 int error;
740
741 /*
742 * A new componentname structure must be faked up because
743 * there is no way to know where the upper level cnp came
744 * from or what it is being used for. This must duplicate
745 * some of the work done by NDINIT, some of the work done
746 * by namei, some of the work done by lookup and some of
747 * the work done by vnop_lookup when given a CREATE flag.
748 * Conclusion: Horrible.
749 */
750 cn->cn_namelen = pathlen;
751 cn->cn_pnbuf = _MALLOC_ZONE(cn->cn_namelen+1, M_NAMEI, M_WAITOK);
752 cn->cn_pnlen = cn->cn_namelen+1;
753 bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
754 cn->cn_pnbuf[cn->cn_namelen] = '\0';
755
756 cn->cn_nameiop = CREATE;
757 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
758 #ifdef XXX_HELP_ME
759 cn->cn_proc = cnp->cn_proc;
760 if (um->um_op == UNMNT_ABOVE)
761 cn->cn_cred = cnp->cn_cred;
762 else
763 cn->cn_cred = um->um_cred;
764 #endif
765 cn->cn_context = cnp->cn_context; /* XXX !UNMNT_ABOVE case ??? */
766 cn->cn_nameptr = cn->cn_pnbuf;
767 cn->cn_hash = cnp->cn_hash;
768 cn->cn_consume = cnp->cn_consume;
769
770 vnode_get(dvp);
771 error = relookup(dvp, vpp, cn);
772 if (!error)
773 vnode_put(dvp);
774
775 return (error);
776 }
777
778 /*
779 * Create a shadow directory in the upper layer.
780 * The new vnode is returned locked.
781 *
782 * (um) points to the union mount structure for access to the
783 * the mounting process's credentials.
784 * (dvp) is the directory in which to create the shadow directory.
785 * it is unlocked on entry and exit.
786 * (cnp) is the componentname to be created.
787 * (vpp) is the returned newly created shadow directory, which
788 * is returned locked.
789 */
790 int
791 union_mkshadow(um, dvp, cnp, vpp)
792 struct union_mount *um;
793 struct vnode *dvp;
794 struct componentname *cnp;
795 struct vnode **vpp;
796 {
797 int error;
798 struct vnode_attr va;
799 struct componentname cn;
800
801 error = union_relookup(um, dvp, vpp, cnp, &cn,
802 cnp->cn_nameptr, cnp->cn_namelen);
803 if (error)
804 return (error);
805
806 if (*vpp) {
807 vnode_put(*vpp);
808 *vpp = NULLVP;
809 return (EEXIST);
810 }
811
812 /*
813 * policy: when creating the shadow directory in the
814 * upper layer, create it owned by the user who did
815 * the mount, group from parent directory, and mode
816 * 777 modified by umask (ie mostly identical to the
817 * mkdir syscall). (jsp, kb)
818 */
819 VATTR_INIT(&va);
820 VATTR_SET(&va, va_type, VDIR);
821 VATTR_SET(&va, va_mode, um->um_cmode);
822
823 error = vn_create(dvp, vpp, &cn, &va, 0, cnp->cn_context);
824 return (error);
825 }
826
827 /*
828 * Create a whiteout entry in the upper layer.
829 *
830 * (um) points to the union mount structure for access to the
831 * the mounting process's credentials.
832 * (dvp) is the directory in which to create the whiteout.
833 * it is locked on entry and exit.
834 * (cnp) is the componentname to be created.
835 */
836 int
837 union_mkwhiteout(um, dvp, cnp, path)
838 struct union_mount *um;
839 struct vnode *dvp;
840 struct componentname *cnp;
841 char *path;
842 {
843 int error;
844 struct vnode *wvp;
845 struct componentname cn;
846
847 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
848 if (error) {
849 return (error);
850 }
851 if (wvp) {
852 vnode_put(dvp);
853 vnode_put(wvp);
854 return (EEXIST);
855 }
856
857 error = VNOP_WHITEOUT(dvp, &cn, CREATE, cnp->cn_context);
858
859 vnode_put(dvp);
860
861 return (error);
862 }
863
864 /*
865 * union_vn_create: creates and opens a new shadow file
866 * on the upper union layer. this function is similar
867 * in spirit to calling vn_open but it avoids calling namei().
868 * the problem with calling namei is that a) it locks too many
869 * things, and b) it doesn't start at the "right" directory,
870 * whereas relookup is told where to start.
871 */
872 int
873 union_vn_create(vpp, un, p)
874 struct vnode **vpp;
875 struct union_node *un;
876 struct proc *p;
877 {
878 struct vnode *vp;
879 kauth_cred_t cred = p->p_ucred;
880 struct vnode_attr vat;
881 struct vnode_attr *vap = &vat;
882 struct vfs_context context;
883 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
884 int error;
885 int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
886 char *cp;
887 struct componentname cn;
888
889 *vpp = NULLVP;
890
891 context.vc_proc = p;
892 context.vc_ucred = p->p_ucred;
893
894 /*
895 * Build a new componentname structure (for the same
896 * reasons outlines in union_mkshadow).
897 * The difference here is that the file is owned by
898 * the current user, rather than by the person who
899 * did the mount, since the current user needs to be
900 * able to write the file (that's why it is being
901 * copied in the first place).
902 */
903 cn.cn_namelen = strlen(un->un_path);
904 cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
905 M_NAMEI, M_WAITOK);
906 cn.cn_pnlen = cn.cn_namelen+1;
907 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
908 cn.cn_nameiop = CREATE;
909 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
910 cn.cn_context = &context;
911 cn.cn_nameptr = cn.cn_pnbuf;
912 cn.cn_hash = un->un_hash;
913 cn.cn_consume = 0;
914
915 vnode_get(un->un_dirvp);
916 if (error = relookup(un->un_dirvp, &vp, &cn))
917 return (error);
918 vnode_put(un->un_dirvp);
919
920 if (vp) {
921 vnode_put(un->un_dirvp);
922 vnode_put(vp);
923 return (EEXIST);
924 }
925
926 /*
927 * Good - there was no race to create the file
928 * so go ahead and create it. The permissions
929 * on the file will be 0666 modified by the
930 * current user's umask. Access to the file, while
931 * it is unioned, will require access to the top *and*
932 * bottom files. Access when not unioned will simply
933 * require access to the top-level file.
934 *
935 * TODO: confirm choice of access permissions.
936 * decide on authorisation behaviour
937 */
938
939 VATTR_INIT(vap);
940 VATTR_SET(vap, va_type, VREG);
941 VATTR_SET(vap, va_mode, cmode);
942
943 if (error = vn_create(un->un_dirvp, &vp, &cn, vap, 0, &context))
944 return (error);
945
946 if (error = VNOP_OPEN(vp, fmode, &context)) {
947 vnode_put(vp);
948 return (error);
949 }
950
951 vnode_lock(vp);
952 if (++vp->v_writecount <= 0)
953 panic("union: v_writecount");
954 vnode_unlock(vp);
955 *vpp = vp;
956 return (0);
957 }
958
959 int
960 union_vn_close(struct vnode *vp, int fmode, kauth_cred_t cred,
961 struct proc *p)
962 {
963 struct vfs_context context;
964
965 context.vc_proc = p;
966 context.vc_ucred = cred;
967
968 if (fmode & FWRITE) {
969 vnode_lock(vp);
970 --vp->v_writecount;
971 vnode_unlock(vp);
972 }
973 return (VNOP_CLOSE(vp, fmode, &context));
974 }
975
976 void
977 union_removed_upper(un)
978 struct union_node *un;
979 {
980 struct proc *p = current_proc(); /* XXX */
981
982 union_newupper(un, NULLVP);
983 if (un->un_flags & UN_CACHED) {
984 un->un_flags &= ~UN_CACHED;
985 LIST_REMOVE(un, un_cache);
986 }
987
988 if (un->un_flags & UN_ULOCK) {
989 un->un_flags &= ~UN_ULOCK;
990 }
991 }
992
993 #if 0
994 struct vnode *
995 union_lowervp(vp)
996 struct vnode *vp;
997 {
998 struct union_node *un = VTOUNION(vp);
999
1000 if ((un->un_lowervp != NULLVP) &&
1001 (vp->v_type == un->un_lowervp->v_type)) {
1002 if (vnode_get(un->un_lowervp) == 0)
1003 return (un->un_lowervp);
1004 }
1005
1006 return (NULLVP);
1007 }
1008 #endif
1009
1010 /*
1011 * determine whether a whiteout is needed
1012 * during a remove/rmdir operation.
1013 */
1014 int
1015 union_dowhiteout(struct union_node *un, vfs_context_t ctx)
1016 {
1017 struct vnode_attr va;
1018
1019 if (un->un_lowervp != NULLVP)
1020 return (1);
1021
1022 VATTR_INIT(&va);
1023 VATTR_WANTED(&va, va_flags);
1024 if (vnode_getattr(un->un_uppervp, &va, ctx) == 0 &&
1025 (va.va_flags & OPAQUE))
1026 return (1);
1027
1028 return (0);
1029 }
1030
1031 static void
1032 union_dircache_r(vp, vppp, cntp)
1033 struct vnode *vp;
1034 struct vnode ***vppp;
1035 int *cntp;
1036 {
1037 struct union_node *un;
1038
1039 if (vp->v_op != union_vnodeop_p) {
1040 if (vppp) {
1041 vnode_get(vp);
1042 *(*vppp)++ = vp;
1043 if (--(*cntp) == 0)
1044 panic("union: dircache table too small");
1045 } else {
1046 (*cntp)++;
1047 }
1048
1049 return;
1050 }
1051
1052 un = VTOUNION(vp);
1053 if (un->un_uppervp != NULLVP)
1054 union_dircache_r(un->un_uppervp, vppp, cntp);
1055 if (un->un_lowervp != NULLVP)
1056 union_dircache_r(un->un_lowervp, vppp, cntp);
1057 }
1058
1059 struct vnode *
1060 union_dircache(vp, p)
1061 struct vnode *vp;
1062 struct proc *p;
1063 {
1064 int count;
1065 struct vnode *nvp;
1066 struct vnode **vpp;
1067 struct vnode **dircache;
1068 struct union_node *un;
1069 int error;
1070
1071 dircache = VTOUNION(vp)->un_dircache;
1072
1073 nvp = NULLVP;
1074
1075 if (dircache == 0) {
1076 count = 0;
1077 union_dircache_r(vp, 0, &count);
1078 count++;
1079 dircache = (struct vnode **)
1080 _MALLOC(count * sizeof(struct vnode *),
1081 M_TEMP, M_WAITOK);
1082 vpp = dircache;
1083 union_dircache_r(vp, &vpp, &count);
1084 *vpp = NULLVP;
1085 vpp = dircache + 1;
1086 } else {
1087 vpp = dircache;
1088 do {
1089 if (*vpp++ == VTOUNION(vp)->un_uppervp)
1090 break;
1091 } while (*vpp != NULLVP);
1092 }
1093
1094 if (*vpp == NULLVP)
1095 goto out;
1096
1097 vnode_get(*vpp);
1098 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1099 if (error)
1100 goto out;
1101
1102 VTOUNION(vp)->un_dircache = 0;
1103 un = VTOUNION(nvp);
1104 un->un_dircache = dircache;
1105
1106 out:
1107 return (nvp);
1108 }