]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-792.22.5.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/proc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/uio.h>
48 #include <sys/vfs_context.h>
49 #include <sys/disk.h>
50 #include <sys/sysctl.h>
51
52 #include <miscfs/specfs/specdev.h>
53
54 #include <sys/ubc.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_kern.h>
57
58 #include <sys/kdebug.h>
59
60 #include "hfs.h"
61 #include "hfs_endian.h"
62 #include "hfs_fsctl.h"
63 #include "hfs_quota.h"
64 #include "hfscommon/headers/FileMgrInternal.h"
65 #include "hfscommon/headers/BTreesInternal.h"
66 #include "hfs_cnode.h"
67 #include "hfs_dbg.h"
68
69 extern int overflow_extents(struct filefork *fp);
70
71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
72
73 enum {
74 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
75 };
76
77 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
78
79 extern int hfs_setextendedsecurity(struct hfsmount *, int);
80
81
82 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
83 static int hfs_clonefile(struct vnode *, int, int, int);
84 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
85
86
87 int flush_cache_on_write = 0;
88 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
89
90
91 /*****************************************************************************
92 *
93 * I/O Operations on vnodes
94 *
95 *****************************************************************************/
96 int hfs_vnop_read(struct vnop_read_args *);
97 int hfs_vnop_write(struct vnop_write_args *);
98 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
99 int hfs_vnop_select(struct vnop_select_args *);
100 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
101 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
102 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
103 int hfs_vnop_strategy(struct vnop_strategy_args *);
104 int hfs_vnop_allocate(struct vnop_allocate_args *);
105 int hfs_vnop_pagein(struct vnop_pagein_args *);
106 int hfs_vnop_pageout(struct vnop_pageout_args *);
107 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
108
109
110 /*
111 * Read data from a file.
112 */
113 int
114 hfs_vnop_read(struct vnop_read_args *ap)
115 {
116 uio_t uio = ap->a_uio;
117 struct vnode *vp = ap->a_vp;
118 struct cnode *cp;
119 struct filefork *fp;
120 struct hfsmount *hfsmp;
121 off_t filesize;
122 off_t filebytes;
123 off_t start_resid = uio_resid(uio);
124 off_t offset = uio_offset(uio);
125 int retval = 0;
126
127
128 /* Preflight checks */
129 if (!vnode_isreg(vp)) {
130 /* can only read regular files */
131 if (vnode_isdir(vp))
132 return (EISDIR);
133 else
134 return (EPERM);
135 }
136 if (start_resid == 0)
137 return (0); /* Nothing left to do */
138 if (offset < 0)
139 return (EINVAL); /* cant read from a negative offset */
140
141 cp = VTOC(vp);
142 fp = VTOF(vp);
143 hfsmp = VTOHFS(vp);
144
145 /* Protect against a size change. */
146 hfs_lock_truncate(cp, 0);
147
148 filesize = fp->ff_size;
149 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
150 if (offset > filesize) {
151 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
152 (offset > (off_t)MAXHFSFILESIZE)) {
153 retval = EFBIG;
154 }
155 goto exit;
156 }
157
158 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
159 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
160
161 retval = cluster_read(vp, uio, filesize, 0);
162
163 cp->c_touch_acctime = TRUE;
164
165 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
166 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
167
168 /*
169 * Keep track blocks read
170 */
171 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
172 int took_cnode_lock = 0;
173 off_t bytesread;
174
175 bytesread = start_resid - uio_resid(uio);
176
177 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
178 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
179 hfs_lock(cp, HFS_FORCE_LOCK);
180 took_cnode_lock = 1;
181 }
182 /*
183 * If this file hasn't been seen since the start of
184 * the current sampling period then start over.
185 */
186 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
187 struct timeval tv;
188
189 fp->ff_bytesread = bytesread;
190 microtime(&tv);
191 cp->c_atime = tv.tv_sec;
192 } else {
193 fp->ff_bytesread += bytesread;
194 }
195 if (took_cnode_lock)
196 hfs_unlock(cp);
197 }
198 exit:
199 hfs_unlock_truncate(cp);
200 return (retval);
201 }
202
203 /*
204 * Write data to a file.
205 */
206 int
207 hfs_vnop_write(struct vnop_write_args *ap)
208 {
209 uio_t uio = ap->a_uio;
210 struct vnode *vp = ap->a_vp;
211 struct cnode *cp;
212 struct filefork *fp;
213 struct hfsmount *hfsmp;
214 kauth_cred_t cred = NULL;
215 off_t origFileSize;
216 off_t writelimit;
217 off_t bytesToAdd;
218 off_t actualBytesAdded;
219 off_t filebytes;
220 off_t offset;
221 size_t resid;
222 int eflags;
223 int ioflag = ap->a_ioflag;
224 int retval = 0;
225 int lockflags;
226 int cnode_locked = 0;
227
228 // LP64todo - fix this! uio_resid may be 64-bit value
229 resid = uio_resid(uio);
230 offset = uio_offset(uio);
231
232 if (offset < 0)
233 return (EINVAL);
234 if (resid == 0)
235 return (E_NONE);
236 if (!vnode_isreg(vp))
237 return (EPERM); /* Can only write regular files */
238
239 /* Protect against a size change. */
240 hfs_lock_truncate(VTOC(vp), TRUE);
241
242 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
243 hfs_unlock_truncate(VTOC(vp));
244 return (retval);
245 }
246 cnode_locked = 1;
247 cp = VTOC(vp);
248 fp = VTOF(vp);
249 hfsmp = VTOHFS(vp);
250 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
251
252 if (ioflag & IO_APPEND) {
253 uio_setoffset(uio, fp->ff_size);
254 offset = fp->ff_size;
255 }
256 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
257 retval = EPERM;
258 goto exit;
259 }
260
261 origFileSize = fp->ff_size;
262 eflags = kEFDeferMask; /* defer file block allocations */
263
264 #ifdef HFS_SPARSE_DEV
265 /*
266 * When the underlying device is sparse and space
267 * is low (< 8MB), stop doing delayed allocations
268 * and begin doing synchronous I/O.
269 */
270 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
271 (hfs_freeblks(hfsmp, 0) < 2048)) {
272 eflags &= ~kEFDeferMask;
273 ioflag |= IO_SYNC;
274 }
275 #endif /* HFS_SPARSE_DEV */
276
277 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
278 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
279
280 /* Now test if we need to extend the file */
281 /* Doing so will adjust the filebytes for us */
282
283 writelimit = offset + resid;
284 if (writelimit <= filebytes)
285 goto sizeok;
286
287 cred = vfs_context_ucred(ap->a_context);
288 #if QUOTA
289 bytesToAdd = writelimit - filebytes;
290 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
291 cred, 0);
292 if (retval)
293 goto exit;
294 #endif /* QUOTA */
295
296 if (hfs_start_transaction(hfsmp) != 0) {
297 retval = EINVAL;
298 goto exit;
299 }
300
301 while (writelimit > filebytes) {
302 bytesToAdd = writelimit - filebytes;
303 if (cred && suser(cred, NULL) != 0)
304 eflags |= kEFReserveMask;
305
306 /* Protect extents b-tree and allocation bitmap */
307 lockflags = SFL_BITMAP;
308 if (overflow_extents(fp))
309 lockflags |= SFL_EXTENTS;
310 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
311
312 /* Files that are changing size are not hot file candidates. */
313 if (hfsmp->hfc_stage == HFC_RECORDING) {
314 fp->ff_bytesread = 0;
315 }
316 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
317 0, eflags, &actualBytesAdded));
318
319 hfs_systemfile_unlock(hfsmp, lockflags);
320
321 if ((actualBytesAdded == 0) && (retval == E_NONE))
322 retval = ENOSPC;
323 if (retval != E_NONE)
324 break;
325 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
326 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
327 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
328 }
329 (void) hfs_update(vp, TRUE);
330 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
331 (void) hfs_end_transaction(hfsmp);
332
333 sizeok:
334 if (retval == E_NONE) {
335 off_t filesize;
336 off_t zero_off;
337 off_t tail_off;
338 off_t inval_start;
339 off_t inval_end;
340 off_t io_start;
341 int lflag;
342 struct rl_entry *invalid_range;
343
344 if (writelimit > fp->ff_size)
345 filesize = writelimit;
346 else
347 filesize = fp->ff_size;
348
349 lflag = (ioflag & IO_SYNC);
350
351 if (offset <= fp->ff_size) {
352 zero_off = offset & ~PAGE_MASK_64;
353
354 /* Check to see whether the area between the zero_offset and the start
355 of the transfer to see whether is invalid and should be zero-filled
356 as part of the transfer:
357 */
358 if (offset > zero_off) {
359 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
360 lflag |= IO_HEADZEROFILL;
361 }
362 } else {
363 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
364
365 /* The bytes between fp->ff_size and uio->uio_offset must never be
366 read without being zeroed. The current last block is filled with zeroes
367 if it holds valid data but in all cases merely do a little bookkeeping
368 to track the area from the end of the current last page to the start of
369 the area actually written. For the same reason only the bytes up to the
370 start of the page where this write will start is invalidated; any remainder
371 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
372
373 Note that inval_start, the start of the page after the current EOF,
374 may be past the start of the write, in which case the zeroing
375 will be handled by the cluser_write of the actual data.
376 */
377 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
378 inval_end = offset & ~PAGE_MASK_64;
379 zero_off = fp->ff_size;
380
381 if ((fp->ff_size & PAGE_MASK_64) &&
382 (rl_scan(&fp->ff_invalidranges,
383 eof_page_base,
384 fp->ff_size - 1,
385 &invalid_range) != RL_NOOVERLAP)) {
386 /* The page containing the EOF is not valid, so the
387 entire page must be made inaccessible now. If the write
388 starts on a page beyond the page containing the eof
389 (inval_end > eof_page_base), add the
390 whole page to the range to be invalidated. Otherwise
391 (i.e. if the write starts on the same page), zero-fill
392 the entire page explicitly now:
393 */
394 if (inval_end > eof_page_base) {
395 inval_start = eof_page_base;
396 } else {
397 zero_off = eof_page_base;
398 };
399 };
400
401 if (inval_start < inval_end) {
402 struct timeval tv;
403 /* There's some range of data that's going to be marked invalid */
404
405 if (zero_off < inval_start) {
406 /* The pages between inval_start and inval_end are going to be invalidated,
407 and the actual write will start on a page past inval_end. Now's the last
408 chance to zero-fill the page containing the EOF:
409 */
410 hfs_unlock(cp);
411 cnode_locked = 0;
412 retval = cluster_write(vp, (uio_t) 0,
413 fp->ff_size, inval_start,
414 zero_off, (off_t)0,
415 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
416 hfs_lock(cp, HFS_FORCE_LOCK);
417 cnode_locked = 1;
418 if (retval) goto ioerr_exit;
419 offset = uio_offset(uio);
420 };
421
422 /* Mark the remaining area of the newly allocated space as invalid: */
423 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
424 microuptime(&tv);
425 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
426 zero_off = fp->ff_size = inval_end;
427 };
428
429 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
430 };
431
432 /* Check to see whether the area between the end of the write and the end of
433 the page it falls in is invalid and should be zero-filled as part of the transfer:
434 */
435 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
436 if (tail_off > filesize) tail_off = filesize;
437 if (tail_off > writelimit) {
438 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
439 lflag |= IO_TAILZEROFILL;
440 };
441 };
442
443 /*
444 * if the write starts beyond the current EOF (possibly advanced in the
445 * zeroing of the last block, above), then we'll zero fill from the current EOF
446 * to where the write begins:
447 *
448 * NOTE: If (and ONLY if) the portion of the file about to be written is
449 * before the current EOF it might be marked as invalid now and must be
450 * made readable (removed from the invalid ranges) before cluster_write
451 * tries to write it:
452 */
453 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
454 if (io_start < fp->ff_size) {
455 off_t io_end;
456
457 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
458 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
459 };
460
461 hfs_unlock(cp);
462 cnode_locked = 0;
463 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
464 tail_off, lflag | IO_NOZERODIRTY);
465 offset = uio_offset(uio);
466 if (offset > fp->ff_size) {
467 fp->ff_size = offset;
468
469 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
470 /* Files that are changing size are not hot file candidates. */
471 if (hfsmp->hfc_stage == HFC_RECORDING)
472 fp->ff_bytesread = 0;
473 }
474 if (resid > uio_resid(uio)) {
475 cp->c_touch_chgtime = TRUE;
476 cp->c_touch_modtime = TRUE;
477 }
478 }
479
480 // XXXdbg - testing for vivek and paul lambert
481 {
482 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
483 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
484 }
485 }
486 HFS_KNOTE(vp, NOTE_WRITE);
487
488 ioerr_exit:
489 /*
490 * If we successfully wrote any data, and we are not the superuser
491 * we clear the setuid and setgid bits as a precaution against
492 * tampering.
493 */
494 if (cp->c_mode & (S_ISUID | S_ISGID)) {
495 cred = vfs_context_ucred(ap->a_context);
496 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
497 if (!cnode_locked) {
498 hfs_lock(cp, HFS_FORCE_LOCK);
499 cnode_locked = 1;
500 }
501 cp->c_mode &= ~(S_ISUID | S_ISGID);
502 }
503 }
504 if (retval) {
505 if (ioflag & IO_UNIT) {
506 if (!cnode_locked) {
507 hfs_lock(cp, HFS_FORCE_LOCK);
508 cnode_locked = 1;
509 }
510 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
511 0, ap->a_context);
512 // LP64todo - fix this! resid needs to by user_ssize_t
513 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
514 uio_setresid(uio, resid);
515 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
516 }
517 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
518 if (!cnode_locked) {
519 hfs_lock(cp, HFS_FORCE_LOCK);
520 cnode_locked = 1;
521 }
522 retval = hfs_update(vp, TRUE);
523 }
524 /* Updating vcbWrCnt doesn't need to be atomic. */
525 hfsmp->vcbWrCnt++;
526
527 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
528 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
529 exit:
530 if (cnode_locked)
531 hfs_unlock(cp);
532 hfs_unlock_truncate(cp);
533 return (retval);
534 }
535
536 /* support for the "bulk-access" fcntl */
537
538 #define CACHE_ELEMS 64
539 #define CACHE_LEVELS 16
540 #define PARENT_IDS_FLAG 0x100
541
542 /* from hfs_attrlist.c */
543 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
544 mode_t obj_mode, struct mount *mp,
545 kauth_cred_t cred, struct proc *p);
546
547 /* from vfs/vfs_fsevents.c */
548 extern char *get_pathbuff(void);
549 extern void release_pathbuff(char *buff);
550
551 struct access_cache {
552 int numcached;
553 int cachehits; /* these two for statistics gathering */
554 int lookups;
555 unsigned int *acache;
556 Boolean *haveaccess;
557 };
558
559 struct access_t {
560 uid_t uid; /* IN: effective user id */
561 short flags; /* IN: access requested (i.e. R_OK) */
562 short num_groups; /* IN: number of groups user belongs to */
563 int num_files; /* IN: number of files to process */
564 int *file_ids; /* IN: array of file ids */
565 gid_t *groups; /* IN: array of groups */
566 short *access; /* OUT: access info for each file (0 for 'has access') */
567 };
568
569 struct user_access_t {
570 uid_t uid; /* IN: effective user id */
571 short flags; /* IN: access requested (i.e. R_OK) */
572 short num_groups; /* IN: number of groups user belongs to */
573 int num_files; /* IN: number of files to process */
574 user_addr_t file_ids; /* IN: array of file ids */
575 user_addr_t groups; /* IN: array of groups */
576 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
577 };
578
579 /*
580 * Perform a binary search for the given parent_id. Return value is
581 * found/not found boolean, and indexp will be the index of the item
582 * or the index at which to insert the item if it's not found.
583 */
584 static int
585 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
586 {
587 unsigned int lo, hi;
588 int index, matches = 0;
589
590 if (cache->numcached == 0) {
591 *indexp = 0;
592 return 0; // table is empty, so insert at index=0 and report no match
593 }
594
595 if (cache->numcached > CACHE_ELEMS) {
596 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
597 cache->numcached, CACHE_ELEMS);*/
598 cache->numcached = CACHE_ELEMS;
599 }
600
601 lo = 0;
602 hi = cache->numcached - 1;
603 index = -1;
604
605 /* perform binary search for parent_id */
606 do {
607 unsigned int mid = (hi - lo)/2 + lo;
608 unsigned int this_id = cache->acache[mid];
609
610 if (parent_id == this_id) {
611 index = mid;
612 break;
613 }
614
615 if (parent_id < this_id) {
616 hi = mid;
617 continue;
618 }
619
620 if (parent_id > this_id) {
621 lo = mid + 1;
622 continue;
623 }
624 } while(lo < hi);
625
626 /* check if lo and hi converged on the match */
627 if (parent_id == cache->acache[hi]) {
628 index = hi;
629 }
630
631 /* if no existing entry found, find index for new one */
632 if (index == -1) {
633 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
634 matches = 0;
635 } else {
636 matches = 1;
637 }
638
639 *indexp = index;
640 return matches;
641 }
642
643 /*
644 * Add a node to the access_cache at the given index (or do a lookup first
645 * to find the index if -1 is passed in). We currently do a replace rather
646 * than an insert if the cache is full.
647 */
648 static void
649 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
650 {
651 int lookup_index = -1;
652
653 /* need to do a lookup first if -1 passed for index */
654 if (index == -1) {
655 if (lookup_bucket(cache, &lookup_index, nodeID)) {
656 if (cache->haveaccess[lookup_index] != access) {
657 /* change access info for existing entry... should never happen */
658 cache->haveaccess[lookup_index] = access;
659 }
660
661 /* mission accomplished */
662 return;
663 } else {
664 index = lookup_index;
665 }
666
667 }
668
669 /* if the cache is full, do a replace rather than an insert */
670 if (cache->numcached >= CACHE_ELEMS) {
671 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
672 cache->numcached = CACHE_ELEMS-1;
673
674 if (index > cache->numcached) {
675 // printf("index %d pinned to %d\n", index, cache->numcached);
676 index = cache->numcached;
677 }
678 } else if (index >= 0 && index < cache->numcached) {
679 /* only do bcopy if we're inserting */
680 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
681 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
682 }
683
684 cache->acache[index] = nodeID;
685 cache->haveaccess[index] = access;
686 cache->numcached++;
687 }
688
689
690 struct cinfo {
691 uid_t uid;
692 gid_t gid;
693 mode_t mode;
694 cnid_t parentcnid;
695 };
696
697 static int
698 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
699 {
700 struct cinfo *cip = (struct cinfo *)arg;
701
702 cip->uid = attrp->ca_uid;
703 cip->gid = attrp->ca_gid;
704 cip->mode = attrp->ca_mode;
705 cip->parentcnid = descp->cd_parentcnid;
706
707 return (0);
708 }
709
710 /*
711 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
712 * isn't incore, then go to the catalog.
713 */
714 static int
715 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
716 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
717 {
718 int error = 0;
719
720 /* if this id matches the one the fsctl was called with, skip the lookup */
721 if (cnid == skip_cp->c_cnid) {
722 cnattrp->ca_uid = skip_cp->c_uid;
723 cnattrp->ca_gid = skip_cp->c_gid;
724 cnattrp->ca_mode = skip_cp->c_mode;
725 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
726 } else {
727 struct cinfo c_info;
728
729 /* otherwise, check the cnode hash incase the file/dir is incore */
730 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
731 cnattrp->ca_uid = c_info.uid;
732 cnattrp->ca_gid = c_info.gid;
733 cnattrp->ca_mode = c_info.mode;
734 keyp->hfsPlus.parentID = c_info.parentcnid;
735 } else {
736 int lockflags;
737
738 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
739
740 /* lookup this cnid in the catalog */
741 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
742
743 hfs_systemfile_unlock(hfsmp, lockflags);
744
745 cache->lookups++;
746 }
747 }
748
749 return (error);
750 }
751
752 /*
753 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
754 * up to CACHE_LEVELS as we progress towards the root.
755 */
756 static int
757 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
758 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
759 {
760 int myErr = 0;
761 int myResult;
762 HFSCatalogNodeID thisNodeID;
763 unsigned long myPerms;
764 struct cat_attr cnattr;
765 int cache_index = -1;
766 CatalogKey catkey;
767
768 int i = 0, ids_to_cache = 0;
769 int parent_ids[CACHE_LEVELS];
770
771 /* root always has access */
772 if (!suser(myp_ucred, NULL)) {
773 return (1);
774 }
775
776 thisNodeID = nodeID;
777 while (thisNodeID >= kRootDirID) {
778 myResult = 0; /* default to "no access" */
779
780 /* check the cache before resorting to hitting the catalog */
781
782 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
783 * to look any further after hitting cached dir */
784
785 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
786 cache->cachehits++;
787 myResult = cache->haveaccess[cache_index];
788 goto ExitThisRoutine;
789 }
790
791 /* remember which parents we want to cache */
792 if (ids_to_cache < CACHE_LEVELS) {
793 parent_ids[ids_to_cache] = thisNodeID;
794 ids_to_cache++;
795 }
796
797 /* do the lookup (checks the cnode hash, then the catalog) */
798 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
799 if (myErr) {
800 goto ExitThisRoutine; /* no access */
801 }
802
803 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
804 cnattr.ca_mode, hfsmp->hfs_mp,
805 myp_ucred, theProcPtr);
806
807 if ( (myPerms & X_OK) == 0 ) {
808 myResult = 0;
809 goto ExitThisRoutine; /* no access */
810 }
811
812 /* up the hierarchy we go */
813 thisNodeID = catkey.hfsPlus.parentID;
814 }
815
816 /* if here, we have access to this node */
817 myResult = 1;
818
819 ExitThisRoutine:
820 if (myErr) {
821 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
822 myResult = 0;
823 }
824 *err = myErr;
825
826 /* cache the parent directory(ies) */
827 for (i = 0; i < ids_to_cache; i++) {
828 /* small optimization: get rid of double-lookup for all these */
829 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
830 add_node(cache, -1, parent_ids[i], myResult);
831 }
832
833 return (myResult);
834 }
835 /* end "bulk-access" support */
836
837
838
839 /*
840 * Callback for use with freeze ioctl.
841 */
842 static int
843 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
844 {
845 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
846
847 return 0;
848 }
849
850 /*
851 * Control filesystem operating characteristics.
852 */
853 int
854 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
855 vnode_t a_vp;
856 int a_command;
857 caddr_t a_data;
858 int a_fflag;
859 vfs_context_t a_context;
860 } */ *ap)
861 {
862 struct vnode * vp = ap->a_vp;
863 struct hfsmount *hfsmp = VTOHFS(vp);
864 vfs_context_t context = ap->a_context;
865 kauth_cred_t cred = vfs_context_ucred(context);
866 proc_t p = vfs_context_proc(context);
867 struct vfsstatfs *vfsp;
868 boolean_t is64bit;
869
870 is64bit = proc_is64bit(p);
871
872 switch (ap->a_command) {
873
874 case HFS_RESIZE_PROGRESS: {
875
876 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
877 if (suser(cred, NULL) &&
878 kauth_cred_getuid(cred) != vfsp->f_owner) {
879 return (EACCES); /* must be owner of file system */
880 }
881 if (!vnode_isvroot(vp)) {
882 return (EINVAL);
883 }
884 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
885 }
886 case HFS_RESIZE_VOLUME: {
887 u_int64_t newsize;
888 u_int64_t cursize;
889
890 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
891 if (suser(cred, NULL) &&
892 kauth_cred_getuid(cred) != vfsp->f_owner) {
893 return (EACCES); /* must be owner of file system */
894 }
895 if (!vnode_isvroot(vp)) {
896 return (EINVAL);
897 }
898 newsize = *(u_int64_t *)ap->a_data;
899 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
900
901 if (newsize > cursize) {
902 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
903 } else if (newsize < cursize) {
904 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
905 } else {
906 return (0);
907 }
908 }
909 case HFS_CHANGE_NEXT_ALLOCATION: {
910 u_int32_t location;
911
912 if (vnode_vfsisrdonly(vp)) {
913 return (EROFS);
914 }
915 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
916 if (suser(cred, NULL) &&
917 kauth_cred_getuid(cred) != vfsp->f_owner) {
918 return (EACCES); /* must be owner of file system */
919 }
920 if (!vnode_isvroot(vp)) {
921 return (EINVAL);
922 }
923 location = *(u_int32_t *)ap->a_data;
924 if (location > hfsmp->totalBlocks - 1) {
925 return (EINVAL);
926 }
927 /* Return previous value. */
928 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
929 HFS_MOUNT_LOCK(hfsmp, TRUE);
930 hfsmp->nextAllocation = location;
931 hfsmp->vcbFlags |= 0xFF00;
932 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
933 return (0);
934 }
935
936 #ifdef HFS_SPARSE_DEV
937 case HFS_SETBACKINGSTOREINFO: {
938 struct vnode * bsfs_rootvp;
939 struct vnode * di_vp;
940 struct hfs_backingstoreinfo *bsdata;
941 int error = 0;
942
943 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
944 return (EALREADY);
945 }
946 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
947 if (suser(cred, NULL) &&
948 kauth_cred_getuid(cred) != vfsp->f_owner) {
949 return (EACCES); /* must be owner of file system */
950 }
951 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
952 if (bsdata == NULL) {
953 return (EINVAL);
954 }
955 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
956 return (error);
957 }
958 if ((error = vnode_getwithref(di_vp))) {
959 file_drop(bsdata->backingfd);
960 return(error);
961 }
962
963 if (vnode_mount(vp) == vnode_mount(di_vp)) {
964 (void)vnode_put(di_vp);
965 file_drop(bsdata->backingfd);
966 return (EINVAL);
967 }
968
969 /*
970 * Obtain the backing fs root vnode and keep a reference
971 * on it. This reference will be dropped in hfs_unmount.
972 */
973 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
974 if (error) {
975 (void)vnode_put(di_vp);
976 file_drop(bsdata->backingfd);
977 return (error);
978 }
979 vnode_ref(bsfs_rootvp);
980 vnode_put(bsfs_rootvp);
981
982 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
983 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
984 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
985 hfsmp->hfs_sparsebandblks *= 4;
986
987 (void)vnode_put(di_vp);
988 file_drop(bsdata->backingfd);
989 return (0);
990 }
991 case HFS_CLRBACKINGSTOREINFO: {
992 struct vnode * tmpvp;
993
994 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
995 if (suser(cred, NULL) &&
996 kauth_cred_getuid(cred) != vfsp->f_owner) {
997 return (EACCES); /* must be owner of file system */
998 }
999 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1000 hfsmp->hfs_backingfs_rootvp) {
1001
1002 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1003 tmpvp = hfsmp->hfs_backingfs_rootvp;
1004 hfsmp->hfs_backingfs_rootvp = NULLVP;
1005 hfsmp->hfs_sparsebandblks = 0;
1006 vnode_rele(tmpvp);
1007 }
1008 return (0);
1009 }
1010 #endif /* HFS_SPARSE_DEV */
1011
1012 case F_FREEZE_FS: {
1013 struct mount *mp;
1014 task_t task;
1015
1016 if (!is_suser())
1017 return (EACCES);
1018
1019 mp = vnode_mount(vp);
1020 hfsmp = VFSTOHFS(mp);
1021
1022 if (!(hfsmp->jnl))
1023 return (ENOTSUP);
1024
1025 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1026
1027 task = current_task();
1028 task_working_set_disable(task);
1029
1030 // flush things before we get started to try and prevent
1031 // dirty data from being paged out while we're frozen.
1032 // note: can't do this after taking the lock as it will
1033 // deadlock against ourselves.
1034 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1035 hfs_global_exclusive_lock_acquire(hfsmp);
1036 journal_flush(hfsmp->jnl);
1037
1038 // don't need to iterate on all vnodes, we just need to
1039 // wait for writes to the system files and the device vnode
1040 if (HFSTOVCB(hfsmp)->extentsRefNum)
1041 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1042 if (HFSTOVCB(hfsmp)->catalogRefNum)
1043 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1044 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1045 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1046 if (hfsmp->hfs_attribute_vp)
1047 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1048 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1049
1050 hfsmp->hfs_freezing_proc = current_proc();
1051
1052 return (0);
1053 }
1054
1055 case F_THAW_FS: {
1056 if (!is_suser())
1057 return (EACCES);
1058
1059 // if we're not the one who froze the fs then we
1060 // can't thaw it.
1061 if (hfsmp->hfs_freezing_proc != current_proc()) {
1062 return EPERM;
1063 }
1064
1065 // NOTE: if you add code here, also go check the
1066 // code that "thaws" the fs in hfs_vnop_close()
1067 //
1068 hfsmp->hfs_freezing_proc = NULL;
1069 hfs_global_exclusive_lock_release(hfsmp);
1070 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1071
1072 return (0);
1073 }
1074
1075 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1076 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1077
1078 case HFS_BULKACCESS_FSCTL:
1079 case HFS_BULKACCESS: {
1080 /*
1081 * NOTE: on entry, the vnode is locked. Incase this vnode
1082 * happens to be in our list of file_ids, we'll note it
1083 * avoid calling hfs_chashget_nowait() on that id as that
1084 * will cause a "locking against myself" panic.
1085 */
1086 Boolean check_leaf = true;
1087
1088 struct user_access_t *user_access_structp;
1089 struct user_access_t tmp_user_access_t;
1090 struct access_cache cache;
1091
1092 int error = 0, i;
1093
1094 dev_t dev = VTOC(vp)->c_dev;
1095
1096 short flags;
1097 struct ucred myucred;
1098 int num_files;
1099 int *file_ids = NULL;
1100 short *access = NULL;
1101
1102 cnid_t cnid;
1103 cnid_t prevParent_cnid = 0;
1104 unsigned long myPerms;
1105 short myaccess = 0;
1106 struct cat_attr cnattr;
1107 CatalogKey catkey;
1108 struct cnode *skip_cp = VTOC(vp);
1109 struct vfs_context my_context;
1110
1111 /* set up front for common exit code */
1112 my_context.vc_ucred = NOCRED;
1113
1114 /* first, return error if not run as root */
1115 if (cred->cr_ruid != 0) {
1116 return EPERM;
1117 }
1118
1119 /* initialize the local cache and buffers */
1120 cache.numcached = 0;
1121 cache.cachehits = 0;
1122 cache.lookups = 0;
1123
1124 file_ids = (int *) get_pathbuff();
1125 access = (short *) get_pathbuff();
1126 cache.acache = (int *) get_pathbuff();
1127 cache.haveaccess = (Boolean *) get_pathbuff();
1128
1129 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1130 release_pathbuff((char *) file_ids);
1131 release_pathbuff((char *) access);
1132 release_pathbuff((char *) cache.acache);
1133 release_pathbuff((char *) cache.haveaccess);
1134
1135 return ENOMEM;
1136 }
1137
1138 /* struct copyin done during dispatch... need to copy file_id array separately */
1139 if (ap->a_data == NULL) {
1140 error = EINVAL;
1141 goto err_exit_bulk_access;
1142 }
1143
1144 if (is64bit) {
1145 user_access_structp = (struct user_access_t *)ap->a_data;
1146 }
1147 else {
1148 struct access_t * accessp = (struct access_t *)ap->a_data;
1149 tmp_user_access_t.uid = accessp->uid;
1150 tmp_user_access_t.flags = accessp->flags;
1151 tmp_user_access_t.num_groups = accessp->num_groups;
1152 tmp_user_access_t.num_files = accessp->num_files;
1153 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1154 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1155 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1156 user_access_structp = &tmp_user_access_t;
1157 }
1158
1159 num_files = user_access_structp->num_files;
1160 if (num_files < 1) {
1161 goto err_exit_bulk_access;
1162 }
1163 if (num_files > 256) {
1164 error = EINVAL;
1165 goto err_exit_bulk_access;
1166 }
1167
1168 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1169 num_files * sizeof(int)))) {
1170 goto err_exit_bulk_access;
1171 }
1172
1173 /* fill in the ucred structure */
1174 flags = user_access_structp->flags;
1175 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1176 flags = R_OK;
1177 }
1178
1179 /* check if we've been passed leaf node ids or parent ids */
1180 if (flags & PARENT_IDS_FLAG) {
1181 check_leaf = false;
1182 }
1183
1184 /*
1185 * Create a templated credential; this credential may *NOT*
1186 * be used unless instantiated with a kauth_cred_create();
1187 * there must be a correcponding kauth_cred_unref() when it
1188 * is no longer in use (i.e. before it goes out of scope).
1189 */
1190 memset(&myucred, 0, sizeof(myucred));
1191 myucred.cr_ref = 1;
1192 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1193 myucred.cr_ngroups = user_access_structp->num_groups;
1194 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1195 myucred.cr_ngroups = 0;
1196 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1197 myucred.cr_ngroups * sizeof(gid_t)))) {
1198 goto err_exit_bulk_access;
1199 }
1200 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1201 myucred.cr_gmuid = myucred.cr_uid;
1202
1203 my_context.vc_proc = p;
1204 my_context.vc_ucred = kauth_cred_create(&myucred);
1205
1206 /* Check access to each file_id passed in */
1207 for (i = 0; i < num_files; i++) {
1208 #if 0
1209 cnid = (cnid_t) file_ids[i];
1210
1211 /* root always has access */
1212 if (!suser(my_context.vc_ucred, NULL)) {
1213 access[i] = 0;
1214 continue;
1215 }
1216
1217 if (check_leaf) {
1218
1219 /* do the lookup (checks the cnode hash, then the catalog) */
1220 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1221 if (error) {
1222 access[i] = (short) error;
1223 continue;
1224 }
1225
1226 /* before calling CheckAccess(), check the target file for read access */
1227 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1228 cnattr.ca_mode, hfsmp->hfs_mp, my_context.vc_ucred, p );
1229
1230
1231 /* fail fast if no access */
1232 if ((myPerms & flags) == 0) {
1233 access[i] = EACCES;
1234 continue;
1235 }
1236 } else {
1237 /* we were passed an array of parent ids */
1238 catkey.hfsPlus.parentID = cnid;
1239 }
1240
1241 /* if the last guy had the same parent and had access, we're done */
1242 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1243 cache.cachehits++;
1244 access[i] = 0;
1245 continue;
1246 }
1247
1248 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1249 skip_cp, p, my_context.vc_ucred, dev);
1250
1251 if ( myaccess ) {
1252 access[i] = 0; // have access.. no errors to report
1253 } else {
1254 access[i] = (error != 0 ? (short) error : EACCES);
1255 }
1256
1257 prevParent_cnid = catkey.hfsPlus.parentID;
1258 #else
1259 int myErr;
1260
1261 cnid = (cnid_t)file_ids[i];
1262
1263 while (cnid >= kRootDirID) {
1264 /* get the vnode for this cnid */
1265 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1266 if ( myErr ) {
1267 access[i] = EACCES;
1268 break;
1269 }
1270
1271 cnid = VTOC(vp)->c_parentcnid;
1272
1273 hfs_unlock(VTOC(vp));
1274 if (vnode_vtype(vp) == VDIR) {
1275 /*
1276 * XXX This code assumes that none of the
1277 * XXX callbacks from vnode_authorize() will
1278 * XXX take a persistent ref on the context
1279 * XXX credential, which is a bad assumption.
1280 */
1281 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1282 } else {
1283 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1284 }
1285 vnode_put(vp);
1286 access[i] = myErr;
1287 if (myErr) {
1288 break;
1289 }
1290 }
1291 #endif
1292 }
1293
1294 /* copyout the access array */
1295 if ((error = copyout((caddr_t)access, user_access_structp->access,
1296 num_files * sizeof (short)))) {
1297 goto err_exit_bulk_access;
1298 }
1299
1300 err_exit_bulk_access:
1301
1302 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1303
1304 release_pathbuff((char *) cache.acache);
1305 release_pathbuff((char *) cache.haveaccess);
1306 release_pathbuff((char *) file_ids);
1307 release_pathbuff((char *) access);
1308 /* clean up local context, if needed */
1309 if (IS_VALID_CRED(my_context.vc_ucred))
1310 kauth_cred_unref(&my_context.vc_ucred);
1311
1312 return (error);
1313 } /* HFS_BULKACCESS */
1314
1315 case HFS_SETACLSTATE: {
1316 int state;
1317
1318 if (ap->a_data == NULL) {
1319 return (EINVAL);
1320 }
1321
1322 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1323 state = *(int *)ap->a_data;
1324
1325 // super-user can enable or disable acl's on a volume.
1326 // the volume owner can only enable acl's
1327 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1328 return (EPERM);
1329 }
1330 if (state == 0 || state == 1)
1331 return hfs_setextendedsecurity(hfsmp, state);
1332 else
1333 return (EINVAL);
1334 }
1335
1336 case F_FULLFSYNC: {
1337 int error;
1338
1339 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1340 if (error == 0) {
1341 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1342 hfs_unlock(VTOC(vp));
1343 }
1344
1345 return error;
1346 }
1347
1348 case F_CHKCLEAN: {
1349 register struct cnode *cp;
1350 int error;
1351
1352 if (!vnode_isreg(vp))
1353 return EINVAL;
1354
1355 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1356 if (error == 0) {
1357 cp = VTOC(vp);
1358 /*
1359 * used by regression test to determine if
1360 * all the dirty pages (via write) have been cleaned
1361 * after a call to 'fsysnc'.
1362 */
1363 error = is_file_clean(vp, VTOF(vp)->ff_size);
1364 hfs_unlock(cp);
1365 }
1366 return (error);
1367 }
1368
1369 case F_RDADVISE: {
1370 register struct radvisory *ra;
1371 struct filefork *fp;
1372 int error;
1373
1374 if (!vnode_isreg(vp))
1375 return EINVAL;
1376
1377 ra = (struct radvisory *)(ap->a_data);
1378 fp = VTOF(vp);
1379
1380 /* Protect against a size change. */
1381 hfs_lock_truncate(VTOC(vp), TRUE);
1382
1383 if (ra->ra_offset >= fp->ff_size) {
1384 error = EFBIG;
1385 } else {
1386 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1387 }
1388
1389 hfs_unlock_truncate(VTOC(vp));
1390 return (error);
1391 }
1392
1393 case F_READBOOTSTRAP:
1394 case F_WRITEBOOTSTRAP:
1395 {
1396 struct vnode *devvp = NULL;
1397 user_fbootstraptransfer_t *user_bootstrapp;
1398 int devBlockSize;
1399 int error;
1400 uio_t auio;
1401 daddr64_t blockNumber;
1402 u_long blockOffset;
1403 u_long xfersize;
1404 struct buf *bp;
1405 user_fbootstraptransfer_t user_bootstrap;
1406
1407 if (!vnode_isvroot(vp))
1408 return (EINVAL);
1409 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1410 * to a user_fbootstraptransfer_t else we get a pointer to a
1411 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1412 */
1413 if (is64bit) {
1414 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1415 }
1416 else {
1417 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1418 user_bootstrapp = &user_bootstrap;
1419 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1420 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1421 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1422 }
1423 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1424 return EINVAL;
1425
1426 devvp = VTOHFS(vp)->hfs_devvp;
1427 auio = uio_create(1, user_bootstrapp->fbt_offset,
1428 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1429 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1430 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1431
1432 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1433
1434 while (uio_resid(auio) > 0) {
1435 blockNumber = uio_offset(auio) / devBlockSize;
1436 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1437 if (error) {
1438 if (bp) buf_brelse(bp);
1439 uio_free(auio);
1440 return error;
1441 };
1442
1443 blockOffset = uio_offset(auio) % devBlockSize;
1444 xfersize = devBlockSize - blockOffset;
1445 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1446 if (error) {
1447 buf_brelse(bp);
1448 uio_free(auio);
1449 return error;
1450 };
1451 if (uio_rw(auio) == UIO_WRITE) {
1452 error = VNOP_BWRITE(bp);
1453 if (error) {
1454 uio_free(auio);
1455 return error;
1456 }
1457 } else {
1458 buf_brelse(bp);
1459 };
1460 };
1461 uio_free(auio);
1462 };
1463 return 0;
1464
1465 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1466 {
1467 if (is64bit) {
1468 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1469 }
1470 else {
1471 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1472 }
1473 return 0;
1474 }
1475
1476 case HFS_GET_MOUNT_TIME:
1477 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1478 break;
1479
1480 case HFS_GET_LAST_MTIME:
1481 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1482 break;
1483
1484 case HFS_SET_BOOT_INFO:
1485 if (!vnode_isvroot(vp))
1486 return(EINVAL);
1487 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1488 return(EACCES); /* must be superuser or owner of filesystem */
1489 HFS_MOUNT_LOCK(hfsmp, TRUE);
1490 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1491 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1492 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1493 break;
1494
1495 case HFS_GET_BOOT_INFO:
1496 if (!vnode_isvroot(vp))
1497 return(EINVAL);
1498 HFS_MOUNT_LOCK(hfsmp, TRUE);
1499 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1500 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1501 break;
1502
1503 default:
1504 return (ENOTTY);
1505 }
1506
1507 /* Should never get here */
1508 return 0;
1509 }
1510
1511 /*
1512 * select
1513 */
1514 int
1515 hfs_vnop_select(__unused struct vnop_select_args *ap)
1516 /*
1517 struct vnop_select_args {
1518 vnode_t a_vp;
1519 int a_which;
1520 int a_fflags;
1521 void *a_wql;
1522 vfs_context_t a_context;
1523 };
1524 */
1525 {
1526 /*
1527 * We should really check to see if I/O is possible.
1528 */
1529 return (1);
1530 }
1531
1532 /*
1533 * Converts a logical block number to a physical block, and optionally returns
1534 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1535 * The physical block number is based on the device block size, currently its 512.
1536 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1537 */
1538 int
1539 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1540 {
1541 struct cnode *cp = VTOC(vp);
1542 struct filefork *fp = VTOF(vp);
1543 struct hfsmount *hfsmp = VTOHFS(vp);
1544 int retval = E_NONE;
1545 daddr_t logBlockSize;
1546 size_t bytesContAvail = 0;
1547 off_t blockposition;
1548 int lockExtBtree;
1549 int lockflags = 0;
1550
1551 /*
1552 * Check for underlying vnode requests and ensure that logical
1553 * to physical mapping is requested.
1554 */
1555 if (vpp != NULL)
1556 *vpp = cp->c_devvp;
1557 if (bnp == NULL)
1558 return (0);
1559
1560 logBlockSize = GetLogicalBlockSize(vp);
1561 blockposition = (off_t)bn * (off_t)logBlockSize;
1562
1563 lockExtBtree = overflow_extents(fp);
1564
1565 if (lockExtBtree)
1566 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1567
1568 retval = MacToVFSError(
1569 MapFileBlockC (HFSTOVCB(hfsmp),
1570 (FCB*)fp,
1571 MAXPHYSIO,
1572 blockposition,
1573 bnp,
1574 &bytesContAvail));
1575
1576 if (lockExtBtree)
1577 hfs_systemfile_unlock(hfsmp, lockflags);
1578
1579 if (retval == E_NONE) {
1580 /* Figure out how many read ahead blocks there are */
1581 if (runp != NULL) {
1582 if (can_cluster(logBlockSize)) {
1583 /* Make sure this result never goes negative: */
1584 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1585 } else {
1586 *runp = 0;
1587 }
1588 }
1589 }
1590 return (retval);
1591 }
1592
1593 /*
1594 * Convert logical block number to file offset.
1595 */
1596 int
1597 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1598 /*
1599 struct vnop_blktooff_args {
1600 vnode_t a_vp;
1601 daddr64_t a_lblkno;
1602 off_t *a_offset;
1603 };
1604 */
1605 {
1606 if (ap->a_vp == NULL)
1607 return (EINVAL);
1608 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1609
1610 return(0);
1611 }
1612
1613 /*
1614 * Convert file offset to logical block number.
1615 */
1616 int
1617 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1618 /*
1619 struct vnop_offtoblk_args {
1620 vnode_t a_vp;
1621 off_t a_offset;
1622 daddr64_t *a_lblkno;
1623 };
1624 */
1625 {
1626 if (ap->a_vp == NULL)
1627 return (EINVAL);
1628 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1629
1630 return(0);
1631 }
1632
1633 /*
1634 * Map file offset to physical block number.
1635 *
1636 * System file cnodes are expected to be locked (shared or exclusive).
1637 */
1638 int
1639 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1640 /*
1641 struct vnop_blockmap_args {
1642 vnode_t a_vp;
1643 off_t a_foffset;
1644 size_t a_size;
1645 daddr64_t *a_bpn;
1646 size_t *a_run;
1647 void *a_poff;
1648 int a_flags;
1649 vfs_context_t a_context;
1650 };
1651 */
1652 {
1653 struct vnode *vp = ap->a_vp;
1654 struct cnode *cp;
1655 struct filefork *fp;
1656 struct hfsmount *hfsmp;
1657 size_t bytesContAvail = 0;
1658 int retval = E_NONE;
1659 int syslocks = 0;
1660 int lockflags = 0;
1661 struct rl_entry *invalid_range;
1662 enum rl_overlaptype overlaptype;
1663 int started_tr = 0;
1664 int tooklock = 0;
1665
1666 /* Do not allow blockmap operation on a directory */
1667 if (vnode_isdir(vp)) {
1668 return (ENOTSUP);
1669 }
1670
1671 /*
1672 * Check for underlying vnode requests and ensure that logical
1673 * to physical mapping is requested.
1674 */
1675 if (ap->a_bpn == NULL)
1676 return (0);
1677
1678 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1679 if (VTOC(vp)->c_lockowner != current_thread()) {
1680 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1681 tooklock = 1;
1682 } else {
1683 cp = VTOC(vp);
1684 panic("blockmap: %s cnode lock already held!\n",
1685 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1686 }
1687 }
1688 hfsmp = VTOHFS(vp);
1689 cp = VTOC(vp);
1690 fp = VTOF(vp);
1691
1692 retry:
1693 if (fp->ff_unallocblocks) {
1694 if (hfs_start_transaction(hfsmp) != 0) {
1695 retval = EINVAL;
1696 goto exit;
1697 } else {
1698 started_tr = 1;
1699 }
1700 syslocks = SFL_EXTENTS | SFL_BITMAP;
1701
1702 } else if (overflow_extents(fp)) {
1703 syslocks = SFL_EXTENTS;
1704 }
1705
1706 if (syslocks)
1707 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1708
1709 /*
1710 * Check for any delayed allocations.
1711 */
1712 if (fp->ff_unallocblocks) {
1713 SInt64 actbytes;
1714 u_int32_t loanedBlocks;
1715
1716 //
1717 // Make sure we have a transaction. It's possible
1718 // that we came in and fp->ff_unallocblocks was zero
1719 // but during the time we blocked acquiring the extents
1720 // btree, ff_unallocblocks became non-zero and so we
1721 // will need to start a transaction.
1722 //
1723 if (started_tr == 0) {
1724 if (syslocks) {
1725 hfs_systemfile_unlock(hfsmp, lockflags);
1726 syslocks = 0;
1727 }
1728 goto retry;
1729 }
1730
1731 /*
1732 * Note: ExtendFileC will Release any blocks on loan and
1733 * aquire real blocks. So we ask to extend by zero bytes
1734 * since ExtendFileC will account for the virtual blocks.
1735 */
1736
1737 loanedBlocks = fp->ff_unallocblocks;
1738 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1739 kEFAllMask | kEFNoClumpMask, &actbytes);
1740
1741 if (retval) {
1742 fp->ff_unallocblocks = loanedBlocks;
1743 cp->c_blocks += loanedBlocks;
1744 fp->ff_blocks += loanedBlocks;
1745
1746 HFS_MOUNT_LOCK(hfsmp, TRUE);
1747 hfsmp->loanedBlocks += loanedBlocks;
1748 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1749 }
1750
1751 if (retval) {
1752 hfs_systemfile_unlock(hfsmp, lockflags);
1753 cp->c_flag |= C_MODIFIED;
1754 if (started_tr) {
1755 (void) hfs_update(vp, TRUE);
1756 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1757
1758 hfs_end_transaction(hfsmp);
1759 }
1760 goto exit;
1761 }
1762 }
1763
1764 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1765 ap->a_bpn, &bytesContAvail);
1766 if (syslocks) {
1767 hfs_systemfile_unlock(hfsmp, lockflags);
1768 syslocks = 0;
1769 }
1770
1771 if (started_tr) {
1772 (void) hfs_update(vp, TRUE);
1773 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1774 hfs_end_transaction(hfsmp);
1775 started_tr = 0;
1776 }
1777 if (retval) {
1778 goto exit;
1779 }
1780
1781 /* Adjust the mapping information for invalid file ranges: */
1782 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1783 ap->a_foffset + (off_t)bytesContAvail - 1,
1784 &invalid_range);
1785 if (overlaptype != RL_NOOVERLAP) {
1786 switch(overlaptype) {
1787 case RL_MATCHINGOVERLAP:
1788 case RL_OVERLAPCONTAINSRANGE:
1789 case RL_OVERLAPSTARTSBEFORE:
1790 /* There's no valid block for this byte offset: */
1791 *ap->a_bpn = (daddr64_t)-1;
1792 /* There's no point limiting the amount to be returned
1793 * if the invalid range that was hit extends all the way
1794 * to the EOF (i.e. there's no valid bytes between the
1795 * end of this range and the file's EOF):
1796 */
1797 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1798 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1799 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1800 }
1801 break;
1802
1803 case RL_OVERLAPISCONTAINED:
1804 case RL_OVERLAPENDSAFTER:
1805 /* The range of interest hits an invalid block before the end: */
1806 if (invalid_range->rl_start == ap->a_foffset) {
1807 /* There's actually no valid information to be had starting here: */
1808 *ap->a_bpn = (daddr64_t)-1;
1809 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1810 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1811 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1812 }
1813 } else {
1814 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1815 }
1816 break;
1817
1818 case RL_NOOVERLAP:
1819 break;
1820 } /* end switch */
1821 if (bytesContAvail > ap->a_size)
1822 bytesContAvail = ap->a_size;
1823 }
1824 if (ap->a_run)
1825 *ap->a_run = bytesContAvail;
1826
1827 if (ap->a_poff)
1828 *(int *)ap->a_poff = 0;
1829 exit:
1830 if (tooklock)
1831 hfs_unlock(cp);
1832
1833 return (MacToVFSError(retval));
1834 }
1835
1836
1837 /*
1838 * prepare and issue the I/O
1839 * buf_strategy knows how to deal
1840 * with requests that require
1841 * fragmented I/Os
1842 */
1843 int
1844 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1845 {
1846 buf_t bp = ap->a_bp;
1847 vnode_t vp = buf_vnode(bp);
1848 struct cnode *cp = VTOC(vp);
1849
1850 return (buf_strategy(cp->c_devvp, ap));
1851 }
1852
1853
1854 static int
1855 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1856 {
1857 register struct cnode *cp = VTOC(vp);
1858 struct filefork *fp = VTOF(vp);
1859 struct proc *p = vfs_context_proc(context);;
1860 kauth_cred_t cred = vfs_context_ucred(context);
1861 int retval;
1862 off_t bytesToAdd;
1863 off_t actualBytesAdded;
1864 off_t filebytes;
1865 u_int64_t old_filesize;
1866 u_long fileblocks;
1867 int blksize;
1868 struct hfsmount *hfsmp;
1869 int lockflags;
1870
1871 blksize = VTOVCB(vp)->blockSize;
1872 fileblocks = fp->ff_blocks;
1873 filebytes = (off_t)fileblocks * (off_t)blksize;
1874 old_filesize = fp->ff_size;
1875
1876 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1877 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1878
1879 if (length < 0)
1880 return (EINVAL);
1881
1882 /* This should only happen with a corrupt filesystem */
1883 if ((off_t)fp->ff_size < 0)
1884 return (EINVAL);
1885
1886 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1887 return (EFBIG);
1888
1889 hfsmp = VTOHFS(vp);
1890
1891 retval = E_NONE;
1892
1893 /* Files that are changing size are not hot file candidates. */
1894 if (hfsmp->hfc_stage == HFC_RECORDING) {
1895 fp->ff_bytesread = 0;
1896 }
1897
1898 /*
1899 * We cannot just check if fp->ff_size == length (as an optimization)
1900 * since there may be extra physical blocks that also need truncation.
1901 */
1902 #if QUOTA
1903 if ((retval = hfs_getinoquota(cp)))
1904 return(retval);
1905 #endif /* QUOTA */
1906
1907 /*
1908 * Lengthen the size of the file. We must ensure that the
1909 * last byte of the file is allocated. Since the smallest
1910 * value of ff_size is 0, length will be at least 1.
1911 */
1912 if (length > (off_t)fp->ff_size) {
1913 #if QUOTA
1914 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1915 cred, 0);
1916 if (retval)
1917 goto Err_Exit;
1918 #endif /* QUOTA */
1919 /*
1920 * If we don't have enough physical space then
1921 * we need to extend the physical size.
1922 */
1923 if (length > filebytes) {
1924 int eflags;
1925 u_long blockHint = 0;
1926
1927 /* All or nothing and don't round up to clumpsize. */
1928 eflags = kEFAllMask | kEFNoClumpMask;
1929
1930 if (cred && suser(cred, NULL) != 0)
1931 eflags |= kEFReserveMask; /* keep a reserve */
1932
1933 /*
1934 * Allocate Journal and Quota files in metadata zone.
1935 */
1936 if (filebytes == 0 &&
1937 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1938 hfs_virtualmetafile(cp)) {
1939 eflags |= kEFMetadataMask;
1940 blockHint = hfsmp->hfs_metazone_start;
1941 }
1942 if (hfs_start_transaction(hfsmp) != 0) {
1943 retval = EINVAL;
1944 goto Err_Exit;
1945 }
1946
1947 /* Protect extents b-tree and allocation bitmap */
1948 lockflags = SFL_BITMAP;
1949 if (overflow_extents(fp))
1950 lockflags |= SFL_EXTENTS;
1951 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1952
1953 while ((length > filebytes) && (retval == E_NONE)) {
1954 bytesToAdd = length - filebytes;
1955 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1956 (FCB*)fp,
1957 bytesToAdd,
1958 blockHint,
1959 eflags,
1960 &actualBytesAdded));
1961
1962 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1963 if (actualBytesAdded == 0 && retval == E_NONE) {
1964 if (length > filebytes)
1965 length = filebytes;
1966 break;
1967 }
1968 } /* endwhile */
1969
1970 hfs_systemfile_unlock(hfsmp, lockflags);
1971
1972 if (hfsmp->jnl) {
1973 (void) hfs_update(vp, TRUE);
1974 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1975 }
1976
1977 hfs_end_transaction(hfsmp);
1978
1979 if (retval)
1980 goto Err_Exit;
1981
1982 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1983 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1984 }
1985
1986 if (!(flags & IO_NOZEROFILL)) {
1987 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1988 struct rl_entry *invalid_range;
1989 off_t zero_limit;
1990
1991 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1992 if (length < zero_limit) zero_limit = length;
1993
1994 if (length > (off_t)fp->ff_size) {
1995 struct timeval tv;
1996
1997 /* Extending the file: time to fill out the current last page w. zeroes? */
1998 if ((fp->ff_size & PAGE_MASK_64) &&
1999 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2000 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2001
2002 /* There's some valid data at the start of the (current) last page
2003 of the file, so zero out the remainder of that page to ensure the
2004 entire page contains valid data. Since there is no invalid range
2005 possible past the (current) eof, there's no need to remove anything
2006 from the invalid range list before calling cluster_write(): */
2007 hfs_unlock(cp);
2008 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2009 fp->ff_size, (off_t)0,
2010 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2011 hfs_lock(cp, HFS_FORCE_LOCK);
2012 if (retval) goto Err_Exit;
2013
2014 /* Merely invalidate the remaining area, if necessary: */
2015 if (length > zero_limit) {
2016 microuptime(&tv);
2017 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
2018 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2019 }
2020 } else {
2021 /* The page containing the (current) eof is invalid: just add the
2022 remainder of the page to the invalid list, along with the area
2023 being newly allocated:
2024 */
2025 microuptime(&tv);
2026 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
2027 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2028 };
2029 }
2030 } else {
2031 panic("hfs_truncate: invoked on non-UBC object?!");
2032 };
2033 }
2034 cp->c_touch_modtime = TRUE;
2035 fp->ff_size = length;
2036
2037 /* Nested transactions will do their own ubc_setsize. */
2038 if (!skipsetsize) {
2039 /*
2040 * ubc_setsize can cause a pagein here
2041 * so we need to drop cnode lock.
2042 */
2043 hfs_unlock(cp);
2044 ubc_setsize(vp, length);
2045 hfs_lock(cp, HFS_FORCE_LOCK);
2046 }
2047
2048 } else { /* Shorten the size of the file */
2049
2050 if ((off_t)fp->ff_size > length) {
2051 /*
2052 * Any buffers that are past the truncation point need to be
2053 * invalidated (to maintain buffer cache consistency).
2054 */
2055
2056 /* Nested transactions will do their own ubc_setsize. */
2057 if (!skipsetsize) {
2058 /*
2059 * ubc_setsize can cause a pageout here
2060 * so we need to drop cnode lock.
2061 */
2062 hfs_unlock(cp);
2063 ubc_setsize(vp, length);
2064 hfs_lock(cp, HFS_FORCE_LOCK);
2065 }
2066
2067 /* Any space previously marked as invalid is now irrelevant: */
2068 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2069 }
2070
2071 /*
2072 * Account for any unmapped blocks. Note that the new
2073 * file length can still end up with unmapped blocks.
2074 */
2075 if (fp->ff_unallocblocks > 0) {
2076 u_int32_t finalblks;
2077 u_int32_t loanedBlocks;
2078
2079 HFS_MOUNT_LOCK(hfsmp, TRUE);
2080
2081 loanedBlocks = fp->ff_unallocblocks;
2082 cp->c_blocks -= loanedBlocks;
2083 fp->ff_blocks -= loanedBlocks;
2084 fp->ff_unallocblocks = 0;
2085
2086 hfsmp->loanedBlocks -= loanedBlocks;
2087
2088 finalblks = (length + blksize - 1) / blksize;
2089 if (finalblks > fp->ff_blocks) {
2090 /* calculate required unmapped blocks */
2091 loanedBlocks = finalblks - fp->ff_blocks;
2092 hfsmp->loanedBlocks += loanedBlocks;
2093
2094 fp->ff_unallocblocks = loanedBlocks;
2095 cp->c_blocks += loanedBlocks;
2096 fp->ff_blocks += loanedBlocks;
2097 }
2098 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2099 }
2100
2101 /*
2102 * For a TBE process the deallocation of the file blocks is
2103 * delayed until the file is closed. And hfs_close calls
2104 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2105 * isn't set, we make sure this isn't a TBE process.
2106 */
2107 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2108 #if QUOTA
2109 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2110 #endif /* QUOTA */
2111 if (hfs_start_transaction(hfsmp) != 0) {
2112 retval = EINVAL;
2113 goto Err_Exit;
2114 }
2115
2116 if (fp->ff_unallocblocks == 0) {
2117 /* Protect extents b-tree and allocation bitmap */
2118 lockflags = SFL_BITMAP;
2119 if (overflow_extents(fp))
2120 lockflags |= SFL_EXTENTS;
2121 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2122
2123 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2124 (FCB*)fp, length, false));
2125
2126 hfs_systemfile_unlock(hfsmp, lockflags);
2127 }
2128 if (hfsmp->jnl) {
2129 if (retval == 0) {
2130 fp->ff_size = length;
2131 }
2132 (void) hfs_update(vp, TRUE);
2133 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2134 }
2135
2136 hfs_end_transaction(hfsmp);
2137
2138 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2139 if (retval)
2140 goto Err_Exit;
2141 #if QUOTA
2142 /* These are bytesreleased */
2143 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2144 #endif /* QUOTA */
2145 }
2146 /* Only set update flag if the logical length changes */
2147 if (old_filesize != length)
2148 cp->c_touch_modtime = TRUE;
2149 fp->ff_size = length;
2150 }
2151 cp->c_touch_chgtime = TRUE;
2152 retval = hfs_update(vp, MNT_WAIT);
2153 if (retval) {
2154 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2155 -1, -1, -1, retval, 0);
2156 }
2157
2158 Err_Exit:
2159
2160 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2161 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2162
2163 return (retval);
2164 }
2165
2166
2167
2168 /*
2169 * Truncate a cnode to at most length size, freeing (or adding) the
2170 * disk blocks.
2171 */
2172 __private_extern__
2173 int
2174 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2175 vfs_context_t context)
2176 {
2177 struct filefork *fp = VTOF(vp);
2178 off_t filebytes;
2179 u_long fileblocks;
2180 int blksize, error = 0;
2181 struct cnode *cp = VTOC(vp);
2182
2183 if (vnode_isdir(vp))
2184 return (EISDIR); /* cannot truncate an HFS directory! */
2185
2186 blksize = VTOVCB(vp)->blockSize;
2187 fileblocks = fp->ff_blocks;
2188 filebytes = (off_t)fileblocks * (off_t)blksize;
2189
2190 // have to loop truncating or growing files that are
2191 // really big because otherwise transactions can get
2192 // enormous and consume too many kernel resources.
2193
2194 if (length < filebytes) {
2195 while (filebytes > length) {
2196 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2197 filebytes -= HFS_BIGFILE_SIZE;
2198 } else {
2199 filebytes = length;
2200 }
2201 cp->c_flag |= C_FORCEUPDATE;
2202 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2203 if (error)
2204 break;
2205 }
2206 } else if (length > filebytes) {
2207 while (filebytes < length) {
2208 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2209 filebytes += HFS_BIGFILE_SIZE;
2210 } else {
2211 filebytes = length;
2212 }
2213 cp->c_flag |= C_FORCEUPDATE;
2214 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2215 if (error)
2216 break;
2217 }
2218 } else /* Same logical size */ {
2219
2220 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2221 }
2222 /* Files that are changing size are not hot file candidates. */
2223 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2224 fp->ff_bytesread = 0;
2225 }
2226
2227 return (error);
2228 }
2229
2230
2231
2232 /*
2233 * Preallocate file storage space.
2234 */
2235 int
2236 hfs_vnop_allocate(struct vnop_allocate_args /* {
2237 vnode_t a_vp;
2238 off_t a_length;
2239 u_int32_t a_flags;
2240 off_t *a_bytesallocated;
2241 off_t a_offset;
2242 vfs_context_t a_context;
2243 } */ *ap)
2244 {
2245 struct vnode *vp = ap->a_vp;
2246 struct cnode *cp;
2247 struct filefork *fp;
2248 ExtendedVCB *vcb;
2249 off_t length = ap->a_length;
2250 off_t startingPEOF;
2251 off_t moreBytesRequested;
2252 off_t actualBytesAdded;
2253 off_t filebytes;
2254 u_long fileblocks;
2255 int retval, retval2;
2256 UInt32 blockHint;
2257 UInt32 extendFlags; /* For call to ExtendFileC */
2258 struct hfsmount *hfsmp;
2259 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2260 int lockflags;
2261
2262 *(ap->a_bytesallocated) = 0;
2263
2264 if (!vnode_isreg(vp))
2265 return (EISDIR);
2266 if (length < (off_t)0)
2267 return (EINVAL);
2268
2269 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2270 return (retval);
2271 cp = VTOC(vp);
2272 fp = VTOF(vp);
2273 hfsmp = VTOHFS(vp);
2274 vcb = VTOVCB(vp);
2275
2276 fileblocks = fp->ff_blocks;
2277 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2278
2279 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2280 retval = EINVAL;
2281 goto Err_Exit;
2282 }
2283
2284 /* Fill in the flags word for the call to Extend the file */
2285
2286 extendFlags = kEFNoClumpMask;
2287 if (ap->a_flags & ALLOCATECONTIG)
2288 extendFlags |= kEFContigMask;
2289 if (ap->a_flags & ALLOCATEALL)
2290 extendFlags |= kEFAllMask;
2291 if (cred && suser(cred, NULL) != 0)
2292 extendFlags |= kEFReserveMask;
2293
2294 retval = E_NONE;
2295 blockHint = 0;
2296 startingPEOF = filebytes;
2297
2298 if (ap->a_flags & ALLOCATEFROMPEOF)
2299 length += filebytes;
2300 else if (ap->a_flags & ALLOCATEFROMVOL)
2301 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2302
2303 /* If no changes are necesary, then we're done */
2304 if (filebytes == length)
2305 goto Std_Exit;
2306
2307 /*
2308 * Lengthen the size of the file. We must ensure that the
2309 * last byte of the file is allocated. Since the smallest
2310 * value of filebytes is 0, length will be at least 1.
2311 */
2312 if (length > filebytes) {
2313 moreBytesRequested = length - filebytes;
2314
2315 #if QUOTA
2316 retval = hfs_chkdq(cp,
2317 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2318 cred, 0);
2319 if (retval)
2320 goto Err_Exit;
2321
2322 #endif /* QUOTA */
2323 /*
2324 * Metadata zone checks.
2325 */
2326 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2327 /*
2328 * Allocate Journal and Quota files in metadata zone.
2329 */
2330 if (hfs_virtualmetafile(cp)) {
2331 extendFlags |= kEFMetadataMask;
2332 blockHint = hfsmp->hfs_metazone_start;
2333 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2334 (blockHint <= hfsmp->hfs_metazone_end)) {
2335 /*
2336 * Move blockHint outside metadata zone.
2337 */
2338 blockHint = hfsmp->hfs_metazone_end + 1;
2339 }
2340 }
2341
2342 if (hfs_start_transaction(hfsmp) != 0) {
2343 retval = EINVAL;
2344 goto Err_Exit;
2345 }
2346
2347 /* Protect extents b-tree and allocation bitmap */
2348 lockflags = SFL_BITMAP;
2349 if (overflow_extents(fp))
2350 lockflags |= SFL_EXTENTS;
2351 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2352
2353 retval = MacToVFSError(ExtendFileC(vcb,
2354 (FCB*)fp,
2355 moreBytesRequested,
2356 blockHint,
2357 extendFlags,
2358 &actualBytesAdded));
2359
2360 *(ap->a_bytesallocated) = actualBytesAdded;
2361 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2362
2363 hfs_systemfile_unlock(hfsmp, lockflags);
2364
2365 if (hfsmp->jnl) {
2366 (void) hfs_update(vp, TRUE);
2367 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2368 }
2369
2370 hfs_end_transaction(hfsmp);
2371
2372 /*
2373 * if we get an error and no changes were made then exit
2374 * otherwise we must do the hfs_update to reflect the changes
2375 */
2376 if (retval && (startingPEOF == filebytes))
2377 goto Err_Exit;
2378
2379 /*
2380 * Adjust actualBytesAdded to be allocation block aligned, not
2381 * clump size aligned.
2382 * NOTE: So what we are reporting does not affect reality
2383 * until the file is closed, when we truncate the file to allocation
2384 * block size.
2385 */
2386 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2387 *(ap->a_bytesallocated) =
2388 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2389
2390 } else { /* Shorten the size of the file */
2391
2392 if (fp->ff_size > length) {
2393 /*
2394 * Any buffers that are past the truncation point need to be
2395 * invalidated (to maintain buffer cache consistency).
2396 */
2397 }
2398
2399 if (hfs_start_transaction(hfsmp) != 0) {
2400 retval = EINVAL;
2401 goto Err_Exit;
2402 }
2403
2404 /* Protect extents b-tree and allocation bitmap */
2405 lockflags = SFL_BITMAP;
2406 if (overflow_extents(fp))
2407 lockflags |= SFL_EXTENTS;
2408 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2409
2410 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2411
2412 hfs_systemfile_unlock(hfsmp, lockflags);
2413
2414 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2415
2416 if (hfsmp->jnl) {
2417 (void) hfs_update(vp, TRUE);
2418 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2419 }
2420
2421 hfs_end_transaction(hfsmp);
2422
2423
2424 /*
2425 * if we get an error and no changes were made then exit
2426 * otherwise we must do the hfs_update to reflect the changes
2427 */
2428 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2429 #if QUOTA
2430 /* These are bytesreleased */
2431 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2432 #endif /* QUOTA */
2433
2434 if (fp->ff_size > filebytes) {
2435 fp->ff_size = filebytes;
2436
2437 hfs_unlock(cp);
2438 ubc_setsize(vp, fp->ff_size);
2439 hfs_lock(cp, HFS_FORCE_LOCK);
2440 }
2441 }
2442
2443 Std_Exit:
2444 cp->c_touch_chgtime = TRUE;
2445 cp->c_touch_modtime = TRUE;
2446 retval2 = hfs_update(vp, MNT_WAIT);
2447
2448 if (retval == 0)
2449 retval = retval2;
2450 Err_Exit:
2451 hfs_unlock(cp);
2452 return (retval);
2453 }
2454
2455
2456 /*
2457 * Pagein for HFS filesystem
2458 */
2459 int
2460 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2461 /*
2462 struct vnop_pagein_args {
2463 vnode_t a_vp,
2464 upl_t a_pl,
2465 vm_offset_t a_pl_offset,
2466 off_t a_f_offset,
2467 size_t a_size,
2468 int a_flags
2469 vfs_context_t a_context;
2470 };
2471 */
2472 {
2473 vnode_t vp = ap->a_vp;
2474 int error;
2475
2476 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2477 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2478 /*
2479 * Keep track of blocks read.
2480 */
2481 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2482 struct cnode *cp;
2483 struct filefork *fp;
2484 int bytesread;
2485 int took_cnode_lock = 0;
2486
2487 cp = VTOC(vp);
2488 fp = VTOF(vp);
2489
2490 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2491 bytesread = fp->ff_size;
2492 else
2493 bytesread = ap->a_size;
2494
2495 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2496 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2497 hfs_lock(cp, HFS_FORCE_LOCK);
2498 took_cnode_lock = 1;
2499 }
2500 /*
2501 * If this file hasn't been seen since the start of
2502 * the current sampling period then start over.
2503 */
2504 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2505 struct timeval tv;
2506
2507 fp->ff_bytesread = bytesread;
2508 microtime(&tv);
2509 cp->c_atime = tv.tv_sec;
2510 } else {
2511 fp->ff_bytesread += bytesread;
2512 }
2513 cp->c_touch_acctime = TRUE;
2514 if (took_cnode_lock)
2515 hfs_unlock(cp);
2516 }
2517 return (error);
2518 }
2519
2520 /*
2521 * Pageout for HFS filesystem.
2522 */
2523 int
2524 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2525 /*
2526 struct vnop_pageout_args {
2527 vnode_t a_vp,
2528 upl_t a_pl,
2529 vm_offset_t a_pl_offset,
2530 off_t a_f_offset,
2531 size_t a_size,
2532 int a_flags
2533 vfs_context_t a_context;
2534 };
2535 */
2536 {
2537 vnode_t vp = ap->a_vp;
2538 struct cnode *cp;
2539 struct filefork *fp;
2540 int retval;
2541 off_t end_of_range;
2542 off_t filesize;
2543
2544 cp = VTOC(vp);
2545 if (cp->c_lockowner == current_thread()) {
2546 panic("pageout: %s cnode lock already held!\n",
2547 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2548 }
2549 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2550 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2551 ubc_upl_abort_range(ap->a_pl,
2552 ap->a_pl_offset,
2553 ap->a_size,
2554 UPL_ABORT_FREE_ON_EMPTY);
2555 }
2556 return (retval);
2557 }
2558 fp = VTOF(vp);
2559
2560 filesize = fp->ff_size;
2561 end_of_range = ap->a_f_offset + ap->a_size - 1;
2562
2563 if (end_of_range >= filesize) {
2564 end_of_range = (off_t)(filesize - 1);
2565 }
2566 if (ap->a_f_offset < filesize) {
2567 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2568 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2569 }
2570 hfs_unlock(cp);
2571
2572 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2573 ap->a_size, filesize, ap->a_flags);
2574
2575 /*
2576 * If data was written, and setuid or setgid bits are set and
2577 * this process is not the superuser then clear the setuid and
2578 * setgid bits as a precaution against tampering.
2579 */
2580 if ((retval == 0) &&
2581 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2582 (vfs_context_suser(ap->a_context) != 0)) {
2583 hfs_lock(cp, HFS_FORCE_LOCK);
2584 cp->c_mode &= ~(S_ISUID | S_ISGID);
2585 cp->c_touch_chgtime = TRUE;
2586 hfs_unlock(cp);
2587 }
2588 return (retval);
2589 }
2590
2591 /*
2592 * Intercept B-Tree node writes to unswap them if necessary.
2593 */
2594 int
2595 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2596 {
2597 int retval = 0;
2598 register struct buf *bp = ap->a_bp;
2599 register struct vnode *vp = buf_vnode(bp);
2600 BlockDescriptor block;
2601
2602 /* Trap B-Tree writes */
2603 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2604 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2605 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
2606 (vp == VTOHFS(vp)->hfc_filevp)) {
2607
2608 /*
2609 * Swap and validate the node if it is in native byte order.
2610 * This is always be true on big endian, so we always validate
2611 * before writing here. On little endian, the node typically has
2612 * been swapped and validatated when it was written to the journal,
2613 * so we won't do anything here.
2614 */
2615 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2616 /* Prepare the block pointer */
2617 block.blockHeader = bp;
2618 block.buffer = (char *)buf_dataptr(bp);
2619 block.blockNum = buf_lblkno(bp);
2620 /* not found in cache ==> came from disk */
2621 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2622 block.blockSize = buf_count(bp);
2623
2624 /* Endian un-swap B-Tree node */
2625 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2626 if (retval)
2627 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2628 }
2629 }
2630
2631 /* This buffer shouldn't be locked anymore but if it is clear it */
2632 if ((buf_flags(bp) & B_LOCKED)) {
2633 // XXXdbg
2634 if (VTOHFS(vp)->jnl) {
2635 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2636 }
2637 buf_clearflags(bp, B_LOCKED);
2638 }
2639 retval = vn_bwrite (ap);
2640
2641 return (retval);
2642 }
2643
2644 /*
2645 * Relocate a file to a new location on disk
2646 * cnode must be locked on entry
2647 *
2648 * Relocation occurs by cloning the file's data from its
2649 * current set of blocks to a new set of blocks. During
2650 * the relocation all of the blocks (old and new) are
2651 * owned by the file.
2652 *
2653 * -----------------
2654 * |///////////////|
2655 * -----------------
2656 * 0 N (file offset)
2657 *
2658 * ----------------- -----------------
2659 * |///////////////| | | STEP 1 (aquire new blocks)
2660 * ----------------- -----------------
2661 * 0 N N+1 2N
2662 *
2663 * ----------------- -----------------
2664 * |///////////////| |///////////////| STEP 2 (clone data)
2665 * ----------------- -----------------
2666 * 0 N N+1 2N
2667 *
2668 * -----------------
2669 * |///////////////| STEP 3 (head truncate blocks)
2670 * -----------------
2671 * 0 N
2672 *
2673 * During steps 2 and 3 page-outs to file offsets less
2674 * than or equal to N are suspended.
2675 *
2676 * During step 3 page-ins to the file get supended.
2677 */
2678 __private_extern__
2679 int
2680 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2681 struct proc *p)
2682 {
2683 struct cnode *cp;
2684 struct filefork *fp;
2685 struct hfsmount *hfsmp;
2686 u_int32_t headblks;
2687 u_int32_t datablks;
2688 u_int32_t blksize;
2689 u_int32_t growsize;
2690 u_int32_t nextallocsave;
2691 daddr64_t sector_a, sector_b;
2692 int disabled_caching = 0;
2693 int eflags;
2694 off_t newbytes;
2695 int retval;
2696 int lockflags = 0;
2697 int took_trunc_lock = 0;
2698 int started_tr = 0;
2699 enum vtype vnodetype;
2700
2701 vnodetype = vnode_vtype(vp);
2702 if (vnodetype != VREG && vnodetype != VLNK) {
2703 return (EPERM);
2704 }
2705
2706 hfsmp = VTOHFS(vp);
2707 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2708 return (ENOSPC);
2709 }
2710
2711 cp = VTOC(vp);
2712 fp = VTOF(vp);
2713 if (fp->ff_unallocblocks)
2714 return (EINVAL);
2715 blksize = hfsmp->blockSize;
2716 if (blockHint == 0)
2717 blockHint = hfsmp->nextAllocation;
2718
2719 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2720 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2721 return (EFBIG);
2722 }
2723
2724 //
2725 // We do not believe that this call to hfs_fsync() is
2726 // necessary and it causes a journal transaction
2727 // deadlock so we are removing it.
2728 //
2729 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2730 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2731 // if (retval)
2732 // return (retval);
2733 //}
2734
2735 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2736 hfs_unlock(cp);
2737 hfs_lock_truncate(cp, TRUE);
2738 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2739 hfs_unlock_truncate(cp);
2740 return (retval);
2741 }
2742 took_trunc_lock = 1;
2743 }
2744 headblks = fp->ff_blocks;
2745 datablks = howmany(fp->ff_size, blksize);
2746 growsize = datablks * blksize;
2747 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2748 if (blockHint >= hfsmp->hfs_metazone_start &&
2749 blockHint <= hfsmp->hfs_metazone_end)
2750 eflags |= kEFMetadataMask;
2751
2752 if (hfs_start_transaction(hfsmp) != 0) {
2753 if (took_trunc_lock)
2754 hfs_unlock_truncate(cp);
2755 return (EINVAL);
2756 }
2757 started_tr = 1;
2758 /*
2759 * Protect the extents b-tree and the allocation bitmap
2760 * during MapFileBlockC and ExtendFileC operations.
2761 */
2762 lockflags = SFL_BITMAP;
2763 if (overflow_extents(fp))
2764 lockflags |= SFL_EXTENTS;
2765 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2766
2767 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2768 if (retval) {
2769 retval = MacToVFSError(retval);
2770 goto out;
2771 }
2772
2773 /*
2774 * STEP 1 - aquire new allocation blocks.
2775 */
2776 if (!vnode_isnocache(vp)) {
2777 vnode_setnocache(vp);
2778 disabled_caching = 1;
2779
2780 }
2781 nextallocsave = hfsmp->nextAllocation;
2782 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2783 if (eflags & kEFMetadataMask) {
2784 HFS_MOUNT_LOCK(hfsmp, TRUE);
2785 hfsmp->nextAllocation = nextallocsave;
2786 hfsmp->vcbFlags |= 0xFF00;
2787 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2788 }
2789
2790 retval = MacToVFSError(retval);
2791 if (retval == 0) {
2792 cp->c_flag |= C_MODIFIED;
2793 if (newbytes < growsize) {
2794 retval = ENOSPC;
2795 goto restore;
2796 } else if (fp->ff_blocks < (headblks + datablks)) {
2797 printf("hfs_relocate: allocation failed");
2798 retval = ENOSPC;
2799 goto restore;
2800 }
2801
2802 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2803 if (retval) {
2804 retval = MacToVFSError(retval);
2805 } else if ((sector_a + 1) == sector_b) {
2806 retval = ENOSPC;
2807 goto restore;
2808 } else if ((eflags & kEFMetadataMask) &&
2809 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2810 hfsmp->hfs_metazone_end)) {
2811 printf("hfs_relocate: didn't move into metadata zone\n");
2812 retval = ENOSPC;
2813 goto restore;
2814 }
2815 }
2816 /* Done with system locks and journal for now. */
2817 hfs_systemfile_unlock(hfsmp, lockflags);
2818 lockflags = 0;
2819 hfs_end_transaction(hfsmp);
2820 started_tr = 0;
2821
2822 if (retval) {
2823 /*
2824 * Check to see if failure is due to excessive fragmentation.
2825 */
2826 if ((retval == ENOSPC) &&
2827 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2828 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2829 }
2830 goto out;
2831 }
2832 /*
2833 * STEP 2 - clone file data into the new allocation blocks.
2834 */
2835
2836 if (vnodetype == VLNK)
2837 retval = hfs_clonelink(vp, blksize, cred, p);
2838 else if (vnode_issystem(vp))
2839 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2840 else
2841 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2842
2843 /* Start transaction for step 3 or for a restore. */
2844 if (hfs_start_transaction(hfsmp) != 0) {
2845 retval = EINVAL;
2846 goto out;
2847 }
2848 started_tr = 1;
2849 if (retval)
2850 goto restore;
2851
2852 /*
2853 * STEP 3 - switch to cloned data and remove old blocks.
2854 */
2855 lockflags = SFL_BITMAP;
2856 if (overflow_extents(fp))
2857 lockflags |= SFL_EXTENTS;
2858 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2859
2860 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2861
2862 hfs_systemfile_unlock(hfsmp, lockflags);
2863 lockflags = 0;
2864 if (retval)
2865 goto restore;
2866 out:
2867 if (took_trunc_lock)
2868 hfs_unlock_truncate(cp);
2869
2870 if (lockflags) {
2871 hfs_systemfile_unlock(hfsmp, lockflags);
2872 lockflags = 0;
2873 }
2874
2875 /* Push cnode's new extent data to disk. */
2876 if (retval == 0) {
2877 (void) hfs_update(vp, MNT_WAIT);
2878 }
2879
2880 if (hfsmp->jnl) {
2881 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2882 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2883 else
2884 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2885 }
2886 exit:
2887 if (disabled_caching) {
2888 vnode_clearnocache(vp);
2889 }
2890 if (started_tr)
2891 hfs_end_transaction(hfsmp);
2892
2893 return (retval);
2894
2895 restore:
2896 if (fp->ff_blocks == headblks)
2897 goto exit;
2898 /*
2899 * Give back any newly allocated space.
2900 */
2901 if (lockflags == 0) {
2902 lockflags = SFL_BITMAP;
2903 if (overflow_extents(fp))
2904 lockflags |= SFL_EXTENTS;
2905 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2906 }
2907
2908 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2909
2910 hfs_systemfile_unlock(hfsmp, lockflags);
2911 lockflags = 0;
2912
2913 if (took_trunc_lock)
2914 hfs_unlock_truncate(cp);
2915 goto exit;
2916 }
2917
2918
2919 /*
2920 * Clone a symlink.
2921 *
2922 */
2923 static int
2924 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2925 {
2926 struct buf *head_bp = NULL;
2927 struct buf *tail_bp = NULL;
2928 int error;
2929
2930
2931 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2932 if (error)
2933 goto out;
2934
2935 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2936 if (tail_bp == NULL) {
2937 error = EIO;
2938 goto out;
2939 }
2940 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2941 error = (int)buf_bwrite(tail_bp);
2942 out:
2943 if (head_bp) {
2944 buf_markinvalid(head_bp);
2945 buf_brelse(head_bp);
2946 }
2947 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2948
2949 return (error);
2950 }
2951
2952 /*
2953 * Clone a file's data within the file.
2954 *
2955 */
2956 static int
2957 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2958 {
2959 caddr_t bufp;
2960 size_t writebase;
2961 size_t bufsize;
2962 size_t copysize;
2963 size_t iosize;
2964 off_t filesize;
2965 size_t offset;
2966 uio_t auio;
2967 int error = 0;
2968
2969 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2970 writebase = blkstart * blksize;
2971 copysize = blkcnt * blksize;
2972 iosize = bufsize = MIN(copysize, 128 * 1024);
2973 offset = 0;
2974
2975 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2976 return (ENOMEM);
2977 }
2978 hfs_unlock(VTOC(vp));
2979
2980 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2981
2982 while (offset < copysize) {
2983 iosize = MIN(copysize - offset, iosize);
2984
2985 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2986 uio_addiov(auio, (uintptr_t)bufp, iosize);
2987
2988 error = cluster_read(vp, auio, copysize, 0);
2989 if (error) {
2990 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2991 break;
2992 }
2993 if (uio_resid(auio) != 0) {
2994 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2995 error = EIO;
2996 break;
2997 }
2998
2999 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
3000 uio_addiov(auio, (uintptr_t)bufp, iosize);
3001
3002 error = cluster_write(vp, auio, filesize + offset,
3003 filesize + offset + iosize,
3004 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
3005 if (error) {
3006 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3007 break;
3008 }
3009 if (uio_resid(auio) != 0) {
3010 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3011 error = EIO;
3012 break;
3013 }
3014 offset += iosize;
3015 }
3016 uio_free(auio);
3017
3018 /*
3019 * No need to call ubc_sync_range or hfs_invalbuf
3020 * since the file was copied using IO_NOCACHE.
3021 */
3022
3023 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3024
3025 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
3026 return (error);
3027 }
3028
3029 /*
3030 * Clone a system (metadata) file.
3031 *
3032 */
3033 static int
3034 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3035 kauth_cred_t cred, struct proc *p)
3036 {
3037 caddr_t bufp;
3038 char * offset;
3039 size_t bufsize;
3040 size_t iosize;
3041 struct buf *bp = NULL;
3042 daddr64_t blkno;
3043 daddr64_t blk;
3044 daddr64_t start_blk;
3045 daddr64_t last_blk;
3046 int breadcnt;
3047 int i;
3048 int error = 0;
3049
3050
3051 iosize = GetLogicalBlockSize(vp);
3052 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3053 breadcnt = bufsize / iosize;
3054
3055 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3056 return (ENOMEM);
3057 }
3058 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3059 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3060 blkno = 0;
3061
3062 while (blkno < last_blk) {
3063 /*
3064 * Read up to a megabyte
3065 */
3066 offset = bufp;
3067 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3068 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3069 if (error) {
3070 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3071 goto out;
3072 }
3073 if (buf_count(bp) != iosize) {
3074 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3075 goto out;
3076 }
3077 bcopy((char *)buf_dataptr(bp), offset, iosize);
3078
3079 buf_markinvalid(bp);
3080 buf_brelse(bp);
3081 bp = NULL;
3082
3083 offset += iosize;
3084 }
3085
3086 /*
3087 * Write up to a megabyte
3088 */
3089 offset = bufp;
3090 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3091 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3092 if (bp == NULL) {
3093 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3094 error = EIO;
3095 goto out;
3096 }
3097 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3098 error = (int)buf_bwrite(bp);
3099 bp = NULL;
3100 if (error)
3101 goto out;
3102 offset += iosize;
3103 }
3104 }
3105 out:
3106 if (bp) {
3107 buf_brelse(bp);
3108 }
3109
3110 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3111
3112 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3113
3114 return (error);
3115 }