]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
76cd198d35554abe56c6dc95033df678bae680fe
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* @(#)hfs_readwrite.c 1.0
23 *
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
25 *
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
27 *
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
36 #include <sys/stat.h>
37 #include <sys/buf.h>
38 #include <sys/proc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/vfs_context.h>
43
44 #include <miscfs/specfs/specdev.h>
45
46 #include <sys/ubc.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_kern.h>
49
50 #include <sys/kdebug.h>
51
52 #include "hfs.h"
53 #include "hfs_endian.h"
54 #include "hfs_fsctl.h"
55 #include "hfs_quota.h"
56 #include "hfscommon/headers/FileMgrInternal.h"
57 #include "hfscommon/headers/BTreesInternal.h"
58 #include "hfs_cnode.h"
59 #include "hfs_dbg.h"
60
61 extern int overflow_extents(struct filefork *fp);
62
63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
64
65 enum {
66 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
67 };
68
69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
70
71 extern int hfs_setextendedsecurity(struct hfsmount *, int);
72
73
74 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
75 static int hfs_clonefile(struct vnode *, int, int, int);
76 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
77
78
79 /*****************************************************************************
80 *
81 * I/O Operations on vnodes
82 *
83 *****************************************************************************/
84 int hfs_vnop_read(struct vnop_read_args *);
85 int hfs_vnop_write(struct vnop_write_args *);
86 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
87 int hfs_vnop_select(struct vnop_select_args *);
88 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
89 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
90 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
91 int hfs_vnop_strategy(struct vnop_strategy_args *);
92 int hfs_vnop_allocate(struct vnop_allocate_args *);
93 int hfs_vnop_pagein(struct vnop_pagein_args *);
94 int hfs_vnop_pageout(struct vnop_pageout_args *);
95 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
96
97
98 /*
99 * Read data from a file.
100 */
101 int
102 hfs_vnop_read(struct vnop_read_args *ap)
103 {
104 uio_t uio = ap->a_uio;
105 struct vnode *vp = ap->a_vp;
106 struct cnode *cp;
107 struct filefork *fp;
108 struct hfsmount *hfsmp;
109 off_t filesize;
110 off_t filebytes;
111 off_t start_resid = uio_resid(uio);
112 off_t offset = uio_offset(uio);
113 int retval = 0;
114
115
116 /* Preflight checks */
117 if (!vnode_isreg(vp)) {
118 /* can only read regular files */
119 if (vnode_isdir(vp))
120 return (EISDIR);
121 else
122 return (EPERM);
123 }
124 if (start_resid == 0)
125 return (0); /* Nothing left to do */
126 if (offset < 0)
127 return (EINVAL); /* cant read from a negative offset */
128
129 cp = VTOC(vp);
130 fp = VTOF(vp);
131 hfsmp = VTOHFS(vp);
132
133 /* Protect against a size change. */
134 hfs_lock_truncate(cp, 0);
135
136 filesize = fp->ff_size;
137 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
138 if (offset > filesize) {
139 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
140 (offset > (off_t)MAXHFSFILESIZE)) {
141 retval = EFBIG;
142 }
143 goto exit;
144 }
145
146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
147 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
148
149 retval = cluster_read(vp, uio, filesize, 0);
150
151 cp->c_touch_acctime = TRUE;
152
153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
154 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
155
156 /*
157 * Keep track blocks read
158 */
159 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
160 int took_cnode_lock = 0;
161 off_t bytesread;
162
163 bytesread = start_resid - uio_resid(uio);
164
165 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
166 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
167 hfs_lock(cp, HFS_FORCE_LOCK);
168 took_cnode_lock = 1;
169 }
170 /*
171 * If this file hasn't been seen since the start of
172 * the current sampling period then start over.
173 */
174 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
175 struct timeval tv;
176
177 fp->ff_bytesread = bytesread;
178 microtime(&tv);
179 cp->c_atime = tv.tv_sec;
180 } else {
181 fp->ff_bytesread += bytesread;
182 }
183 if (took_cnode_lock)
184 hfs_unlock(cp);
185 }
186 exit:
187 hfs_unlock_truncate(cp);
188 return (retval);
189 }
190
191 /*
192 * Write data to a file.
193 */
194 int
195 hfs_vnop_write(struct vnop_write_args *ap)
196 {
197 uio_t uio = ap->a_uio;
198 struct vnode *vp = ap->a_vp;
199 struct cnode *cp;
200 struct filefork *fp;
201 struct hfsmount *hfsmp;
202 kauth_cred_t cred = NULL;
203 off_t origFileSize;
204 off_t writelimit;
205 off_t bytesToAdd;
206 off_t actualBytesAdded;
207 off_t filebytes;
208 off_t offset;
209 size_t resid;
210 int eflags;
211 int ioflag = ap->a_ioflag;
212 int retval = 0;
213 int lockflags;
214 int cnode_locked = 0;
215
216 // LP64todo - fix this! uio_resid may be 64-bit value
217 resid = uio_resid(uio);
218 offset = uio_offset(uio);
219
220 if (offset < 0)
221 return (EINVAL);
222 if (resid == 0)
223 return (E_NONE);
224 if (!vnode_isreg(vp))
225 return (EPERM); /* Can only write regular files */
226
227 /* Protect against a size change. */
228 hfs_lock_truncate(VTOC(vp), TRUE);
229
230 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
231 hfs_unlock_truncate(VTOC(vp));
232 return (retval);
233 }
234 cnode_locked = 1;
235 cp = VTOC(vp);
236 fp = VTOF(vp);
237 hfsmp = VTOHFS(vp);
238 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
239
240 if (ioflag & IO_APPEND) {
241 uio_setoffset(uio, fp->ff_size);
242 offset = fp->ff_size;
243 }
244 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
245 retval = EPERM;
246 goto exit;
247 }
248
249 origFileSize = fp->ff_size;
250 eflags = kEFDeferMask; /* defer file block allocations */
251
252 #ifdef HFS_SPARSE_DEV
253 /*
254 * When the underlying device is sparse and space
255 * is low (< 8MB), stop doing delayed allocations
256 * and begin doing synchronous I/O.
257 */
258 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
259 (hfs_freeblks(hfsmp, 0) < 2048)) {
260 eflags &= ~kEFDeferMask;
261 ioflag |= IO_SYNC;
262 }
263 #endif /* HFS_SPARSE_DEV */
264
265 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
266 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
267
268 /* Now test if we need to extend the file */
269 /* Doing so will adjust the filebytes for us */
270
271 writelimit = offset + resid;
272 if (writelimit <= filebytes)
273 goto sizeok;
274
275 cred = vfs_context_ucred(ap->a_context);
276 #if QUOTA
277 bytesToAdd = writelimit - filebytes;
278 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
279 cred, 0);
280 if (retval)
281 goto exit;
282 #endif /* QUOTA */
283
284 if (hfs_start_transaction(hfsmp) != 0) {
285 retval = EINVAL;
286 goto exit;
287 }
288
289 while (writelimit > filebytes) {
290 bytesToAdd = writelimit - filebytes;
291 if (cred && suser(cred, NULL) != 0)
292 eflags |= kEFReserveMask;
293
294 /* Protect extents b-tree and allocation bitmap */
295 lockflags = SFL_BITMAP;
296 if (overflow_extents(fp))
297 lockflags |= SFL_EXTENTS;
298 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
299
300 /* Files that are changing size are not hot file candidates. */
301 if (hfsmp->hfc_stage == HFC_RECORDING) {
302 fp->ff_bytesread = 0;
303 }
304 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
305 0, eflags, &actualBytesAdded));
306
307 hfs_systemfile_unlock(hfsmp, lockflags);
308
309 if ((actualBytesAdded == 0) && (retval == E_NONE))
310 retval = ENOSPC;
311 if (retval != E_NONE)
312 break;
313 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
315 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
316 }
317 (void) hfs_update(vp, TRUE);
318 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
319 (void) hfs_end_transaction(hfsmp);
320
321 sizeok:
322 if (retval == E_NONE) {
323 off_t filesize;
324 off_t zero_off;
325 off_t tail_off;
326 off_t inval_start;
327 off_t inval_end;
328 off_t io_start;
329 int lflag;
330 struct rl_entry *invalid_range;
331
332 if (writelimit > fp->ff_size)
333 filesize = writelimit;
334 else
335 filesize = fp->ff_size;
336
337 lflag = (ioflag & IO_SYNC);
338
339 if (offset <= fp->ff_size) {
340 zero_off = offset & ~PAGE_MASK_64;
341
342 /* Check to see whether the area between the zero_offset and the start
343 of the transfer to see whether is invalid and should be zero-filled
344 as part of the transfer:
345 */
346 if (offset > zero_off) {
347 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
348 lflag |= IO_HEADZEROFILL;
349 }
350 } else {
351 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
352
353 /* The bytes between fp->ff_size and uio->uio_offset must never be
354 read without being zeroed. The current last block is filled with zeroes
355 if it holds valid data but in all cases merely do a little bookkeeping
356 to track the area from the end of the current last page to the start of
357 the area actually written. For the same reason only the bytes up to the
358 start of the page where this write will start is invalidated; any remainder
359 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
360
361 Note that inval_start, the start of the page after the current EOF,
362 may be past the start of the write, in which case the zeroing
363 will be handled by the cluser_write of the actual data.
364 */
365 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
366 inval_end = offset & ~PAGE_MASK_64;
367 zero_off = fp->ff_size;
368
369 if ((fp->ff_size & PAGE_MASK_64) &&
370 (rl_scan(&fp->ff_invalidranges,
371 eof_page_base,
372 fp->ff_size - 1,
373 &invalid_range) != RL_NOOVERLAP)) {
374 /* The page containing the EOF is not valid, so the
375 entire page must be made inaccessible now. If the write
376 starts on a page beyond the page containing the eof
377 (inval_end > eof_page_base), add the
378 whole page to the range to be invalidated. Otherwise
379 (i.e. if the write starts on the same page), zero-fill
380 the entire page explicitly now:
381 */
382 if (inval_end > eof_page_base) {
383 inval_start = eof_page_base;
384 } else {
385 zero_off = eof_page_base;
386 };
387 };
388
389 if (inval_start < inval_end) {
390 struct timeval tv;
391 /* There's some range of data that's going to be marked invalid */
392
393 if (zero_off < inval_start) {
394 /* The pages between inval_start and inval_end are going to be invalidated,
395 and the actual write will start on a page past inval_end. Now's the last
396 chance to zero-fill the page containing the EOF:
397 */
398 hfs_unlock(cp);
399 cnode_locked = 0;
400 retval = cluster_write(vp, (uio_t) 0,
401 fp->ff_size, inval_start,
402 zero_off, (off_t)0,
403 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
404 hfs_lock(cp, HFS_FORCE_LOCK);
405 cnode_locked = 1;
406 if (retval) goto ioerr_exit;
407 offset = uio_offset(uio);
408 };
409
410 /* Mark the remaining area of the newly allocated space as invalid: */
411 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
412 microuptime(&tv);
413 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
414 zero_off = fp->ff_size = inval_end;
415 };
416
417 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
418 };
419
420 /* Check to see whether the area between the end of the write and the end of
421 the page it falls in is invalid and should be zero-filled as part of the transfer:
422 */
423 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
424 if (tail_off > filesize) tail_off = filesize;
425 if (tail_off > writelimit) {
426 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
427 lflag |= IO_TAILZEROFILL;
428 };
429 };
430
431 /*
432 * if the write starts beyond the current EOF (possibly advanced in the
433 * zeroing of the last block, above), then we'll zero fill from the current EOF
434 * to where the write begins:
435 *
436 * NOTE: If (and ONLY if) the portion of the file about to be written is
437 * before the current EOF it might be marked as invalid now and must be
438 * made readable (removed from the invalid ranges) before cluster_write
439 * tries to write it:
440 */
441 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
442 if (io_start < fp->ff_size) {
443 off_t io_end;
444
445 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
446 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
447 };
448
449 hfs_unlock(cp);
450 cnode_locked = 0;
451 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
452 tail_off, lflag | IO_NOZERODIRTY);
453 offset = uio_offset(uio);
454 if (offset > fp->ff_size) {
455 fp->ff_size = offset;
456
457 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
458 /* Files that are changing size are not hot file candidates. */
459 if (hfsmp->hfc_stage == HFC_RECORDING)
460 fp->ff_bytesread = 0;
461 }
462 if (resid > uio_resid(uio)) {
463 cp->c_touch_chgtime = TRUE;
464 cp->c_touch_modtime = TRUE;
465 }
466 }
467 HFS_KNOTE(vp, NOTE_WRITE);
468
469 ioerr_exit:
470 /*
471 * If we successfully wrote any data, and we are not the superuser
472 * we clear the setuid and setgid bits as a precaution against
473 * tampering.
474 */
475 if (cp->c_mode & (S_ISUID | S_ISGID)) {
476 cred = vfs_context_ucred(ap->a_context);
477 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
478 if (!cnode_locked) {
479 hfs_lock(cp, HFS_FORCE_LOCK);
480 cnode_locked = 1;
481 }
482 cp->c_mode &= ~(S_ISUID | S_ISGID);
483 }
484 }
485 if (retval) {
486 if (ioflag & IO_UNIT) {
487 if (!cnode_locked) {
488 hfs_lock(cp, HFS_FORCE_LOCK);
489 cnode_locked = 1;
490 }
491 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
492 0, ap->a_context);
493 // LP64todo - fix this! resid needs to by user_ssize_t
494 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
495 uio_setresid(uio, resid);
496 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
497 }
498 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
499 if (!cnode_locked) {
500 hfs_lock(cp, HFS_FORCE_LOCK);
501 cnode_locked = 1;
502 }
503 retval = hfs_update(vp, TRUE);
504 }
505 /* Updating vcbWrCnt doesn't need to be atomic. */
506 hfsmp->vcbWrCnt++;
507
508 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
509 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
510 exit:
511 if (cnode_locked)
512 hfs_unlock(cp);
513 hfs_unlock_truncate(cp);
514 return (retval);
515 }
516
517 /* support for the "bulk-access" fcntl */
518
519 #define CACHE_ELEMS 64
520 #define CACHE_LEVELS 16
521 #define PARENT_IDS_FLAG 0x100
522
523 /* from hfs_attrlist.c */
524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
525 mode_t obj_mode, struct mount *mp,
526 kauth_cred_t cred, struct proc *p);
527
528 /* from vfs/vfs_fsevents.c */
529 extern char *get_pathbuff(void);
530 extern void release_pathbuff(char *buff);
531
532 struct access_cache {
533 int numcached;
534 int cachehits; /* these two for statistics gathering */
535 int lookups;
536 unsigned int *acache;
537 Boolean *haveaccess;
538 };
539
540 struct access_t {
541 uid_t uid; /* IN: effective user id */
542 short flags; /* IN: access requested (i.e. R_OK) */
543 short num_groups; /* IN: number of groups user belongs to */
544 int num_files; /* IN: number of files to process */
545 int *file_ids; /* IN: array of file ids */
546 gid_t *groups; /* IN: array of groups */
547 short *access; /* OUT: access info for each file (0 for 'has access') */
548 };
549
550 struct user_access_t {
551 uid_t uid; /* IN: effective user id */
552 short flags; /* IN: access requested (i.e. R_OK) */
553 short num_groups; /* IN: number of groups user belongs to */
554 int num_files; /* IN: number of files to process */
555 user_addr_t file_ids; /* IN: array of file ids */
556 user_addr_t groups; /* IN: array of groups */
557 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
558 };
559
560 /*
561 * Perform a binary search for the given parent_id. Return value is
562 * found/not found boolean, and indexp will be the index of the item
563 * or the index at which to insert the item if it's not found.
564 */
565 static int
566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
567 {
568 unsigned int lo, hi;
569 int index, matches = 0;
570
571 if (cache->numcached == 0) {
572 *indexp = 0;
573 return 0; // table is empty, so insert at index=0 and report no match
574 }
575
576 if (cache->numcached > CACHE_ELEMS) {
577 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
578 cache->numcached, CACHE_ELEMS);*/
579 cache->numcached = CACHE_ELEMS;
580 }
581
582 lo = 0;
583 hi = cache->numcached - 1;
584 index = -1;
585
586 /* perform binary search for parent_id */
587 do {
588 unsigned int mid = (hi - lo)/2 + lo;
589 unsigned int this_id = cache->acache[mid];
590
591 if (parent_id == this_id) {
592 index = mid;
593 break;
594 }
595
596 if (parent_id < this_id) {
597 hi = mid;
598 continue;
599 }
600
601 if (parent_id > this_id) {
602 lo = mid + 1;
603 continue;
604 }
605 } while(lo < hi);
606
607 /* check if lo and hi converged on the match */
608 if (parent_id == cache->acache[hi]) {
609 index = hi;
610 }
611
612 /* if no existing entry found, find index for new one */
613 if (index == -1) {
614 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
615 matches = 0;
616 } else {
617 matches = 1;
618 }
619
620 *indexp = index;
621 return matches;
622 }
623
624 /*
625 * Add a node to the access_cache at the given index (or do a lookup first
626 * to find the index if -1 is passed in). We currently do a replace rather
627 * than an insert if the cache is full.
628 */
629 static void
630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
631 {
632 int lookup_index = -1;
633
634 /* need to do a lookup first if -1 passed for index */
635 if (index == -1) {
636 if (lookup_bucket(cache, &lookup_index, nodeID)) {
637 if (cache->haveaccess[lookup_index] != access) {
638 /* change access info for existing entry... should never happen */
639 cache->haveaccess[lookup_index] = access;
640 }
641
642 /* mission accomplished */
643 return;
644 } else {
645 index = lookup_index;
646 }
647
648 }
649
650 /* if the cache is full, do a replace rather than an insert */
651 if (cache->numcached >= CACHE_ELEMS) {
652 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
653 cache->numcached = CACHE_ELEMS-1;
654
655 if (index > cache->numcached) {
656 // printf("index %d pinned to %d\n", index, cache->numcached);
657 index = cache->numcached;
658 }
659 } else if (index >= 0 && index < cache->numcached) {
660 /* only do bcopy if we're inserting */
661 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
662 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
663 }
664
665 cache->acache[index] = nodeID;
666 cache->haveaccess[index] = access;
667 cache->numcached++;
668 }
669
670
671 struct cinfo {
672 uid_t uid;
673 gid_t gid;
674 mode_t mode;
675 cnid_t parentcnid;
676 };
677
678 static int
679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
680 {
681 struct cinfo *cip = (struct cinfo *)arg;
682
683 cip->uid = attrp->ca_uid;
684 cip->gid = attrp->ca_gid;
685 cip->mode = attrp->ca_mode;
686 cip->parentcnid = descp->cd_parentcnid;
687
688 return (0);
689 }
690
691 /*
692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
693 * isn't incore, then go to the catalog.
694 */
695 static int
696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
697 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
698 {
699 int error = 0;
700
701 /* if this id matches the one the fsctl was called with, skip the lookup */
702 if (cnid == skip_cp->c_cnid) {
703 cnattrp->ca_uid = skip_cp->c_uid;
704 cnattrp->ca_gid = skip_cp->c_gid;
705 cnattrp->ca_mode = skip_cp->c_mode;
706 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
707 } else {
708 struct cinfo c_info;
709
710 /* otherwise, check the cnode hash incase the file/dir is incore */
711 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
712 cnattrp->ca_uid = c_info.uid;
713 cnattrp->ca_gid = c_info.gid;
714 cnattrp->ca_mode = c_info.mode;
715 keyp->hfsPlus.parentID = c_info.parentcnid;
716 } else {
717 int lockflags;
718
719 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
720
721 /* lookup this cnid in the catalog */
722 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
723
724 hfs_systemfile_unlock(hfsmp, lockflags);
725
726 cache->lookups++;
727 }
728 }
729
730 return (error);
731 }
732
733 /*
734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
735 * up to CACHE_LEVELS as we progress towards the root.
736 */
737 static int
738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
739 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
740 {
741 int myErr = 0;
742 int myResult;
743 HFSCatalogNodeID thisNodeID;
744 unsigned long myPerms;
745 struct cat_attr cnattr;
746 int cache_index = -1;
747 CatalogKey catkey;
748
749 int i = 0, ids_to_cache = 0;
750 int parent_ids[CACHE_LEVELS];
751
752 /* root always has access */
753 if (!suser(myp_ucred, NULL)) {
754 return (1);
755 }
756
757 thisNodeID = nodeID;
758 while (thisNodeID >= kRootDirID) {
759 myResult = 0; /* default to "no access" */
760
761 /* check the cache before resorting to hitting the catalog */
762
763 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
764 * to look any further after hitting cached dir */
765
766 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
767 cache->cachehits++;
768 myResult = cache->haveaccess[cache_index];
769 goto ExitThisRoutine;
770 }
771
772 /* remember which parents we want to cache */
773 if (ids_to_cache < CACHE_LEVELS) {
774 parent_ids[ids_to_cache] = thisNodeID;
775 ids_to_cache++;
776 }
777
778 /* do the lookup (checks the cnode hash, then the catalog) */
779 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
780 if (myErr) {
781 goto ExitThisRoutine; /* no access */
782 }
783
784 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
785 cnattr.ca_mode, hfsmp->hfs_mp,
786 myp_ucred, theProcPtr);
787
788 if ( (myPerms & X_OK) == 0 ) {
789 myResult = 0;
790 goto ExitThisRoutine; /* no access */
791 }
792
793 /* up the hierarchy we go */
794 thisNodeID = catkey.hfsPlus.parentID;
795 }
796
797 /* if here, we have access to this node */
798 myResult = 1;
799
800 ExitThisRoutine:
801 if (myErr) {
802 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
803 myResult = 0;
804 }
805 *err = myErr;
806
807 /* cache the parent directory(ies) */
808 for (i = 0; i < ids_to_cache; i++) {
809 /* small optimization: get rid of double-lookup for all these */
810 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
811 add_node(cache, -1, parent_ids[i], myResult);
812 }
813
814 return (myResult);
815 }
816 /* end "bulk-access" support */
817
818
819
820 /*
821 * Callback for use with freeze ioctl.
822 */
823 static int
824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
825 {
826 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
827
828 return 0;
829 }
830
831 /*
832 * Control filesystem operating characteristics.
833 */
834 int
835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
836 vnode_t a_vp;
837 int a_command;
838 caddr_t a_data;
839 int a_fflag;
840 vfs_context_t a_context;
841 } */ *ap)
842 {
843 struct vnode * vp = ap->a_vp;
844 struct hfsmount *hfsmp = VTOHFS(vp);
845 vfs_context_t context = ap->a_context;
846 kauth_cred_t cred = vfs_context_ucred(context);
847 proc_t p = vfs_context_proc(context);
848 struct vfsstatfs *vfsp;
849 boolean_t is64bit;
850
851 is64bit = proc_is64bit(p);
852
853 switch (ap->a_command) {
854
855 case HFS_RESIZE_VOLUME: {
856 u_int64_t newsize;
857 u_int64_t cursize;
858
859 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
860 if (suser(cred, NULL) &&
861 kauth_cred_getuid(cred) != vfsp->f_owner) {
862 return (EACCES); /* must be owner of file system */
863 }
864 if (!vnode_isvroot(vp)) {
865 return (EINVAL);
866 }
867 newsize = *(u_int64_t *)ap->a_data;
868 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
869
870 if (newsize > cursize) {
871 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
872 } else if (newsize < cursize) {
873 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
874 } else {
875 return (0);
876 }
877 }
878 case HFS_CHANGE_NEXT_ALLOCATION: {
879 u_int32_t location;
880
881 if (vnode_vfsisrdonly(vp)) {
882 return (EROFS);
883 }
884 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
885 if (suser(cred, NULL) &&
886 kauth_cred_getuid(cred) != vfsp->f_owner) {
887 return (EACCES); /* must be owner of file system */
888 }
889 if (!vnode_isvroot(vp)) {
890 return (EINVAL);
891 }
892 location = *(u_int32_t *)ap->a_data;
893 if (location > hfsmp->totalBlocks - 1) {
894 return (EINVAL);
895 }
896 /* Return previous value. */
897 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
898 HFS_MOUNT_LOCK(hfsmp, TRUE);
899 hfsmp->nextAllocation = location;
900 hfsmp->vcbFlags |= 0xFF00;
901 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
902 return (0);
903 }
904
905 #ifdef HFS_SPARSE_DEV
906 case HFS_SETBACKINGSTOREINFO: {
907 struct vnode * bsfs_rootvp;
908 struct vnode * di_vp;
909 struct hfs_backingstoreinfo *bsdata;
910 int error = 0;
911
912 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
913 return (EALREADY);
914 }
915 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
916 if (suser(cred, NULL) &&
917 kauth_cred_getuid(cred) != vfsp->f_owner) {
918 return (EACCES); /* must be owner of file system */
919 }
920 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
921 if (bsdata == NULL) {
922 return (EINVAL);
923 }
924 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
925 return (error);
926 }
927 if ((error = vnode_getwithref(di_vp))) {
928 file_drop(bsdata->backingfd);
929 return(error);
930 }
931
932 if (vnode_mount(vp) == vnode_mount(di_vp)) {
933 (void)vnode_put(di_vp);
934 file_drop(bsdata->backingfd);
935 return (EINVAL);
936 }
937
938 /*
939 * Obtain the backing fs root vnode and keep a reference
940 * on it. This reference will be dropped in hfs_unmount.
941 */
942 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
943 if (error) {
944 (void)vnode_put(di_vp);
945 file_drop(bsdata->backingfd);
946 return (error);
947 }
948 vnode_ref(bsfs_rootvp);
949 vnode_put(bsfs_rootvp);
950
951 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
952 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
953 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
954 hfsmp->hfs_sparsebandblks *= 4;
955
956 (void)vnode_put(di_vp);
957 file_drop(bsdata->backingfd);
958 return (0);
959 }
960 case HFS_CLRBACKINGSTOREINFO: {
961 struct vnode * tmpvp;
962
963 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
964 if (suser(cred, NULL) &&
965 kauth_cred_getuid(cred) != vfsp->f_owner) {
966 return (EACCES); /* must be owner of file system */
967 }
968 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
969 hfsmp->hfs_backingfs_rootvp) {
970
971 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
972 tmpvp = hfsmp->hfs_backingfs_rootvp;
973 hfsmp->hfs_backingfs_rootvp = NULLVP;
974 hfsmp->hfs_sparsebandblks = 0;
975 vnode_rele(tmpvp);
976 }
977 return (0);
978 }
979 #endif /* HFS_SPARSE_DEV */
980
981 case F_FREEZE_FS: {
982 struct mount *mp;
983 task_t task;
984
985 if (!is_suser())
986 return (EACCES);
987
988 mp = vnode_mount(vp);
989 hfsmp = VFSTOHFS(mp);
990
991 if (!(hfsmp->jnl))
992 return (ENOTSUP);
993
994 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
995
996 task = current_task();
997 task_working_set_disable(task);
998
999 // flush things before we get started to try and prevent
1000 // dirty data from being paged out while we're frozen.
1001 // note: can't do this after taking the lock as it will
1002 // deadlock against ourselves.
1003 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1004 hfs_global_exclusive_lock_acquire(hfsmp);
1005 journal_flush(hfsmp->jnl);
1006
1007 // don't need to iterate on all vnodes, we just need to
1008 // wait for writes to the system files and the device vnode
1009 if (HFSTOVCB(hfsmp)->extentsRefNum)
1010 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1011 if (HFSTOVCB(hfsmp)->catalogRefNum)
1012 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1013 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1014 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1015 if (hfsmp->hfs_attribute_vp)
1016 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1017 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1018
1019 hfsmp->hfs_freezing_proc = current_proc();
1020
1021 return (0);
1022 }
1023
1024 case F_THAW_FS: {
1025 if (!is_suser())
1026 return (EACCES);
1027
1028 // if we're not the one who froze the fs then we
1029 // can't thaw it.
1030 if (hfsmp->hfs_freezing_proc != current_proc()) {
1031 return EPERM;
1032 }
1033
1034 // NOTE: if you add code here, also go check the
1035 // code that "thaws" the fs in hfs_vnop_close()
1036 //
1037 hfsmp->hfs_freezing_proc = NULL;
1038 hfs_global_exclusive_lock_release(hfsmp);
1039 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1040
1041 return (0);
1042 }
1043
1044 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1045 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1046
1047 case HFS_BULKACCESS_FSCTL:
1048 case HFS_BULKACCESS: {
1049 /*
1050 * NOTE: on entry, the vnode is locked. Incase this vnode
1051 * happens to be in our list of file_ids, we'll note it
1052 * avoid calling hfs_chashget_nowait() on that id as that
1053 * will cause a "locking against myself" panic.
1054 */
1055 Boolean check_leaf = true;
1056
1057 struct user_access_t *user_access_structp;
1058 struct user_access_t tmp_user_access_t;
1059 struct access_cache cache;
1060
1061 int error = 0, i;
1062
1063 dev_t dev = VTOC(vp)->c_dev;
1064
1065 short flags;
1066 struct ucred myucred; /* XXX ILLEGAL */
1067 int num_files;
1068 int *file_ids = NULL;
1069 short *access = NULL;
1070
1071 cnid_t cnid;
1072 cnid_t prevParent_cnid = 0;
1073 unsigned long myPerms;
1074 short myaccess = 0;
1075 struct cat_attr cnattr;
1076 CatalogKey catkey;
1077 struct cnode *skip_cp = VTOC(vp);
1078 struct vfs_context my_context;
1079
1080 /* first, return error if not run as root */
1081 if (cred->cr_ruid != 0) {
1082 return EPERM;
1083 }
1084
1085 /* initialize the local cache and buffers */
1086 cache.numcached = 0;
1087 cache.cachehits = 0;
1088 cache.lookups = 0;
1089
1090 file_ids = (int *) get_pathbuff();
1091 access = (short *) get_pathbuff();
1092 cache.acache = (int *) get_pathbuff();
1093 cache.haveaccess = (Boolean *) get_pathbuff();
1094
1095 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1096 release_pathbuff((char *) file_ids);
1097 release_pathbuff((char *) access);
1098 release_pathbuff((char *) cache.acache);
1099 release_pathbuff((char *) cache.haveaccess);
1100
1101 return ENOMEM;
1102 }
1103
1104 /* struct copyin done during dispatch... need to copy file_id array separately */
1105 if (ap->a_data == NULL) {
1106 error = EINVAL;
1107 goto err_exit_bulk_access;
1108 }
1109
1110 if (is64bit) {
1111 user_access_structp = (struct user_access_t *)ap->a_data;
1112 }
1113 else {
1114 struct access_t * accessp = (struct access_t *)ap->a_data;
1115 tmp_user_access_t.uid = accessp->uid;
1116 tmp_user_access_t.flags = accessp->flags;
1117 tmp_user_access_t.num_groups = accessp->num_groups;
1118 tmp_user_access_t.num_files = accessp->num_files;
1119 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1120 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1121 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1122 user_access_structp = &tmp_user_access_t;
1123 }
1124
1125 num_files = user_access_structp->num_files;
1126 if (num_files < 1) {
1127 goto err_exit_bulk_access;
1128 }
1129 if (num_files > 256) {
1130 error = EINVAL;
1131 goto err_exit_bulk_access;
1132 }
1133
1134 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1135 num_files * sizeof(int)))) {
1136 goto err_exit_bulk_access;
1137 }
1138
1139 /* fill in the ucred structure */
1140 flags = user_access_structp->flags;
1141 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1142 flags = R_OK;
1143 }
1144
1145 /* check if we've been passed leaf node ids or parent ids */
1146 if (flags & PARENT_IDS_FLAG) {
1147 check_leaf = false;
1148 }
1149
1150 memset(&myucred, 0, sizeof(myucred));
1151 myucred.cr_ref = 1;
1152 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1153 myucred.cr_ngroups = user_access_structp->num_groups;
1154 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1155 myucred.cr_ngroups = 0;
1156 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1157 myucred.cr_ngroups * sizeof(gid_t)))) {
1158 goto err_exit_bulk_access;
1159 }
1160 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1161
1162 my_context.vc_proc = p;
1163 my_context.vc_ucred = &myucred;
1164
1165 /* Check access to each file_id passed in */
1166 for (i = 0; i < num_files; i++) {
1167 #if 0
1168 cnid = (cnid_t) file_ids[i];
1169
1170 /* root always has access */
1171 if (!suser(&myucred, NULL)) {
1172 access[i] = 0;
1173 continue;
1174 }
1175
1176 if (check_leaf) {
1177
1178 /* do the lookup (checks the cnode hash, then the catalog) */
1179 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1180 if (error) {
1181 access[i] = (short) error;
1182 continue;
1183 }
1184
1185 /* before calling CheckAccess(), check the target file for read access */
1186 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1187 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1188
1189
1190 /* fail fast if no access */
1191 if ((myPerms & flags) == 0) {
1192 access[i] = EACCES;
1193 continue;
1194 }
1195 } else {
1196 /* we were passed an array of parent ids */
1197 catkey.hfsPlus.parentID = cnid;
1198 }
1199
1200 /* if the last guy had the same parent and had access, we're done */
1201 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1202 cache.cachehits++;
1203 access[i] = 0;
1204 continue;
1205 }
1206
1207 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1208 skip_cp, p, &myucred, dev);
1209
1210 if ( myaccess ) {
1211 access[i] = 0; // have access.. no errors to report
1212 } else {
1213 access[i] = (error != 0 ? (short) error : EACCES);
1214 }
1215
1216 prevParent_cnid = catkey.hfsPlus.parentID;
1217 #else
1218 int myErr;
1219
1220 cnid = (cnid_t)file_ids[i];
1221
1222 while (cnid >= kRootDirID) {
1223 /* get the vnode for this cnid */
1224 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1225 if ( myErr ) {
1226 access[i] = EACCES;
1227 break;
1228 }
1229
1230 cnid = VTOC(vp)->c_parentcnid;
1231
1232 hfs_unlock(VTOC(vp));
1233 if (vnode_vtype(vp) == VDIR) {
1234 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1235 } else {
1236 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1237 }
1238 vnode_put(vp);
1239 access[i] = myErr;
1240 if (myErr) {
1241 break;
1242 }
1243 }
1244 #endif
1245 }
1246
1247 /* copyout the access array */
1248 if ((error = copyout((caddr_t)access, user_access_structp->access,
1249 num_files * sizeof (short)))) {
1250 goto err_exit_bulk_access;
1251 }
1252
1253 err_exit_bulk_access:
1254
1255 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1256
1257 release_pathbuff((char *) cache.acache);
1258 release_pathbuff((char *) cache.haveaccess);
1259 release_pathbuff((char *) file_ids);
1260 release_pathbuff((char *) access);
1261
1262 return (error);
1263 } /* HFS_BULKACCESS */
1264
1265 case HFS_SETACLSTATE: {
1266 int state;
1267
1268 if (ap->a_data == NULL) {
1269 return (EINVAL);
1270 }
1271
1272 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1273 state = *(int *)ap->a_data;
1274
1275 // super-user can enable or disable acl's on a volume.
1276 // the volume owner can only enable acl's
1277 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1278 return (EPERM);
1279 }
1280 if (state == 0 || state == 1)
1281 return hfs_setextendedsecurity(hfsmp, state);
1282 else
1283 return (EINVAL);
1284 }
1285
1286 case F_FULLFSYNC: {
1287 int error;
1288
1289 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1290 if (error == 0) {
1291 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1292 hfs_unlock(VTOC(vp));
1293 }
1294
1295 return error;
1296 }
1297
1298 case F_CHKCLEAN: {
1299 register struct cnode *cp;
1300 int error;
1301
1302 if (!vnode_isreg(vp))
1303 return EINVAL;
1304
1305 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1306 if (error == 0) {
1307 cp = VTOC(vp);
1308 /*
1309 * used by regression test to determine if
1310 * all the dirty pages (via write) have been cleaned
1311 * after a call to 'fsysnc'.
1312 */
1313 error = is_file_clean(vp, VTOF(vp)->ff_size);
1314 hfs_unlock(cp);
1315 }
1316 return (error);
1317 }
1318
1319 case F_RDADVISE: {
1320 register struct radvisory *ra;
1321 struct filefork *fp;
1322 int error;
1323
1324 if (!vnode_isreg(vp))
1325 return EINVAL;
1326
1327 ra = (struct radvisory *)(ap->a_data);
1328 fp = VTOF(vp);
1329
1330 /* Protect against a size change. */
1331 hfs_lock_truncate(VTOC(vp), TRUE);
1332
1333 if (ra->ra_offset >= fp->ff_size) {
1334 error = EFBIG;
1335 } else {
1336 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1337 }
1338
1339 hfs_unlock_truncate(VTOC(vp));
1340 return (error);
1341 }
1342
1343 case F_READBOOTSTRAP:
1344 case F_WRITEBOOTSTRAP:
1345 {
1346 struct vnode *devvp = NULL;
1347 user_fbootstraptransfer_t *user_bootstrapp;
1348 int devBlockSize;
1349 int error;
1350 uio_t auio;
1351 daddr64_t blockNumber;
1352 u_long blockOffset;
1353 u_long xfersize;
1354 struct buf *bp;
1355 user_fbootstraptransfer_t user_bootstrap;
1356
1357 if (!vnode_isvroot(vp))
1358 return (EINVAL);
1359 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1360 * to a user_fbootstraptransfer_t else we get a pointer to a
1361 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1362 */
1363 if (is64bit) {
1364 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1365 }
1366 else {
1367 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1368 user_bootstrapp = &user_bootstrap;
1369 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1370 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1371 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1372 }
1373 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1374 return EINVAL;
1375
1376 devvp = VTOHFS(vp)->hfs_devvp;
1377 auio = uio_create(1, user_bootstrapp->fbt_offset,
1378 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1379 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1380 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1381
1382 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1383
1384 while (uio_resid(auio) > 0) {
1385 blockNumber = uio_offset(auio) / devBlockSize;
1386 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1387 if (error) {
1388 if (bp) buf_brelse(bp);
1389 uio_free(auio);
1390 return error;
1391 };
1392
1393 blockOffset = uio_offset(auio) % devBlockSize;
1394 xfersize = devBlockSize - blockOffset;
1395 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1396 if (error) {
1397 buf_brelse(bp);
1398 uio_free(auio);
1399 return error;
1400 };
1401 if (uio_rw(auio) == UIO_WRITE) {
1402 error = VNOP_BWRITE(bp);
1403 if (error) {
1404 uio_free(auio);
1405 return error;
1406 }
1407 } else {
1408 buf_brelse(bp);
1409 };
1410 };
1411 uio_free(auio);
1412 };
1413 return 0;
1414
1415 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1416 {
1417 if (is64bit) {
1418 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1419 }
1420 else {
1421 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1422 }
1423 return 0;
1424 }
1425
1426 case HFS_GET_MOUNT_TIME:
1427 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1428 break;
1429
1430 case HFS_GET_LAST_MTIME:
1431 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1432 break;
1433
1434 case HFS_SET_BOOT_INFO:
1435 if (!vnode_isvroot(vp))
1436 return(EINVAL);
1437 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1438 return(EACCES); /* must be superuser or owner of filesystem */
1439 HFS_MOUNT_LOCK(hfsmp, TRUE);
1440 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1441 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1442 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1443 break;
1444
1445 case HFS_GET_BOOT_INFO:
1446 if (!vnode_isvroot(vp))
1447 return(EINVAL);
1448 HFS_MOUNT_LOCK(hfsmp, TRUE);
1449 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1450 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1451 break;
1452
1453 default:
1454 return (ENOTTY);
1455 }
1456
1457 /* Should never get here */
1458 return 0;
1459 }
1460
1461 /*
1462 * select
1463 */
1464 int
1465 hfs_vnop_select(__unused struct vnop_select_args *ap)
1466 /*
1467 struct vnop_select_args {
1468 vnode_t a_vp;
1469 int a_which;
1470 int a_fflags;
1471 void *a_wql;
1472 vfs_context_t a_context;
1473 };
1474 */
1475 {
1476 /*
1477 * We should really check to see if I/O is possible.
1478 */
1479 return (1);
1480 }
1481
1482 /*
1483 * Converts a logical block number to a physical block, and optionally returns
1484 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1485 * The physical block number is based on the device block size, currently its 512.
1486 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1487 */
1488 int
1489 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1490 {
1491 struct cnode *cp = VTOC(vp);
1492 struct filefork *fp = VTOF(vp);
1493 struct hfsmount *hfsmp = VTOHFS(vp);
1494 int retval = E_NONE;
1495 daddr_t logBlockSize;
1496 size_t bytesContAvail = 0;
1497 off_t blockposition;
1498 int lockExtBtree;
1499 int lockflags = 0;
1500
1501 /*
1502 * Check for underlying vnode requests and ensure that logical
1503 * to physical mapping is requested.
1504 */
1505 if (vpp != NULL)
1506 *vpp = cp->c_devvp;
1507 if (bnp == NULL)
1508 return (0);
1509
1510 logBlockSize = GetLogicalBlockSize(vp);
1511 blockposition = (off_t)bn * (off_t)logBlockSize;
1512
1513 lockExtBtree = overflow_extents(fp);
1514
1515 if (lockExtBtree)
1516 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1517
1518 retval = MacToVFSError(
1519 MapFileBlockC (HFSTOVCB(hfsmp),
1520 (FCB*)fp,
1521 MAXPHYSIO,
1522 blockposition,
1523 bnp,
1524 &bytesContAvail));
1525
1526 if (lockExtBtree)
1527 hfs_systemfile_unlock(hfsmp, lockflags);
1528
1529 if (retval == E_NONE) {
1530 /* Figure out how many read ahead blocks there are */
1531 if (runp != NULL) {
1532 if (can_cluster(logBlockSize)) {
1533 /* Make sure this result never goes negative: */
1534 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1535 } else {
1536 *runp = 0;
1537 }
1538 }
1539 }
1540 return (retval);
1541 }
1542
1543 /*
1544 * Convert logical block number to file offset.
1545 */
1546 int
1547 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1548 /*
1549 struct vnop_blktooff_args {
1550 vnode_t a_vp;
1551 daddr64_t a_lblkno;
1552 off_t *a_offset;
1553 };
1554 */
1555 {
1556 if (ap->a_vp == NULL)
1557 return (EINVAL);
1558 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1559
1560 return(0);
1561 }
1562
1563 /*
1564 * Convert file offset to logical block number.
1565 */
1566 int
1567 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1568 /*
1569 struct vnop_offtoblk_args {
1570 vnode_t a_vp;
1571 off_t a_offset;
1572 daddr64_t *a_lblkno;
1573 };
1574 */
1575 {
1576 if (ap->a_vp == NULL)
1577 return (EINVAL);
1578 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1579
1580 return(0);
1581 }
1582
1583 /*
1584 * Map file offset to physical block number.
1585 *
1586 * System file cnodes are expected to be locked (shared or exclusive).
1587 */
1588 int
1589 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1590 /*
1591 struct vnop_blockmap_args {
1592 vnode_t a_vp;
1593 off_t a_foffset;
1594 size_t a_size;
1595 daddr64_t *a_bpn;
1596 size_t *a_run;
1597 void *a_poff;
1598 int a_flags;
1599 vfs_context_t a_context;
1600 };
1601 */
1602 {
1603 struct vnode *vp = ap->a_vp;
1604 struct cnode *cp;
1605 struct filefork *fp;
1606 struct hfsmount *hfsmp;
1607 size_t bytesContAvail = 0;
1608 int retval = E_NONE;
1609 int syslocks = 0;
1610 int lockflags = 0;
1611 struct rl_entry *invalid_range;
1612 enum rl_overlaptype overlaptype;
1613 int started_tr = 0;
1614 int tooklock = 0;
1615
1616 /* Do not allow blockmap operation on a directory */
1617 if (vnode_isdir(vp)) {
1618 return (ENOTSUP);
1619 }
1620
1621 /*
1622 * Check for underlying vnode requests and ensure that logical
1623 * to physical mapping is requested.
1624 */
1625 if (ap->a_bpn == NULL)
1626 return (0);
1627
1628 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1629 if (VTOC(vp)->c_lockowner != current_thread()) {
1630 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1631 tooklock = 1;
1632 } else {
1633 cp = VTOC(vp);
1634 panic("blockmap: %s cnode lock already held!\n",
1635 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1636 }
1637 }
1638 hfsmp = VTOHFS(vp);
1639 cp = VTOC(vp);
1640 fp = VTOF(vp);
1641
1642 retry:
1643 if (fp->ff_unallocblocks) {
1644 if (hfs_start_transaction(hfsmp) != 0) {
1645 retval = EINVAL;
1646 goto exit;
1647 } else {
1648 started_tr = 1;
1649 }
1650 syslocks = SFL_EXTENTS | SFL_BITMAP;
1651
1652 } else if (overflow_extents(fp)) {
1653 syslocks = SFL_EXTENTS;
1654 }
1655
1656 if (syslocks)
1657 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1658
1659 /*
1660 * Check for any delayed allocations.
1661 */
1662 if (fp->ff_unallocblocks) {
1663 SInt64 actbytes;
1664 u_int32_t loanedBlocks;
1665
1666 //
1667 // Make sure we have a transaction. It's possible
1668 // that we came in and fp->ff_unallocblocks was zero
1669 // but during the time we blocked acquiring the extents
1670 // btree, ff_unallocblocks became non-zero and so we
1671 // will need to start a transaction.
1672 //
1673 if (started_tr == 0) {
1674 if (syslocks) {
1675 hfs_systemfile_unlock(hfsmp, lockflags);
1676 syslocks = 0;
1677 }
1678 goto retry;
1679 }
1680
1681 /*
1682 * Note: ExtendFileC will Release any blocks on loan and
1683 * aquire real blocks. So we ask to extend by zero bytes
1684 * since ExtendFileC will account for the virtual blocks.
1685 */
1686
1687 loanedBlocks = fp->ff_unallocblocks;
1688 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1689 kEFAllMask | kEFNoClumpMask, &actbytes);
1690
1691 if (retval) {
1692 fp->ff_unallocblocks = loanedBlocks;
1693 cp->c_blocks += loanedBlocks;
1694 fp->ff_blocks += loanedBlocks;
1695
1696 HFS_MOUNT_LOCK(hfsmp, TRUE);
1697 hfsmp->loanedBlocks += loanedBlocks;
1698 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1699 }
1700
1701 if (retval) {
1702 hfs_systemfile_unlock(hfsmp, lockflags);
1703 cp->c_flag |= C_MODIFIED;
1704 if (started_tr) {
1705 (void) hfs_update(vp, TRUE);
1706 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1707
1708 hfs_end_transaction(hfsmp);
1709 }
1710 goto exit;
1711 }
1712 }
1713
1714 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1715 ap->a_bpn, &bytesContAvail);
1716 if (syslocks) {
1717 hfs_systemfile_unlock(hfsmp, lockflags);
1718 syslocks = 0;
1719 }
1720
1721 if (started_tr) {
1722 (void) hfs_update(vp, TRUE);
1723 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1724 hfs_end_transaction(hfsmp);
1725 started_tr = 0;
1726 }
1727 if (retval) {
1728 goto exit;
1729 }
1730
1731 /* Adjust the mapping information for invalid file ranges: */
1732 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1733 ap->a_foffset + (off_t)bytesContAvail - 1,
1734 &invalid_range);
1735 if (overlaptype != RL_NOOVERLAP) {
1736 switch(overlaptype) {
1737 case RL_MATCHINGOVERLAP:
1738 case RL_OVERLAPCONTAINSRANGE:
1739 case RL_OVERLAPSTARTSBEFORE:
1740 /* There's no valid block for this byte offset: */
1741 *ap->a_bpn = (daddr64_t)-1;
1742 /* There's no point limiting the amount to be returned
1743 * if the invalid range that was hit extends all the way
1744 * to the EOF (i.e. there's no valid bytes between the
1745 * end of this range and the file's EOF):
1746 */
1747 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1748 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1749 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1750 }
1751 break;
1752
1753 case RL_OVERLAPISCONTAINED:
1754 case RL_OVERLAPENDSAFTER:
1755 /* The range of interest hits an invalid block before the end: */
1756 if (invalid_range->rl_start == ap->a_foffset) {
1757 /* There's actually no valid information to be had starting here: */
1758 *ap->a_bpn = (daddr64_t)-1;
1759 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1760 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1761 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1762 }
1763 } else {
1764 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1765 }
1766 break;
1767
1768 case RL_NOOVERLAP:
1769 break;
1770 } /* end switch */
1771 if (bytesContAvail > ap->a_size)
1772 bytesContAvail = ap->a_size;
1773 }
1774 if (ap->a_run)
1775 *ap->a_run = bytesContAvail;
1776
1777 if (ap->a_poff)
1778 *(int *)ap->a_poff = 0;
1779 exit:
1780 if (tooklock)
1781 hfs_unlock(cp);
1782
1783 return (MacToVFSError(retval));
1784 }
1785
1786
1787 /*
1788 * prepare and issue the I/O
1789 * buf_strategy knows how to deal
1790 * with requests that require
1791 * fragmented I/Os
1792 */
1793 int
1794 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1795 {
1796 buf_t bp = ap->a_bp;
1797 vnode_t vp = buf_vnode(bp);
1798 struct cnode *cp = VTOC(vp);
1799
1800 return (buf_strategy(cp->c_devvp, ap));
1801 }
1802
1803
1804 static int
1805 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1806 {
1807 register struct cnode *cp = VTOC(vp);
1808 struct filefork *fp = VTOF(vp);
1809 struct proc *p = vfs_context_proc(context);;
1810 kauth_cred_t cred = vfs_context_ucred(context);
1811 int retval;
1812 off_t bytesToAdd;
1813 off_t actualBytesAdded;
1814 off_t filebytes;
1815 u_long fileblocks;
1816 int blksize;
1817 struct hfsmount *hfsmp;
1818 int lockflags;
1819
1820 blksize = VTOVCB(vp)->blockSize;
1821 fileblocks = fp->ff_blocks;
1822 filebytes = (off_t)fileblocks * (off_t)blksize;
1823
1824 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1825 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1826
1827 if (length < 0)
1828 return (EINVAL);
1829
1830 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1831 return (EFBIG);
1832
1833 hfsmp = VTOHFS(vp);
1834
1835 retval = E_NONE;
1836
1837 /* Files that are changing size are not hot file candidates. */
1838 if (hfsmp->hfc_stage == HFC_RECORDING) {
1839 fp->ff_bytesread = 0;
1840 }
1841
1842 /*
1843 * We cannot just check if fp->ff_size == length (as an optimization)
1844 * since there may be extra physical blocks that also need truncation.
1845 */
1846 #if QUOTA
1847 if ((retval = hfs_getinoquota(cp)))
1848 return(retval);
1849 #endif /* QUOTA */
1850
1851 /*
1852 * Lengthen the size of the file. We must ensure that the
1853 * last byte of the file is allocated. Since the smallest
1854 * value of ff_size is 0, length will be at least 1.
1855 */
1856 if (length > (off_t)fp->ff_size) {
1857 #if QUOTA
1858 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1859 cred, 0);
1860 if (retval)
1861 goto Err_Exit;
1862 #endif /* QUOTA */
1863 /*
1864 * If we don't have enough physical space then
1865 * we need to extend the physical size.
1866 */
1867 if (length > filebytes) {
1868 int eflags;
1869 u_long blockHint = 0;
1870
1871 /* All or nothing and don't round up to clumpsize. */
1872 eflags = kEFAllMask | kEFNoClumpMask;
1873
1874 if (cred && suser(cred, NULL) != 0)
1875 eflags |= kEFReserveMask; /* keep a reserve */
1876
1877 /*
1878 * Allocate Journal and Quota files in metadata zone.
1879 */
1880 if (filebytes == 0 &&
1881 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1882 hfs_virtualmetafile(cp)) {
1883 eflags |= kEFMetadataMask;
1884 blockHint = hfsmp->hfs_metazone_start;
1885 }
1886 if (hfs_start_transaction(hfsmp) != 0) {
1887 retval = EINVAL;
1888 goto Err_Exit;
1889 }
1890
1891 /* Protect extents b-tree and allocation bitmap */
1892 lockflags = SFL_BITMAP;
1893 if (overflow_extents(fp))
1894 lockflags |= SFL_EXTENTS;
1895 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1896
1897 while ((length > filebytes) && (retval == E_NONE)) {
1898 bytesToAdd = length - filebytes;
1899 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1900 (FCB*)fp,
1901 bytesToAdd,
1902 blockHint,
1903 eflags,
1904 &actualBytesAdded));
1905
1906 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1907 if (actualBytesAdded == 0 && retval == E_NONE) {
1908 if (length > filebytes)
1909 length = filebytes;
1910 break;
1911 }
1912 } /* endwhile */
1913
1914 hfs_systemfile_unlock(hfsmp, lockflags);
1915
1916 if (hfsmp->jnl) {
1917 (void) hfs_update(vp, TRUE);
1918 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1919 }
1920
1921 hfs_end_transaction(hfsmp);
1922
1923 if (retval)
1924 goto Err_Exit;
1925
1926 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1927 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1928 }
1929
1930 if (!(flags & IO_NOZEROFILL)) {
1931 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1932 struct rl_entry *invalid_range;
1933 off_t zero_limit;
1934
1935 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1936 if (length < zero_limit) zero_limit = length;
1937
1938 if (length > (off_t)fp->ff_size) {
1939 struct timeval tv;
1940
1941 /* Extending the file: time to fill out the current last page w. zeroes? */
1942 if ((fp->ff_size & PAGE_MASK_64) &&
1943 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1944 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1945
1946 /* There's some valid data at the start of the (current) last page
1947 of the file, so zero out the remainder of that page to ensure the
1948 entire page contains valid data. Since there is no invalid range
1949 possible past the (current) eof, there's no need to remove anything
1950 from the invalid range list before calling cluster_write(): */
1951 hfs_unlock(cp);
1952 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1953 fp->ff_size, (off_t)0,
1954 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1955 hfs_lock(cp, HFS_FORCE_LOCK);
1956 if (retval) goto Err_Exit;
1957
1958 /* Merely invalidate the remaining area, if necessary: */
1959 if (length > zero_limit) {
1960 microuptime(&tv);
1961 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1962 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1963 }
1964 } else {
1965 /* The page containing the (current) eof is invalid: just add the
1966 remainder of the page to the invalid list, along with the area
1967 being newly allocated:
1968 */
1969 microuptime(&tv);
1970 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1971 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1972 };
1973 }
1974 } else {
1975 panic("hfs_truncate: invoked on non-UBC object?!");
1976 };
1977 }
1978 cp->c_touch_modtime = TRUE;
1979 fp->ff_size = length;
1980
1981 /* Nested transactions will do their own ubc_setsize. */
1982 if (!skipsetsize) {
1983 /*
1984 * ubc_setsize can cause a pagein here
1985 * so we need to drop cnode lock.
1986 */
1987 hfs_unlock(cp);
1988 ubc_setsize(vp, length);
1989 hfs_lock(cp, HFS_FORCE_LOCK);
1990 }
1991
1992 } else { /* Shorten the size of the file */
1993
1994 if ((off_t)fp->ff_size > length) {
1995 /*
1996 * Any buffers that are past the truncation point need to be
1997 * invalidated (to maintain buffer cache consistency).
1998 */
1999
2000 /* Nested transactions will do their own ubc_setsize. */
2001 if (!skipsetsize) {
2002 /*
2003 * ubc_setsize can cause a pageout here
2004 * so we need to drop cnode lock.
2005 */
2006 hfs_unlock(cp);
2007 ubc_setsize(vp, length);
2008 hfs_lock(cp, HFS_FORCE_LOCK);
2009 }
2010
2011 /* Any space previously marked as invalid is now irrelevant: */
2012 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2013 }
2014
2015 /*
2016 * Account for any unmapped blocks. Note that the new
2017 * file length can still end up with unmapped blocks.
2018 */
2019 if (fp->ff_unallocblocks > 0) {
2020 u_int32_t finalblks;
2021 u_int32_t loanedBlocks;
2022
2023 HFS_MOUNT_LOCK(hfsmp, TRUE);
2024
2025 loanedBlocks = fp->ff_unallocblocks;
2026 cp->c_blocks -= loanedBlocks;
2027 fp->ff_blocks -= loanedBlocks;
2028 fp->ff_unallocblocks = 0;
2029
2030 hfsmp->loanedBlocks -= loanedBlocks;
2031
2032 finalblks = (length + blksize - 1) / blksize;
2033 if (finalblks > fp->ff_blocks) {
2034 /* calculate required unmapped blocks */
2035 loanedBlocks = finalblks - fp->ff_blocks;
2036 hfsmp->loanedBlocks += loanedBlocks;
2037
2038 fp->ff_unallocblocks = loanedBlocks;
2039 cp->c_blocks += loanedBlocks;
2040 fp->ff_blocks += loanedBlocks;
2041 }
2042 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2043 }
2044
2045 /*
2046 * For a TBE process the deallocation of the file blocks is
2047 * delayed until the file is closed. And hfs_close calls
2048 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2049 * isn't set, we make sure this isn't a TBE process.
2050 */
2051 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2052 #if QUOTA
2053 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2054 #endif /* QUOTA */
2055 if (hfs_start_transaction(hfsmp) != 0) {
2056 retval = EINVAL;
2057 goto Err_Exit;
2058 }
2059
2060 if (fp->ff_unallocblocks == 0) {
2061 /* Protect extents b-tree and allocation bitmap */
2062 lockflags = SFL_BITMAP;
2063 if (overflow_extents(fp))
2064 lockflags |= SFL_EXTENTS;
2065 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2066
2067 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2068 (FCB*)fp, length, false));
2069
2070 hfs_systemfile_unlock(hfsmp, lockflags);
2071 }
2072 if (hfsmp->jnl) {
2073 (void) hfs_update(vp, TRUE);
2074 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2075 }
2076
2077 hfs_end_transaction(hfsmp);
2078
2079 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2080 if (retval)
2081 goto Err_Exit;
2082 #if QUOTA
2083 /* These are bytesreleased */
2084 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2085 #endif /* QUOTA */
2086 }
2087 /* Only set update flag if the logical length changes */
2088 if ((off_t)fp->ff_size != length)
2089 cp->c_touch_modtime = TRUE;
2090 fp->ff_size = length;
2091 }
2092 cp->c_touch_chgtime = TRUE;
2093 retval = hfs_update(vp, MNT_WAIT);
2094 if (retval) {
2095 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2096 -1, -1, -1, retval, 0);
2097 }
2098
2099 Err_Exit:
2100
2101 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2102 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2103
2104 return (retval);
2105 }
2106
2107
2108
2109 /*
2110 * Truncate a cnode to at most length size, freeing (or adding) the
2111 * disk blocks.
2112 */
2113 __private_extern__
2114 int
2115 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2116 vfs_context_t context)
2117 {
2118 struct filefork *fp = VTOF(vp);
2119 off_t filebytes;
2120 u_long fileblocks;
2121 int blksize, error = 0;
2122 struct cnode *cp = VTOC(vp);
2123
2124 if (vnode_isdir(vp))
2125 return (EISDIR); /* cannot truncate an HFS directory! */
2126
2127 blksize = VTOVCB(vp)->blockSize;
2128 fileblocks = fp->ff_blocks;
2129 filebytes = (off_t)fileblocks * (off_t)blksize;
2130
2131 // have to loop truncating or growing files that are
2132 // really big because otherwise transactions can get
2133 // enormous and consume too many kernel resources.
2134
2135 if (length < filebytes) {
2136 while (filebytes > length) {
2137 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2138 filebytes -= HFS_BIGFILE_SIZE;
2139 } else {
2140 filebytes = length;
2141 }
2142 cp->c_flag |= C_FORCEUPDATE;
2143 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2144 if (error)
2145 break;
2146 }
2147 } else if (length > filebytes) {
2148 while (filebytes < length) {
2149 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2150 filebytes += HFS_BIGFILE_SIZE;
2151 } else {
2152 filebytes = length;
2153 }
2154 cp->c_flag |= C_FORCEUPDATE;
2155 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2156 if (error)
2157 break;
2158 }
2159 } else /* Same logical size */ {
2160
2161 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2162 }
2163 /* Files that are changing size are not hot file candidates. */
2164 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2165 fp->ff_bytesread = 0;
2166 }
2167
2168 return (error);
2169 }
2170
2171
2172
2173 /*
2174 * Preallocate file storage space.
2175 */
2176 int
2177 hfs_vnop_allocate(struct vnop_allocate_args /* {
2178 vnode_t a_vp;
2179 off_t a_length;
2180 u_int32_t a_flags;
2181 off_t *a_bytesallocated;
2182 off_t a_offset;
2183 vfs_context_t a_context;
2184 } */ *ap)
2185 {
2186 struct vnode *vp = ap->a_vp;
2187 struct cnode *cp;
2188 struct filefork *fp;
2189 ExtendedVCB *vcb;
2190 off_t length = ap->a_length;
2191 off_t startingPEOF;
2192 off_t moreBytesRequested;
2193 off_t actualBytesAdded;
2194 off_t filebytes;
2195 u_long fileblocks;
2196 int retval, retval2;
2197 UInt32 blockHint;
2198 UInt32 extendFlags; /* For call to ExtendFileC */
2199 struct hfsmount *hfsmp;
2200 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2201 int lockflags;
2202
2203 *(ap->a_bytesallocated) = 0;
2204
2205 if (!vnode_isreg(vp))
2206 return (EISDIR);
2207 if (length < (off_t)0)
2208 return (EINVAL);
2209
2210 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2211 return (retval);
2212 cp = VTOC(vp);
2213 fp = VTOF(vp);
2214 hfsmp = VTOHFS(vp);
2215 vcb = VTOVCB(vp);
2216
2217 fileblocks = fp->ff_blocks;
2218 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2219
2220 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2221 retval = EINVAL;
2222 goto Err_Exit;
2223 }
2224
2225 /* Fill in the flags word for the call to Extend the file */
2226
2227 extendFlags = kEFNoClumpMask;
2228 if (ap->a_flags & ALLOCATECONTIG)
2229 extendFlags |= kEFContigMask;
2230 if (ap->a_flags & ALLOCATEALL)
2231 extendFlags |= kEFAllMask;
2232 if (cred && suser(cred, NULL) != 0)
2233 extendFlags |= kEFReserveMask;
2234
2235 retval = E_NONE;
2236 blockHint = 0;
2237 startingPEOF = filebytes;
2238
2239 if (ap->a_flags & ALLOCATEFROMPEOF)
2240 length += filebytes;
2241 else if (ap->a_flags & ALLOCATEFROMVOL)
2242 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2243
2244 /* If no changes are necesary, then we're done */
2245 if (filebytes == length)
2246 goto Std_Exit;
2247
2248 /*
2249 * Lengthen the size of the file. We must ensure that the
2250 * last byte of the file is allocated. Since the smallest
2251 * value of filebytes is 0, length will be at least 1.
2252 */
2253 if (length > filebytes) {
2254 moreBytesRequested = length - filebytes;
2255
2256 #if QUOTA
2257 retval = hfs_chkdq(cp,
2258 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2259 cred, 0);
2260 if (retval)
2261 goto Err_Exit;
2262
2263 #endif /* QUOTA */
2264 /*
2265 * Metadata zone checks.
2266 */
2267 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2268 /*
2269 * Allocate Journal and Quota files in metadata zone.
2270 */
2271 if (hfs_virtualmetafile(cp)) {
2272 extendFlags |= kEFMetadataMask;
2273 blockHint = hfsmp->hfs_metazone_start;
2274 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2275 (blockHint <= hfsmp->hfs_metazone_end)) {
2276 /*
2277 * Move blockHint outside metadata zone.
2278 */
2279 blockHint = hfsmp->hfs_metazone_end + 1;
2280 }
2281 }
2282
2283 if (hfs_start_transaction(hfsmp) != 0) {
2284 retval = EINVAL;
2285 goto Err_Exit;
2286 }
2287
2288 /* Protect extents b-tree and allocation bitmap */
2289 lockflags = SFL_BITMAP;
2290 if (overflow_extents(fp))
2291 lockflags |= SFL_EXTENTS;
2292 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2293
2294 retval = MacToVFSError(ExtendFileC(vcb,
2295 (FCB*)fp,
2296 moreBytesRequested,
2297 blockHint,
2298 extendFlags,
2299 &actualBytesAdded));
2300
2301 *(ap->a_bytesallocated) = actualBytesAdded;
2302 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2303
2304 hfs_systemfile_unlock(hfsmp, lockflags);
2305
2306 if (hfsmp->jnl) {
2307 (void) hfs_update(vp, TRUE);
2308 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2309 }
2310
2311 hfs_end_transaction(hfsmp);
2312
2313 /*
2314 * if we get an error and no changes were made then exit
2315 * otherwise we must do the hfs_update to reflect the changes
2316 */
2317 if (retval && (startingPEOF == filebytes))
2318 goto Err_Exit;
2319
2320 /*
2321 * Adjust actualBytesAdded to be allocation block aligned, not
2322 * clump size aligned.
2323 * NOTE: So what we are reporting does not affect reality
2324 * until the file is closed, when we truncate the file to allocation
2325 * block size.
2326 */
2327 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2328 *(ap->a_bytesallocated) =
2329 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2330
2331 } else { /* Shorten the size of the file */
2332
2333 if (fp->ff_size > length) {
2334 /*
2335 * Any buffers that are past the truncation point need to be
2336 * invalidated (to maintain buffer cache consistency).
2337 */
2338 }
2339
2340 if (hfs_start_transaction(hfsmp) != 0) {
2341 retval = EINVAL;
2342 goto Err_Exit;
2343 }
2344
2345 /* Protect extents b-tree and allocation bitmap */
2346 lockflags = SFL_BITMAP;
2347 if (overflow_extents(fp))
2348 lockflags |= SFL_EXTENTS;
2349 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2350
2351 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2352
2353 hfs_systemfile_unlock(hfsmp, lockflags);
2354
2355 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2356
2357 if (hfsmp->jnl) {
2358 (void) hfs_update(vp, TRUE);
2359 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2360 }
2361
2362 hfs_end_transaction(hfsmp);
2363
2364
2365 /*
2366 * if we get an error and no changes were made then exit
2367 * otherwise we must do the hfs_update to reflect the changes
2368 */
2369 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2370 #if QUOTA
2371 /* These are bytesreleased */
2372 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2373 #endif /* QUOTA */
2374
2375 if (fp->ff_size > filebytes) {
2376 fp->ff_size = filebytes;
2377
2378 hfs_unlock(cp);
2379 ubc_setsize(vp, fp->ff_size);
2380 hfs_lock(cp, HFS_FORCE_LOCK);
2381 }
2382 }
2383
2384 Std_Exit:
2385 cp->c_touch_chgtime = TRUE;
2386 cp->c_touch_modtime = TRUE;
2387 retval2 = hfs_update(vp, MNT_WAIT);
2388
2389 if (retval == 0)
2390 retval = retval2;
2391 Err_Exit:
2392 hfs_unlock(cp);
2393 return (retval);
2394 }
2395
2396
2397 /*
2398 * Pagein for HFS filesystem
2399 */
2400 int
2401 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2402 /*
2403 struct vnop_pagein_args {
2404 vnode_t a_vp,
2405 upl_t a_pl,
2406 vm_offset_t a_pl_offset,
2407 off_t a_f_offset,
2408 size_t a_size,
2409 int a_flags
2410 vfs_context_t a_context;
2411 };
2412 */
2413 {
2414 vnode_t vp = ap->a_vp;
2415 int error;
2416
2417 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2418 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2419 /*
2420 * Keep track of blocks read.
2421 */
2422 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2423 struct cnode *cp;
2424 struct filefork *fp;
2425 int bytesread;
2426 int took_cnode_lock = 0;
2427
2428 cp = VTOC(vp);
2429 fp = VTOF(vp);
2430
2431 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2432 bytesread = fp->ff_size;
2433 else
2434 bytesread = ap->a_size;
2435
2436 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2437 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2438 hfs_lock(cp, HFS_FORCE_LOCK);
2439 took_cnode_lock = 1;
2440 }
2441 /*
2442 * If this file hasn't been seen since the start of
2443 * the current sampling period then start over.
2444 */
2445 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2446 struct timeval tv;
2447
2448 fp->ff_bytesread = bytesread;
2449 microtime(&tv);
2450 cp->c_atime = tv.tv_sec;
2451 } else {
2452 fp->ff_bytesread += bytesread;
2453 }
2454 cp->c_touch_acctime = TRUE;
2455 if (took_cnode_lock)
2456 hfs_unlock(cp);
2457 }
2458 return (error);
2459 }
2460
2461 /*
2462 * Pageout for HFS filesystem.
2463 */
2464 int
2465 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2466 /*
2467 struct vnop_pageout_args {
2468 vnode_t a_vp,
2469 upl_t a_pl,
2470 vm_offset_t a_pl_offset,
2471 off_t a_f_offset,
2472 size_t a_size,
2473 int a_flags
2474 vfs_context_t a_context;
2475 };
2476 */
2477 {
2478 vnode_t vp = ap->a_vp;
2479 struct cnode *cp;
2480 struct filefork *fp;
2481 int retval;
2482 off_t end_of_range;
2483 off_t filesize;
2484
2485 cp = VTOC(vp);
2486 if (cp->c_lockowner == current_thread()) {
2487 panic("pageout: %s cnode lock already held!\n",
2488 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2489 }
2490 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2491 return (retval);
2492 }
2493 fp = VTOF(vp);
2494
2495 filesize = fp->ff_size;
2496 end_of_range = ap->a_f_offset + ap->a_size - 1;
2497
2498 if (end_of_range >= filesize) {
2499 end_of_range = (off_t)(filesize - 1);
2500 }
2501 if (ap->a_f_offset < filesize) {
2502 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2503 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2504 }
2505 hfs_unlock(cp);
2506
2507 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2508 ap->a_size, filesize, ap->a_flags);
2509
2510 /*
2511 * If data was written, and setuid or setgid bits are set and
2512 * this process is not the superuser then clear the setuid and
2513 * setgid bits as a precaution against tampering.
2514 */
2515 if ((retval == 0) &&
2516 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2517 (vfs_context_suser(ap->a_context) != 0)) {
2518 hfs_lock(cp, HFS_FORCE_LOCK);
2519 cp->c_mode &= ~(S_ISUID | S_ISGID);
2520 cp->c_touch_chgtime = TRUE;
2521 hfs_unlock(cp);
2522 }
2523 return (retval);
2524 }
2525
2526 /*
2527 * Intercept B-Tree node writes to unswap them if necessary.
2528 */
2529 int
2530 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2531 {
2532 int retval = 0;
2533 register struct buf *bp = ap->a_bp;
2534 register struct vnode *vp = buf_vnode(bp);
2535 BlockDescriptor block;
2536
2537 /* Trap B-Tree writes */
2538 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2539 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2540 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2541
2542 /*
2543 * Swap and validate the node if it is in native byte order.
2544 * This is always be true on big endian, so we always validate
2545 * before writing here. On little endian, the node typically has
2546 * been swapped and validatated when it was written to the journal,
2547 * so we won't do anything here.
2548 */
2549 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2550 /* Prepare the block pointer */
2551 block.blockHeader = bp;
2552 block.buffer = (char *)buf_dataptr(bp);
2553 block.blockNum = buf_lblkno(bp);
2554 /* not found in cache ==> came from disk */
2555 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2556 block.blockSize = buf_count(bp);
2557
2558 /* Endian un-swap B-Tree node */
2559 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2560 if (retval)
2561 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2562 }
2563 }
2564
2565 /* This buffer shouldn't be locked anymore but if it is clear it */
2566 if ((buf_flags(bp) & B_LOCKED)) {
2567 // XXXdbg
2568 if (VTOHFS(vp)->jnl) {
2569 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2570 }
2571 buf_clearflags(bp, B_LOCKED);
2572 }
2573 retval = vn_bwrite (ap);
2574
2575 return (retval);
2576 }
2577
2578 /*
2579 * Relocate a file to a new location on disk
2580 * cnode must be locked on entry
2581 *
2582 * Relocation occurs by cloning the file's data from its
2583 * current set of blocks to a new set of blocks. During
2584 * the relocation all of the blocks (old and new) are
2585 * owned by the file.
2586 *
2587 * -----------------
2588 * |///////////////|
2589 * -----------------
2590 * 0 N (file offset)
2591 *
2592 * ----------------- -----------------
2593 * |///////////////| | | STEP 1 (aquire new blocks)
2594 * ----------------- -----------------
2595 * 0 N N+1 2N
2596 *
2597 * ----------------- -----------------
2598 * |///////////////| |///////////////| STEP 2 (clone data)
2599 * ----------------- -----------------
2600 * 0 N N+1 2N
2601 *
2602 * -----------------
2603 * |///////////////| STEP 3 (head truncate blocks)
2604 * -----------------
2605 * 0 N
2606 *
2607 * During steps 2 and 3 page-outs to file offsets less
2608 * than or equal to N are suspended.
2609 *
2610 * During step 3 page-ins to the file get supended.
2611 */
2612 __private_extern__
2613 int
2614 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2615 struct proc *p)
2616 {
2617 struct cnode *cp;
2618 struct filefork *fp;
2619 struct hfsmount *hfsmp;
2620 u_int32_t headblks;
2621 u_int32_t datablks;
2622 u_int32_t blksize;
2623 u_int32_t growsize;
2624 u_int32_t nextallocsave;
2625 daddr64_t sector_a, sector_b;
2626 int disabled_caching = 0;
2627 int eflags;
2628 off_t newbytes;
2629 int retval;
2630 int lockflags = 0;
2631 int took_trunc_lock = 0;
2632 int started_tr = 0;
2633 enum vtype vnodetype;
2634
2635 vnodetype = vnode_vtype(vp);
2636 if (vnodetype != VREG && vnodetype != VLNK) {
2637 return (EPERM);
2638 }
2639
2640 hfsmp = VTOHFS(vp);
2641 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2642 return (ENOSPC);
2643 }
2644
2645 cp = VTOC(vp);
2646 fp = VTOF(vp);
2647 if (fp->ff_unallocblocks)
2648 return (EINVAL);
2649 blksize = hfsmp->blockSize;
2650 if (blockHint == 0)
2651 blockHint = hfsmp->nextAllocation;
2652
2653 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2654 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2655 return (EFBIG);
2656 }
2657
2658 //
2659 // We do not believe that this call to hfs_fsync() is
2660 // necessary and it causes a journal transaction
2661 // deadlock so we are removing it.
2662 //
2663 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2664 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2665 // if (retval)
2666 // return (retval);
2667 //}
2668
2669 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2670 hfs_unlock(cp);
2671 hfs_lock_truncate(cp, TRUE);
2672 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2673 hfs_unlock_truncate(cp);
2674 return (retval);
2675 }
2676 took_trunc_lock = 1;
2677 }
2678 headblks = fp->ff_blocks;
2679 datablks = howmany(fp->ff_size, blksize);
2680 growsize = datablks * blksize;
2681 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2682 if (blockHint >= hfsmp->hfs_metazone_start &&
2683 blockHint <= hfsmp->hfs_metazone_end)
2684 eflags |= kEFMetadataMask;
2685
2686 if (hfs_start_transaction(hfsmp) != 0) {
2687 if (took_trunc_lock)
2688 hfs_unlock_truncate(cp);
2689 return (EINVAL);
2690 }
2691 started_tr = 1;
2692 /*
2693 * Protect the extents b-tree and the allocation bitmap
2694 * during MapFileBlockC and ExtendFileC operations.
2695 */
2696 lockflags = SFL_BITMAP;
2697 if (overflow_extents(fp))
2698 lockflags |= SFL_EXTENTS;
2699 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2700
2701 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2702 if (retval) {
2703 retval = MacToVFSError(retval);
2704 goto out;
2705 }
2706
2707 /*
2708 * STEP 1 - aquire new allocation blocks.
2709 */
2710 if (!vnode_isnocache(vp)) {
2711 vnode_setnocache(vp);
2712 disabled_caching = 1;
2713
2714 }
2715 nextallocsave = hfsmp->nextAllocation;
2716 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2717 if (eflags & kEFMetadataMask) {
2718 HFS_MOUNT_LOCK(hfsmp, TRUE);
2719 hfsmp->nextAllocation = nextallocsave;
2720 hfsmp->vcbFlags |= 0xFF00;
2721 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2722 }
2723
2724 retval = MacToVFSError(retval);
2725 if (retval == 0) {
2726 cp->c_flag |= C_MODIFIED;
2727 if (newbytes < growsize) {
2728 retval = ENOSPC;
2729 goto restore;
2730 } else if (fp->ff_blocks < (headblks + datablks)) {
2731 printf("hfs_relocate: allocation failed");
2732 retval = ENOSPC;
2733 goto restore;
2734 }
2735
2736 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2737 if (retval) {
2738 retval = MacToVFSError(retval);
2739 } else if ((sector_a + 1) == sector_b) {
2740 retval = ENOSPC;
2741 goto restore;
2742 } else if ((eflags & kEFMetadataMask) &&
2743 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2744 hfsmp->hfs_metazone_end)) {
2745 printf("hfs_relocate: didn't move into metadata zone\n");
2746 retval = ENOSPC;
2747 goto restore;
2748 }
2749 }
2750 /* Done with system locks and journal for now. */
2751 hfs_systemfile_unlock(hfsmp, lockflags);
2752 lockflags = 0;
2753 hfs_end_transaction(hfsmp);
2754 started_tr = 0;
2755
2756 if (retval) {
2757 /*
2758 * Check to see if failure is due to excessive fragmentation.
2759 */
2760 if ((retval == ENOSPC) &&
2761 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2762 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2763 }
2764 goto out;
2765 }
2766 /*
2767 * STEP 2 - clone file data into the new allocation blocks.
2768 */
2769
2770 if (vnodetype == VLNK)
2771 retval = hfs_clonelink(vp, blksize, cred, p);
2772 else if (vnode_issystem(vp))
2773 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2774 else
2775 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2776
2777 /* Start transaction for step 3 or for a restore. */
2778 if (hfs_start_transaction(hfsmp) != 0) {
2779 retval = EINVAL;
2780 goto out;
2781 }
2782 started_tr = 1;
2783 if (retval)
2784 goto restore;
2785
2786 /*
2787 * STEP 3 - switch to cloned data and remove old blocks.
2788 */
2789 lockflags = SFL_BITMAP;
2790 if (overflow_extents(fp))
2791 lockflags |= SFL_EXTENTS;
2792 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2793
2794 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2795
2796 hfs_systemfile_unlock(hfsmp, lockflags);
2797 lockflags = 0;
2798 if (retval)
2799 goto restore;
2800 out:
2801 if (took_trunc_lock)
2802 hfs_unlock_truncate(cp);
2803
2804 if (lockflags) {
2805 hfs_systemfile_unlock(hfsmp, lockflags);
2806 lockflags = 0;
2807 }
2808
2809 // See comment up above about calls to hfs_fsync()
2810 //
2811 //if (retval == 0)
2812 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2813
2814 if (hfsmp->jnl) {
2815 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2816 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2817 else
2818 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2819 }
2820 exit:
2821 if (disabled_caching) {
2822 vnode_clearnocache(vp);
2823 }
2824 if (started_tr)
2825 hfs_end_transaction(hfsmp);
2826
2827 return (retval);
2828
2829 restore:
2830 if (fp->ff_blocks == headblks)
2831 goto exit;
2832 /*
2833 * Give back any newly allocated space.
2834 */
2835 if (lockflags == 0) {
2836 lockflags = SFL_BITMAP;
2837 if (overflow_extents(fp))
2838 lockflags |= SFL_EXTENTS;
2839 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2840 }
2841
2842 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2843
2844 hfs_systemfile_unlock(hfsmp, lockflags);
2845 lockflags = 0;
2846
2847 if (took_trunc_lock)
2848 hfs_unlock_truncate(cp);
2849 goto exit;
2850 }
2851
2852
2853 /*
2854 * Clone a symlink.
2855 *
2856 */
2857 static int
2858 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2859 {
2860 struct buf *head_bp = NULL;
2861 struct buf *tail_bp = NULL;
2862 int error;
2863
2864
2865 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2866 if (error)
2867 goto out;
2868
2869 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2870 if (tail_bp == NULL) {
2871 error = EIO;
2872 goto out;
2873 }
2874 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2875 error = (int)buf_bwrite(tail_bp);
2876 out:
2877 if (head_bp) {
2878 buf_markinvalid(head_bp);
2879 buf_brelse(head_bp);
2880 }
2881 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2882
2883 return (error);
2884 }
2885
2886 /*
2887 * Clone a file's data within the file.
2888 *
2889 */
2890 static int
2891 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2892 {
2893 caddr_t bufp;
2894 size_t writebase;
2895 size_t bufsize;
2896 size_t copysize;
2897 size_t iosize;
2898 off_t filesize;
2899 size_t offset;
2900 uio_t auio;
2901 int error = 0;
2902
2903 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2904 writebase = blkstart * blksize;
2905 copysize = blkcnt * blksize;
2906 iosize = bufsize = MIN(copysize, 4096 * 16);
2907 offset = 0;
2908
2909 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2910 return (ENOMEM);
2911 }
2912 hfs_unlock(VTOC(vp));
2913
2914 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2915
2916 while (offset < copysize) {
2917 iosize = MIN(copysize - offset, iosize);
2918
2919 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2920 uio_addiov(auio, (uintptr_t)bufp, iosize);
2921
2922 error = cluster_read(vp, auio, copysize, 0);
2923 if (error) {
2924 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2925 break;
2926 }
2927 if (uio_resid(auio) != 0) {
2928 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2929 error = EIO;
2930 break;
2931 }
2932
2933 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2934 uio_addiov(auio, (uintptr_t)bufp, iosize);
2935
2936 error = cluster_write(vp, auio, filesize + offset,
2937 filesize + offset + iosize,
2938 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2939 if (error) {
2940 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2941 break;
2942 }
2943 if (uio_resid(auio) != 0) {
2944 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2945 error = EIO;
2946 break;
2947 }
2948 offset += iosize;
2949 }
2950 uio_free(auio);
2951
2952 /*
2953 * No need to call ubc_sync_range or hfs_invalbuf
2954 * since the file was copied using IO_NOCACHE.
2955 */
2956
2957 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2958
2959 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2960 return (error);
2961 }
2962
2963 /*
2964 * Clone a system (metadata) file.
2965 *
2966 */
2967 static int
2968 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2969 kauth_cred_t cred, struct proc *p)
2970 {
2971 caddr_t bufp;
2972 char * offset;
2973 size_t bufsize;
2974 size_t iosize;
2975 struct buf *bp = NULL;
2976 daddr64_t blkno;
2977 daddr64_t blk;
2978 daddr64_t start_blk;
2979 daddr64_t last_blk;
2980 int breadcnt;
2981 int i;
2982 int error = 0;
2983
2984
2985 iosize = GetLogicalBlockSize(vp);
2986 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2987 breadcnt = bufsize / iosize;
2988
2989 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2990 return (ENOMEM);
2991 }
2992 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
2993 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
2994 blkno = 0;
2995
2996 while (blkno < last_blk) {
2997 /*
2998 * Read up to a megabyte
2999 */
3000 offset = bufp;
3001 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3002 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3003 if (error) {
3004 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3005 goto out;
3006 }
3007 if (buf_count(bp) != iosize) {
3008 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3009 goto out;
3010 }
3011 bcopy((char *)buf_dataptr(bp), offset, iosize);
3012
3013 buf_markinvalid(bp);
3014 buf_brelse(bp);
3015 bp = NULL;
3016
3017 offset += iosize;
3018 }
3019
3020 /*
3021 * Write up to a megabyte
3022 */
3023 offset = bufp;
3024 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3025 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3026 if (bp == NULL) {
3027 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3028 error = EIO;
3029 goto out;
3030 }
3031 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3032 error = (int)buf_bwrite(bp);
3033 bp = NULL;
3034 if (error)
3035 goto out;
3036 offset += iosize;
3037 }
3038 }
3039 out:
3040 if (bp) {
3041 buf_brelse(bp);
3042 }
3043
3044 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3045
3046 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3047
3048 return (error);
3049 }