]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/hfs/hfs_readwrite.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/* @(#)hfs_readwrite.c 1.0
24 *
25 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
26 *
27 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
28 *
29 */
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/resourcevar.h>
34#include <sys/kernel.h>
35#include <sys/fcntl.h>
36#include <sys/filedesc.h>
37#include <sys/stat.h>
38#include <sys/buf.h>
39#include <sys/proc.h>
40#include <sys/kauth.h>
41#include <sys/vnode.h>
42#include <sys/uio.h>
43#include <sys/vfs_context.h>
44
45#include <miscfs/specfs/specdev.h>
46
47#include <sys/ubc.h>
48#include <vm/vm_pageout.h>
49#include <vm/vm_kern.h>
50
51#include <sys/kdebug.h>
52
53#include "hfs.h"
54#include "hfs_endian.h"
55#include "hfs_fsctl.h"
56#include "hfs_quota.h"
57#include "hfscommon/headers/FileMgrInternal.h"
58#include "hfscommon/headers/BTreesInternal.h"
59#include "hfs_cnode.h"
60#include "hfs_dbg.h"
61
62extern int overflow_extents(struct filefork *fp);
63
64#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
65
66enum {
67 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
68};
69
70extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
71
72extern int hfs_setextendedsecurity(struct hfsmount *, int);
73
74
75static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
76static int hfs_clonefile(struct vnode *, int, int, int);
77static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
78
79
80/*****************************************************************************
81*
82* I/O Operations on vnodes
83*
84*****************************************************************************/
85int hfs_vnop_read(struct vnop_read_args *);
86int hfs_vnop_write(struct vnop_write_args *);
87int hfs_vnop_ioctl(struct vnop_ioctl_args *);
88int hfs_vnop_select(struct vnop_select_args *);
89int hfs_vnop_blktooff(struct vnop_blktooff_args *);
90int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
91int hfs_vnop_blockmap(struct vnop_blockmap_args *);
92int hfs_vnop_strategy(struct vnop_strategy_args *);
93int hfs_vnop_allocate(struct vnop_allocate_args *);
94int hfs_vnop_pagein(struct vnop_pagein_args *);
95int hfs_vnop_pageout(struct vnop_pageout_args *);
96int hfs_vnop_bwrite(struct vnop_bwrite_args *);
97
98
99/*
100 * Read data from a file.
101 */
102int
103hfs_vnop_read(struct vnop_read_args *ap)
104{
105 uio_t uio = ap->a_uio;
106 struct vnode *vp = ap->a_vp;
107 struct cnode *cp;
108 struct filefork *fp;
109 struct hfsmount *hfsmp;
110 off_t filesize;
111 off_t filebytes;
112 off_t start_resid = uio_resid(uio);
113 off_t offset = uio_offset(uio);
114 int retval = 0;
115
116
117 /* Preflight checks */
118 if (!vnode_isreg(vp)) {
119 /* can only read regular files */
120 if (vnode_isdir(vp))
121 return (EISDIR);
122 else
123 return (EPERM);
124 }
125 if (start_resid == 0)
126 return (0); /* Nothing left to do */
127 if (offset < 0)
128 return (EINVAL); /* cant read from a negative offset */
129
130 cp = VTOC(vp);
131 fp = VTOF(vp);
132 hfsmp = VTOHFS(vp);
133
134 /* Protect against a size change. */
135 hfs_lock_truncate(cp, 0);
136
137 filesize = fp->ff_size;
138 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
139 if (offset > filesize) {
140 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
141 (offset > (off_t)MAXHFSFILESIZE)) {
142 retval = EFBIG;
143 }
144 goto exit;
145 }
146
147 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
148 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
149
150 retval = cluster_read(vp, uio, filesize, 0);
151
152 cp->c_touch_acctime = TRUE;
153
154 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
155 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
156
157 /*
158 * Keep track blocks read
159 */
160 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
161 int took_cnode_lock = 0;
162 off_t bytesread;
163
164 bytesread = start_resid - uio_resid(uio);
165
166 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
167 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
168 hfs_lock(cp, HFS_FORCE_LOCK);
169 took_cnode_lock = 1;
170 }
171 /*
172 * If this file hasn't been seen since the start of
173 * the current sampling period then start over.
174 */
175 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
176 struct timeval tv;
177
178 fp->ff_bytesread = bytesread;
179 microtime(&tv);
180 cp->c_atime = tv.tv_sec;
181 } else {
182 fp->ff_bytesread += bytesread;
183 }
184 if (took_cnode_lock)
185 hfs_unlock(cp);
186 }
187exit:
188 hfs_unlock_truncate(cp);
189 return (retval);
190}
191
192/*
193 * Write data to a file.
194 */
195int
196hfs_vnop_write(struct vnop_write_args *ap)
197{
198 uio_t uio = ap->a_uio;
199 struct vnode *vp = ap->a_vp;
200 struct cnode *cp;
201 struct filefork *fp;
202 struct hfsmount *hfsmp;
203 kauth_cred_t cred = NULL;
204 off_t origFileSize;
205 off_t writelimit;
206 off_t bytesToAdd;
207 off_t actualBytesAdded;
208 off_t filebytes;
209 off_t offset;
210 size_t resid;
211 int eflags;
212 int ioflag = ap->a_ioflag;
213 int retval = 0;
214 int lockflags;
215 int cnode_locked = 0;
216
217 // LP64todo - fix this! uio_resid may be 64-bit value
218 resid = uio_resid(uio);
219 offset = uio_offset(uio);
220
221 if (offset < 0)
222 return (EINVAL);
223 if (resid == 0)
224 return (E_NONE);
225 if (!vnode_isreg(vp))
226 return (EPERM); /* Can only write regular files */
227
228 /* Protect against a size change. */
229 hfs_lock_truncate(VTOC(vp), TRUE);
230
231 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
232 hfs_unlock_truncate(VTOC(vp));
233 return (retval);
234 }
235 cnode_locked = 1;
236 cp = VTOC(vp);
237 fp = VTOF(vp);
238 hfsmp = VTOHFS(vp);
239 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
240
241 if (ioflag & IO_APPEND) {
242 uio_setoffset(uio, fp->ff_size);
243 offset = fp->ff_size;
244 }
245 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
246 retval = EPERM;
247 goto exit;
248 }
249
250 origFileSize = fp->ff_size;
251 eflags = kEFDeferMask; /* defer file block allocations */
252
253#ifdef HFS_SPARSE_DEV
254 /*
255 * When the underlying device is sparse and space
256 * is low (< 8MB), stop doing delayed allocations
257 * and begin doing synchronous I/O.
258 */
259 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
260 (hfs_freeblks(hfsmp, 0) < 2048)) {
261 eflags &= ~kEFDeferMask;
262 ioflag |= IO_SYNC;
263 }
264#endif /* HFS_SPARSE_DEV */
265
266 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
267 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
268
269 /* Now test if we need to extend the file */
270 /* Doing so will adjust the filebytes for us */
271
272 writelimit = offset + resid;
273 if (writelimit <= filebytes)
274 goto sizeok;
275
276 cred = vfs_context_ucred(ap->a_context);
277#if QUOTA
278 bytesToAdd = writelimit - filebytes;
279 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
280 cred, 0);
281 if (retval)
282 goto exit;
283#endif /* QUOTA */
284
285 if (hfs_start_transaction(hfsmp) != 0) {
286 retval = EINVAL;
287 goto exit;
288 }
289
290 while (writelimit > filebytes) {
291 bytesToAdd = writelimit - filebytes;
292 if (cred && suser(cred, NULL) != 0)
293 eflags |= kEFReserveMask;
294
295 /* Protect extents b-tree and allocation bitmap */
296 lockflags = SFL_BITMAP;
297 if (overflow_extents(fp))
298 lockflags |= SFL_EXTENTS;
299 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
300
301 /* Files that are changing size are not hot file candidates. */
302 if (hfsmp->hfc_stage == HFC_RECORDING) {
303 fp->ff_bytesread = 0;
304 }
305 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
306 0, eflags, &actualBytesAdded));
307
308 hfs_systemfile_unlock(hfsmp, lockflags);
309
310 if ((actualBytesAdded == 0) && (retval == E_NONE))
311 retval = ENOSPC;
312 if (retval != E_NONE)
313 break;
314 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
315 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
316 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
317 }
318 (void) hfs_update(vp, TRUE);
319 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
320 (void) hfs_end_transaction(hfsmp);
321
322sizeok:
323 if (retval == E_NONE) {
324 off_t filesize;
325 off_t zero_off;
326 off_t tail_off;
327 off_t inval_start;
328 off_t inval_end;
329 off_t io_start;
330 int lflag;
331 struct rl_entry *invalid_range;
332
333 if (writelimit > fp->ff_size)
334 filesize = writelimit;
335 else
336 filesize = fp->ff_size;
337
338 lflag = (ioflag & IO_SYNC);
339
340 if (offset <= fp->ff_size) {
341 zero_off = offset & ~PAGE_MASK_64;
342
343 /* Check to see whether the area between the zero_offset and the start
344 of the transfer to see whether is invalid and should be zero-filled
345 as part of the transfer:
346 */
347 if (offset > zero_off) {
348 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
349 lflag |= IO_HEADZEROFILL;
350 }
351 } else {
352 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
353
354 /* The bytes between fp->ff_size and uio->uio_offset must never be
355 read without being zeroed. The current last block is filled with zeroes
356 if it holds valid data but in all cases merely do a little bookkeeping
357 to track the area from the end of the current last page to the start of
358 the area actually written. For the same reason only the bytes up to the
359 start of the page where this write will start is invalidated; any remainder
360 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
361
362 Note that inval_start, the start of the page after the current EOF,
363 may be past the start of the write, in which case the zeroing
364 will be handled by the cluser_write of the actual data.
365 */
366 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
367 inval_end = offset & ~PAGE_MASK_64;
368 zero_off = fp->ff_size;
369
370 if ((fp->ff_size & PAGE_MASK_64) &&
371 (rl_scan(&fp->ff_invalidranges,
372 eof_page_base,
373 fp->ff_size - 1,
374 &invalid_range) != RL_NOOVERLAP)) {
375 /* The page containing the EOF is not valid, so the
376 entire page must be made inaccessible now. If the write
377 starts on a page beyond the page containing the eof
378 (inval_end > eof_page_base), add the
379 whole page to the range to be invalidated. Otherwise
380 (i.e. if the write starts on the same page), zero-fill
381 the entire page explicitly now:
382 */
383 if (inval_end > eof_page_base) {
384 inval_start = eof_page_base;
385 } else {
386 zero_off = eof_page_base;
387 };
388 };
389
390 if (inval_start < inval_end) {
391 struct timeval tv;
392 /* There's some range of data that's going to be marked invalid */
393
394 if (zero_off < inval_start) {
395 /* The pages between inval_start and inval_end are going to be invalidated,
396 and the actual write will start on a page past inval_end. Now's the last
397 chance to zero-fill the page containing the EOF:
398 */
399 hfs_unlock(cp);
400 cnode_locked = 0;
401 retval = cluster_write(vp, (uio_t) 0,
402 fp->ff_size, inval_start,
403 zero_off, (off_t)0,
404 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
405 hfs_lock(cp, HFS_FORCE_LOCK);
406 cnode_locked = 1;
407 if (retval) goto ioerr_exit;
408 offset = uio_offset(uio);
409 };
410
411 /* Mark the remaining area of the newly allocated space as invalid: */
412 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
413 microuptime(&tv);
414 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
415 zero_off = fp->ff_size = inval_end;
416 };
417
418 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
419 };
420
421 /* Check to see whether the area between the end of the write and the end of
422 the page it falls in is invalid and should be zero-filled as part of the transfer:
423 */
424 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
425 if (tail_off > filesize) tail_off = filesize;
426 if (tail_off > writelimit) {
427 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
428 lflag |= IO_TAILZEROFILL;
429 };
430 };
431
432 /*
433 * if the write starts beyond the current EOF (possibly advanced in the
434 * zeroing of the last block, above), then we'll zero fill from the current EOF
435 * to where the write begins:
436 *
437 * NOTE: If (and ONLY if) the portion of the file about to be written is
438 * before the current EOF it might be marked as invalid now and must be
439 * made readable (removed from the invalid ranges) before cluster_write
440 * tries to write it:
441 */
442 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
443 if (io_start < fp->ff_size) {
444 off_t io_end;
445
446 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
447 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
448 };
449
450 hfs_unlock(cp);
451 cnode_locked = 0;
452 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
453 tail_off, lflag | IO_NOZERODIRTY);
454 offset = uio_offset(uio);
455 if (offset > fp->ff_size) {
456 fp->ff_size = offset;
457
458 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
459 /* Files that are changing size are not hot file candidates. */
460 if (hfsmp->hfc_stage == HFC_RECORDING)
461 fp->ff_bytesread = 0;
462 }
463 if (resid > uio_resid(uio)) {
464 cp->c_touch_chgtime = TRUE;
465 cp->c_touch_modtime = TRUE;
466 }
467 }
468 HFS_KNOTE(vp, NOTE_WRITE);
469
470ioerr_exit:
471 /*
472 * If we successfully wrote any data, and we are not the superuser
473 * we clear the setuid and setgid bits as a precaution against
474 * tampering.
475 */
476 if (cp->c_mode & (S_ISUID | S_ISGID)) {
477 cred = vfs_context_ucred(ap->a_context);
478 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
479 if (!cnode_locked) {
480 hfs_lock(cp, HFS_FORCE_LOCK);
481 cnode_locked = 1;
482 }
483 cp->c_mode &= ~(S_ISUID | S_ISGID);
484 }
485 }
486 if (retval) {
487 if (ioflag & IO_UNIT) {
488 if (!cnode_locked) {
489 hfs_lock(cp, HFS_FORCE_LOCK);
490 cnode_locked = 1;
491 }
492 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
493 0, ap->a_context);
494 // LP64todo - fix this! resid needs to by user_ssize_t
495 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
496 uio_setresid(uio, resid);
497 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
498 }
499 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
500 if (!cnode_locked) {
501 hfs_lock(cp, HFS_FORCE_LOCK);
502 cnode_locked = 1;
503 }
504 retval = hfs_update(vp, TRUE);
505 }
506 /* Updating vcbWrCnt doesn't need to be atomic. */
507 hfsmp->vcbWrCnt++;
508
509 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
510 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
511exit:
512 if (cnode_locked)
513 hfs_unlock(cp);
514 hfs_unlock_truncate(cp);
515 return (retval);
516}
517
518/* support for the "bulk-access" fcntl */
519
520#define CACHE_ELEMS 64
521#define CACHE_LEVELS 16
522#define PARENT_IDS_FLAG 0x100
523
524/* from hfs_attrlist.c */
525extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
526 mode_t obj_mode, struct mount *mp,
527 kauth_cred_t cred, struct proc *p);
528
529/* from vfs/vfs_fsevents.c */
530extern char *get_pathbuff(void);
531extern void release_pathbuff(char *buff);
532
533struct access_cache {
534 int numcached;
535 int cachehits; /* these two for statistics gathering */
536 int lookups;
537 unsigned int *acache;
538 Boolean *haveaccess;
539};
540
541struct access_t {
542 uid_t uid; /* IN: effective user id */
543 short flags; /* IN: access requested (i.e. R_OK) */
544 short num_groups; /* IN: number of groups user belongs to */
545 int num_files; /* IN: number of files to process */
546 int *file_ids; /* IN: array of file ids */
547 gid_t *groups; /* IN: array of groups */
548 short *access; /* OUT: access info for each file (0 for 'has access') */
549};
550
551struct user_access_t {
552 uid_t uid; /* IN: effective user id */
553 short flags; /* IN: access requested (i.e. R_OK) */
554 short num_groups; /* IN: number of groups user belongs to */
555 int num_files; /* IN: number of files to process */
556 user_addr_t file_ids; /* IN: array of file ids */
557 user_addr_t groups; /* IN: array of groups */
558 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
559};
560
561/*
562 * Perform a binary search for the given parent_id. Return value is
563 * found/not found boolean, and indexp will be the index of the item
564 * or the index at which to insert the item if it's not found.
565 */
566static int
567lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
568{
569 unsigned int lo, hi;
570 int index, matches = 0;
571
572 if (cache->numcached == 0) {
573 *indexp = 0;
574 return 0; // table is empty, so insert at index=0 and report no match
575 }
576
577 if (cache->numcached > CACHE_ELEMS) {
578 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
579 cache->numcached, CACHE_ELEMS);*/
580 cache->numcached = CACHE_ELEMS;
581 }
582
583 lo = 0;
584 hi = cache->numcached - 1;
585 index = -1;
586
587 /* perform binary search for parent_id */
588 do {
589 unsigned int mid = (hi - lo)/2 + lo;
590 unsigned int this_id = cache->acache[mid];
591
592 if (parent_id == this_id) {
593 index = mid;
594 break;
595 }
596
597 if (parent_id < this_id) {
598 hi = mid;
599 continue;
600 }
601
602 if (parent_id > this_id) {
603 lo = mid + 1;
604 continue;
605 }
606 } while(lo < hi);
607
608 /* check if lo and hi converged on the match */
609 if (parent_id == cache->acache[hi]) {
610 index = hi;
611 }
612
613 /* if no existing entry found, find index for new one */
614 if (index == -1) {
615 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
616 matches = 0;
617 } else {
618 matches = 1;
619 }
620
621 *indexp = index;
622 return matches;
623}
624
625/*
626 * Add a node to the access_cache at the given index (or do a lookup first
627 * to find the index if -1 is passed in). We currently do a replace rather
628 * than an insert if the cache is full.
629 */
630static void
631add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
632{
633 int lookup_index = -1;
634
635 /* need to do a lookup first if -1 passed for index */
636 if (index == -1) {
637 if (lookup_bucket(cache, &lookup_index, nodeID)) {
638 if (cache->haveaccess[lookup_index] != access) {
639 /* change access info for existing entry... should never happen */
640 cache->haveaccess[lookup_index] = access;
641 }
642
643 /* mission accomplished */
644 return;
645 } else {
646 index = lookup_index;
647 }
648
649 }
650
651 /* if the cache is full, do a replace rather than an insert */
652 if (cache->numcached >= CACHE_ELEMS) {
653 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
654 cache->numcached = CACHE_ELEMS-1;
655
656 if (index > cache->numcached) {
657 // printf("index %d pinned to %d\n", index, cache->numcached);
658 index = cache->numcached;
659 }
660 } else if (index >= 0 && index < cache->numcached) {
661 /* only do bcopy if we're inserting */
662 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
663 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
664 }
665
666 cache->acache[index] = nodeID;
667 cache->haveaccess[index] = access;
668 cache->numcached++;
669}
670
671
672struct cinfo {
673 uid_t uid;
674 gid_t gid;
675 mode_t mode;
676 cnid_t parentcnid;
677};
678
679static int
680snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
681{
682 struct cinfo *cip = (struct cinfo *)arg;
683
684 cip->uid = attrp->ca_uid;
685 cip->gid = attrp->ca_gid;
686 cip->mode = attrp->ca_mode;
687 cip->parentcnid = descp->cd_parentcnid;
688
689 return (0);
690}
691
692/*
693 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
694 * isn't incore, then go to the catalog.
695 */
696static int
697do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
698 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
699{
700 int error = 0;
701
702 /* if this id matches the one the fsctl was called with, skip the lookup */
703 if (cnid == skip_cp->c_cnid) {
704 cnattrp->ca_uid = skip_cp->c_uid;
705 cnattrp->ca_gid = skip_cp->c_gid;
706 cnattrp->ca_mode = skip_cp->c_mode;
707 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
708 } else {
709 struct cinfo c_info;
710
711 /* otherwise, check the cnode hash incase the file/dir is incore */
712 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
713 cnattrp->ca_uid = c_info.uid;
714 cnattrp->ca_gid = c_info.gid;
715 cnattrp->ca_mode = c_info.mode;
716 keyp->hfsPlus.parentID = c_info.parentcnid;
717 } else {
718 int lockflags;
719
720 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
721
722 /* lookup this cnid in the catalog */
723 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
724
725 hfs_systemfile_unlock(hfsmp, lockflags);
726
727 cache->lookups++;
728 }
729 }
730
731 return (error);
732}
733
734/*
735 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
736 * up to CACHE_LEVELS as we progress towards the root.
737 */
738static int
739do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
740 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
741{
742 int myErr = 0;
743 int myResult;
744 HFSCatalogNodeID thisNodeID;
745 unsigned long myPerms;
746 struct cat_attr cnattr;
747 int cache_index = -1;
748 CatalogKey catkey;
749
750 int i = 0, ids_to_cache = 0;
751 int parent_ids[CACHE_LEVELS];
752
753 /* root always has access */
754 if (!suser(myp_ucred, NULL)) {
755 return (1);
756 }
757
758 thisNodeID = nodeID;
759 while (thisNodeID >= kRootDirID) {
760 myResult = 0; /* default to "no access" */
761
762 /* check the cache before resorting to hitting the catalog */
763
764 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
765 * to look any further after hitting cached dir */
766
767 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
768 cache->cachehits++;
769 myResult = cache->haveaccess[cache_index];
770 goto ExitThisRoutine;
771 }
772
773 /* remember which parents we want to cache */
774 if (ids_to_cache < CACHE_LEVELS) {
775 parent_ids[ids_to_cache] = thisNodeID;
776 ids_to_cache++;
777 }
778
779 /* do the lookup (checks the cnode hash, then the catalog) */
780 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
781 if (myErr) {
782 goto ExitThisRoutine; /* no access */
783 }
784
785 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
786 cnattr.ca_mode, hfsmp->hfs_mp,
787 myp_ucred, theProcPtr);
788
789 if ( (myPerms & X_OK) == 0 ) {
790 myResult = 0;
791 goto ExitThisRoutine; /* no access */
792 }
793
794 /* up the hierarchy we go */
795 thisNodeID = catkey.hfsPlus.parentID;
796 }
797
798 /* if here, we have access to this node */
799 myResult = 1;
800
801 ExitThisRoutine:
802 if (myErr) {
803 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
804 myResult = 0;
805 }
806 *err = myErr;
807
808 /* cache the parent directory(ies) */
809 for (i = 0; i < ids_to_cache; i++) {
810 /* small optimization: get rid of double-lookup for all these */
811 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
812 add_node(cache, -1, parent_ids[i], myResult);
813 }
814
815 return (myResult);
816}
817/* end "bulk-access" support */
818
819
820
821/*
822 * Callback for use with freeze ioctl.
823 */
824static int
825hfs_freezewrite_callback(struct vnode *vp, void *cargs)
826{
827 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
828
829 return 0;
830}
831
832/*
833 * Control filesystem operating characteristics.
834 */
835int
836hfs_vnop_ioctl( struct vnop_ioctl_args /* {
837 vnode_t a_vp;
838 int a_command;
839 caddr_t a_data;
840 int a_fflag;
841 vfs_context_t a_context;
842 } */ *ap)
843{
844 struct vnode * vp = ap->a_vp;
845 struct hfsmount *hfsmp = VTOHFS(vp);
846 vfs_context_t context = ap->a_context;
847 kauth_cred_t cred = vfs_context_ucred(context);
848 proc_t p = vfs_context_proc(context);
849 struct vfsstatfs *vfsp;
850 boolean_t is64bit;
851
852 is64bit = proc_is64bit(p);
853
854 switch (ap->a_command) {
855
856 case HFS_RESIZE_VOLUME: {
857 u_int64_t newsize;
858 u_int64_t cursize;
859
860 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
861 if (suser(cred, NULL) &&
862 kauth_cred_getuid(cred) != vfsp->f_owner) {
863 return (EACCES); /* must be owner of file system */
864 }
865 if (!vnode_isvroot(vp)) {
866 return (EINVAL);
867 }
868 newsize = *(u_int64_t *)ap->a_data;
869 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
870
871 if (newsize > cursize) {
872 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
873 } else if (newsize < cursize) {
874 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
875 } else {
876 return (0);
877 }
878 }
879 case HFS_CHANGE_NEXT_ALLOCATION: {
880 u_int32_t location;
881
882 if (vnode_vfsisrdonly(vp)) {
883 return (EROFS);
884 }
885 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
886 if (suser(cred, NULL) &&
887 kauth_cred_getuid(cred) != vfsp->f_owner) {
888 return (EACCES); /* must be owner of file system */
889 }
890 if (!vnode_isvroot(vp)) {
891 return (EINVAL);
892 }
893 location = *(u_int32_t *)ap->a_data;
894 if (location > hfsmp->totalBlocks - 1) {
895 return (EINVAL);
896 }
897 /* Return previous value. */
898 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
899 HFS_MOUNT_LOCK(hfsmp, TRUE);
900 hfsmp->nextAllocation = location;
901 hfsmp->vcbFlags |= 0xFF00;
902 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
903 return (0);
904 }
905
906#ifdef HFS_SPARSE_DEV
907 case HFS_SETBACKINGSTOREINFO: {
908 struct vnode * bsfs_rootvp;
909 struct vnode * di_vp;
910 struct hfs_backingstoreinfo *bsdata;
911 int error = 0;
912
913 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
914 return (EALREADY);
915 }
916 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
917 if (suser(cred, NULL) &&
918 kauth_cred_getuid(cred) != vfsp->f_owner) {
919 return (EACCES); /* must be owner of file system */
920 }
921 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
922 if (bsdata == NULL) {
923 return (EINVAL);
924 }
925 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
926 return (error);
927 }
928 if ((error = vnode_getwithref(di_vp))) {
929 file_drop(bsdata->backingfd);
930 return(error);
931 }
932
933 if (vnode_mount(vp) == vnode_mount(di_vp)) {
934 (void)vnode_put(di_vp);
935 file_drop(bsdata->backingfd);
936 return (EINVAL);
937 }
938
939 /*
940 * Obtain the backing fs root vnode and keep a reference
941 * on it. This reference will be dropped in hfs_unmount.
942 */
943 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
944 if (error) {
945 (void)vnode_put(di_vp);
946 file_drop(bsdata->backingfd);
947 return (error);
948 }
949 vnode_ref(bsfs_rootvp);
950 vnode_put(bsfs_rootvp);
951
952 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
953 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
954 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
955 hfsmp->hfs_sparsebandblks *= 4;
956
957 (void)vnode_put(di_vp);
958 file_drop(bsdata->backingfd);
959 return (0);
960 }
961 case HFS_CLRBACKINGSTOREINFO: {
962 struct vnode * tmpvp;
963
964 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
965 if (suser(cred, NULL) &&
966 kauth_cred_getuid(cred) != vfsp->f_owner) {
967 return (EACCES); /* must be owner of file system */
968 }
969 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
970 hfsmp->hfs_backingfs_rootvp) {
971
972 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
973 tmpvp = hfsmp->hfs_backingfs_rootvp;
974 hfsmp->hfs_backingfs_rootvp = NULLVP;
975 hfsmp->hfs_sparsebandblks = 0;
976 vnode_rele(tmpvp);
977 }
978 return (0);
979 }
980#endif /* HFS_SPARSE_DEV */
981
982 case F_FREEZE_FS: {
983 struct mount *mp;
984 task_t task;
985
986 if (!is_suser())
987 return (EACCES);
988
989 mp = vnode_mount(vp);
990 hfsmp = VFSTOHFS(mp);
991
992 if (!(hfsmp->jnl))
993 return (ENOTSUP);
994
995 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
996
997 task = current_task();
998 task_working_set_disable(task);
999
1000 // flush things before we get started to try and prevent
1001 // dirty data from being paged out while we're frozen.
1002 // note: can't do this after taking the lock as it will
1003 // deadlock against ourselves.
1004 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1005 hfs_global_exclusive_lock_acquire(hfsmp);
1006 journal_flush(hfsmp->jnl);
1007
1008 // don't need to iterate on all vnodes, we just need to
1009 // wait for writes to the system files and the device vnode
1010 if (HFSTOVCB(hfsmp)->extentsRefNum)
1011 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1012 if (HFSTOVCB(hfsmp)->catalogRefNum)
1013 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1014 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1015 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1016 if (hfsmp->hfs_attribute_vp)
1017 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1018 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1019
1020 hfsmp->hfs_freezing_proc = current_proc();
1021
1022 return (0);
1023 }
1024
1025 case F_THAW_FS: {
1026 if (!is_suser())
1027 return (EACCES);
1028
1029 // if we're not the one who froze the fs then we
1030 // can't thaw it.
1031 if (hfsmp->hfs_freezing_proc != current_proc()) {
1032 return EPERM;
1033 }
1034
1035 // NOTE: if you add code here, also go check the
1036 // code that "thaws" the fs in hfs_vnop_close()
1037 //
1038 hfsmp->hfs_freezing_proc = NULL;
1039 hfs_global_exclusive_lock_release(hfsmp);
1040 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1041
1042 return (0);
1043 }
1044
1045#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1046#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1047
1048 case HFS_BULKACCESS_FSCTL:
1049 case HFS_BULKACCESS: {
1050 /*
1051 * NOTE: on entry, the vnode is locked. Incase this vnode
1052 * happens to be in our list of file_ids, we'll note it
1053 * avoid calling hfs_chashget_nowait() on that id as that
1054 * will cause a "locking against myself" panic.
1055 */
1056 Boolean check_leaf = true;
1057
1058 struct user_access_t *user_access_structp;
1059 struct user_access_t tmp_user_access_t;
1060 struct access_cache cache;
1061
1062 int error = 0, i;
1063
1064 dev_t dev = VTOC(vp)->c_dev;
1065
1066 short flags;
1067 struct ucred myucred; /* XXX ILLEGAL */
1068 int num_files;
1069 int *file_ids = NULL;
1070 short *access = NULL;
1071
1072 cnid_t cnid;
1073 cnid_t prevParent_cnid = 0;
1074 unsigned long myPerms;
1075 short myaccess = 0;
1076 struct cat_attr cnattr;
1077 CatalogKey catkey;
1078 struct cnode *skip_cp = VTOC(vp);
1079 struct vfs_context my_context;
1080
1081 /* first, return error if not run as root */
1082 if (cred->cr_ruid != 0) {
1083 return EPERM;
1084 }
1085
1086 /* initialize the local cache and buffers */
1087 cache.numcached = 0;
1088 cache.cachehits = 0;
1089 cache.lookups = 0;
1090
1091 file_ids = (int *) get_pathbuff();
1092 access = (short *) get_pathbuff();
1093 cache.acache = (int *) get_pathbuff();
1094 cache.haveaccess = (Boolean *) get_pathbuff();
1095
1096 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1097 release_pathbuff((char *) file_ids);
1098 release_pathbuff((char *) access);
1099 release_pathbuff((char *) cache.acache);
1100 release_pathbuff((char *) cache.haveaccess);
1101
1102 return ENOMEM;
1103 }
1104
1105 /* struct copyin done during dispatch... need to copy file_id array separately */
1106 if (ap->a_data == NULL) {
1107 error = EINVAL;
1108 goto err_exit_bulk_access;
1109 }
1110
1111 if (is64bit) {
1112 user_access_structp = (struct user_access_t *)ap->a_data;
1113 }
1114 else {
1115 struct access_t * accessp = (struct access_t *)ap->a_data;
1116 tmp_user_access_t.uid = accessp->uid;
1117 tmp_user_access_t.flags = accessp->flags;
1118 tmp_user_access_t.num_groups = accessp->num_groups;
1119 tmp_user_access_t.num_files = accessp->num_files;
1120 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1121 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1122 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1123 user_access_structp = &tmp_user_access_t;
1124 }
1125
1126 num_files = user_access_structp->num_files;
1127 if (num_files < 1) {
1128 goto err_exit_bulk_access;
1129 }
1130 if (num_files > 256) {
1131 error = EINVAL;
1132 goto err_exit_bulk_access;
1133 }
1134
1135 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1136 num_files * sizeof(int)))) {
1137 goto err_exit_bulk_access;
1138 }
1139
1140 /* fill in the ucred structure */
1141 flags = user_access_structp->flags;
1142 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1143 flags = R_OK;
1144 }
1145
1146 /* check if we've been passed leaf node ids or parent ids */
1147 if (flags & PARENT_IDS_FLAG) {
1148 check_leaf = false;
1149 }
1150
1151 memset(&myucred, 0, sizeof(myucred));
1152 myucred.cr_ref = 1;
1153 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1154 myucred.cr_ngroups = user_access_structp->num_groups;
1155 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1156 myucred.cr_ngroups = 0;
1157 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1158 myucred.cr_ngroups * sizeof(gid_t)))) {
1159 goto err_exit_bulk_access;
1160 }
1161 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1162
1163 my_context.vc_proc = p;
1164 my_context.vc_ucred = &myucred;
1165
1166 /* Check access to each file_id passed in */
1167 for (i = 0; i < num_files; i++) {
1168#if 0
1169 cnid = (cnid_t) file_ids[i];
1170
1171 /* root always has access */
1172 if (!suser(&myucred, NULL)) {
1173 access[i] = 0;
1174 continue;
1175 }
1176
1177 if (check_leaf) {
1178
1179 /* do the lookup (checks the cnode hash, then the catalog) */
1180 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1181 if (error) {
1182 access[i] = (short) error;
1183 continue;
1184 }
1185
1186 /* before calling CheckAccess(), check the target file for read access */
1187 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1188 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1189
1190
1191 /* fail fast if no access */
1192 if ((myPerms & flags) == 0) {
1193 access[i] = EACCES;
1194 continue;
1195 }
1196 } else {
1197 /* we were passed an array of parent ids */
1198 catkey.hfsPlus.parentID = cnid;
1199 }
1200
1201 /* if the last guy had the same parent and had access, we're done */
1202 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1203 cache.cachehits++;
1204 access[i] = 0;
1205 continue;
1206 }
1207
1208 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1209 skip_cp, p, &myucred, dev);
1210
1211 if ( myaccess ) {
1212 access[i] = 0; // have access.. no errors to report
1213 } else {
1214 access[i] = (error != 0 ? (short) error : EACCES);
1215 }
1216
1217 prevParent_cnid = catkey.hfsPlus.parentID;
1218#else
1219 int myErr;
1220
1221 cnid = (cnid_t)file_ids[i];
1222
1223 while (cnid >= kRootDirID) {
1224 /* get the vnode for this cnid */
1225 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1226 if ( myErr ) {
1227 access[i] = EACCES;
1228 break;
1229 }
1230
1231 cnid = VTOC(vp)->c_parentcnid;
1232
1233 hfs_unlock(VTOC(vp));
1234 if (vnode_vtype(vp) == VDIR) {
1235 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1236 } else {
1237 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1238 }
1239 vnode_put(vp);
1240 access[i] = myErr;
1241 if (myErr) {
1242 break;
1243 }
1244 }
1245#endif
1246 }
1247
1248 /* copyout the access array */
1249 if ((error = copyout((caddr_t)access, user_access_structp->access,
1250 num_files * sizeof (short)))) {
1251 goto err_exit_bulk_access;
1252 }
1253
1254 err_exit_bulk_access:
1255
1256 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1257
1258 release_pathbuff((char *) cache.acache);
1259 release_pathbuff((char *) cache.haveaccess);
1260 release_pathbuff((char *) file_ids);
1261 release_pathbuff((char *) access);
1262
1263 return (error);
1264 } /* HFS_BULKACCESS */
1265
1266 case HFS_SETACLSTATE: {
1267 int state;
1268
1269 if (ap->a_data == NULL) {
1270 return (EINVAL);
1271 }
1272
1273 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1274 state = *(int *)ap->a_data;
1275
1276 // super-user can enable or disable acl's on a volume.
1277 // the volume owner can only enable acl's
1278 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1279 return (EPERM);
1280 }
1281 if (state == 0 || state == 1)
1282 return hfs_setextendedsecurity(hfsmp, state);
1283 else
1284 return (EINVAL);
1285 }
1286
1287 case F_FULLFSYNC: {
1288 int error;
1289
1290 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1291 if (error == 0) {
1292 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1293 hfs_unlock(VTOC(vp));
1294 }
1295
1296 return error;
1297 }
1298
1299 case F_CHKCLEAN: {
1300 register struct cnode *cp;
1301 int error;
1302
1303 if (!vnode_isreg(vp))
1304 return EINVAL;
1305
1306 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1307 if (error == 0) {
1308 cp = VTOC(vp);
1309 /*
1310 * used by regression test to determine if
1311 * all the dirty pages (via write) have been cleaned
1312 * after a call to 'fsysnc'.
1313 */
1314 error = is_file_clean(vp, VTOF(vp)->ff_size);
1315 hfs_unlock(cp);
1316 }
1317 return (error);
1318 }
1319
1320 case F_RDADVISE: {
1321 register struct radvisory *ra;
1322 struct filefork *fp;
1323 int error;
1324
1325 if (!vnode_isreg(vp))
1326 return EINVAL;
1327
1328 ra = (struct radvisory *)(ap->a_data);
1329 fp = VTOF(vp);
1330
1331 /* Protect against a size change. */
1332 hfs_lock_truncate(VTOC(vp), TRUE);
1333
1334 if (ra->ra_offset >= fp->ff_size) {
1335 error = EFBIG;
1336 } else {
1337 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1338 }
1339
1340 hfs_unlock_truncate(VTOC(vp));
1341 return (error);
1342 }
1343
1344 case F_READBOOTSTRAP:
1345 case F_WRITEBOOTSTRAP:
1346 {
1347 struct vnode *devvp = NULL;
1348 user_fbootstraptransfer_t *user_bootstrapp;
1349 int devBlockSize;
1350 int error;
1351 uio_t auio;
1352 daddr64_t blockNumber;
1353 u_long blockOffset;
1354 u_long xfersize;
1355 struct buf *bp;
1356 user_fbootstraptransfer_t user_bootstrap;
1357
1358 if (!vnode_isvroot(vp))
1359 return (EINVAL);
1360 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1361 * to a user_fbootstraptransfer_t else we get a pointer to a
1362 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1363 */
1364 if (is64bit) {
1365 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1366 }
1367 else {
1368 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1369 user_bootstrapp = &user_bootstrap;
1370 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1371 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1372 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1373 }
1374 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1375 return EINVAL;
1376
1377 devvp = VTOHFS(vp)->hfs_devvp;
1378 auio = uio_create(1, user_bootstrapp->fbt_offset,
1379 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1380 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1381 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1382
1383 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1384
1385 while (uio_resid(auio) > 0) {
1386 blockNumber = uio_offset(auio) / devBlockSize;
1387 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1388 if (error) {
1389 if (bp) buf_brelse(bp);
1390 uio_free(auio);
1391 return error;
1392 };
1393
1394 blockOffset = uio_offset(auio) % devBlockSize;
1395 xfersize = devBlockSize - blockOffset;
1396 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1397 if (error) {
1398 buf_brelse(bp);
1399 uio_free(auio);
1400 return error;
1401 };
1402 if (uio_rw(auio) == UIO_WRITE) {
1403 error = VNOP_BWRITE(bp);
1404 if (error) {
1405 uio_free(auio);
1406 return error;
1407 }
1408 } else {
1409 buf_brelse(bp);
1410 };
1411 };
1412 uio_free(auio);
1413 };
1414 return 0;
1415
1416 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1417 {
1418 if (is64bit) {
1419 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1420 }
1421 else {
1422 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1423 }
1424 return 0;
1425 }
1426
1427 case HFS_GET_MOUNT_TIME:
1428 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1429 break;
1430
1431 case HFS_GET_LAST_MTIME:
1432 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1433 break;
1434
1435 case HFS_SET_BOOT_INFO:
1436 if (!vnode_isvroot(vp))
1437 return(EINVAL);
1438 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1439 return(EACCES); /* must be superuser or owner of filesystem */
1440 HFS_MOUNT_LOCK(hfsmp, TRUE);
1441 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1442 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1443 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1444 break;
1445
1446 case HFS_GET_BOOT_INFO:
1447 if (!vnode_isvroot(vp))
1448 return(EINVAL);
1449 HFS_MOUNT_LOCK(hfsmp, TRUE);
1450 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1451 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1452 break;
1453
1454 default:
1455 return (ENOTTY);
1456 }
1457
1458 /* Should never get here */
1459 return 0;
1460}
1461
1462/*
1463 * select
1464 */
1465int
1466hfs_vnop_select(__unused struct vnop_select_args *ap)
1467/*
1468 struct vnop_select_args {
1469 vnode_t a_vp;
1470 int a_which;
1471 int a_fflags;
1472 void *a_wql;
1473 vfs_context_t a_context;
1474 };
1475*/
1476{
1477 /*
1478 * We should really check to see if I/O is possible.
1479 */
1480 return (1);
1481}
1482
1483/*
1484 * Converts a logical block number to a physical block, and optionally returns
1485 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1486 * The physical block number is based on the device block size, currently its 512.
1487 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1488 */
1489int
1490hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1491{
1492 struct cnode *cp = VTOC(vp);
1493 struct filefork *fp = VTOF(vp);
1494 struct hfsmount *hfsmp = VTOHFS(vp);
1495 int retval = E_NONE;
1496 daddr_t logBlockSize;
1497 size_t bytesContAvail = 0;
1498 off_t blockposition;
1499 int lockExtBtree;
1500 int lockflags = 0;
1501
1502 /*
1503 * Check for underlying vnode requests and ensure that logical
1504 * to physical mapping is requested.
1505 */
1506 if (vpp != NULL)
1507 *vpp = cp->c_devvp;
1508 if (bnp == NULL)
1509 return (0);
1510
1511 logBlockSize = GetLogicalBlockSize(vp);
1512 blockposition = (off_t)bn * (off_t)logBlockSize;
1513
1514 lockExtBtree = overflow_extents(fp);
1515
1516 if (lockExtBtree)
1517 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1518
1519 retval = MacToVFSError(
1520 MapFileBlockC (HFSTOVCB(hfsmp),
1521 (FCB*)fp,
1522 MAXPHYSIO,
1523 blockposition,
1524 bnp,
1525 &bytesContAvail));
1526
1527 if (lockExtBtree)
1528 hfs_systemfile_unlock(hfsmp, lockflags);
1529
1530 if (retval == E_NONE) {
1531 /* Figure out how many read ahead blocks there are */
1532 if (runp != NULL) {
1533 if (can_cluster(logBlockSize)) {
1534 /* Make sure this result never goes negative: */
1535 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1536 } else {
1537 *runp = 0;
1538 }
1539 }
1540 }
1541 return (retval);
1542}
1543
1544/*
1545 * Convert logical block number to file offset.
1546 */
1547int
1548hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1549/*
1550 struct vnop_blktooff_args {
1551 vnode_t a_vp;
1552 daddr64_t a_lblkno;
1553 off_t *a_offset;
1554 };
1555*/
1556{
1557 if (ap->a_vp == NULL)
1558 return (EINVAL);
1559 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1560
1561 return(0);
1562}
1563
1564/*
1565 * Convert file offset to logical block number.
1566 */
1567int
1568hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1569/*
1570 struct vnop_offtoblk_args {
1571 vnode_t a_vp;
1572 off_t a_offset;
1573 daddr64_t *a_lblkno;
1574 };
1575*/
1576{
1577 if (ap->a_vp == NULL)
1578 return (EINVAL);
1579 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1580
1581 return(0);
1582}
1583
1584/*
1585 * Map file offset to physical block number.
1586 *
1587 * System file cnodes are expected to be locked (shared or exclusive).
1588 */
1589int
1590hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1591/*
1592 struct vnop_blockmap_args {
1593 vnode_t a_vp;
1594 off_t a_foffset;
1595 size_t a_size;
1596 daddr64_t *a_bpn;
1597 size_t *a_run;
1598 void *a_poff;
1599 int a_flags;
1600 vfs_context_t a_context;
1601 };
1602*/
1603{
1604 struct vnode *vp = ap->a_vp;
1605 struct cnode *cp;
1606 struct filefork *fp;
1607 struct hfsmount *hfsmp;
1608 size_t bytesContAvail = 0;
1609 int retval = E_NONE;
1610 int syslocks = 0;
1611 int lockflags = 0;
1612 struct rl_entry *invalid_range;
1613 enum rl_overlaptype overlaptype;
1614 int started_tr = 0;
1615 int tooklock = 0;
1616
1617 /* Do not allow blockmap operation on a directory */
1618 if (vnode_isdir(vp)) {
1619 return (ENOTSUP);
1620 }
1621
1622 /*
1623 * Check for underlying vnode requests and ensure that logical
1624 * to physical mapping is requested.
1625 */
1626 if (ap->a_bpn == NULL)
1627 return (0);
1628
1629 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1630 if (VTOC(vp)->c_lockowner != current_thread()) {
1631 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1632 tooklock = 1;
1633 } else {
1634 cp = VTOC(vp);
1635 panic("blockmap: %s cnode lock already held!\n",
1636 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1637 }
1638 }
1639 hfsmp = VTOHFS(vp);
1640 cp = VTOC(vp);
1641 fp = VTOF(vp);
1642
1643retry:
1644 if (fp->ff_unallocblocks) {
1645 if (hfs_start_transaction(hfsmp) != 0) {
1646 retval = EINVAL;
1647 goto exit;
1648 } else {
1649 started_tr = 1;
1650 }
1651 syslocks = SFL_EXTENTS | SFL_BITMAP;
1652
1653 } else if (overflow_extents(fp)) {
1654 syslocks = SFL_EXTENTS;
1655 }
1656
1657 if (syslocks)
1658 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1659
1660 /*
1661 * Check for any delayed allocations.
1662 */
1663 if (fp->ff_unallocblocks) {
1664 SInt64 actbytes;
1665 u_int32_t loanedBlocks;
1666
1667 //
1668 // Make sure we have a transaction. It's possible
1669 // that we came in and fp->ff_unallocblocks was zero
1670 // but during the time we blocked acquiring the extents
1671 // btree, ff_unallocblocks became non-zero and so we
1672 // will need to start a transaction.
1673 //
1674 if (started_tr == 0) {
1675 if (syslocks) {
1676 hfs_systemfile_unlock(hfsmp, lockflags);
1677 syslocks = 0;
1678 }
1679 goto retry;
1680 }
1681
1682 /*
1683 * Note: ExtendFileC will Release any blocks on loan and
1684 * aquire real blocks. So we ask to extend by zero bytes
1685 * since ExtendFileC will account for the virtual blocks.
1686 */
1687
1688 loanedBlocks = fp->ff_unallocblocks;
1689 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1690 kEFAllMask | kEFNoClumpMask, &actbytes);
1691
1692 if (retval) {
1693 fp->ff_unallocblocks = loanedBlocks;
1694 cp->c_blocks += loanedBlocks;
1695 fp->ff_blocks += loanedBlocks;
1696
1697 HFS_MOUNT_LOCK(hfsmp, TRUE);
1698 hfsmp->loanedBlocks += loanedBlocks;
1699 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1700 }
1701
1702 if (retval) {
1703 hfs_systemfile_unlock(hfsmp, lockflags);
1704 cp->c_flag |= C_MODIFIED;
1705 if (started_tr) {
1706 (void) hfs_update(vp, TRUE);
1707 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1708
1709 hfs_end_transaction(hfsmp);
1710 }
1711 goto exit;
1712 }
1713 }
1714
1715 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1716 ap->a_bpn, &bytesContAvail);
1717 if (syslocks) {
1718 hfs_systemfile_unlock(hfsmp, lockflags);
1719 syslocks = 0;
1720 }
1721
1722 if (started_tr) {
1723 (void) hfs_update(vp, TRUE);
1724 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1725 hfs_end_transaction(hfsmp);
1726 started_tr = 0;
1727 }
1728 if (retval) {
1729 goto exit;
1730 }
1731
1732 /* Adjust the mapping information for invalid file ranges: */
1733 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1734 ap->a_foffset + (off_t)bytesContAvail - 1,
1735 &invalid_range);
1736 if (overlaptype != RL_NOOVERLAP) {
1737 switch(overlaptype) {
1738 case RL_MATCHINGOVERLAP:
1739 case RL_OVERLAPCONTAINSRANGE:
1740 case RL_OVERLAPSTARTSBEFORE:
1741 /* There's no valid block for this byte offset: */
1742 *ap->a_bpn = (daddr64_t)-1;
1743 /* There's no point limiting the amount to be returned
1744 * if the invalid range that was hit extends all the way
1745 * to the EOF (i.e. there's no valid bytes between the
1746 * end of this range and the file's EOF):
1747 */
1748 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1749 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1750 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1751 }
1752 break;
1753
1754 case RL_OVERLAPISCONTAINED:
1755 case RL_OVERLAPENDSAFTER:
1756 /* The range of interest hits an invalid block before the end: */
1757 if (invalid_range->rl_start == ap->a_foffset) {
1758 /* There's actually no valid information to be had starting here: */
1759 *ap->a_bpn = (daddr64_t)-1;
1760 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1761 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1762 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1763 }
1764 } else {
1765 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1766 }
1767 break;
1768
1769 case RL_NOOVERLAP:
1770 break;
1771 } /* end switch */
1772 if (bytesContAvail > ap->a_size)
1773 bytesContAvail = ap->a_size;
1774 }
1775 if (ap->a_run)
1776 *ap->a_run = bytesContAvail;
1777
1778 if (ap->a_poff)
1779 *(int *)ap->a_poff = 0;
1780exit:
1781 if (tooklock)
1782 hfs_unlock(cp);
1783
1784 return (MacToVFSError(retval));
1785}
1786
1787
1788/*
1789 * prepare and issue the I/O
1790 * buf_strategy knows how to deal
1791 * with requests that require
1792 * fragmented I/Os
1793 */
1794int
1795hfs_vnop_strategy(struct vnop_strategy_args *ap)
1796{
1797 buf_t bp = ap->a_bp;
1798 vnode_t vp = buf_vnode(bp);
1799 struct cnode *cp = VTOC(vp);
1800
1801 return (buf_strategy(cp->c_devvp, ap));
1802}
1803
1804
1805static int
1806do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1807{
1808 register struct cnode *cp = VTOC(vp);
1809 struct filefork *fp = VTOF(vp);
1810 struct proc *p = vfs_context_proc(context);;
1811 kauth_cred_t cred = vfs_context_ucred(context);
1812 int retval;
1813 off_t bytesToAdd;
1814 off_t actualBytesAdded;
1815 off_t filebytes;
1816 u_int64_t old_filesize;
1817 u_long fileblocks;
1818 int blksize;
1819 struct hfsmount *hfsmp;
1820 int lockflags;
1821
1822 blksize = VTOVCB(vp)->blockSize;
1823 fileblocks = fp->ff_blocks;
1824 filebytes = (off_t)fileblocks * (off_t)blksize;
1825 old_filesize = fp->ff_size;
1826
1827 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1828 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1829
1830 if (length < 0)
1831 return (EINVAL);
1832
1833 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1834 return (EFBIG);
1835
1836 hfsmp = VTOHFS(vp);
1837
1838 retval = E_NONE;
1839
1840 /* Files that are changing size are not hot file candidates. */
1841 if (hfsmp->hfc_stage == HFC_RECORDING) {
1842 fp->ff_bytesread = 0;
1843 }
1844
1845 /*
1846 * We cannot just check if fp->ff_size == length (as an optimization)
1847 * since there may be extra physical blocks that also need truncation.
1848 */
1849#if QUOTA
1850 if ((retval = hfs_getinoquota(cp)))
1851 return(retval);
1852#endif /* QUOTA */
1853
1854 /*
1855 * Lengthen the size of the file. We must ensure that the
1856 * last byte of the file is allocated. Since the smallest
1857 * value of ff_size is 0, length will be at least 1.
1858 */
1859 if (length > (off_t)fp->ff_size) {
1860#if QUOTA
1861 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1862 cred, 0);
1863 if (retval)
1864 goto Err_Exit;
1865#endif /* QUOTA */
1866 /*
1867 * If we don't have enough physical space then
1868 * we need to extend the physical size.
1869 */
1870 if (length > filebytes) {
1871 int eflags;
1872 u_long blockHint = 0;
1873
1874 /* All or nothing and don't round up to clumpsize. */
1875 eflags = kEFAllMask | kEFNoClumpMask;
1876
1877 if (cred && suser(cred, NULL) != 0)
1878 eflags |= kEFReserveMask; /* keep a reserve */
1879
1880 /*
1881 * Allocate Journal and Quota files in metadata zone.
1882 */
1883 if (filebytes == 0 &&
1884 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1885 hfs_virtualmetafile(cp)) {
1886 eflags |= kEFMetadataMask;
1887 blockHint = hfsmp->hfs_metazone_start;
1888 }
1889 if (hfs_start_transaction(hfsmp) != 0) {
1890 retval = EINVAL;
1891 goto Err_Exit;
1892 }
1893
1894 /* Protect extents b-tree and allocation bitmap */
1895 lockflags = SFL_BITMAP;
1896 if (overflow_extents(fp))
1897 lockflags |= SFL_EXTENTS;
1898 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1899
1900 while ((length > filebytes) && (retval == E_NONE)) {
1901 bytesToAdd = length - filebytes;
1902 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1903 (FCB*)fp,
1904 bytesToAdd,
1905 blockHint,
1906 eflags,
1907 &actualBytesAdded));
1908
1909 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1910 if (actualBytesAdded == 0 && retval == E_NONE) {
1911 if (length > filebytes)
1912 length = filebytes;
1913 break;
1914 }
1915 } /* endwhile */
1916
1917 hfs_systemfile_unlock(hfsmp, lockflags);
1918
1919 if (hfsmp->jnl) {
1920 (void) hfs_update(vp, TRUE);
1921 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1922 }
1923
1924 hfs_end_transaction(hfsmp);
1925
1926 if (retval)
1927 goto Err_Exit;
1928
1929 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1930 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1931 }
1932
1933 if (!(flags & IO_NOZEROFILL)) {
1934 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1935 struct rl_entry *invalid_range;
1936 off_t zero_limit;
1937
1938 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1939 if (length < zero_limit) zero_limit = length;
1940
1941 if (length > (off_t)fp->ff_size) {
1942 struct timeval tv;
1943
1944 /* Extending the file: time to fill out the current last page w. zeroes? */
1945 if ((fp->ff_size & PAGE_MASK_64) &&
1946 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1947 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1948
1949 /* There's some valid data at the start of the (current) last page
1950 of the file, so zero out the remainder of that page to ensure the
1951 entire page contains valid data. Since there is no invalid range
1952 possible past the (current) eof, there's no need to remove anything
1953 from the invalid range list before calling cluster_write(): */
1954 hfs_unlock(cp);
1955 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1956 fp->ff_size, (off_t)0,
1957 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1958 hfs_lock(cp, HFS_FORCE_LOCK);
1959 if (retval) goto Err_Exit;
1960
1961 /* Merely invalidate the remaining area, if necessary: */
1962 if (length > zero_limit) {
1963 microuptime(&tv);
1964 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1965 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1966 }
1967 } else {
1968 /* The page containing the (current) eof is invalid: just add the
1969 remainder of the page to the invalid list, along with the area
1970 being newly allocated:
1971 */
1972 microuptime(&tv);
1973 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1974 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1975 };
1976 }
1977 } else {
1978 panic("hfs_truncate: invoked on non-UBC object?!");
1979 };
1980 }
1981 cp->c_touch_modtime = TRUE;
1982 fp->ff_size = length;
1983
1984 /* Nested transactions will do their own ubc_setsize. */
1985 if (!skipsetsize) {
1986 /*
1987 * ubc_setsize can cause a pagein here
1988 * so we need to drop cnode lock.
1989 */
1990 hfs_unlock(cp);
1991 ubc_setsize(vp, length);
1992 hfs_lock(cp, HFS_FORCE_LOCK);
1993 }
1994
1995 } else { /* Shorten the size of the file */
1996
1997 if ((off_t)fp->ff_size > length) {
1998 /*
1999 * Any buffers that are past the truncation point need to be
2000 * invalidated (to maintain buffer cache consistency).
2001 */
2002
2003 /* Nested transactions will do their own ubc_setsize. */
2004 if (!skipsetsize) {
2005 /*
2006 * ubc_setsize can cause a pageout here
2007 * so we need to drop cnode lock.
2008 */
2009 hfs_unlock(cp);
2010 ubc_setsize(vp, length);
2011 hfs_lock(cp, HFS_FORCE_LOCK);
2012 }
2013
2014 /* Any space previously marked as invalid is now irrelevant: */
2015 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2016 }
2017
2018 /*
2019 * Account for any unmapped blocks. Note that the new
2020 * file length can still end up with unmapped blocks.
2021 */
2022 if (fp->ff_unallocblocks > 0) {
2023 u_int32_t finalblks;
2024 u_int32_t loanedBlocks;
2025
2026 HFS_MOUNT_LOCK(hfsmp, TRUE);
2027
2028 loanedBlocks = fp->ff_unallocblocks;
2029 cp->c_blocks -= loanedBlocks;
2030 fp->ff_blocks -= loanedBlocks;
2031 fp->ff_unallocblocks = 0;
2032
2033 hfsmp->loanedBlocks -= loanedBlocks;
2034
2035 finalblks = (length + blksize - 1) / blksize;
2036 if (finalblks > fp->ff_blocks) {
2037 /* calculate required unmapped blocks */
2038 loanedBlocks = finalblks - fp->ff_blocks;
2039 hfsmp->loanedBlocks += loanedBlocks;
2040
2041 fp->ff_unallocblocks = loanedBlocks;
2042 cp->c_blocks += loanedBlocks;
2043 fp->ff_blocks += loanedBlocks;
2044 }
2045 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2046 }
2047
2048 /*
2049 * For a TBE process the deallocation of the file blocks is
2050 * delayed until the file is closed. And hfs_close calls
2051 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2052 * isn't set, we make sure this isn't a TBE process.
2053 */
2054 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2055#if QUOTA
2056 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2057#endif /* QUOTA */
2058 if (hfs_start_transaction(hfsmp) != 0) {
2059 retval = EINVAL;
2060 goto Err_Exit;
2061 }
2062
2063 if (fp->ff_unallocblocks == 0) {
2064 /* Protect extents b-tree and allocation bitmap */
2065 lockflags = SFL_BITMAP;
2066 if (overflow_extents(fp))
2067 lockflags |= SFL_EXTENTS;
2068 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2069
2070 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2071 (FCB*)fp, length, false));
2072
2073 hfs_systemfile_unlock(hfsmp, lockflags);
2074 }
2075 if (hfsmp->jnl) {
2076 if (retval == 0) {
2077 fp->ff_size = length;
2078 }
2079 (void) hfs_update(vp, TRUE);
2080 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2081 }
2082
2083 hfs_end_transaction(hfsmp);
2084
2085 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2086 if (retval)
2087 goto Err_Exit;
2088#if QUOTA
2089 /* These are bytesreleased */
2090 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2091#endif /* QUOTA */
2092 }
2093 /* Only set update flag if the logical length changes */
2094 if (old_filesize != length)
2095 cp->c_touch_modtime = TRUE;
2096 fp->ff_size = length;
2097 }
2098 cp->c_touch_chgtime = TRUE;
2099 retval = hfs_update(vp, MNT_WAIT);
2100 if (retval) {
2101 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2102 -1, -1, -1, retval, 0);
2103 }
2104
2105Err_Exit:
2106
2107 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2108 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2109
2110 return (retval);
2111}
2112
2113
2114
2115/*
2116 * Truncate a cnode to at most length size, freeing (or adding) the
2117 * disk blocks.
2118 */
2119__private_extern__
2120int
2121hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2122 vfs_context_t context)
2123{
2124 struct filefork *fp = VTOF(vp);
2125 off_t filebytes;
2126 u_long fileblocks;
2127 int blksize, error = 0;
2128 struct cnode *cp = VTOC(vp);
2129
2130 if (vnode_isdir(vp))
2131 return (EISDIR); /* cannot truncate an HFS directory! */
2132
2133 blksize = VTOVCB(vp)->blockSize;
2134 fileblocks = fp->ff_blocks;
2135 filebytes = (off_t)fileblocks * (off_t)blksize;
2136
2137 // have to loop truncating or growing files that are
2138 // really big because otherwise transactions can get
2139 // enormous and consume too many kernel resources.
2140
2141 if (length < filebytes) {
2142 while (filebytes > length) {
2143 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2144 filebytes -= HFS_BIGFILE_SIZE;
2145 } else {
2146 filebytes = length;
2147 }
2148 cp->c_flag |= C_FORCEUPDATE;
2149 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2150 if (error)
2151 break;
2152 }
2153 } else if (length > filebytes) {
2154 while (filebytes < length) {
2155 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2156 filebytes += HFS_BIGFILE_SIZE;
2157 } else {
2158 filebytes = length;
2159 }
2160 cp->c_flag |= C_FORCEUPDATE;
2161 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2162 if (error)
2163 break;
2164 }
2165 } else /* Same logical size */ {
2166
2167 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2168 }
2169 /* Files that are changing size are not hot file candidates. */
2170 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2171 fp->ff_bytesread = 0;
2172 }
2173
2174 return (error);
2175}
2176
2177
2178
2179/*
2180 * Preallocate file storage space.
2181 */
2182int
2183hfs_vnop_allocate(struct vnop_allocate_args /* {
2184 vnode_t a_vp;
2185 off_t a_length;
2186 u_int32_t a_flags;
2187 off_t *a_bytesallocated;
2188 off_t a_offset;
2189 vfs_context_t a_context;
2190 } */ *ap)
2191{
2192 struct vnode *vp = ap->a_vp;
2193 struct cnode *cp;
2194 struct filefork *fp;
2195 ExtendedVCB *vcb;
2196 off_t length = ap->a_length;
2197 off_t startingPEOF;
2198 off_t moreBytesRequested;
2199 off_t actualBytesAdded;
2200 off_t filebytes;
2201 u_long fileblocks;
2202 int retval, retval2;
2203 UInt32 blockHint;
2204 UInt32 extendFlags; /* For call to ExtendFileC */
2205 struct hfsmount *hfsmp;
2206 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2207 int lockflags;
2208
2209 *(ap->a_bytesallocated) = 0;
2210
2211 if (!vnode_isreg(vp))
2212 return (EISDIR);
2213 if (length < (off_t)0)
2214 return (EINVAL);
2215
2216 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2217 return (retval);
2218 cp = VTOC(vp);
2219 fp = VTOF(vp);
2220 hfsmp = VTOHFS(vp);
2221 vcb = VTOVCB(vp);
2222
2223 fileblocks = fp->ff_blocks;
2224 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2225
2226 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2227 retval = EINVAL;
2228 goto Err_Exit;
2229 }
2230
2231 /* Fill in the flags word for the call to Extend the file */
2232
2233 extendFlags = kEFNoClumpMask;
2234 if (ap->a_flags & ALLOCATECONTIG)
2235 extendFlags |= kEFContigMask;
2236 if (ap->a_flags & ALLOCATEALL)
2237 extendFlags |= kEFAllMask;
2238 if (cred && suser(cred, NULL) != 0)
2239 extendFlags |= kEFReserveMask;
2240
2241 retval = E_NONE;
2242 blockHint = 0;
2243 startingPEOF = filebytes;
2244
2245 if (ap->a_flags & ALLOCATEFROMPEOF)
2246 length += filebytes;
2247 else if (ap->a_flags & ALLOCATEFROMVOL)
2248 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2249
2250 /* If no changes are necesary, then we're done */
2251 if (filebytes == length)
2252 goto Std_Exit;
2253
2254 /*
2255 * Lengthen the size of the file. We must ensure that the
2256 * last byte of the file is allocated. Since the smallest
2257 * value of filebytes is 0, length will be at least 1.
2258 */
2259 if (length > filebytes) {
2260 moreBytesRequested = length - filebytes;
2261
2262#if QUOTA
2263 retval = hfs_chkdq(cp,
2264 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2265 cred, 0);
2266 if (retval)
2267 goto Err_Exit;
2268
2269#endif /* QUOTA */
2270 /*
2271 * Metadata zone checks.
2272 */
2273 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2274 /*
2275 * Allocate Journal and Quota files in metadata zone.
2276 */
2277 if (hfs_virtualmetafile(cp)) {
2278 extendFlags |= kEFMetadataMask;
2279 blockHint = hfsmp->hfs_metazone_start;
2280 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2281 (blockHint <= hfsmp->hfs_metazone_end)) {
2282 /*
2283 * Move blockHint outside metadata zone.
2284 */
2285 blockHint = hfsmp->hfs_metazone_end + 1;
2286 }
2287 }
2288
2289 if (hfs_start_transaction(hfsmp) != 0) {
2290 retval = EINVAL;
2291 goto Err_Exit;
2292 }
2293
2294 /* Protect extents b-tree and allocation bitmap */
2295 lockflags = SFL_BITMAP;
2296 if (overflow_extents(fp))
2297 lockflags |= SFL_EXTENTS;
2298 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2299
2300 retval = MacToVFSError(ExtendFileC(vcb,
2301 (FCB*)fp,
2302 moreBytesRequested,
2303 blockHint,
2304 extendFlags,
2305 &actualBytesAdded));
2306
2307 *(ap->a_bytesallocated) = actualBytesAdded;
2308 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2309
2310 hfs_systemfile_unlock(hfsmp, lockflags);
2311
2312 if (hfsmp->jnl) {
2313 (void) hfs_update(vp, TRUE);
2314 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2315 }
2316
2317 hfs_end_transaction(hfsmp);
2318
2319 /*
2320 * if we get an error and no changes were made then exit
2321 * otherwise we must do the hfs_update to reflect the changes
2322 */
2323 if (retval && (startingPEOF == filebytes))
2324 goto Err_Exit;
2325
2326 /*
2327 * Adjust actualBytesAdded to be allocation block aligned, not
2328 * clump size aligned.
2329 * NOTE: So what we are reporting does not affect reality
2330 * until the file is closed, when we truncate the file to allocation
2331 * block size.
2332 */
2333 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2334 *(ap->a_bytesallocated) =
2335 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2336
2337 } else { /* Shorten the size of the file */
2338
2339 if (fp->ff_size > length) {
2340 /*
2341 * Any buffers that are past the truncation point need to be
2342 * invalidated (to maintain buffer cache consistency).
2343 */
2344 }
2345
2346 if (hfs_start_transaction(hfsmp) != 0) {
2347 retval = EINVAL;
2348 goto Err_Exit;
2349 }
2350
2351 /* Protect extents b-tree and allocation bitmap */
2352 lockflags = SFL_BITMAP;
2353 if (overflow_extents(fp))
2354 lockflags |= SFL_EXTENTS;
2355 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2356
2357 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2358
2359 hfs_systemfile_unlock(hfsmp, lockflags);
2360
2361 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2362
2363 if (hfsmp->jnl) {
2364 (void) hfs_update(vp, TRUE);
2365 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2366 }
2367
2368 hfs_end_transaction(hfsmp);
2369
2370
2371 /*
2372 * if we get an error and no changes were made then exit
2373 * otherwise we must do the hfs_update to reflect the changes
2374 */
2375 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2376#if QUOTA
2377 /* These are bytesreleased */
2378 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2379#endif /* QUOTA */
2380
2381 if (fp->ff_size > filebytes) {
2382 fp->ff_size = filebytes;
2383
2384 hfs_unlock(cp);
2385 ubc_setsize(vp, fp->ff_size);
2386 hfs_lock(cp, HFS_FORCE_LOCK);
2387 }
2388 }
2389
2390Std_Exit:
2391 cp->c_touch_chgtime = TRUE;
2392 cp->c_touch_modtime = TRUE;
2393 retval2 = hfs_update(vp, MNT_WAIT);
2394
2395 if (retval == 0)
2396 retval = retval2;
2397Err_Exit:
2398 hfs_unlock(cp);
2399 return (retval);
2400}
2401
2402
2403/*
2404 * Pagein for HFS filesystem
2405 */
2406int
2407hfs_vnop_pagein(struct vnop_pagein_args *ap)
2408/*
2409 struct vnop_pagein_args {
2410 vnode_t a_vp,
2411 upl_t a_pl,
2412 vm_offset_t a_pl_offset,
2413 off_t a_f_offset,
2414 size_t a_size,
2415 int a_flags
2416 vfs_context_t a_context;
2417 };
2418*/
2419{
2420 vnode_t vp = ap->a_vp;
2421 int error;
2422
2423 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2424 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2425 /*
2426 * Keep track of blocks read.
2427 */
2428 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2429 struct cnode *cp;
2430 struct filefork *fp;
2431 int bytesread;
2432 int took_cnode_lock = 0;
2433
2434 cp = VTOC(vp);
2435 fp = VTOF(vp);
2436
2437 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2438 bytesread = fp->ff_size;
2439 else
2440 bytesread = ap->a_size;
2441
2442 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2443 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2444 hfs_lock(cp, HFS_FORCE_LOCK);
2445 took_cnode_lock = 1;
2446 }
2447 /*
2448 * If this file hasn't been seen since the start of
2449 * the current sampling period then start over.
2450 */
2451 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2452 struct timeval tv;
2453
2454 fp->ff_bytesread = bytesread;
2455 microtime(&tv);
2456 cp->c_atime = tv.tv_sec;
2457 } else {
2458 fp->ff_bytesread += bytesread;
2459 }
2460 cp->c_touch_acctime = TRUE;
2461 if (took_cnode_lock)
2462 hfs_unlock(cp);
2463 }
2464 return (error);
2465}
2466
2467/*
2468 * Pageout for HFS filesystem.
2469 */
2470int
2471hfs_vnop_pageout(struct vnop_pageout_args *ap)
2472/*
2473 struct vnop_pageout_args {
2474 vnode_t a_vp,
2475 upl_t a_pl,
2476 vm_offset_t a_pl_offset,
2477 off_t a_f_offset,
2478 size_t a_size,
2479 int a_flags
2480 vfs_context_t a_context;
2481 };
2482*/
2483{
2484 vnode_t vp = ap->a_vp;
2485 struct cnode *cp;
2486 struct filefork *fp;
2487 int retval;
2488 off_t end_of_range;
2489 off_t filesize;
2490
2491 cp = VTOC(vp);
2492 if (cp->c_lockowner == current_thread()) {
2493 panic("pageout: %s cnode lock already held!\n",
2494 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2495 }
2496 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2497 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2498 ubc_upl_abort_range(ap->a_pl,
2499 ap->a_pl_offset,
2500 ap->a_size,
2501 UPL_ABORT_FREE_ON_EMPTY);
2502 }
2503 return (retval);
2504 }
2505 fp = VTOF(vp);
2506
2507 filesize = fp->ff_size;
2508 end_of_range = ap->a_f_offset + ap->a_size - 1;
2509
2510 if (end_of_range >= filesize) {
2511 end_of_range = (off_t)(filesize - 1);
2512 }
2513 if (ap->a_f_offset < filesize) {
2514 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2515 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2516 }
2517 hfs_unlock(cp);
2518
2519 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2520 ap->a_size, filesize, ap->a_flags);
2521
2522 /*
2523 * If data was written, and setuid or setgid bits are set and
2524 * this process is not the superuser then clear the setuid and
2525 * setgid bits as a precaution against tampering.
2526 */
2527 if ((retval == 0) &&
2528 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2529 (vfs_context_suser(ap->a_context) != 0)) {
2530 hfs_lock(cp, HFS_FORCE_LOCK);
2531 cp->c_mode &= ~(S_ISUID | S_ISGID);
2532 cp->c_touch_chgtime = TRUE;
2533 hfs_unlock(cp);
2534 }
2535 return (retval);
2536}
2537
2538/*
2539 * Intercept B-Tree node writes to unswap them if necessary.
2540 */
2541int
2542hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2543{
2544 int retval = 0;
2545 register struct buf *bp = ap->a_bp;
2546 register struct vnode *vp = buf_vnode(bp);
2547 BlockDescriptor block;
2548
2549 /* Trap B-Tree writes */
2550 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2551 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2552 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2553
2554 /*
2555 * Swap and validate the node if it is in native byte order.
2556 * This is always be true on big endian, so we always validate
2557 * before writing here. On little endian, the node typically has
2558 * been swapped and validatated when it was written to the journal,
2559 * so we won't do anything here.
2560 */
2561 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2562 /* Prepare the block pointer */
2563 block.blockHeader = bp;
2564 block.buffer = (char *)buf_dataptr(bp);
2565 block.blockNum = buf_lblkno(bp);
2566 /* not found in cache ==> came from disk */
2567 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2568 block.blockSize = buf_count(bp);
2569
2570 /* Endian un-swap B-Tree node */
2571 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2572 if (retval)
2573 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2574 }
2575 }
2576
2577 /* This buffer shouldn't be locked anymore but if it is clear it */
2578 if ((buf_flags(bp) & B_LOCKED)) {
2579 // XXXdbg
2580 if (VTOHFS(vp)->jnl) {
2581 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2582 }
2583 buf_clearflags(bp, B_LOCKED);
2584 }
2585 retval = vn_bwrite (ap);
2586
2587 return (retval);
2588}
2589
2590/*
2591 * Relocate a file to a new location on disk
2592 * cnode must be locked on entry
2593 *
2594 * Relocation occurs by cloning the file's data from its
2595 * current set of blocks to a new set of blocks. During
2596 * the relocation all of the blocks (old and new) are
2597 * owned by the file.
2598 *
2599 * -----------------
2600 * |///////////////|
2601 * -----------------
2602 * 0 N (file offset)
2603 *
2604 * ----------------- -----------------
2605 * |///////////////| | | STEP 1 (aquire new blocks)
2606 * ----------------- -----------------
2607 * 0 N N+1 2N
2608 *
2609 * ----------------- -----------------
2610 * |///////////////| |///////////////| STEP 2 (clone data)
2611 * ----------------- -----------------
2612 * 0 N N+1 2N
2613 *
2614 * -----------------
2615 * |///////////////| STEP 3 (head truncate blocks)
2616 * -----------------
2617 * 0 N
2618 *
2619 * During steps 2 and 3 page-outs to file offsets less
2620 * than or equal to N are suspended.
2621 *
2622 * During step 3 page-ins to the file get supended.
2623 */
2624__private_extern__
2625int
2626hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2627 struct proc *p)
2628{
2629 struct cnode *cp;
2630 struct filefork *fp;
2631 struct hfsmount *hfsmp;
2632 u_int32_t headblks;
2633 u_int32_t datablks;
2634 u_int32_t blksize;
2635 u_int32_t growsize;
2636 u_int32_t nextallocsave;
2637 daddr64_t sector_a, sector_b;
2638 int disabled_caching = 0;
2639 int eflags;
2640 off_t newbytes;
2641 int retval;
2642 int lockflags = 0;
2643 int took_trunc_lock = 0;
2644 int started_tr = 0;
2645 enum vtype vnodetype;
2646
2647 vnodetype = vnode_vtype(vp);
2648 if (vnodetype != VREG && vnodetype != VLNK) {
2649 return (EPERM);
2650 }
2651
2652 hfsmp = VTOHFS(vp);
2653 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2654 return (ENOSPC);
2655 }
2656
2657 cp = VTOC(vp);
2658 fp = VTOF(vp);
2659 if (fp->ff_unallocblocks)
2660 return (EINVAL);
2661 blksize = hfsmp->blockSize;
2662 if (blockHint == 0)
2663 blockHint = hfsmp->nextAllocation;
2664
2665 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2666 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2667 return (EFBIG);
2668 }
2669
2670 //
2671 // We do not believe that this call to hfs_fsync() is
2672 // necessary and it causes a journal transaction
2673 // deadlock so we are removing it.
2674 //
2675 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2676 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2677 // if (retval)
2678 // return (retval);
2679 //}
2680
2681 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2682 hfs_unlock(cp);
2683 hfs_lock_truncate(cp, TRUE);
2684 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2685 hfs_unlock_truncate(cp);
2686 return (retval);
2687 }
2688 took_trunc_lock = 1;
2689 }
2690 headblks = fp->ff_blocks;
2691 datablks = howmany(fp->ff_size, blksize);
2692 growsize = datablks * blksize;
2693 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2694 if (blockHint >= hfsmp->hfs_metazone_start &&
2695 blockHint <= hfsmp->hfs_metazone_end)
2696 eflags |= kEFMetadataMask;
2697
2698 if (hfs_start_transaction(hfsmp) != 0) {
2699 if (took_trunc_lock)
2700 hfs_unlock_truncate(cp);
2701 return (EINVAL);
2702 }
2703 started_tr = 1;
2704 /*
2705 * Protect the extents b-tree and the allocation bitmap
2706 * during MapFileBlockC and ExtendFileC operations.
2707 */
2708 lockflags = SFL_BITMAP;
2709 if (overflow_extents(fp))
2710 lockflags |= SFL_EXTENTS;
2711 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2712
2713 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2714 if (retval) {
2715 retval = MacToVFSError(retval);
2716 goto out;
2717 }
2718
2719 /*
2720 * STEP 1 - aquire new allocation blocks.
2721 */
2722 if (!vnode_isnocache(vp)) {
2723 vnode_setnocache(vp);
2724 disabled_caching = 1;
2725
2726 }
2727 nextallocsave = hfsmp->nextAllocation;
2728 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2729 if (eflags & kEFMetadataMask) {
2730 HFS_MOUNT_LOCK(hfsmp, TRUE);
2731 hfsmp->nextAllocation = nextallocsave;
2732 hfsmp->vcbFlags |= 0xFF00;
2733 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2734 }
2735
2736 retval = MacToVFSError(retval);
2737 if (retval == 0) {
2738 cp->c_flag |= C_MODIFIED;
2739 if (newbytes < growsize) {
2740 retval = ENOSPC;
2741 goto restore;
2742 } else if (fp->ff_blocks < (headblks + datablks)) {
2743 printf("hfs_relocate: allocation failed");
2744 retval = ENOSPC;
2745 goto restore;
2746 }
2747
2748 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2749 if (retval) {
2750 retval = MacToVFSError(retval);
2751 } else if ((sector_a + 1) == sector_b) {
2752 retval = ENOSPC;
2753 goto restore;
2754 } else if ((eflags & kEFMetadataMask) &&
2755 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2756 hfsmp->hfs_metazone_end)) {
2757 printf("hfs_relocate: didn't move into metadata zone\n");
2758 retval = ENOSPC;
2759 goto restore;
2760 }
2761 }
2762 /* Done with system locks and journal for now. */
2763 hfs_systemfile_unlock(hfsmp, lockflags);
2764 lockflags = 0;
2765 hfs_end_transaction(hfsmp);
2766 started_tr = 0;
2767
2768 if (retval) {
2769 /*
2770 * Check to see if failure is due to excessive fragmentation.
2771 */
2772 if ((retval == ENOSPC) &&
2773 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2774 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2775 }
2776 goto out;
2777 }
2778 /*
2779 * STEP 2 - clone file data into the new allocation blocks.
2780 */
2781
2782 if (vnodetype == VLNK)
2783 retval = hfs_clonelink(vp, blksize, cred, p);
2784 else if (vnode_issystem(vp))
2785 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2786 else
2787 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2788
2789 /* Start transaction for step 3 or for a restore. */
2790 if (hfs_start_transaction(hfsmp) != 0) {
2791 retval = EINVAL;
2792 goto out;
2793 }
2794 started_tr = 1;
2795 if (retval)
2796 goto restore;
2797
2798 /*
2799 * STEP 3 - switch to cloned data and remove old blocks.
2800 */
2801 lockflags = SFL_BITMAP;
2802 if (overflow_extents(fp))
2803 lockflags |= SFL_EXTENTS;
2804 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2805
2806 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2807
2808 hfs_systemfile_unlock(hfsmp, lockflags);
2809 lockflags = 0;
2810 if (retval)
2811 goto restore;
2812out:
2813 if (took_trunc_lock)
2814 hfs_unlock_truncate(cp);
2815
2816 if (lockflags) {
2817 hfs_systemfile_unlock(hfsmp, lockflags);
2818 lockflags = 0;
2819 }
2820
2821 // See comment up above about calls to hfs_fsync()
2822 //
2823 //if (retval == 0)
2824 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2825
2826 if (hfsmp->jnl) {
2827 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2828 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2829 else
2830 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2831 }
2832exit:
2833 if (disabled_caching) {
2834 vnode_clearnocache(vp);
2835 }
2836 if (started_tr)
2837 hfs_end_transaction(hfsmp);
2838
2839 return (retval);
2840
2841restore:
2842 if (fp->ff_blocks == headblks)
2843 goto exit;
2844 /*
2845 * Give back any newly allocated space.
2846 */
2847 if (lockflags == 0) {
2848 lockflags = SFL_BITMAP;
2849 if (overflow_extents(fp))
2850 lockflags |= SFL_EXTENTS;
2851 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2852 }
2853
2854 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2855
2856 hfs_systemfile_unlock(hfsmp, lockflags);
2857 lockflags = 0;
2858
2859 if (took_trunc_lock)
2860 hfs_unlock_truncate(cp);
2861 goto exit;
2862}
2863
2864
2865/*
2866 * Clone a symlink.
2867 *
2868 */
2869static int
2870hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2871{
2872 struct buf *head_bp = NULL;
2873 struct buf *tail_bp = NULL;
2874 int error;
2875
2876
2877 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2878 if (error)
2879 goto out;
2880
2881 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2882 if (tail_bp == NULL) {
2883 error = EIO;
2884 goto out;
2885 }
2886 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2887 error = (int)buf_bwrite(tail_bp);
2888out:
2889 if (head_bp) {
2890 buf_markinvalid(head_bp);
2891 buf_brelse(head_bp);
2892 }
2893 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2894
2895 return (error);
2896}
2897
2898/*
2899 * Clone a file's data within the file.
2900 *
2901 */
2902static int
2903hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2904{
2905 caddr_t bufp;
2906 size_t writebase;
2907 size_t bufsize;
2908 size_t copysize;
2909 size_t iosize;
2910 off_t filesize;
2911 size_t offset;
2912 uio_t auio;
2913 int error = 0;
2914
2915 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2916 writebase = blkstart * blksize;
2917 copysize = blkcnt * blksize;
2918 iosize = bufsize = MIN(copysize, 4096 * 16);
2919 offset = 0;
2920
2921 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2922 return (ENOMEM);
2923 }
2924 hfs_unlock(VTOC(vp));
2925
2926 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2927
2928 while (offset < copysize) {
2929 iosize = MIN(copysize - offset, iosize);
2930
2931 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2932 uio_addiov(auio, (uintptr_t)bufp, iosize);
2933
2934 error = cluster_read(vp, auio, copysize, 0);
2935 if (error) {
2936 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2937 break;
2938 }
2939 if (uio_resid(auio) != 0) {
2940 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2941 error = EIO;
2942 break;
2943 }
2944
2945 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2946 uio_addiov(auio, (uintptr_t)bufp, iosize);
2947
2948 error = cluster_write(vp, auio, filesize + offset,
2949 filesize + offset + iosize,
2950 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2951 if (error) {
2952 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2953 break;
2954 }
2955 if (uio_resid(auio) != 0) {
2956 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2957 error = EIO;
2958 break;
2959 }
2960 offset += iosize;
2961 }
2962 uio_free(auio);
2963
2964 /*
2965 * No need to call ubc_sync_range or hfs_invalbuf
2966 * since the file was copied using IO_NOCACHE.
2967 */
2968
2969 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2970
2971 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2972 return (error);
2973}
2974
2975/*
2976 * Clone a system (metadata) file.
2977 *
2978 */
2979static int
2980hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2981 kauth_cred_t cred, struct proc *p)
2982{
2983 caddr_t bufp;
2984 char * offset;
2985 size_t bufsize;
2986 size_t iosize;
2987 struct buf *bp = NULL;
2988 daddr64_t blkno;
2989 daddr64_t blk;
2990 daddr64_t start_blk;
2991 daddr64_t last_blk;
2992 int breadcnt;
2993 int i;
2994 int error = 0;
2995
2996
2997 iosize = GetLogicalBlockSize(vp);
2998 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2999 breadcnt = bufsize / iosize;
3000
3001 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3002 return (ENOMEM);
3003 }
3004 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3005 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3006 blkno = 0;
3007
3008 while (blkno < last_blk) {
3009 /*
3010 * Read up to a megabyte
3011 */
3012 offset = bufp;
3013 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3014 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3015 if (error) {
3016 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3017 goto out;
3018 }
3019 if (buf_count(bp) != iosize) {
3020 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3021 goto out;
3022 }
3023 bcopy((char *)buf_dataptr(bp), offset, iosize);
3024
3025 buf_markinvalid(bp);
3026 buf_brelse(bp);
3027 bp = NULL;
3028
3029 offset += iosize;
3030 }
3031
3032 /*
3033 * Write up to a megabyte
3034 */
3035 offset = bufp;
3036 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3037 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3038 if (bp == NULL) {
3039 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3040 error = EIO;
3041 goto out;
3042 }
3043 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3044 error = (int)buf_bwrite(bp);
3045 bp = NULL;
3046 if (error)
3047 goto out;
3048 offset += iosize;
3049 }
3050 }
3051out:
3052 if (bp) {
3053 buf_brelse(bp);
3054 }
3055
3056 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3057
3058 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3059
3060 return (error);
3061}