]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-792.6.61.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* @(#)hfs_readwrite.c 1.0
23 *
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
25 *
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
27 *
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
36 #include <sys/stat.h>
37 #include <sys/buf.h>
38 #include <sys/proc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/vfs_context.h>
43
44 #include <miscfs/specfs/specdev.h>
45
46 #include <sys/ubc.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_kern.h>
49
50 #include <sys/kdebug.h>
51
52 #include "hfs.h"
53 #include "hfs_endian.h"
54 #include "hfs_fsctl.h"
55 #include "hfs_quota.h"
56 #include "hfscommon/headers/FileMgrInternal.h"
57 #include "hfscommon/headers/BTreesInternal.h"
58 #include "hfs_cnode.h"
59 #include "hfs_dbg.h"
60
61 extern int overflow_extents(struct filefork *fp);
62
63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
64
65 enum {
66 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
67 };
68
69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
70
71 extern int hfs_setextendedsecurity(struct hfsmount *, int);
72
73
74 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
75 static int hfs_clonefile(struct vnode *, int, int, int);
76 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
77
78
79 /*****************************************************************************
80 *
81 * I/O Operations on vnodes
82 *
83 *****************************************************************************/
84 int hfs_vnop_read(struct vnop_read_args *);
85 int hfs_vnop_write(struct vnop_write_args *);
86 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
87 int hfs_vnop_select(struct vnop_select_args *);
88 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
89 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
90 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
91 int hfs_vnop_strategy(struct vnop_strategy_args *);
92 int hfs_vnop_allocate(struct vnop_allocate_args *);
93 int hfs_vnop_pagein(struct vnop_pagein_args *);
94 int hfs_vnop_pageout(struct vnop_pageout_args *);
95 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
96
97
98 /*
99 * Read data from a file.
100 */
101 int
102 hfs_vnop_read(struct vnop_read_args *ap)
103 {
104 uio_t uio = ap->a_uio;
105 struct vnode *vp = ap->a_vp;
106 struct cnode *cp;
107 struct filefork *fp;
108 struct hfsmount *hfsmp;
109 off_t filesize;
110 off_t filebytes;
111 off_t start_resid = uio_resid(uio);
112 off_t offset = uio_offset(uio);
113 int retval = 0;
114
115
116 /* Preflight checks */
117 if (!vnode_isreg(vp)) {
118 /* can only read regular files */
119 if (vnode_isdir(vp))
120 return (EISDIR);
121 else
122 return (EPERM);
123 }
124 if (start_resid == 0)
125 return (0); /* Nothing left to do */
126 if (offset < 0)
127 return (EINVAL); /* cant read from a negative offset */
128
129 cp = VTOC(vp);
130 fp = VTOF(vp);
131 hfsmp = VTOHFS(vp);
132
133 /* Protect against a size change. */
134 hfs_lock_truncate(cp, 0);
135
136 filesize = fp->ff_size;
137 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
138 if (offset > filesize) {
139 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
140 (offset > (off_t)MAXHFSFILESIZE)) {
141 retval = EFBIG;
142 }
143 goto exit;
144 }
145
146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
147 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
148
149 retval = cluster_read(vp, uio, filesize, 0);
150
151 cp->c_touch_acctime = TRUE;
152
153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
154 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
155
156 /*
157 * Keep track blocks read
158 */
159 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
160 int took_cnode_lock = 0;
161 off_t bytesread;
162
163 bytesread = start_resid - uio_resid(uio);
164
165 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
166 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
167 hfs_lock(cp, HFS_FORCE_LOCK);
168 took_cnode_lock = 1;
169 }
170 /*
171 * If this file hasn't been seen since the start of
172 * the current sampling period then start over.
173 */
174 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
175 struct timeval tv;
176
177 fp->ff_bytesread = bytesread;
178 microtime(&tv);
179 cp->c_atime = tv.tv_sec;
180 } else {
181 fp->ff_bytesread += bytesread;
182 }
183 if (took_cnode_lock)
184 hfs_unlock(cp);
185 }
186 exit:
187 hfs_unlock_truncate(cp);
188 return (retval);
189 }
190
191 /*
192 * Write data to a file.
193 */
194 int
195 hfs_vnop_write(struct vnop_write_args *ap)
196 {
197 uio_t uio = ap->a_uio;
198 struct vnode *vp = ap->a_vp;
199 struct cnode *cp;
200 struct filefork *fp;
201 struct hfsmount *hfsmp;
202 kauth_cred_t cred = NULL;
203 off_t origFileSize;
204 off_t writelimit;
205 off_t bytesToAdd;
206 off_t actualBytesAdded;
207 off_t filebytes;
208 off_t offset;
209 size_t resid;
210 int eflags;
211 int ioflag = ap->a_ioflag;
212 int retval = 0;
213 int lockflags;
214 int cnode_locked = 0;
215
216 // LP64todo - fix this! uio_resid may be 64-bit value
217 resid = uio_resid(uio);
218 offset = uio_offset(uio);
219
220 if (offset < 0)
221 return (EINVAL);
222 if (resid == 0)
223 return (E_NONE);
224 if (!vnode_isreg(vp))
225 return (EPERM); /* Can only write regular files */
226
227 /* Protect against a size change. */
228 hfs_lock_truncate(VTOC(vp), TRUE);
229
230 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
231 hfs_unlock_truncate(VTOC(vp));
232 return (retval);
233 }
234 cnode_locked = 1;
235 cp = VTOC(vp);
236 fp = VTOF(vp);
237 hfsmp = VTOHFS(vp);
238 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
239
240 if (ioflag & IO_APPEND) {
241 uio_setoffset(uio, fp->ff_size);
242 offset = fp->ff_size;
243 }
244 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
245 retval = EPERM;
246 goto exit;
247 }
248
249 origFileSize = fp->ff_size;
250 eflags = kEFDeferMask; /* defer file block allocations */
251
252 #ifdef HFS_SPARSE_DEV
253 /*
254 * When the underlying device is sparse and space
255 * is low (< 8MB), stop doing delayed allocations
256 * and begin doing synchronous I/O.
257 */
258 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
259 (hfs_freeblks(hfsmp, 0) < 2048)) {
260 eflags &= ~kEFDeferMask;
261 ioflag |= IO_SYNC;
262 }
263 #endif /* HFS_SPARSE_DEV */
264
265 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
266 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
267
268 /* Now test if we need to extend the file */
269 /* Doing so will adjust the filebytes for us */
270
271 writelimit = offset + resid;
272 if (writelimit <= filebytes)
273 goto sizeok;
274
275 cred = vfs_context_ucred(ap->a_context);
276 #if QUOTA
277 bytesToAdd = writelimit - filebytes;
278 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
279 cred, 0);
280 if (retval)
281 goto exit;
282 #endif /* QUOTA */
283
284 if (hfs_start_transaction(hfsmp) != 0) {
285 retval = EINVAL;
286 goto exit;
287 }
288
289 while (writelimit > filebytes) {
290 bytesToAdd = writelimit - filebytes;
291 if (cred && suser(cred, NULL) != 0)
292 eflags |= kEFReserveMask;
293
294 /* Protect extents b-tree and allocation bitmap */
295 lockflags = SFL_BITMAP;
296 if (overflow_extents(fp))
297 lockflags |= SFL_EXTENTS;
298 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
299
300 /* Files that are changing size are not hot file candidates. */
301 if (hfsmp->hfc_stage == HFC_RECORDING) {
302 fp->ff_bytesread = 0;
303 }
304 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
305 0, eflags, &actualBytesAdded));
306
307 hfs_systemfile_unlock(hfsmp, lockflags);
308
309 if ((actualBytesAdded == 0) && (retval == E_NONE))
310 retval = ENOSPC;
311 if (retval != E_NONE)
312 break;
313 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
315 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
316 }
317 (void) hfs_update(vp, TRUE);
318 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
319 (void) hfs_end_transaction(hfsmp);
320
321 sizeok:
322 if (retval == E_NONE) {
323 off_t filesize;
324 off_t zero_off;
325 off_t tail_off;
326 off_t inval_start;
327 off_t inval_end;
328 off_t io_start;
329 int lflag;
330 struct rl_entry *invalid_range;
331
332 if (writelimit > fp->ff_size)
333 filesize = writelimit;
334 else
335 filesize = fp->ff_size;
336
337 lflag = (ioflag & IO_SYNC);
338
339 if (offset <= fp->ff_size) {
340 zero_off = offset & ~PAGE_MASK_64;
341
342 /* Check to see whether the area between the zero_offset and the start
343 of the transfer to see whether is invalid and should be zero-filled
344 as part of the transfer:
345 */
346 if (offset > zero_off) {
347 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
348 lflag |= IO_HEADZEROFILL;
349 }
350 } else {
351 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
352
353 /* The bytes between fp->ff_size and uio->uio_offset must never be
354 read without being zeroed. The current last block is filled with zeroes
355 if it holds valid data but in all cases merely do a little bookkeeping
356 to track the area from the end of the current last page to the start of
357 the area actually written. For the same reason only the bytes up to the
358 start of the page where this write will start is invalidated; any remainder
359 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
360
361 Note that inval_start, the start of the page after the current EOF,
362 may be past the start of the write, in which case the zeroing
363 will be handled by the cluser_write of the actual data.
364 */
365 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
366 inval_end = offset & ~PAGE_MASK_64;
367 zero_off = fp->ff_size;
368
369 if ((fp->ff_size & PAGE_MASK_64) &&
370 (rl_scan(&fp->ff_invalidranges,
371 eof_page_base,
372 fp->ff_size - 1,
373 &invalid_range) != RL_NOOVERLAP)) {
374 /* The page containing the EOF is not valid, so the
375 entire page must be made inaccessible now. If the write
376 starts on a page beyond the page containing the eof
377 (inval_end > eof_page_base), add the
378 whole page to the range to be invalidated. Otherwise
379 (i.e. if the write starts on the same page), zero-fill
380 the entire page explicitly now:
381 */
382 if (inval_end > eof_page_base) {
383 inval_start = eof_page_base;
384 } else {
385 zero_off = eof_page_base;
386 };
387 };
388
389 if (inval_start < inval_end) {
390 struct timeval tv;
391 /* There's some range of data that's going to be marked invalid */
392
393 if (zero_off < inval_start) {
394 /* The pages between inval_start and inval_end are going to be invalidated,
395 and the actual write will start on a page past inval_end. Now's the last
396 chance to zero-fill the page containing the EOF:
397 */
398 hfs_unlock(cp);
399 cnode_locked = 0;
400 retval = cluster_write(vp, (uio_t) 0,
401 fp->ff_size, inval_start,
402 zero_off, (off_t)0,
403 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
404 hfs_lock(cp, HFS_FORCE_LOCK);
405 cnode_locked = 1;
406 if (retval) goto ioerr_exit;
407 offset = uio_offset(uio);
408 };
409
410 /* Mark the remaining area of the newly allocated space as invalid: */
411 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
412 microuptime(&tv);
413 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
414 zero_off = fp->ff_size = inval_end;
415 };
416
417 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
418 };
419
420 /* Check to see whether the area between the end of the write and the end of
421 the page it falls in is invalid and should be zero-filled as part of the transfer:
422 */
423 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
424 if (tail_off > filesize) tail_off = filesize;
425 if (tail_off > writelimit) {
426 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
427 lflag |= IO_TAILZEROFILL;
428 };
429 };
430
431 /*
432 * if the write starts beyond the current EOF (possibly advanced in the
433 * zeroing of the last block, above), then we'll zero fill from the current EOF
434 * to where the write begins:
435 *
436 * NOTE: If (and ONLY if) the portion of the file about to be written is
437 * before the current EOF it might be marked as invalid now and must be
438 * made readable (removed from the invalid ranges) before cluster_write
439 * tries to write it:
440 */
441 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
442 if (io_start < fp->ff_size) {
443 off_t io_end;
444
445 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
446 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
447 };
448
449 hfs_unlock(cp);
450 cnode_locked = 0;
451 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
452 tail_off, lflag | IO_NOZERODIRTY);
453 offset = uio_offset(uio);
454 if (offset > fp->ff_size) {
455 fp->ff_size = offset;
456
457 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
458 /* Files that are changing size are not hot file candidates. */
459 if (hfsmp->hfc_stage == HFC_RECORDING)
460 fp->ff_bytesread = 0;
461 }
462 if (resid > uio_resid(uio)) {
463 cp->c_touch_chgtime = TRUE;
464 cp->c_touch_modtime = TRUE;
465 }
466 }
467 HFS_KNOTE(vp, NOTE_WRITE);
468
469 ioerr_exit:
470 /*
471 * If we successfully wrote any data, and we are not the superuser
472 * we clear the setuid and setgid bits as a precaution against
473 * tampering.
474 */
475 if (cp->c_mode & (S_ISUID | S_ISGID)) {
476 cred = vfs_context_ucred(ap->a_context);
477 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
478 if (!cnode_locked) {
479 hfs_lock(cp, HFS_FORCE_LOCK);
480 cnode_locked = 1;
481 }
482 cp->c_mode &= ~(S_ISUID | S_ISGID);
483 }
484 }
485 if (retval) {
486 if (ioflag & IO_UNIT) {
487 if (!cnode_locked) {
488 hfs_lock(cp, HFS_FORCE_LOCK);
489 cnode_locked = 1;
490 }
491 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
492 0, ap->a_context);
493 // LP64todo - fix this! resid needs to by user_ssize_t
494 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
495 uio_setresid(uio, resid);
496 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
497 }
498 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
499 if (!cnode_locked) {
500 hfs_lock(cp, HFS_FORCE_LOCK);
501 cnode_locked = 1;
502 }
503 retval = hfs_update(vp, TRUE);
504 }
505 /* Updating vcbWrCnt doesn't need to be atomic. */
506 hfsmp->vcbWrCnt++;
507
508 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
509 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
510 exit:
511 if (cnode_locked)
512 hfs_unlock(cp);
513 hfs_unlock_truncate(cp);
514 return (retval);
515 }
516
517 /* support for the "bulk-access" fcntl */
518
519 #define CACHE_ELEMS 64
520 #define CACHE_LEVELS 16
521 #define PARENT_IDS_FLAG 0x100
522
523 /* from hfs_attrlist.c */
524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
525 mode_t obj_mode, struct mount *mp,
526 kauth_cred_t cred, struct proc *p);
527
528 /* from vfs/vfs_fsevents.c */
529 extern char *get_pathbuff(void);
530 extern void release_pathbuff(char *buff);
531
532 struct access_cache {
533 int numcached;
534 int cachehits; /* these two for statistics gathering */
535 int lookups;
536 unsigned int *acache;
537 Boolean *haveaccess;
538 };
539
540 struct access_t {
541 uid_t uid; /* IN: effective user id */
542 short flags; /* IN: access requested (i.e. R_OK) */
543 short num_groups; /* IN: number of groups user belongs to */
544 int num_files; /* IN: number of files to process */
545 int *file_ids; /* IN: array of file ids */
546 gid_t *groups; /* IN: array of groups */
547 short *access; /* OUT: access info for each file (0 for 'has access') */
548 };
549
550 struct user_access_t {
551 uid_t uid; /* IN: effective user id */
552 short flags; /* IN: access requested (i.e. R_OK) */
553 short num_groups; /* IN: number of groups user belongs to */
554 int num_files; /* IN: number of files to process */
555 user_addr_t file_ids; /* IN: array of file ids */
556 user_addr_t groups; /* IN: array of groups */
557 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
558 };
559
560 /*
561 * Perform a binary search for the given parent_id. Return value is
562 * found/not found boolean, and indexp will be the index of the item
563 * or the index at which to insert the item if it's not found.
564 */
565 static int
566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
567 {
568 unsigned int lo, hi;
569 int index, matches = 0;
570
571 if (cache->numcached == 0) {
572 *indexp = 0;
573 return 0; // table is empty, so insert at index=0 and report no match
574 }
575
576 if (cache->numcached > CACHE_ELEMS) {
577 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
578 cache->numcached, CACHE_ELEMS);*/
579 cache->numcached = CACHE_ELEMS;
580 }
581
582 lo = 0;
583 hi = cache->numcached - 1;
584 index = -1;
585
586 /* perform binary search for parent_id */
587 do {
588 unsigned int mid = (hi - lo)/2 + lo;
589 unsigned int this_id = cache->acache[mid];
590
591 if (parent_id == this_id) {
592 index = mid;
593 break;
594 }
595
596 if (parent_id < this_id) {
597 hi = mid;
598 continue;
599 }
600
601 if (parent_id > this_id) {
602 lo = mid + 1;
603 continue;
604 }
605 } while(lo < hi);
606
607 /* check if lo and hi converged on the match */
608 if (parent_id == cache->acache[hi]) {
609 index = hi;
610 }
611
612 /* if no existing entry found, find index for new one */
613 if (index == -1) {
614 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
615 matches = 0;
616 } else {
617 matches = 1;
618 }
619
620 *indexp = index;
621 return matches;
622 }
623
624 /*
625 * Add a node to the access_cache at the given index (or do a lookup first
626 * to find the index if -1 is passed in). We currently do a replace rather
627 * than an insert if the cache is full.
628 */
629 static void
630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
631 {
632 int lookup_index = -1;
633
634 /* need to do a lookup first if -1 passed for index */
635 if (index == -1) {
636 if (lookup_bucket(cache, &lookup_index, nodeID)) {
637 if (cache->haveaccess[lookup_index] != access) {
638 /* change access info for existing entry... should never happen */
639 cache->haveaccess[lookup_index] = access;
640 }
641
642 /* mission accomplished */
643 return;
644 } else {
645 index = lookup_index;
646 }
647
648 }
649
650 /* if the cache is full, do a replace rather than an insert */
651 if (cache->numcached >= CACHE_ELEMS) {
652 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
653 cache->numcached = CACHE_ELEMS-1;
654
655 if (index > cache->numcached) {
656 // printf("index %d pinned to %d\n", index, cache->numcached);
657 index = cache->numcached;
658 }
659 } else if (index >= 0 && index < cache->numcached) {
660 /* only do bcopy if we're inserting */
661 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
662 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
663 }
664
665 cache->acache[index] = nodeID;
666 cache->haveaccess[index] = access;
667 cache->numcached++;
668 }
669
670
671 struct cinfo {
672 uid_t uid;
673 gid_t gid;
674 mode_t mode;
675 cnid_t parentcnid;
676 };
677
678 static int
679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
680 {
681 struct cinfo *cip = (struct cinfo *)arg;
682
683 cip->uid = attrp->ca_uid;
684 cip->gid = attrp->ca_gid;
685 cip->mode = attrp->ca_mode;
686 cip->parentcnid = descp->cd_parentcnid;
687
688 return (0);
689 }
690
691 /*
692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
693 * isn't incore, then go to the catalog.
694 */
695 static int
696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
697 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
698 {
699 int error = 0;
700
701 /* if this id matches the one the fsctl was called with, skip the lookup */
702 if (cnid == skip_cp->c_cnid) {
703 cnattrp->ca_uid = skip_cp->c_uid;
704 cnattrp->ca_gid = skip_cp->c_gid;
705 cnattrp->ca_mode = skip_cp->c_mode;
706 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
707 } else {
708 struct cinfo c_info;
709
710 /* otherwise, check the cnode hash incase the file/dir is incore */
711 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
712 cnattrp->ca_uid = c_info.uid;
713 cnattrp->ca_gid = c_info.gid;
714 cnattrp->ca_mode = c_info.mode;
715 keyp->hfsPlus.parentID = c_info.parentcnid;
716 } else {
717 int lockflags;
718
719 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
720
721 /* lookup this cnid in the catalog */
722 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
723
724 hfs_systemfile_unlock(hfsmp, lockflags);
725
726 cache->lookups++;
727 }
728 }
729
730 return (error);
731 }
732
733 /*
734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
735 * up to CACHE_LEVELS as we progress towards the root.
736 */
737 static int
738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
739 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
740 {
741 int myErr = 0;
742 int myResult;
743 HFSCatalogNodeID thisNodeID;
744 unsigned long myPerms;
745 struct cat_attr cnattr;
746 int cache_index = -1;
747 CatalogKey catkey;
748
749 int i = 0, ids_to_cache = 0;
750 int parent_ids[CACHE_LEVELS];
751
752 /* root always has access */
753 if (!suser(myp_ucred, NULL)) {
754 return (1);
755 }
756
757 thisNodeID = nodeID;
758 while (thisNodeID >= kRootDirID) {
759 myResult = 0; /* default to "no access" */
760
761 /* check the cache before resorting to hitting the catalog */
762
763 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
764 * to look any further after hitting cached dir */
765
766 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
767 cache->cachehits++;
768 myResult = cache->haveaccess[cache_index];
769 goto ExitThisRoutine;
770 }
771
772 /* remember which parents we want to cache */
773 if (ids_to_cache < CACHE_LEVELS) {
774 parent_ids[ids_to_cache] = thisNodeID;
775 ids_to_cache++;
776 }
777
778 /* do the lookup (checks the cnode hash, then the catalog) */
779 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
780 if (myErr) {
781 goto ExitThisRoutine; /* no access */
782 }
783
784 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
785 cnattr.ca_mode, hfsmp->hfs_mp,
786 myp_ucred, theProcPtr);
787
788 if ( (myPerms & X_OK) == 0 ) {
789 myResult = 0;
790 goto ExitThisRoutine; /* no access */
791 }
792
793 /* up the hierarchy we go */
794 thisNodeID = catkey.hfsPlus.parentID;
795 }
796
797 /* if here, we have access to this node */
798 myResult = 1;
799
800 ExitThisRoutine:
801 if (myErr) {
802 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
803 myResult = 0;
804 }
805 *err = myErr;
806
807 /* cache the parent directory(ies) */
808 for (i = 0; i < ids_to_cache; i++) {
809 /* small optimization: get rid of double-lookup for all these */
810 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
811 add_node(cache, -1, parent_ids[i], myResult);
812 }
813
814 return (myResult);
815 }
816 /* end "bulk-access" support */
817
818
819
820 /*
821 * Callback for use with freeze ioctl.
822 */
823 static int
824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
825 {
826 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
827
828 return 0;
829 }
830
831 /*
832 * Control filesystem operating characteristics.
833 */
834 int
835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
836 vnode_t a_vp;
837 int a_command;
838 caddr_t a_data;
839 int a_fflag;
840 vfs_context_t a_context;
841 } */ *ap)
842 {
843 struct vnode * vp = ap->a_vp;
844 struct hfsmount *hfsmp = VTOHFS(vp);
845 vfs_context_t context = ap->a_context;
846 kauth_cred_t cred = vfs_context_ucred(context);
847 proc_t p = vfs_context_proc(context);
848 struct vfsstatfs *vfsp;
849 boolean_t is64bit;
850
851 is64bit = proc_is64bit(p);
852
853 switch (ap->a_command) {
854
855 case HFS_RESIZE_VOLUME: {
856 u_int64_t newsize;
857 u_int64_t cursize;
858
859 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
860 if (suser(cred, NULL) &&
861 kauth_cred_getuid(cred) != vfsp->f_owner) {
862 return (EACCES); /* must be owner of file system */
863 }
864 if (!vnode_isvroot(vp)) {
865 return (EINVAL);
866 }
867 newsize = *(u_int64_t *)ap->a_data;
868 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
869
870 if (newsize > cursize) {
871 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
872 } else if (newsize < cursize) {
873 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
874 } else {
875 return (0);
876 }
877 }
878 case HFS_CHANGE_NEXT_ALLOCATION: {
879 u_int32_t location;
880
881 if (vnode_vfsisrdonly(vp)) {
882 return (EROFS);
883 }
884 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
885 if (suser(cred, NULL) &&
886 kauth_cred_getuid(cred) != vfsp->f_owner) {
887 return (EACCES); /* must be owner of file system */
888 }
889 if (!vnode_isvroot(vp)) {
890 return (EINVAL);
891 }
892 location = *(u_int32_t *)ap->a_data;
893 if (location > hfsmp->totalBlocks - 1) {
894 return (EINVAL);
895 }
896 /* Return previous value. */
897 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
898 HFS_MOUNT_LOCK(hfsmp, TRUE);
899 hfsmp->nextAllocation = location;
900 hfsmp->vcbFlags |= 0xFF00;
901 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
902 return (0);
903 }
904
905 #ifdef HFS_SPARSE_DEV
906 case HFS_SETBACKINGSTOREINFO: {
907 struct vnode * bsfs_rootvp;
908 struct vnode * di_vp;
909 struct hfs_backingstoreinfo *bsdata;
910 int error = 0;
911
912 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
913 return (EALREADY);
914 }
915 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
916 if (suser(cred, NULL) &&
917 kauth_cred_getuid(cred) != vfsp->f_owner) {
918 return (EACCES); /* must be owner of file system */
919 }
920 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
921 if (bsdata == NULL) {
922 return (EINVAL);
923 }
924 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
925 return (error);
926 }
927 if ((error = vnode_getwithref(di_vp))) {
928 file_drop(bsdata->backingfd);
929 return(error);
930 }
931
932 if (vnode_mount(vp) == vnode_mount(di_vp)) {
933 (void)vnode_put(di_vp);
934 file_drop(bsdata->backingfd);
935 return (EINVAL);
936 }
937
938 /*
939 * Obtain the backing fs root vnode and keep a reference
940 * on it. This reference will be dropped in hfs_unmount.
941 */
942 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
943 if (error) {
944 (void)vnode_put(di_vp);
945 file_drop(bsdata->backingfd);
946 return (error);
947 }
948 vnode_ref(bsfs_rootvp);
949 vnode_put(bsfs_rootvp);
950
951 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
952 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
953 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
954 hfsmp->hfs_sparsebandblks *= 4;
955
956 (void)vnode_put(di_vp);
957 file_drop(bsdata->backingfd);
958 return (0);
959 }
960 case HFS_CLRBACKINGSTOREINFO: {
961 struct vnode * tmpvp;
962
963 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
964 if (suser(cred, NULL) &&
965 kauth_cred_getuid(cred) != vfsp->f_owner) {
966 return (EACCES); /* must be owner of file system */
967 }
968 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
969 hfsmp->hfs_backingfs_rootvp) {
970
971 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
972 tmpvp = hfsmp->hfs_backingfs_rootvp;
973 hfsmp->hfs_backingfs_rootvp = NULLVP;
974 hfsmp->hfs_sparsebandblks = 0;
975 vnode_rele(tmpvp);
976 }
977 return (0);
978 }
979 #endif /* HFS_SPARSE_DEV */
980
981 case F_FREEZE_FS: {
982 struct mount *mp;
983 task_t task;
984
985 if (!is_suser())
986 return (EACCES);
987
988 mp = vnode_mount(vp);
989 hfsmp = VFSTOHFS(mp);
990
991 if (!(hfsmp->jnl))
992 return (ENOTSUP);
993
994 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
995
996 task = current_task();
997 task_working_set_disable(task);
998
999 // flush things before we get started to try and prevent
1000 // dirty data from being paged out while we're frozen.
1001 // note: can't do this after taking the lock as it will
1002 // deadlock against ourselves.
1003 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1004 hfs_global_exclusive_lock_acquire(hfsmp);
1005 journal_flush(hfsmp->jnl);
1006
1007 // don't need to iterate on all vnodes, we just need to
1008 // wait for writes to the system files and the device vnode
1009 if (HFSTOVCB(hfsmp)->extentsRefNum)
1010 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1011 if (HFSTOVCB(hfsmp)->catalogRefNum)
1012 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1013 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1014 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1015 if (hfsmp->hfs_attribute_vp)
1016 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1017 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1018
1019 hfsmp->hfs_freezing_proc = current_proc();
1020
1021 return (0);
1022 }
1023
1024 case F_THAW_FS: {
1025 if (!is_suser())
1026 return (EACCES);
1027
1028 // if we're not the one who froze the fs then we
1029 // can't thaw it.
1030 if (hfsmp->hfs_freezing_proc != current_proc()) {
1031 return EPERM;
1032 }
1033
1034 // NOTE: if you add code here, also go check the
1035 // code that "thaws" the fs in hfs_vnop_close()
1036 //
1037 hfsmp->hfs_freezing_proc = NULL;
1038 hfs_global_exclusive_lock_release(hfsmp);
1039 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1040
1041 return (0);
1042 }
1043
1044 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1045 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1046
1047 case HFS_BULKACCESS_FSCTL:
1048 case HFS_BULKACCESS: {
1049 /*
1050 * NOTE: on entry, the vnode is locked. Incase this vnode
1051 * happens to be in our list of file_ids, we'll note it
1052 * avoid calling hfs_chashget_nowait() on that id as that
1053 * will cause a "locking against myself" panic.
1054 */
1055 Boolean check_leaf = true;
1056
1057 struct user_access_t *user_access_structp;
1058 struct user_access_t tmp_user_access_t;
1059 struct access_cache cache;
1060
1061 int error = 0, i;
1062
1063 dev_t dev = VTOC(vp)->c_dev;
1064
1065 short flags;
1066 struct ucred myucred; /* XXX ILLEGAL */
1067 int num_files;
1068 int *file_ids = NULL;
1069 short *access = NULL;
1070
1071 cnid_t cnid;
1072 cnid_t prevParent_cnid = 0;
1073 unsigned long myPerms;
1074 short myaccess = 0;
1075 struct cat_attr cnattr;
1076 CatalogKey catkey;
1077 struct cnode *skip_cp = VTOC(vp);
1078 struct vfs_context my_context;
1079
1080 /* first, return error if not run as root */
1081 if (cred->cr_ruid != 0) {
1082 return EPERM;
1083 }
1084
1085 /* initialize the local cache and buffers */
1086 cache.numcached = 0;
1087 cache.cachehits = 0;
1088 cache.lookups = 0;
1089
1090 file_ids = (int *) get_pathbuff();
1091 access = (short *) get_pathbuff();
1092 cache.acache = (int *) get_pathbuff();
1093 cache.haveaccess = (Boolean *) get_pathbuff();
1094
1095 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1096 release_pathbuff((char *) file_ids);
1097 release_pathbuff((char *) access);
1098 release_pathbuff((char *) cache.acache);
1099 release_pathbuff((char *) cache.haveaccess);
1100
1101 return ENOMEM;
1102 }
1103
1104 /* struct copyin done during dispatch... need to copy file_id array separately */
1105 if (ap->a_data == NULL) {
1106 error = EINVAL;
1107 goto err_exit_bulk_access;
1108 }
1109
1110 if (is64bit) {
1111 user_access_structp = (struct user_access_t *)ap->a_data;
1112 }
1113 else {
1114 struct access_t * accessp = (struct access_t *)ap->a_data;
1115 tmp_user_access_t.uid = accessp->uid;
1116 tmp_user_access_t.flags = accessp->flags;
1117 tmp_user_access_t.num_groups = accessp->num_groups;
1118 tmp_user_access_t.num_files = accessp->num_files;
1119 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1120 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1121 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1122 user_access_structp = &tmp_user_access_t;
1123 }
1124
1125 num_files = user_access_structp->num_files;
1126 if (num_files < 1) {
1127 goto err_exit_bulk_access;
1128 }
1129 if (num_files > 256) {
1130 error = EINVAL;
1131 goto err_exit_bulk_access;
1132 }
1133
1134 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1135 num_files * sizeof(int)))) {
1136 goto err_exit_bulk_access;
1137 }
1138
1139 /* fill in the ucred structure */
1140 flags = user_access_structp->flags;
1141 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1142 flags = R_OK;
1143 }
1144
1145 /* check if we've been passed leaf node ids or parent ids */
1146 if (flags & PARENT_IDS_FLAG) {
1147 check_leaf = false;
1148 }
1149
1150 memset(&myucred, 0, sizeof(myucred));
1151 myucred.cr_ref = 1;
1152 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1153 myucred.cr_ngroups = user_access_structp->num_groups;
1154 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1155 myucred.cr_ngroups = 0;
1156 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1157 myucred.cr_ngroups * sizeof(gid_t)))) {
1158 goto err_exit_bulk_access;
1159 }
1160 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1161
1162 my_context.vc_proc = p;
1163 my_context.vc_ucred = &myucred;
1164
1165 /* Check access to each file_id passed in */
1166 for (i = 0; i < num_files; i++) {
1167 #if 0
1168 cnid = (cnid_t) file_ids[i];
1169
1170 /* root always has access */
1171 if (!suser(&myucred, NULL)) {
1172 access[i] = 0;
1173 continue;
1174 }
1175
1176 if (check_leaf) {
1177
1178 /* do the lookup (checks the cnode hash, then the catalog) */
1179 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1180 if (error) {
1181 access[i] = (short) error;
1182 continue;
1183 }
1184
1185 /* before calling CheckAccess(), check the target file for read access */
1186 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1187 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1188
1189
1190 /* fail fast if no access */
1191 if ((myPerms & flags) == 0) {
1192 access[i] = EACCES;
1193 continue;
1194 }
1195 } else {
1196 /* we were passed an array of parent ids */
1197 catkey.hfsPlus.parentID = cnid;
1198 }
1199
1200 /* if the last guy had the same parent and had access, we're done */
1201 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1202 cache.cachehits++;
1203 access[i] = 0;
1204 continue;
1205 }
1206
1207 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1208 skip_cp, p, &myucred, dev);
1209
1210 if ( myaccess ) {
1211 access[i] = 0; // have access.. no errors to report
1212 } else {
1213 access[i] = (error != 0 ? (short) error : EACCES);
1214 }
1215
1216 prevParent_cnid = catkey.hfsPlus.parentID;
1217 #else
1218 int myErr;
1219
1220 cnid = (cnid_t)file_ids[i];
1221
1222 while (cnid >= kRootDirID) {
1223 /* get the vnode for this cnid */
1224 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1225 if ( myErr ) {
1226 access[i] = EACCES;
1227 break;
1228 }
1229
1230 cnid = VTOC(vp)->c_parentcnid;
1231
1232 hfs_unlock(VTOC(vp));
1233 if (vnode_vtype(vp) == VDIR) {
1234 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1235 } else {
1236 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1237 }
1238 vnode_put(vp);
1239 access[i] = myErr;
1240 if (myErr) {
1241 break;
1242 }
1243 }
1244 #endif
1245 }
1246
1247 /* copyout the access array */
1248 if ((error = copyout((caddr_t)access, user_access_structp->access,
1249 num_files * sizeof (short)))) {
1250 goto err_exit_bulk_access;
1251 }
1252
1253 err_exit_bulk_access:
1254
1255 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1256
1257 release_pathbuff((char *) cache.acache);
1258 release_pathbuff((char *) cache.haveaccess);
1259 release_pathbuff((char *) file_ids);
1260 release_pathbuff((char *) access);
1261
1262 return (error);
1263 } /* HFS_BULKACCESS */
1264
1265 case HFS_SETACLSTATE: {
1266 int state;
1267
1268 if (ap->a_data == NULL) {
1269 return (EINVAL);
1270 }
1271
1272 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1273 state = *(int *)ap->a_data;
1274
1275 // super-user can enable or disable acl's on a volume.
1276 // the volume owner can only enable acl's
1277 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1278 return (EPERM);
1279 }
1280 if (state == 0 || state == 1)
1281 return hfs_setextendedsecurity(hfsmp, state);
1282 else
1283 return (EINVAL);
1284 }
1285
1286 case F_FULLFSYNC: {
1287 int error;
1288
1289 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1290 if (error == 0) {
1291 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1292 hfs_unlock(VTOC(vp));
1293 }
1294
1295 return error;
1296 }
1297
1298 case F_CHKCLEAN: {
1299 register struct cnode *cp;
1300 int error;
1301
1302 if (!vnode_isreg(vp))
1303 return EINVAL;
1304
1305 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1306 if (error == 0) {
1307 cp = VTOC(vp);
1308 /*
1309 * used by regression test to determine if
1310 * all the dirty pages (via write) have been cleaned
1311 * after a call to 'fsysnc'.
1312 */
1313 error = is_file_clean(vp, VTOF(vp)->ff_size);
1314 hfs_unlock(cp);
1315 }
1316 return (error);
1317 }
1318
1319 case F_RDADVISE: {
1320 register struct radvisory *ra;
1321 struct filefork *fp;
1322 int error;
1323
1324 if (!vnode_isreg(vp))
1325 return EINVAL;
1326
1327 ra = (struct radvisory *)(ap->a_data);
1328 fp = VTOF(vp);
1329
1330 /* Protect against a size change. */
1331 hfs_lock_truncate(VTOC(vp), TRUE);
1332
1333 if (ra->ra_offset >= fp->ff_size) {
1334 error = EFBIG;
1335 } else {
1336 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1337 }
1338
1339 hfs_unlock_truncate(VTOC(vp));
1340 return (error);
1341 }
1342
1343 case F_READBOOTSTRAP:
1344 case F_WRITEBOOTSTRAP:
1345 {
1346 struct vnode *devvp = NULL;
1347 user_fbootstraptransfer_t *user_bootstrapp;
1348 int devBlockSize;
1349 int error;
1350 uio_t auio;
1351 daddr64_t blockNumber;
1352 u_long blockOffset;
1353 u_long xfersize;
1354 struct buf *bp;
1355 user_fbootstraptransfer_t user_bootstrap;
1356
1357 if (!vnode_isvroot(vp))
1358 return (EINVAL);
1359 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1360 * to a user_fbootstraptransfer_t else we get a pointer to a
1361 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1362 */
1363 if (is64bit) {
1364 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1365 }
1366 else {
1367 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1368 user_bootstrapp = &user_bootstrap;
1369 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1370 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1371 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1372 }
1373 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1374 return EINVAL;
1375
1376 devvp = VTOHFS(vp)->hfs_devvp;
1377 auio = uio_create(1, user_bootstrapp->fbt_offset,
1378 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1379 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1380 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1381
1382 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1383
1384 while (uio_resid(auio) > 0) {
1385 blockNumber = uio_offset(auio) / devBlockSize;
1386 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1387 if (error) {
1388 if (bp) buf_brelse(bp);
1389 uio_free(auio);
1390 return error;
1391 };
1392
1393 blockOffset = uio_offset(auio) % devBlockSize;
1394 xfersize = devBlockSize - blockOffset;
1395 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1396 if (error) {
1397 buf_brelse(bp);
1398 uio_free(auio);
1399 return error;
1400 };
1401 if (uio_rw(auio) == UIO_WRITE) {
1402 error = VNOP_BWRITE(bp);
1403 if (error) {
1404 uio_free(auio);
1405 return error;
1406 }
1407 } else {
1408 buf_brelse(bp);
1409 };
1410 };
1411 uio_free(auio);
1412 };
1413 return 0;
1414
1415 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1416 {
1417 if (is64bit) {
1418 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1419 }
1420 else {
1421 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1422 }
1423 return 0;
1424 }
1425
1426 case HFS_GET_MOUNT_TIME:
1427 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1428 break;
1429
1430 case HFS_GET_LAST_MTIME:
1431 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1432 break;
1433
1434 case HFS_SET_BOOT_INFO:
1435 if (!vnode_isvroot(vp))
1436 return(EINVAL);
1437 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1438 return(EACCES); /* must be superuser or owner of filesystem */
1439 HFS_MOUNT_LOCK(hfsmp, TRUE);
1440 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1441 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1442 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1443 break;
1444
1445 case HFS_GET_BOOT_INFO:
1446 if (!vnode_isvroot(vp))
1447 return(EINVAL);
1448 HFS_MOUNT_LOCK(hfsmp, TRUE);
1449 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1450 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1451 break;
1452
1453 default:
1454 return (ENOTTY);
1455 }
1456
1457 /* Should never get here */
1458 return 0;
1459 }
1460
1461 /*
1462 * select
1463 */
1464 int
1465 hfs_vnop_select(__unused struct vnop_select_args *ap)
1466 /*
1467 struct vnop_select_args {
1468 vnode_t a_vp;
1469 int a_which;
1470 int a_fflags;
1471 void *a_wql;
1472 vfs_context_t a_context;
1473 };
1474 */
1475 {
1476 /*
1477 * We should really check to see if I/O is possible.
1478 */
1479 return (1);
1480 }
1481
1482 /*
1483 * Converts a logical block number to a physical block, and optionally returns
1484 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1485 * The physical block number is based on the device block size, currently its 512.
1486 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1487 */
1488 int
1489 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1490 {
1491 struct cnode *cp = VTOC(vp);
1492 struct filefork *fp = VTOF(vp);
1493 struct hfsmount *hfsmp = VTOHFS(vp);
1494 int retval = E_NONE;
1495 daddr_t logBlockSize;
1496 size_t bytesContAvail = 0;
1497 off_t blockposition;
1498 int lockExtBtree;
1499 int lockflags = 0;
1500
1501 /*
1502 * Check for underlying vnode requests and ensure that logical
1503 * to physical mapping is requested.
1504 */
1505 if (vpp != NULL)
1506 *vpp = cp->c_devvp;
1507 if (bnp == NULL)
1508 return (0);
1509
1510 logBlockSize = GetLogicalBlockSize(vp);
1511 blockposition = (off_t)bn * (off_t)logBlockSize;
1512
1513 lockExtBtree = overflow_extents(fp);
1514
1515 if (lockExtBtree)
1516 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1517
1518 retval = MacToVFSError(
1519 MapFileBlockC (HFSTOVCB(hfsmp),
1520 (FCB*)fp,
1521 MAXPHYSIO,
1522 blockposition,
1523 bnp,
1524 &bytesContAvail));
1525
1526 if (lockExtBtree)
1527 hfs_systemfile_unlock(hfsmp, lockflags);
1528
1529 if (retval == E_NONE) {
1530 /* Figure out how many read ahead blocks there are */
1531 if (runp != NULL) {
1532 if (can_cluster(logBlockSize)) {
1533 /* Make sure this result never goes negative: */
1534 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1535 } else {
1536 *runp = 0;
1537 }
1538 }
1539 }
1540 return (retval);
1541 }
1542
1543 /*
1544 * Convert logical block number to file offset.
1545 */
1546 int
1547 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1548 /*
1549 struct vnop_blktooff_args {
1550 vnode_t a_vp;
1551 daddr64_t a_lblkno;
1552 off_t *a_offset;
1553 };
1554 */
1555 {
1556 if (ap->a_vp == NULL)
1557 return (EINVAL);
1558 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1559
1560 return(0);
1561 }
1562
1563 /*
1564 * Convert file offset to logical block number.
1565 */
1566 int
1567 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1568 /*
1569 struct vnop_offtoblk_args {
1570 vnode_t a_vp;
1571 off_t a_offset;
1572 daddr64_t *a_lblkno;
1573 };
1574 */
1575 {
1576 if (ap->a_vp == NULL)
1577 return (EINVAL);
1578 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1579
1580 return(0);
1581 }
1582
1583 /*
1584 * Map file offset to physical block number.
1585 *
1586 * System file cnodes are expected to be locked (shared or exclusive).
1587 */
1588 int
1589 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1590 /*
1591 struct vnop_blockmap_args {
1592 vnode_t a_vp;
1593 off_t a_foffset;
1594 size_t a_size;
1595 daddr64_t *a_bpn;
1596 size_t *a_run;
1597 void *a_poff;
1598 int a_flags;
1599 vfs_context_t a_context;
1600 };
1601 */
1602 {
1603 struct vnode *vp = ap->a_vp;
1604 struct cnode *cp;
1605 struct filefork *fp;
1606 struct hfsmount *hfsmp;
1607 size_t bytesContAvail = 0;
1608 int retval = E_NONE;
1609 int syslocks = 0;
1610 int lockflags = 0;
1611 struct rl_entry *invalid_range;
1612 enum rl_overlaptype overlaptype;
1613 int started_tr = 0;
1614 int tooklock = 0;
1615
1616 /* Do not allow blockmap operation on a directory */
1617 if (vnode_isdir(vp)) {
1618 return (ENOTSUP);
1619 }
1620
1621 /*
1622 * Check for underlying vnode requests and ensure that logical
1623 * to physical mapping is requested.
1624 */
1625 if (ap->a_bpn == NULL)
1626 return (0);
1627
1628 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1629 if (VTOC(vp)->c_lockowner != current_thread()) {
1630 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1631 tooklock = 1;
1632 } else {
1633 cp = VTOC(vp);
1634 panic("blockmap: %s cnode lock already held!\n",
1635 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1636 }
1637 }
1638 hfsmp = VTOHFS(vp);
1639 cp = VTOC(vp);
1640 fp = VTOF(vp);
1641
1642 retry:
1643 if (fp->ff_unallocblocks) {
1644 if (hfs_start_transaction(hfsmp) != 0) {
1645 retval = EINVAL;
1646 goto exit;
1647 } else {
1648 started_tr = 1;
1649 }
1650 syslocks = SFL_EXTENTS | SFL_BITMAP;
1651
1652 } else if (overflow_extents(fp)) {
1653 syslocks = SFL_EXTENTS;
1654 }
1655
1656 if (syslocks)
1657 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1658
1659 /*
1660 * Check for any delayed allocations.
1661 */
1662 if (fp->ff_unallocblocks) {
1663 SInt64 actbytes;
1664 u_int32_t loanedBlocks;
1665
1666 //
1667 // Make sure we have a transaction. It's possible
1668 // that we came in and fp->ff_unallocblocks was zero
1669 // but during the time we blocked acquiring the extents
1670 // btree, ff_unallocblocks became non-zero and so we
1671 // will need to start a transaction.
1672 //
1673 if (started_tr == 0) {
1674 if (syslocks) {
1675 hfs_systemfile_unlock(hfsmp, lockflags);
1676 syslocks = 0;
1677 }
1678 goto retry;
1679 }
1680
1681 /*
1682 * Note: ExtendFileC will Release any blocks on loan and
1683 * aquire real blocks. So we ask to extend by zero bytes
1684 * since ExtendFileC will account for the virtual blocks.
1685 */
1686
1687 loanedBlocks = fp->ff_unallocblocks;
1688 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1689 kEFAllMask | kEFNoClumpMask, &actbytes);
1690
1691 if (retval) {
1692 fp->ff_unallocblocks = loanedBlocks;
1693 cp->c_blocks += loanedBlocks;
1694 fp->ff_blocks += loanedBlocks;
1695
1696 HFS_MOUNT_LOCK(hfsmp, TRUE);
1697 hfsmp->loanedBlocks += loanedBlocks;
1698 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1699 }
1700
1701 if (retval) {
1702 hfs_systemfile_unlock(hfsmp, lockflags);
1703 cp->c_flag |= C_MODIFIED;
1704 if (started_tr) {
1705 (void) hfs_update(vp, TRUE);
1706 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1707
1708 hfs_end_transaction(hfsmp);
1709 }
1710 goto exit;
1711 }
1712 }
1713
1714 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1715 ap->a_bpn, &bytesContAvail);
1716 if (syslocks) {
1717 hfs_systemfile_unlock(hfsmp, lockflags);
1718 syslocks = 0;
1719 }
1720
1721 if (started_tr) {
1722 (void) hfs_update(vp, TRUE);
1723 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1724 hfs_end_transaction(hfsmp);
1725 started_tr = 0;
1726 }
1727 if (retval) {
1728 goto exit;
1729 }
1730
1731 /* Adjust the mapping information for invalid file ranges: */
1732 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1733 ap->a_foffset + (off_t)bytesContAvail - 1,
1734 &invalid_range);
1735 if (overlaptype != RL_NOOVERLAP) {
1736 switch(overlaptype) {
1737 case RL_MATCHINGOVERLAP:
1738 case RL_OVERLAPCONTAINSRANGE:
1739 case RL_OVERLAPSTARTSBEFORE:
1740 /* There's no valid block for this byte offset: */
1741 *ap->a_bpn = (daddr64_t)-1;
1742 /* There's no point limiting the amount to be returned
1743 * if the invalid range that was hit extends all the way
1744 * to the EOF (i.e. there's no valid bytes between the
1745 * end of this range and the file's EOF):
1746 */
1747 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1748 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1749 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1750 }
1751 break;
1752
1753 case RL_OVERLAPISCONTAINED:
1754 case RL_OVERLAPENDSAFTER:
1755 /* The range of interest hits an invalid block before the end: */
1756 if (invalid_range->rl_start == ap->a_foffset) {
1757 /* There's actually no valid information to be had starting here: */
1758 *ap->a_bpn = (daddr64_t)-1;
1759 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1760 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1761 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1762 }
1763 } else {
1764 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1765 }
1766 break;
1767
1768 case RL_NOOVERLAP:
1769 break;
1770 } /* end switch */
1771 if (bytesContAvail > ap->a_size)
1772 bytesContAvail = ap->a_size;
1773 }
1774 if (ap->a_run)
1775 *ap->a_run = bytesContAvail;
1776
1777 if (ap->a_poff)
1778 *(int *)ap->a_poff = 0;
1779 exit:
1780 if (tooklock)
1781 hfs_unlock(cp);
1782
1783 return (MacToVFSError(retval));
1784 }
1785
1786
1787 /*
1788 * prepare and issue the I/O
1789 * buf_strategy knows how to deal
1790 * with requests that require
1791 * fragmented I/Os
1792 */
1793 int
1794 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1795 {
1796 buf_t bp = ap->a_bp;
1797 vnode_t vp = buf_vnode(bp);
1798 struct cnode *cp = VTOC(vp);
1799
1800 return (buf_strategy(cp->c_devvp, ap));
1801 }
1802
1803
1804 static int
1805 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1806 {
1807 register struct cnode *cp = VTOC(vp);
1808 struct filefork *fp = VTOF(vp);
1809 struct proc *p = vfs_context_proc(context);;
1810 kauth_cred_t cred = vfs_context_ucred(context);
1811 int retval;
1812 off_t bytesToAdd;
1813 off_t actualBytesAdded;
1814 off_t filebytes;
1815 u_int64_t old_filesize;
1816 u_long fileblocks;
1817 int blksize;
1818 struct hfsmount *hfsmp;
1819 int lockflags;
1820
1821 blksize = VTOVCB(vp)->blockSize;
1822 fileblocks = fp->ff_blocks;
1823 filebytes = (off_t)fileblocks * (off_t)blksize;
1824 old_filesize = fp->ff_size;
1825
1826 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1827 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1828
1829 if (length < 0)
1830 return (EINVAL);
1831
1832 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1833 return (EFBIG);
1834
1835 hfsmp = VTOHFS(vp);
1836
1837 retval = E_NONE;
1838
1839 /* Files that are changing size are not hot file candidates. */
1840 if (hfsmp->hfc_stage == HFC_RECORDING) {
1841 fp->ff_bytesread = 0;
1842 }
1843
1844 /*
1845 * We cannot just check if fp->ff_size == length (as an optimization)
1846 * since there may be extra physical blocks that also need truncation.
1847 */
1848 #if QUOTA
1849 if ((retval = hfs_getinoquota(cp)))
1850 return(retval);
1851 #endif /* QUOTA */
1852
1853 /*
1854 * Lengthen the size of the file. We must ensure that the
1855 * last byte of the file is allocated. Since the smallest
1856 * value of ff_size is 0, length will be at least 1.
1857 */
1858 if (length > (off_t)fp->ff_size) {
1859 #if QUOTA
1860 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1861 cred, 0);
1862 if (retval)
1863 goto Err_Exit;
1864 #endif /* QUOTA */
1865 /*
1866 * If we don't have enough physical space then
1867 * we need to extend the physical size.
1868 */
1869 if (length > filebytes) {
1870 int eflags;
1871 u_long blockHint = 0;
1872
1873 /* All or nothing and don't round up to clumpsize. */
1874 eflags = kEFAllMask | kEFNoClumpMask;
1875
1876 if (cred && suser(cred, NULL) != 0)
1877 eflags |= kEFReserveMask; /* keep a reserve */
1878
1879 /*
1880 * Allocate Journal and Quota files in metadata zone.
1881 */
1882 if (filebytes == 0 &&
1883 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1884 hfs_virtualmetafile(cp)) {
1885 eflags |= kEFMetadataMask;
1886 blockHint = hfsmp->hfs_metazone_start;
1887 }
1888 if (hfs_start_transaction(hfsmp) != 0) {
1889 retval = EINVAL;
1890 goto Err_Exit;
1891 }
1892
1893 /* Protect extents b-tree and allocation bitmap */
1894 lockflags = SFL_BITMAP;
1895 if (overflow_extents(fp))
1896 lockflags |= SFL_EXTENTS;
1897 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1898
1899 while ((length > filebytes) && (retval == E_NONE)) {
1900 bytesToAdd = length - filebytes;
1901 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1902 (FCB*)fp,
1903 bytesToAdd,
1904 blockHint,
1905 eflags,
1906 &actualBytesAdded));
1907
1908 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1909 if (actualBytesAdded == 0 && retval == E_NONE) {
1910 if (length > filebytes)
1911 length = filebytes;
1912 break;
1913 }
1914 } /* endwhile */
1915
1916 hfs_systemfile_unlock(hfsmp, lockflags);
1917
1918 if (hfsmp->jnl) {
1919 (void) hfs_update(vp, TRUE);
1920 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1921 }
1922
1923 hfs_end_transaction(hfsmp);
1924
1925 if (retval)
1926 goto Err_Exit;
1927
1928 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1929 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1930 }
1931
1932 if (!(flags & IO_NOZEROFILL)) {
1933 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1934 struct rl_entry *invalid_range;
1935 off_t zero_limit;
1936
1937 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1938 if (length < zero_limit) zero_limit = length;
1939
1940 if (length > (off_t)fp->ff_size) {
1941 struct timeval tv;
1942
1943 /* Extending the file: time to fill out the current last page w. zeroes? */
1944 if ((fp->ff_size & PAGE_MASK_64) &&
1945 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1946 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1947
1948 /* There's some valid data at the start of the (current) last page
1949 of the file, so zero out the remainder of that page to ensure the
1950 entire page contains valid data. Since there is no invalid range
1951 possible past the (current) eof, there's no need to remove anything
1952 from the invalid range list before calling cluster_write(): */
1953 hfs_unlock(cp);
1954 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1955 fp->ff_size, (off_t)0,
1956 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1957 hfs_lock(cp, HFS_FORCE_LOCK);
1958 if (retval) goto Err_Exit;
1959
1960 /* Merely invalidate the remaining area, if necessary: */
1961 if (length > zero_limit) {
1962 microuptime(&tv);
1963 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1964 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1965 }
1966 } else {
1967 /* The page containing the (current) eof is invalid: just add the
1968 remainder of the page to the invalid list, along with the area
1969 being newly allocated:
1970 */
1971 microuptime(&tv);
1972 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1973 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1974 };
1975 }
1976 } else {
1977 panic("hfs_truncate: invoked on non-UBC object?!");
1978 };
1979 }
1980 cp->c_touch_modtime = TRUE;
1981 fp->ff_size = length;
1982
1983 /* Nested transactions will do their own ubc_setsize. */
1984 if (!skipsetsize) {
1985 /*
1986 * ubc_setsize can cause a pagein here
1987 * so we need to drop cnode lock.
1988 */
1989 hfs_unlock(cp);
1990 ubc_setsize(vp, length);
1991 hfs_lock(cp, HFS_FORCE_LOCK);
1992 }
1993
1994 } else { /* Shorten the size of the file */
1995
1996 if ((off_t)fp->ff_size > length) {
1997 /*
1998 * Any buffers that are past the truncation point need to be
1999 * invalidated (to maintain buffer cache consistency).
2000 */
2001
2002 /* Nested transactions will do their own ubc_setsize. */
2003 if (!skipsetsize) {
2004 /*
2005 * ubc_setsize can cause a pageout here
2006 * so we need to drop cnode lock.
2007 */
2008 hfs_unlock(cp);
2009 ubc_setsize(vp, length);
2010 hfs_lock(cp, HFS_FORCE_LOCK);
2011 }
2012
2013 /* Any space previously marked as invalid is now irrelevant: */
2014 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2015 }
2016
2017 /*
2018 * Account for any unmapped blocks. Note that the new
2019 * file length can still end up with unmapped blocks.
2020 */
2021 if (fp->ff_unallocblocks > 0) {
2022 u_int32_t finalblks;
2023 u_int32_t loanedBlocks;
2024
2025 HFS_MOUNT_LOCK(hfsmp, TRUE);
2026
2027 loanedBlocks = fp->ff_unallocblocks;
2028 cp->c_blocks -= loanedBlocks;
2029 fp->ff_blocks -= loanedBlocks;
2030 fp->ff_unallocblocks = 0;
2031
2032 hfsmp->loanedBlocks -= loanedBlocks;
2033
2034 finalblks = (length + blksize - 1) / blksize;
2035 if (finalblks > fp->ff_blocks) {
2036 /* calculate required unmapped blocks */
2037 loanedBlocks = finalblks - fp->ff_blocks;
2038 hfsmp->loanedBlocks += loanedBlocks;
2039
2040 fp->ff_unallocblocks = loanedBlocks;
2041 cp->c_blocks += loanedBlocks;
2042 fp->ff_blocks += loanedBlocks;
2043 }
2044 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2045 }
2046
2047 /*
2048 * For a TBE process the deallocation of the file blocks is
2049 * delayed until the file is closed. And hfs_close calls
2050 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2051 * isn't set, we make sure this isn't a TBE process.
2052 */
2053 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2054 #if QUOTA
2055 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2056 #endif /* QUOTA */
2057 if (hfs_start_transaction(hfsmp) != 0) {
2058 retval = EINVAL;
2059 goto Err_Exit;
2060 }
2061
2062 if (fp->ff_unallocblocks == 0) {
2063 /* Protect extents b-tree and allocation bitmap */
2064 lockflags = SFL_BITMAP;
2065 if (overflow_extents(fp))
2066 lockflags |= SFL_EXTENTS;
2067 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2068
2069 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2070 (FCB*)fp, length, false));
2071
2072 hfs_systemfile_unlock(hfsmp, lockflags);
2073 }
2074 if (hfsmp->jnl) {
2075 if (retval == 0) {
2076 fp->ff_size = length;
2077 }
2078 (void) hfs_update(vp, TRUE);
2079 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2080 }
2081
2082 hfs_end_transaction(hfsmp);
2083
2084 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2085 if (retval)
2086 goto Err_Exit;
2087 #if QUOTA
2088 /* These are bytesreleased */
2089 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2090 #endif /* QUOTA */
2091 }
2092 /* Only set update flag if the logical length changes */
2093 if (old_filesize != length)
2094 cp->c_touch_modtime = TRUE;
2095 fp->ff_size = length;
2096 }
2097 cp->c_touch_chgtime = TRUE;
2098 retval = hfs_update(vp, MNT_WAIT);
2099 if (retval) {
2100 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2101 -1, -1, -1, retval, 0);
2102 }
2103
2104 Err_Exit:
2105
2106 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2107 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2108
2109 return (retval);
2110 }
2111
2112
2113
2114 /*
2115 * Truncate a cnode to at most length size, freeing (or adding) the
2116 * disk blocks.
2117 */
2118 __private_extern__
2119 int
2120 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2121 vfs_context_t context)
2122 {
2123 struct filefork *fp = VTOF(vp);
2124 off_t filebytes;
2125 u_long fileblocks;
2126 int blksize, error = 0;
2127 struct cnode *cp = VTOC(vp);
2128
2129 if (vnode_isdir(vp))
2130 return (EISDIR); /* cannot truncate an HFS directory! */
2131
2132 blksize = VTOVCB(vp)->blockSize;
2133 fileblocks = fp->ff_blocks;
2134 filebytes = (off_t)fileblocks * (off_t)blksize;
2135
2136 // have to loop truncating or growing files that are
2137 // really big because otherwise transactions can get
2138 // enormous and consume too many kernel resources.
2139
2140 if (length < filebytes) {
2141 while (filebytes > length) {
2142 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2143 filebytes -= HFS_BIGFILE_SIZE;
2144 } else {
2145 filebytes = length;
2146 }
2147 cp->c_flag |= C_FORCEUPDATE;
2148 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2149 if (error)
2150 break;
2151 }
2152 } else if (length > filebytes) {
2153 while (filebytes < length) {
2154 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2155 filebytes += HFS_BIGFILE_SIZE;
2156 } else {
2157 filebytes = length;
2158 }
2159 cp->c_flag |= C_FORCEUPDATE;
2160 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2161 if (error)
2162 break;
2163 }
2164 } else /* Same logical size */ {
2165
2166 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2167 }
2168 /* Files that are changing size are not hot file candidates. */
2169 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2170 fp->ff_bytesread = 0;
2171 }
2172
2173 return (error);
2174 }
2175
2176
2177
2178 /*
2179 * Preallocate file storage space.
2180 */
2181 int
2182 hfs_vnop_allocate(struct vnop_allocate_args /* {
2183 vnode_t a_vp;
2184 off_t a_length;
2185 u_int32_t a_flags;
2186 off_t *a_bytesallocated;
2187 off_t a_offset;
2188 vfs_context_t a_context;
2189 } */ *ap)
2190 {
2191 struct vnode *vp = ap->a_vp;
2192 struct cnode *cp;
2193 struct filefork *fp;
2194 ExtendedVCB *vcb;
2195 off_t length = ap->a_length;
2196 off_t startingPEOF;
2197 off_t moreBytesRequested;
2198 off_t actualBytesAdded;
2199 off_t filebytes;
2200 u_long fileblocks;
2201 int retval, retval2;
2202 UInt32 blockHint;
2203 UInt32 extendFlags; /* For call to ExtendFileC */
2204 struct hfsmount *hfsmp;
2205 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2206 int lockflags;
2207
2208 *(ap->a_bytesallocated) = 0;
2209
2210 if (!vnode_isreg(vp))
2211 return (EISDIR);
2212 if (length < (off_t)0)
2213 return (EINVAL);
2214
2215 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2216 return (retval);
2217 cp = VTOC(vp);
2218 fp = VTOF(vp);
2219 hfsmp = VTOHFS(vp);
2220 vcb = VTOVCB(vp);
2221
2222 fileblocks = fp->ff_blocks;
2223 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2224
2225 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2226 retval = EINVAL;
2227 goto Err_Exit;
2228 }
2229
2230 /* Fill in the flags word for the call to Extend the file */
2231
2232 extendFlags = kEFNoClumpMask;
2233 if (ap->a_flags & ALLOCATECONTIG)
2234 extendFlags |= kEFContigMask;
2235 if (ap->a_flags & ALLOCATEALL)
2236 extendFlags |= kEFAllMask;
2237 if (cred && suser(cred, NULL) != 0)
2238 extendFlags |= kEFReserveMask;
2239
2240 retval = E_NONE;
2241 blockHint = 0;
2242 startingPEOF = filebytes;
2243
2244 if (ap->a_flags & ALLOCATEFROMPEOF)
2245 length += filebytes;
2246 else if (ap->a_flags & ALLOCATEFROMVOL)
2247 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2248
2249 /* If no changes are necesary, then we're done */
2250 if (filebytes == length)
2251 goto Std_Exit;
2252
2253 /*
2254 * Lengthen the size of the file. We must ensure that the
2255 * last byte of the file is allocated. Since the smallest
2256 * value of filebytes is 0, length will be at least 1.
2257 */
2258 if (length > filebytes) {
2259 moreBytesRequested = length - filebytes;
2260
2261 #if QUOTA
2262 retval = hfs_chkdq(cp,
2263 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2264 cred, 0);
2265 if (retval)
2266 goto Err_Exit;
2267
2268 #endif /* QUOTA */
2269 /*
2270 * Metadata zone checks.
2271 */
2272 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2273 /*
2274 * Allocate Journal and Quota files in metadata zone.
2275 */
2276 if (hfs_virtualmetafile(cp)) {
2277 extendFlags |= kEFMetadataMask;
2278 blockHint = hfsmp->hfs_metazone_start;
2279 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2280 (blockHint <= hfsmp->hfs_metazone_end)) {
2281 /*
2282 * Move blockHint outside metadata zone.
2283 */
2284 blockHint = hfsmp->hfs_metazone_end + 1;
2285 }
2286 }
2287
2288 if (hfs_start_transaction(hfsmp) != 0) {
2289 retval = EINVAL;
2290 goto Err_Exit;
2291 }
2292
2293 /* Protect extents b-tree and allocation bitmap */
2294 lockflags = SFL_BITMAP;
2295 if (overflow_extents(fp))
2296 lockflags |= SFL_EXTENTS;
2297 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2298
2299 retval = MacToVFSError(ExtendFileC(vcb,
2300 (FCB*)fp,
2301 moreBytesRequested,
2302 blockHint,
2303 extendFlags,
2304 &actualBytesAdded));
2305
2306 *(ap->a_bytesallocated) = actualBytesAdded;
2307 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2308
2309 hfs_systemfile_unlock(hfsmp, lockflags);
2310
2311 if (hfsmp->jnl) {
2312 (void) hfs_update(vp, TRUE);
2313 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2314 }
2315
2316 hfs_end_transaction(hfsmp);
2317
2318 /*
2319 * if we get an error and no changes were made then exit
2320 * otherwise we must do the hfs_update to reflect the changes
2321 */
2322 if (retval && (startingPEOF == filebytes))
2323 goto Err_Exit;
2324
2325 /*
2326 * Adjust actualBytesAdded to be allocation block aligned, not
2327 * clump size aligned.
2328 * NOTE: So what we are reporting does not affect reality
2329 * until the file is closed, when we truncate the file to allocation
2330 * block size.
2331 */
2332 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2333 *(ap->a_bytesallocated) =
2334 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2335
2336 } else { /* Shorten the size of the file */
2337
2338 if (fp->ff_size > length) {
2339 /*
2340 * Any buffers that are past the truncation point need to be
2341 * invalidated (to maintain buffer cache consistency).
2342 */
2343 }
2344
2345 if (hfs_start_transaction(hfsmp) != 0) {
2346 retval = EINVAL;
2347 goto Err_Exit;
2348 }
2349
2350 /* Protect extents b-tree and allocation bitmap */
2351 lockflags = SFL_BITMAP;
2352 if (overflow_extents(fp))
2353 lockflags |= SFL_EXTENTS;
2354 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2355
2356 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2357
2358 hfs_systemfile_unlock(hfsmp, lockflags);
2359
2360 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2361
2362 if (hfsmp->jnl) {
2363 (void) hfs_update(vp, TRUE);
2364 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2365 }
2366
2367 hfs_end_transaction(hfsmp);
2368
2369
2370 /*
2371 * if we get an error and no changes were made then exit
2372 * otherwise we must do the hfs_update to reflect the changes
2373 */
2374 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2375 #if QUOTA
2376 /* These are bytesreleased */
2377 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2378 #endif /* QUOTA */
2379
2380 if (fp->ff_size > filebytes) {
2381 fp->ff_size = filebytes;
2382
2383 hfs_unlock(cp);
2384 ubc_setsize(vp, fp->ff_size);
2385 hfs_lock(cp, HFS_FORCE_LOCK);
2386 }
2387 }
2388
2389 Std_Exit:
2390 cp->c_touch_chgtime = TRUE;
2391 cp->c_touch_modtime = TRUE;
2392 retval2 = hfs_update(vp, MNT_WAIT);
2393
2394 if (retval == 0)
2395 retval = retval2;
2396 Err_Exit:
2397 hfs_unlock(cp);
2398 return (retval);
2399 }
2400
2401
2402 /*
2403 * Pagein for HFS filesystem
2404 */
2405 int
2406 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2407 /*
2408 struct vnop_pagein_args {
2409 vnode_t a_vp,
2410 upl_t a_pl,
2411 vm_offset_t a_pl_offset,
2412 off_t a_f_offset,
2413 size_t a_size,
2414 int a_flags
2415 vfs_context_t a_context;
2416 };
2417 */
2418 {
2419 vnode_t vp = ap->a_vp;
2420 int error;
2421
2422 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2423 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2424 /*
2425 * Keep track of blocks read.
2426 */
2427 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2428 struct cnode *cp;
2429 struct filefork *fp;
2430 int bytesread;
2431 int took_cnode_lock = 0;
2432
2433 cp = VTOC(vp);
2434 fp = VTOF(vp);
2435
2436 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2437 bytesread = fp->ff_size;
2438 else
2439 bytesread = ap->a_size;
2440
2441 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2442 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2443 hfs_lock(cp, HFS_FORCE_LOCK);
2444 took_cnode_lock = 1;
2445 }
2446 /*
2447 * If this file hasn't been seen since the start of
2448 * the current sampling period then start over.
2449 */
2450 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2451 struct timeval tv;
2452
2453 fp->ff_bytesread = bytesread;
2454 microtime(&tv);
2455 cp->c_atime = tv.tv_sec;
2456 } else {
2457 fp->ff_bytesread += bytesread;
2458 }
2459 cp->c_touch_acctime = TRUE;
2460 if (took_cnode_lock)
2461 hfs_unlock(cp);
2462 }
2463 return (error);
2464 }
2465
2466 /*
2467 * Pageout for HFS filesystem.
2468 */
2469 int
2470 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2471 /*
2472 struct vnop_pageout_args {
2473 vnode_t a_vp,
2474 upl_t a_pl,
2475 vm_offset_t a_pl_offset,
2476 off_t a_f_offset,
2477 size_t a_size,
2478 int a_flags
2479 vfs_context_t a_context;
2480 };
2481 */
2482 {
2483 vnode_t vp = ap->a_vp;
2484 struct cnode *cp;
2485 struct filefork *fp;
2486 int retval;
2487 off_t end_of_range;
2488 off_t filesize;
2489
2490 cp = VTOC(vp);
2491 if (cp->c_lockowner == current_thread()) {
2492 panic("pageout: %s cnode lock already held!\n",
2493 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2494 }
2495 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2496 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2497 ubc_upl_abort_range(ap->a_pl,
2498 ap->a_pl_offset,
2499 ap->a_size,
2500 UPL_ABORT_FREE_ON_EMPTY);
2501 }
2502 return (retval);
2503 }
2504 fp = VTOF(vp);
2505
2506 filesize = fp->ff_size;
2507 end_of_range = ap->a_f_offset + ap->a_size - 1;
2508
2509 if (end_of_range >= filesize) {
2510 end_of_range = (off_t)(filesize - 1);
2511 }
2512 if (ap->a_f_offset < filesize) {
2513 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2514 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2515 }
2516 hfs_unlock(cp);
2517
2518 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2519 ap->a_size, filesize, ap->a_flags);
2520
2521 /*
2522 * If data was written, and setuid or setgid bits are set and
2523 * this process is not the superuser then clear the setuid and
2524 * setgid bits as a precaution against tampering.
2525 */
2526 if ((retval == 0) &&
2527 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2528 (vfs_context_suser(ap->a_context) != 0)) {
2529 hfs_lock(cp, HFS_FORCE_LOCK);
2530 cp->c_mode &= ~(S_ISUID | S_ISGID);
2531 cp->c_touch_chgtime = TRUE;
2532 hfs_unlock(cp);
2533 }
2534 return (retval);
2535 }
2536
2537 /*
2538 * Intercept B-Tree node writes to unswap them if necessary.
2539 */
2540 int
2541 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2542 {
2543 int retval = 0;
2544 register struct buf *bp = ap->a_bp;
2545 register struct vnode *vp = buf_vnode(bp);
2546 BlockDescriptor block;
2547
2548 /* Trap B-Tree writes */
2549 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2550 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2551 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2552
2553 /*
2554 * Swap and validate the node if it is in native byte order.
2555 * This is always be true on big endian, so we always validate
2556 * before writing here. On little endian, the node typically has
2557 * been swapped and validatated when it was written to the journal,
2558 * so we won't do anything here.
2559 */
2560 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2561 /* Prepare the block pointer */
2562 block.blockHeader = bp;
2563 block.buffer = (char *)buf_dataptr(bp);
2564 block.blockNum = buf_lblkno(bp);
2565 /* not found in cache ==> came from disk */
2566 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2567 block.blockSize = buf_count(bp);
2568
2569 /* Endian un-swap B-Tree node */
2570 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2571 if (retval)
2572 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2573 }
2574 }
2575
2576 /* This buffer shouldn't be locked anymore but if it is clear it */
2577 if ((buf_flags(bp) & B_LOCKED)) {
2578 // XXXdbg
2579 if (VTOHFS(vp)->jnl) {
2580 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2581 }
2582 buf_clearflags(bp, B_LOCKED);
2583 }
2584 retval = vn_bwrite (ap);
2585
2586 return (retval);
2587 }
2588
2589 /*
2590 * Relocate a file to a new location on disk
2591 * cnode must be locked on entry
2592 *
2593 * Relocation occurs by cloning the file's data from its
2594 * current set of blocks to a new set of blocks. During
2595 * the relocation all of the blocks (old and new) are
2596 * owned by the file.
2597 *
2598 * -----------------
2599 * |///////////////|
2600 * -----------------
2601 * 0 N (file offset)
2602 *
2603 * ----------------- -----------------
2604 * |///////////////| | | STEP 1 (aquire new blocks)
2605 * ----------------- -----------------
2606 * 0 N N+1 2N
2607 *
2608 * ----------------- -----------------
2609 * |///////////////| |///////////////| STEP 2 (clone data)
2610 * ----------------- -----------------
2611 * 0 N N+1 2N
2612 *
2613 * -----------------
2614 * |///////////////| STEP 3 (head truncate blocks)
2615 * -----------------
2616 * 0 N
2617 *
2618 * During steps 2 and 3 page-outs to file offsets less
2619 * than or equal to N are suspended.
2620 *
2621 * During step 3 page-ins to the file get supended.
2622 */
2623 __private_extern__
2624 int
2625 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2626 struct proc *p)
2627 {
2628 struct cnode *cp;
2629 struct filefork *fp;
2630 struct hfsmount *hfsmp;
2631 u_int32_t headblks;
2632 u_int32_t datablks;
2633 u_int32_t blksize;
2634 u_int32_t growsize;
2635 u_int32_t nextallocsave;
2636 daddr64_t sector_a, sector_b;
2637 int disabled_caching = 0;
2638 int eflags;
2639 off_t newbytes;
2640 int retval;
2641 int lockflags = 0;
2642 int took_trunc_lock = 0;
2643 int started_tr = 0;
2644 enum vtype vnodetype;
2645
2646 vnodetype = vnode_vtype(vp);
2647 if (vnodetype != VREG && vnodetype != VLNK) {
2648 return (EPERM);
2649 }
2650
2651 hfsmp = VTOHFS(vp);
2652 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2653 return (ENOSPC);
2654 }
2655
2656 cp = VTOC(vp);
2657 fp = VTOF(vp);
2658 if (fp->ff_unallocblocks)
2659 return (EINVAL);
2660 blksize = hfsmp->blockSize;
2661 if (blockHint == 0)
2662 blockHint = hfsmp->nextAllocation;
2663
2664 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2665 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2666 return (EFBIG);
2667 }
2668
2669 //
2670 // We do not believe that this call to hfs_fsync() is
2671 // necessary and it causes a journal transaction
2672 // deadlock so we are removing it.
2673 //
2674 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2675 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2676 // if (retval)
2677 // return (retval);
2678 //}
2679
2680 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2681 hfs_unlock(cp);
2682 hfs_lock_truncate(cp, TRUE);
2683 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2684 hfs_unlock_truncate(cp);
2685 return (retval);
2686 }
2687 took_trunc_lock = 1;
2688 }
2689 headblks = fp->ff_blocks;
2690 datablks = howmany(fp->ff_size, blksize);
2691 growsize = datablks * blksize;
2692 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2693 if (blockHint >= hfsmp->hfs_metazone_start &&
2694 blockHint <= hfsmp->hfs_metazone_end)
2695 eflags |= kEFMetadataMask;
2696
2697 if (hfs_start_transaction(hfsmp) != 0) {
2698 if (took_trunc_lock)
2699 hfs_unlock_truncate(cp);
2700 return (EINVAL);
2701 }
2702 started_tr = 1;
2703 /*
2704 * Protect the extents b-tree and the allocation bitmap
2705 * during MapFileBlockC and ExtendFileC operations.
2706 */
2707 lockflags = SFL_BITMAP;
2708 if (overflow_extents(fp))
2709 lockflags |= SFL_EXTENTS;
2710 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2711
2712 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2713 if (retval) {
2714 retval = MacToVFSError(retval);
2715 goto out;
2716 }
2717
2718 /*
2719 * STEP 1 - aquire new allocation blocks.
2720 */
2721 if (!vnode_isnocache(vp)) {
2722 vnode_setnocache(vp);
2723 disabled_caching = 1;
2724
2725 }
2726 nextallocsave = hfsmp->nextAllocation;
2727 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2728 if (eflags & kEFMetadataMask) {
2729 HFS_MOUNT_LOCK(hfsmp, TRUE);
2730 hfsmp->nextAllocation = nextallocsave;
2731 hfsmp->vcbFlags |= 0xFF00;
2732 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2733 }
2734
2735 retval = MacToVFSError(retval);
2736 if (retval == 0) {
2737 cp->c_flag |= C_MODIFIED;
2738 if (newbytes < growsize) {
2739 retval = ENOSPC;
2740 goto restore;
2741 } else if (fp->ff_blocks < (headblks + datablks)) {
2742 printf("hfs_relocate: allocation failed");
2743 retval = ENOSPC;
2744 goto restore;
2745 }
2746
2747 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2748 if (retval) {
2749 retval = MacToVFSError(retval);
2750 } else if ((sector_a + 1) == sector_b) {
2751 retval = ENOSPC;
2752 goto restore;
2753 } else if ((eflags & kEFMetadataMask) &&
2754 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2755 hfsmp->hfs_metazone_end)) {
2756 printf("hfs_relocate: didn't move into metadata zone\n");
2757 retval = ENOSPC;
2758 goto restore;
2759 }
2760 }
2761 /* Done with system locks and journal for now. */
2762 hfs_systemfile_unlock(hfsmp, lockflags);
2763 lockflags = 0;
2764 hfs_end_transaction(hfsmp);
2765 started_tr = 0;
2766
2767 if (retval) {
2768 /*
2769 * Check to see if failure is due to excessive fragmentation.
2770 */
2771 if ((retval == ENOSPC) &&
2772 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2773 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2774 }
2775 goto out;
2776 }
2777 /*
2778 * STEP 2 - clone file data into the new allocation blocks.
2779 */
2780
2781 if (vnodetype == VLNK)
2782 retval = hfs_clonelink(vp, blksize, cred, p);
2783 else if (vnode_issystem(vp))
2784 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2785 else
2786 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2787
2788 /* Start transaction for step 3 or for a restore. */
2789 if (hfs_start_transaction(hfsmp) != 0) {
2790 retval = EINVAL;
2791 goto out;
2792 }
2793 started_tr = 1;
2794 if (retval)
2795 goto restore;
2796
2797 /*
2798 * STEP 3 - switch to cloned data and remove old blocks.
2799 */
2800 lockflags = SFL_BITMAP;
2801 if (overflow_extents(fp))
2802 lockflags |= SFL_EXTENTS;
2803 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2804
2805 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2806
2807 hfs_systemfile_unlock(hfsmp, lockflags);
2808 lockflags = 0;
2809 if (retval)
2810 goto restore;
2811 out:
2812 if (took_trunc_lock)
2813 hfs_unlock_truncate(cp);
2814
2815 if (lockflags) {
2816 hfs_systemfile_unlock(hfsmp, lockflags);
2817 lockflags = 0;
2818 }
2819
2820 // See comment up above about calls to hfs_fsync()
2821 //
2822 //if (retval == 0)
2823 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2824
2825 if (hfsmp->jnl) {
2826 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2827 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2828 else
2829 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2830 }
2831 exit:
2832 if (disabled_caching) {
2833 vnode_clearnocache(vp);
2834 }
2835 if (started_tr)
2836 hfs_end_transaction(hfsmp);
2837
2838 return (retval);
2839
2840 restore:
2841 if (fp->ff_blocks == headblks)
2842 goto exit;
2843 /*
2844 * Give back any newly allocated space.
2845 */
2846 if (lockflags == 0) {
2847 lockflags = SFL_BITMAP;
2848 if (overflow_extents(fp))
2849 lockflags |= SFL_EXTENTS;
2850 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2851 }
2852
2853 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2854
2855 hfs_systemfile_unlock(hfsmp, lockflags);
2856 lockflags = 0;
2857
2858 if (took_trunc_lock)
2859 hfs_unlock_truncate(cp);
2860 goto exit;
2861 }
2862
2863
2864 /*
2865 * Clone a symlink.
2866 *
2867 */
2868 static int
2869 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2870 {
2871 struct buf *head_bp = NULL;
2872 struct buf *tail_bp = NULL;
2873 int error;
2874
2875
2876 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2877 if (error)
2878 goto out;
2879
2880 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2881 if (tail_bp == NULL) {
2882 error = EIO;
2883 goto out;
2884 }
2885 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2886 error = (int)buf_bwrite(tail_bp);
2887 out:
2888 if (head_bp) {
2889 buf_markinvalid(head_bp);
2890 buf_brelse(head_bp);
2891 }
2892 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2893
2894 return (error);
2895 }
2896
2897 /*
2898 * Clone a file's data within the file.
2899 *
2900 */
2901 static int
2902 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2903 {
2904 caddr_t bufp;
2905 size_t writebase;
2906 size_t bufsize;
2907 size_t copysize;
2908 size_t iosize;
2909 off_t filesize;
2910 size_t offset;
2911 uio_t auio;
2912 int error = 0;
2913
2914 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2915 writebase = blkstart * blksize;
2916 copysize = blkcnt * blksize;
2917 iosize = bufsize = MIN(copysize, 4096 * 16);
2918 offset = 0;
2919
2920 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2921 return (ENOMEM);
2922 }
2923 hfs_unlock(VTOC(vp));
2924
2925 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2926
2927 while (offset < copysize) {
2928 iosize = MIN(copysize - offset, iosize);
2929
2930 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2931 uio_addiov(auio, (uintptr_t)bufp, iosize);
2932
2933 error = cluster_read(vp, auio, copysize, 0);
2934 if (error) {
2935 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2936 break;
2937 }
2938 if (uio_resid(auio) != 0) {
2939 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2940 error = EIO;
2941 break;
2942 }
2943
2944 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2945 uio_addiov(auio, (uintptr_t)bufp, iosize);
2946
2947 error = cluster_write(vp, auio, filesize + offset,
2948 filesize + offset + iosize,
2949 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2950 if (error) {
2951 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2952 break;
2953 }
2954 if (uio_resid(auio) != 0) {
2955 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2956 error = EIO;
2957 break;
2958 }
2959 offset += iosize;
2960 }
2961 uio_free(auio);
2962
2963 /*
2964 * No need to call ubc_sync_range or hfs_invalbuf
2965 * since the file was copied using IO_NOCACHE.
2966 */
2967
2968 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2969
2970 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2971 return (error);
2972 }
2973
2974 /*
2975 * Clone a system (metadata) file.
2976 *
2977 */
2978 static int
2979 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2980 kauth_cred_t cred, struct proc *p)
2981 {
2982 caddr_t bufp;
2983 char * offset;
2984 size_t bufsize;
2985 size_t iosize;
2986 struct buf *bp = NULL;
2987 daddr64_t blkno;
2988 daddr64_t blk;
2989 daddr64_t start_blk;
2990 daddr64_t last_blk;
2991 int breadcnt;
2992 int i;
2993 int error = 0;
2994
2995
2996 iosize = GetLogicalBlockSize(vp);
2997 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2998 breadcnt = bufsize / iosize;
2999
3000 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3001 return (ENOMEM);
3002 }
3003 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3004 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3005 blkno = 0;
3006
3007 while (blkno < last_blk) {
3008 /*
3009 * Read up to a megabyte
3010 */
3011 offset = bufp;
3012 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3013 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3014 if (error) {
3015 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3016 goto out;
3017 }
3018 if (buf_count(bp) != iosize) {
3019 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3020 goto out;
3021 }
3022 bcopy((char *)buf_dataptr(bp), offset, iosize);
3023
3024 buf_markinvalid(bp);
3025 buf_brelse(bp);
3026 bp = NULL;
3027
3028 offset += iosize;
3029 }
3030
3031 /*
3032 * Write up to a megabyte
3033 */
3034 offset = bufp;
3035 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3036 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3037 if (bp == NULL) {
3038 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3039 error = EIO;
3040 goto out;
3041 }
3042 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3043 error = (int)buf_bwrite(bp);
3044 bp = NULL;
3045 if (error)
3046 goto out;
3047 offset += iosize;
3048 }
3049 }
3050 out:
3051 if (bp) {
3052 buf_brelse(bp);
3053 }
3054
3055 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3056
3057 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3058
3059 return (error);
3060 }