]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
46f8e54e5e4837324710ccae3c6891070ed2ddf6
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* @(#)hfs_readwrite.c 1.0
23 *
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
25 *
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
27 *
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
36 #include <sys/stat.h>
37 #include <sys/buf.h>
38 #include <sys/proc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/vfs_context.h>
43
44 #include <miscfs/specfs/specdev.h>
45
46 #include <sys/ubc.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_kern.h>
49
50 #include <sys/kdebug.h>
51
52 #include "hfs.h"
53 #include "hfs_endian.h"
54 #include "hfs_fsctl.h"
55 #include "hfs_quota.h"
56 #include "hfscommon/headers/FileMgrInternal.h"
57 #include "hfscommon/headers/BTreesInternal.h"
58 #include "hfs_cnode.h"
59 #include "hfs_dbg.h"
60
61 extern int overflow_extents(struct filefork *fp);
62
63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
64
65 enum {
66 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
67 };
68
69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
70
71 extern int hfs_setextendedsecurity(struct hfsmount *, int);
72
73
74 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
75 static int hfs_clonefile(struct vnode *, int, int, int);
76 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
77
78
79 /*****************************************************************************
80 *
81 * I/O Operations on vnodes
82 *
83 *****************************************************************************/
84 int hfs_vnop_read(struct vnop_read_args *);
85 int hfs_vnop_write(struct vnop_write_args *);
86 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
87 int hfs_vnop_select(struct vnop_select_args *);
88 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
89 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
90 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
91 int hfs_vnop_strategy(struct vnop_strategy_args *);
92 int hfs_vnop_allocate(struct vnop_allocate_args *);
93 int hfs_vnop_pagein(struct vnop_pagein_args *);
94 int hfs_vnop_pageout(struct vnop_pageout_args *);
95 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
96
97
98 /*
99 * Read data from a file.
100 */
101 int
102 hfs_vnop_read(struct vnop_read_args *ap)
103 {
104 uio_t uio = ap->a_uio;
105 struct vnode *vp = ap->a_vp;
106 struct cnode *cp;
107 struct filefork *fp;
108 struct hfsmount *hfsmp;
109 off_t filesize;
110 off_t filebytes;
111 off_t start_resid = uio_resid(uio);
112 off_t offset = uio_offset(uio);
113 int retval = 0;
114
115
116 /* Preflight checks */
117 if (!vnode_isreg(vp)) {
118 /* can only read regular files */
119 if (vnode_isdir(vp))
120 return (EISDIR);
121 else
122 return (EPERM);
123 }
124 if (start_resid == 0)
125 return (0); /* Nothing left to do */
126 if (offset < 0)
127 return (EINVAL); /* cant read from a negative offset */
128
129 cp = VTOC(vp);
130 fp = VTOF(vp);
131 hfsmp = VTOHFS(vp);
132
133 /* Protect against a size change. */
134 hfs_lock_truncate(cp, 0);
135
136 filesize = fp->ff_size;
137 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
138 if (offset > filesize) {
139 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
140 (offset > (off_t)MAXHFSFILESIZE)) {
141 retval = EFBIG;
142 }
143 goto exit;
144 }
145
146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
147 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
148
149 retval = cluster_read(vp, uio, filesize, 0);
150
151 cp->c_touch_acctime = TRUE;
152
153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
154 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
155
156 /*
157 * Keep track blocks read
158 */
159 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
160 int took_cnode_lock = 0;
161 off_t bytesread;
162
163 bytesread = start_resid - uio_resid(uio);
164
165 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
166 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
167 hfs_lock(cp, HFS_FORCE_LOCK);
168 took_cnode_lock = 1;
169 }
170 /*
171 * If this file hasn't been seen since the start of
172 * the current sampling period then start over.
173 */
174 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
175 struct timeval tv;
176
177 fp->ff_bytesread = bytesread;
178 microtime(&tv);
179 cp->c_atime = tv.tv_sec;
180 } else {
181 fp->ff_bytesread += bytesread;
182 }
183 if (took_cnode_lock)
184 hfs_unlock(cp);
185 }
186 exit:
187 hfs_unlock_truncate(cp);
188 return (retval);
189 }
190
191 /*
192 * Write data to a file.
193 */
194 int
195 hfs_vnop_write(struct vnop_write_args *ap)
196 {
197 uio_t uio = ap->a_uio;
198 struct vnode *vp = ap->a_vp;
199 struct cnode *cp;
200 struct filefork *fp;
201 struct hfsmount *hfsmp;
202 kauth_cred_t cred = NULL;
203 off_t origFileSize;
204 off_t writelimit;
205 off_t bytesToAdd;
206 off_t actualBytesAdded;
207 off_t filebytes;
208 off_t offset;
209 size_t resid;
210 int eflags;
211 int ioflag = ap->a_ioflag;
212 int retval = 0;
213 int lockflags;
214 int cnode_locked = 0;
215
216 // LP64todo - fix this! uio_resid may be 64-bit value
217 resid = uio_resid(uio);
218 offset = uio_offset(uio);
219
220 if (offset < 0)
221 return (EINVAL);
222 if (resid == 0)
223 return (E_NONE);
224 if (!vnode_isreg(vp))
225 return (EPERM); /* Can only write regular files */
226
227 /* Protect against a size change. */
228 hfs_lock_truncate(VTOC(vp), TRUE);
229
230 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
231 hfs_unlock_truncate(VTOC(vp));
232 return (retval);
233 }
234 cnode_locked = 1;
235 cp = VTOC(vp);
236 fp = VTOF(vp);
237 hfsmp = VTOHFS(vp);
238 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
239
240 if (ioflag & IO_APPEND) {
241 uio_setoffset(uio, fp->ff_size);
242 offset = fp->ff_size;
243 }
244 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
245 retval = EPERM;
246 goto exit;
247 }
248
249 origFileSize = fp->ff_size;
250 eflags = kEFDeferMask; /* defer file block allocations */
251
252 #ifdef HFS_SPARSE_DEV
253 /*
254 * When the underlying device is sparse and space
255 * is low (< 8MB), stop doing delayed allocations
256 * and begin doing synchronous I/O.
257 */
258 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
259 (hfs_freeblks(hfsmp, 0) < 2048)) {
260 eflags &= ~kEFDeferMask;
261 ioflag |= IO_SYNC;
262 }
263 #endif /* HFS_SPARSE_DEV */
264
265 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
266 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
267
268 /* Now test if we need to extend the file */
269 /* Doing so will adjust the filebytes for us */
270
271 writelimit = offset + resid;
272 if (writelimit <= filebytes)
273 goto sizeok;
274
275 cred = vfs_context_ucred(ap->a_context);
276 #if QUOTA
277 bytesToAdd = writelimit - filebytes;
278 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
279 cred, 0);
280 if (retval)
281 goto exit;
282 #endif /* QUOTA */
283
284 if (hfs_start_transaction(hfsmp) != 0) {
285 retval = EINVAL;
286 goto exit;
287 }
288
289 while (writelimit > filebytes) {
290 bytesToAdd = writelimit - filebytes;
291 if (cred && suser(cred, NULL) != 0)
292 eflags |= kEFReserveMask;
293
294 /* Protect extents b-tree and allocation bitmap */
295 lockflags = SFL_BITMAP;
296 if (overflow_extents(fp))
297 lockflags |= SFL_EXTENTS;
298 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
299
300 /* Files that are changing size are not hot file candidates. */
301 if (hfsmp->hfc_stage == HFC_RECORDING) {
302 fp->ff_bytesread = 0;
303 }
304 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
305 0, eflags, &actualBytesAdded));
306
307 hfs_systemfile_unlock(hfsmp, lockflags);
308
309 if ((actualBytesAdded == 0) && (retval == E_NONE))
310 retval = ENOSPC;
311 if (retval != E_NONE)
312 break;
313 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
315 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
316 }
317 (void) hfs_update(vp, TRUE);
318 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
319 (void) hfs_end_transaction(hfsmp);
320
321 sizeok:
322 if (retval == E_NONE) {
323 off_t filesize;
324 off_t zero_off;
325 off_t tail_off;
326 off_t inval_start;
327 off_t inval_end;
328 off_t io_start;
329 int lflag;
330 struct rl_entry *invalid_range;
331
332 if (writelimit > fp->ff_size)
333 filesize = writelimit;
334 else
335 filesize = fp->ff_size;
336
337 lflag = (ioflag & IO_SYNC);
338
339 if (offset <= fp->ff_size) {
340 zero_off = offset & ~PAGE_MASK_64;
341
342 /* Check to see whether the area between the zero_offset and the start
343 of the transfer to see whether is invalid and should be zero-filled
344 as part of the transfer:
345 */
346 if (offset > zero_off) {
347 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
348 lflag |= IO_HEADZEROFILL;
349 }
350 } else {
351 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
352
353 /* The bytes between fp->ff_size and uio->uio_offset must never be
354 read without being zeroed. The current last block is filled with zeroes
355 if it holds valid data but in all cases merely do a little bookkeeping
356 to track the area from the end of the current last page to the start of
357 the area actually written. For the same reason only the bytes up to the
358 start of the page where this write will start is invalidated; any remainder
359 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
360
361 Note that inval_start, the start of the page after the current EOF,
362 may be past the start of the write, in which case the zeroing
363 will be handled by the cluser_write of the actual data.
364 */
365 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
366 inval_end = offset & ~PAGE_MASK_64;
367 zero_off = fp->ff_size;
368
369 if ((fp->ff_size & PAGE_MASK_64) &&
370 (rl_scan(&fp->ff_invalidranges,
371 eof_page_base,
372 fp->ff_size - 1,
373 &invalid_range) != RL_NOOVERLAP)) {
374 /* The page containing the EOF is not valid, so the
375 entire page must be made inaccessible now. If the write
376 starts on a page beyond the page containing the eof
377 (inval_end > eof_page_base), add the
378 whole page to the range to be invalidated. Otherwise
379 (i.e. if the write starts on the same page), zero-fill
380 the entire page explicitly now:
381 */
382 if (inval_end > eof_page_base) {
383 inval_start = eof_page_base;
384 } else {
385 zero_off = eof_page_base;
386 };
387 };
388
389 if (inval_start < inval_end) {
390 struct timeval tv;
391 /* There's some range of data that's going to be marked invalid */
392
393 if (zero_off < inval_start) {
394 /* The pages between inval_start and inval_end are going to be invalidated,
395 and the actual write will start on a page past inval_end. Now's the last
396 chance to zero-fill the page containing the EOF:
397 */
398 hfs_unlock(cp);
399 cnode_locked = 0;
400 retval = cluster_write(vp, (uio_t) 0,
401 fp->ff_size, inval_start,
402 zero_off, (off_t)0,
403 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
404 hfs_lock(cp, HFS_FORCE_LOCK);
405 cnode_locked = 1;
406 if (retval) goto ioerr_exit;
407 offset = uio_offset(uio);
408 };
409
410 /* Mark the remaining area of the newly allocated space as invalid: */
411 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
412 microuptime(&tv);
413 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
414 zero_off = fp->ff_size = inval_end;
415 };
416
417 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
418 };
419
420 /* Check to see whether the area between the end of the write and the end of
421 the page it falls in is invalid and should be zero-filled as part of the transfer:
422 */
423 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
424 if (tail_off > filesize) tail_off = filesize;
425 if (tail_off > writelimit) {
426 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
427 lflag |= IO_TAILZEROFILL;
428 };
429 };
430
431 /*
432 * if the write starts beyond the current EOF (possibly advanced in the
433 * zeroing of the last block, above), then we'll zero fill from the current EOF
434 * to where the write begins:
435 *
436 * NOTE: If (and ONLY if) the portion of the file about to be written is
437 * before the current EOF it might be marked as invalid now and must be
438 * made readable (removed from the invalid ranges) before cluster_write
439 * tries to write it:
440 */
441 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
442 if (io_start < fp->ff_size) {
443 off_t io_end;
444
445 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
446 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
447 };
448
449 hfs_unlock(cp);
450 cnode_locked = 0;
451 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
452 tail_off, lflag | IO_NOZERODIRTY);
453 offset = uio_offset(uio);
454 if (offset > fp->ff_size) {
455 fp->ff_size = offset;
456
457 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
458 /* Files that are changing size are not hot file candidates. */
459 if (hfsmp->hfc_stage == HFC_RECORDING)
460 fp->ff_bytesread = 0;
461 }
462 if (resid > uio_resid(uio)) {
463 cp->c_touch_chgtime = TRUE;
464 cp->c_touch_modtime = TRUE;
465 }
466 }
467 HFS_KNOTE(vp, NOTE_WRITE);
468
469 ioerr_exit:
470 /*
471 * If we successfully wrote any data, and we are not the superuser
472 * we clear the setuid and setgid bits as a precaution against
473 * tampering.
474 */
475 if (cp->c_mode & (S_ISUID | S_ISGID)) {
476 cred = vfs_context_ucred(ap->a_context);
477 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
478 if (!cnode_locked) {
479 hfs_lock(cp, HFS_FORCE_LOCK);
480 cnode_locked = 1;
481 }
482 cp->c_mode &= ~(S_ISUID | S_ISGID);
483 }
484 }
485 if (retval) {
486 if (ioflag & IO_UNIT) {
487 if (!cnode_locked) {
488 hfs_lock(cp, HFS_FORCE_LOCK);
489 cnode_locked = 1;
490 }
491 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
492 0, ap->a_context);
493 // LP64todo - fix this! resid needs to by user_ssize_t
494 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
495 uio_setresid(uio, resid);
496 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
497 }
498 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
499 if (!cnode_locked) {
500 hfs_lock(cp, HFS_FORCE_LOCK);
501 cnode_locked = 1;
502 }
503 retval = hfs_update(vp, TRUE);
504 }
505 /* Updating vcbWrCnt doesn't need to be atomic. */
506 hfsmp->vcbWrCnt++;
507
508 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
509 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
510 exit:
511 if (cnode_locked)
512 hfs_unlock(cp);
513 hfs_unlock_truncate(cp);
514 return (retval);
515 }
516
517 /* support for the "bulk-access" fcntl */
518
519 #define CACHE_ELEMS 64
520 #define CACHE_LEVELS 16
521 #define PARENT_IDS_FLAG 0x100
522
523 /* from hfs_attrlist.c */
524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
525 mode_t obj_mode, struct mount *mp,
526 kauth_cred_t cred, struct proc *p);
527
528 /* from vfs/vfs_fsevents.c */
529 extern char *get_pathbuff(void);
530 extern void release_pathbuff(char *buff);
531
532 struct access_cache {
533 int numcached;
534 int cachehits; /* these two for statistics gathering */
535 int lookups;
536 unsigned int *acache;
537 Boolean *haveaccess;
538 };
539
540 struct access_t {
541 uid_t uid; /* IN: effective user id */
542 short flags; /* IN: access requested (i.e. R_OK) */
543 short num_groups; /* IN: number of groups user belongs to */
544 int num_files; /* IN: number of files to process */
545 int *file_ids; /* IN: array of file ids */
546 gid_t *groups; /* IN: array of groups */
547 short *access; /* OUT: access info for each file (0 for 'has access') */
548 };
549
550 struct user_access_t {
551 uid_t uid; /* IN: effective user id */
552 short flags; /* IN: access requested (i.e. R_OK) */
553 short num_groups; /* IN: number of groups user belongs to */
554 int num_files; /* IN: number of files to process */
555 user_addr_t file_ids; /* IN: array of file ids */
556 user_addr_t groups; /* IN: array of groups */
557 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
558 };
559
560 /*
561 * Perform a binary search for the given parent_id. Return value is
562 * found/not found boolean, and indexp will be the index of the item
563 * or the index at which to insert the item if it's not found.
564 */
565 static int
566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
567 {
568 unsigned int lo, hi;
569 int index, matches = 0;
570
571 if (cache->numcached == 0) {
572 *indexp = 0;
573 return 0; // table is empty, so insert at index=0 and report no match
574 }
575
576 if (cache->numcached > CACHE_ELEMS) {
577 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
578 cache->numcached, CACHE_ELEMS);*/
579 cache->numcached = CACHE_ELEMS;
580 }
581
582 lo = 0;
583 hi = cache->numcached - 1;
584 index = -1;
585
586 /* perform binary search for parent_id */
587 do {
588 unsigned int mid = (hi - lo)/2 + lo;
589 unsigned int this_id = cache->acache[mid];
590
591 if (parent_id == this_id) {
592 index = mid;
593 break;
594 }
595
596 if (parent_id < this_id) {
597 hi = mid;
598 continue;
599 }
600
601 if (parent_id > this_id) {
602 lo = mid + 1;
603 continue;
604 }
605 } while(lo < hi);
606
607 /* check if lo and hi converged on the match */
608 if (parent_id == cache->acache[hi]) {
609 index = hi;
610 }
611
612 /* if no existing entry found, find index for new one */
613 if (index == -1) {
614 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
615 matches = 0;
616 } else {
617 matches = 1;
618 }
619
620 *indexp = index;
621 return matches;
622 }
623
624 /*
625 * Add a node to the access_cache at the given index (or do a lookup first
626 * to find the index if -1 is passed in). We currently do a replace rather
627 * than an insert if the cache is full.
628 */
629 static void
630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
631 {
632 int lookup_index = -1;
633
634 /* need to do a lookup first if -1 passed for index */
635 if (index == -1) {
636 if (lookup_bucket(cache, &lookup_index, nodeID)) {
637 if (cache->haveaccess[lookup_index] != access) {
638 /* change access info for existing entry... should never happen */
639 cache->haveaccess[lookup_index] = access;
640 }
641
642 /* mission accomplished */
643 return;
644 } else {
645 index = lookup_index;
646 }
647
648 }
649
650 /* if the cache is full, do a replace rather than an insert */
651 if (cache->numcached >= CACHE_ELEMS) {
652 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
653 cache->numcached = CACHE_ELEMS-1;
654
655 if (index > cache->numcached) {
656 // printf("index %d pinned to %d\n", index, cache->numcached);
657 index = cache->numcached;
658 }
659 } else if (index >= 0 && index < cache->numcached) {
660 /* only do bcopy if we're inserting */
661 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
662 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
663 }
664
665 cache->acache[index] = nodeID;
666 cache->haveaccess[index] = access;
667 cache->numcached++;
668 }
669
670
671 struct cinfo {
672 uid_t uid;
673 gid_t gid;
674 mode_t mode;
675 cnid_t parentcnid;
676 };
677
678 static int
679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
680 {
681 struct cinfo *cip = (struct cinfo *)arg;
682
683 cip->uid = attrp->ca_uid;
684 cip->gid = attrp->ca_gid;
685 cip->mode = attrp->ca_mode;
686 cip->parentcnid = descp->cd_parentcnid;
687
688 return (0);
689 }
690
691 /*
692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
693 * isn't incore, then go to the catalog.
694 */
695 static int
696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
697 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
698 {
699 int error = 0;
700
701 /* if this id matches the one the fsctl was called with, skip the lookup */
702 if (cnid == skip_cp->c_cnid) {
703 cnattrp->ca_uid = skip_cp->c_uid;
704 cnattrp->ca_gid = skip_cp->c_gid;
705 cnattrp->ca_mode = skip_cp->c_mode;
706 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
707 } else {
708 struct cinfo c_info;
709
710 /* otherwise, check the cnode hash incase the file/dir is incore */
711 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
712 cnattrp->ca_uid = c_info.uid;
713 cnattrp->ca_gid = c_info.gid;
714 cnattrp->ca_mode = c_info.mode;
715 keyp->hfsPlus.parentID = c_info.parentcnid;
716 } else {
717 int lockflags;
718
719 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
720
721 /* lookup this cnid in the catalog */
722 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
723
724 hfs_systemfile_unlock(hfsmp, lockflags);
725
726 cache->lookups++;
727 }
728 }
729
730 return (error);
731 }
732
733 /*
734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
735 * up to CACHE_LEVELS as we progress towards the root.
736 */
737 static int
738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
739 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
740 {
741 int myErr = 0;
742 int myResult;
743 HFSCatalogNodeID thisNodeID;
744 unsigned long myPerms;
745 struct cat_attr cnattr;
746 int cache_index = -1;
747 CatalogKey catkey;
748
749 int i = 0, ids_to_cache = 0;
750 int parent_ids[CACHE_LEVELS];
751
752 /* root always has access */
753 if (!suser(myp_ucred, NULL)) {
754 return (1);
755 }
756
757 thisNodeID = nodeID;
758 while (thisNodeID >= kRootDirID) {
759 myResult = 0; /* default to "no access" */
760
761 /* check the cache before resorting to hitting the catalog */
762
763 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
764 * to look any further after hitting cached dir */
765
766 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
767 cache->cachehits++;
768 myResult = cache->haveaccess[cache_index];
769 goto ExitThisRoutine;
770 }
771
772 /* remember which parents we want to cache */
773 if (ids_to_cache < CACHE_LEVELS) {
774 parent_ids[ids_to_cache] = thisNodeID;
775 ids_to_cache++;
776 }
777
778 /* do the lookup (checks the cnode hash, then the catalog) */
779 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
780 if (myErr) {
781 goto ExitThisRoutine; /* no access */
782 }
783
784 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
785 cnattr.ca_mode, hfsmp->hfs_mp,
786 myp_ucred, theProcPtr);
787
788 if ( (myPerms & X_OK) == 0 ) {
789 myResult = 0;
790 goto ExitThisRoutine; /* no access */
791 }
792
793 /* up the hierarchy we go */
794 thisNodeID = catkey.hfsPlus.parentID;
795 }
796
797 /* if here, we have access to this node */
798 myResult = 1;
799
800 ExitThisRoutine:
801 if (myErr) {
802 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
803 myResult = 0;
804 }
805 *err = myErr;
806
807 /* cache the parent directory(ies) */
808 for (i = 0; i < ids_to_cache; i++) {
809 /* small optimization: get rid of double-lookup for all these */
810 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
811 add_node(cache, -1, parent_ids[i], myResult);
812 }
813
814 return (myResult);
815 }
816 /* end "bulk-access" support */
817
818
819
820 /*
821 * Callback for use with freeze ioctl.
822 */
823 static int
824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
825 {
826 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
827
828 return 0;
829 }
830
831 /*
832 * Control filesystem operating characteristics.
833 */
834 int
835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
836 vnode_t a_vp;
837 int a_command;
838 caddr_t a_data;
839 int a_fflag;
840 vfs_context_t a_context;
841 } */ *ap)
842 {
843 struct vnode * vp = ap->a_vp;
844 struct hfsmount *hfsmp = VTOHFS(vp);
845 vfs_context_t context = ap->a_context;
846 kauth_cred_t cred = vfs_context_ucred(context);
847 proc_t p = vfs_context_proc(context);
848 struct vfsstatfs *vfsp;
849 boolean_t is64bit;
850
851 is64bit = proc_is64bit(p);
852
853 switch (ap->a_command) {
854
855 case HFS_RESIZE_VOLUME: {
856 u_int64_t newsize;
857 u_int64_t cursize;
858
859 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
860 if (suser(cred, NULL) &&
861 kauth_cred_getuid(cred) != vfsp->f_owner) {
862 return (EACCES); /* must be owner of file system */
863 }
864 if (!vnode_isvroot(vp)) {
865 return (EINVAL);
866 }
867 newsize = *(u_int64_t *)ap->a_data;
868 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
869
870 if (newsize > cursize) {
871 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
872 } else if (newsize < cursize) {
873 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
874 } else {
875 return (0);
876 }
877 }
878 case HFS_CHANGE_NEXT_ALLOCATION: {
879 u_int32_t location;
880
881 if (vnode_vfsisrdonly(vp)) {
882 return (EROFS);
883 }
884 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
885 if (suser(cred, NULL) &&
886 kauth_cred_getuid(cred) != vfsp->f_owner) {
887 return (EACCES); /* must be owner of file system */
888 }
889 if (!vnode_isvroot(vp)) {
890 return (EINVAL);
891 }
892 location = *(u_int32_t *)ap->a_data;
893 if (location > hfsmp->totalBlocks - 1) {
894 return (EINVAL);
895 }
896 /* Return previous value. */
897 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
898 HFS_MOUNT_LOCK(hfsmp, TRUE);
899 hfsmp->nextAllocation = location;
900 hfsmp->vcbFlags |= 0xFF00;
901 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
902 return (0);
903 }
904
905 #ifdef HFS_SPARSE_DEV
906 case HFS_SETBACKINGSTOREINFO: {
907 struct vnode * bsfs_rootvp;
908 struct vnode * di_vp;
909 struct hfs_backingstoreinfo *bsdata;
910 int error = 0;
911
912 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
913 return (EALREADY);
914 }
915 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
916 if (suser(cred, NULL) &&
917 kauth_cred_getuid(cred) != vfsp->f_owner) {
918 return (EACCES); /* must be owner of file system */
919 }
920 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
921 if (bsdata == NULL) {
922 return (EINVAL);
923 }
924 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
925 return (error);
926 }
927 if ((error = vnode_getwithref(di_vp))) {
928 file_drop(bsdata->backingfd);
929 return(error);
930 }
931
932 if (vnode_mount(vp) == vnode_mount(di_vp)) {
933 (void)vnode_put(di_vp);
934 file_drop(bsdata->backingfd);
935 return (EINVAL);
936 }
937
938 /*
939 * Obtain the backing fs root vnode and keep a reference
940 * on it. This reference will be dropped in hfs_unmount.
941 */
942 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
943 if (error) {
944 (void)vnode_put(di_vp);
945 file_drop(bsdata->backingfd);
946 return (error);
947 }
948 vnode_ref(bsfs_rootvp);
949 vnode_put(bsfs_rootvp);
950
951 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
952 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
953 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
954 hfsmp->hfs_sparsebandblks *= 4;
955
956 (void)vnode_put(di_vp);
957 file_drop(bsdata->backingfd);
958 return (0);
959 }
960 case HFS_CLRBACKINGSTOREINFO: {
961 struct vnode * tmpvp;
962
963 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
964 if (suser(cred, NULL) &&
965 kauth_cred_getuid(cred) != vfsp->f_owner) {
966 return (EACCES); /* must be owner of file system */
967 }
968 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
969 hfsmp->hfs_backingfs_rootvp) {
970
971 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
972 tmpvp = hfsmp->hfs_backingfs_rootvp;
973 hfsmp->hfs_backingfs_rootvp = NULLVP;
974 hfsmp->hfs_sparsebandblks = 0;
975 vnode_rele(tmpvp);
976 }
977 return (0);
978 }
979 #endif /* HFS_SPARSE_DEV */
980
981 case F_FREEZE_FS: {
982 struct mount *mp;
983 task_t task;
984
985 if (!is_suser())
986 return (EACCES);
987
988 mp = vnode_mount(vp);
989 hfsmp = VFSTOHFS(mp);
990
991 if (!(hfsmp->jnl))
992 return (ENOTSUP);
993
994 task = current_task();
995 task_working_set_disable(task);
996
997 // flush things before we get started to try and prevent
998 // dirty data from being paged out while we're frozen.
999 // note: can't do this after taking the lock as it will
1000 // deadlock against ourselves.
1001 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1002 hfs_global_exclusive_lock_acquire(hfsmp);
1003 journal_flush(hfsmp->jnl);
1004 // don't need to iterate on all vnodes, we just need to
1005 // wait for writes to the system files and the device vnode
1006 // vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1007 if (HFSTOVCB(hfsmp)->extentsRefNum)
1008 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1009 if (HFSTOVCB(hfsmp)->catalogRefNum)
1010 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1011 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1012 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1013 if (hfsmp->hfs_attribute_vp)
1014 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1015 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1016
1017 hfsmp->hfs_freezing_proc = current_proc();
1018
1019 return (0);
1020 }
1021
1022 case F_THAW_FS: {
1023 if (!is_suser())
1024 return (EACCES);
1025
1026 // if we're not the one who froze the fs then we
1027 // can't thaw it.
1028 if (hfsmp->hfs_freezing_proc != current_proc()) {
1029 return EINVAL;
1030 }
1031
1032 // NOTE: if you add code here, also go check the
1033 // code that "thaws" the fs in hfs_vnop_close()
1034 //
1035 hfsmp->hfs_freezing_proc = NULL;
1036 hfs_global_exclusive_lock_release(hfsmp);
1037
1038 return (0);
1039 }
1040
1041 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1042 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1043
1044 case HFS_BULKACCESS_FSCTL:
1045 case HFS_BULKACCESS: {
1046 /*
1047 * NOTE: on entry, the vnode is locked. Incase this vnode
1048 * happens to be in our list of file_ids, we'll note it
1049 * avoid calling hfs_chashget_nowait() on that id as that
1050 * will cause a "locking against myself" panic.
1051 */
1052 Boolean check_leaf = true;
1053
1054 struct user_access_t *user_access_structp;
1055 struct user_access_t tmp_user_access_t;
1056 struct access_cache cache;
1057
1058 int error = 0, i;
1059
1060 dev_t dev = VTOC(vp)->c_dev;
1061
1062 short flags;
1063 struct ucred myucred; /* XXX ILLEGAL */
1064 int num_files;
1065 int *file_ids = NULL;
1066 short *access = NULL;
1067
1068 cnid_t cnid;
1069 cnid_t prevParent_cnid = 0;
1070 unsigned long myPerms;
1071 short myaccess = 0;
1072 struct cat_attr cnattr;
1073 CatalogKey catkey;
1074 struct cnode *skip_cp = VTOC(vp);
1075 struct vfs_context my_context;
1076
1077 /* first, return error if not run as root */
1078 if (cred->cr_ruid != 0) {
1079 return EPERM;
1080 }
1081
1082 /* initialize the local cache and buffers */
1083 cache.numcached = 0;
1084 cache.cachehits = 0;
1085 cache.lookups = 0;
1086
1087 file_ids = (int *) get_pathbuff();
1088 access = (short *) get_pathbuff();
1089 cache.acache = (int *) get_pathbuff();
1090 cache.haveaccess = (Boolean *) get_pathbuff();
1091
1092 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1093 release_pathbuff((char *) file_ids);
1094 release_pathbuff((char *) access);
1095 release_pathbuff((char *) cache.acache);
1096 release_pathbuff((char *) cache.haveaccess);
1097
1098 return ENOMEM;
1099 }
1100
1101 /* struct copyin done during dispatch... need to copy file_id array separately */
1102 if (ap->a_data == NULL) {
1103 error = EINVAL;
1104 goto err_exit_bulk_access;
1105 }
1106
1107 if (is64bit) {
1108 user_access_structp = (struct user_access_t *)ap->a_data;
1109 }
1110 else {
1111 struct access_t * accessp = (struct access_t *)ap->a_data;
1112 tmp_user_access_t.uid = accessp->uid;
1113 tmp_user_access_t.flags = accessp->flags;
1114 tmp_user_access_t.num_groups = accessp->num_groups;
1115 tmp_user_access_t.num_files = accessp->num_files;
1116 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1117 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1118 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1119 user_access_structp = &tmp_user_access_t;
1120 }
1121
1122 num_files = user_access_structp->num_files;
1123 if (num_files < 1) {
1124 goto err_exit_bulk_access;
1125 }
1126 if (num_files > 256) {
1127 error = EINVAL;
1128 goto err_exit_bulk_access;
1129 }
1130
1131 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1132 num_files * sizeof(int)))) {
1133 goto err_exit_bulk_access;
1134 }
1135
1136 /* fill in the ucred structure */
1137 flags = user_access_structp->flags;
1138 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1139 flags = R_OK;
1140 }
1141
1142 /* check if we've been passed leaf node ids or parent ids */
1143 if (flags & PARENT_IDS_FLAG) {
1144 check_leaf = false;
1145 }
1146
1147 memset(&myucred, 0, sizeof(myucred));
1148 myucred.cr_ref = 1;
1149 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1150 myucred.cr_ngroups = user_access_structp->num_groups;
1151 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1152 myucred.cr_ngroups = 0;
1153 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1154 myucred.cr_ngroups * sizeof(gid_t)))) {
1155 goto err_exit_bulk_access;
1156 }
1157 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1158
1159 my_context.vc_proc = p;
1160 my_context.vc_ucred = &myucred;
1161
1162 /* Check access to each file_id passed in */
1163 for (i = 0; i < num_files; i++) {
1164 #if 0
1165 cnid = (cnid_t) file_ids[i];
1166
1167 /* root always has access */
1168 if (!suser(&myucred, NULL)) {
1169 access[i] = 0;
1170 continue;
1171 }
1172
1173 if (check_leaf) {
1174
1175 /* do the lookup (checks the cnode hash, then the catalog) */
1176 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1177 if (error) {
1178 access[i] = (short) error;
1179 continue;
1180 }
1181
1182 /* before calling CheckAccess(), check the target file for read access */
1183 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1184 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1185
1186
1187 /* fail fast if no access */
1188 if ((myPerms & flags) == 0) {
1189 access[i] = EACCES;
1190 continue;
1191 }
1192 } else {
1193 /* we were passed an array of parent ids */
1194 catkey.hfsPlus.parentID = cnid;
1195 }
1196
1197 /* if the last guy had the same parent and had access, we're done */
1198 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1199 cache.cachehits++;
1200 access[i] = 0;
1201 continue;
1202 }
1203
1204 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1205 skip_cp, p, &myucred, dev);
1206
1207 if ( myaccess ) {
1208 access[i] = 0; // have access.. no errors to report
1209 } else {
1210 access[i] = (error != 0 ? (short) error : EACCES);
1211 }
1212
1213 prevParent_cnid = catkey.hfsPlus.parentID;
1214 #else
1215 int myErr;
1216
1217 cnid = (cnid_t)file_ids[i];
1218
1219 while (cnid >= kRootDirID) {
1220 /* get the vnode for this cnid */
1221 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1222 if ( myErr ) {
1223 access[i] = EACCES;
1224 break;
1225 }
1226
1227 cnid = VTOC(vp)->c_parentcnid;
1228
1229 hfs_unlock(VTOC(vp));
1230 if (vnode_vtype(vp) == VDIR) {
1231 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1232 } else {
1233 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1234 }
1235 vnode_put(vp);
1236 access[i] = myErr;
1237 if (myErr) {
1238 break;
1239 }
1240 }
1241 #endif
1242 }
1243
1244 /* copyout the access array */
1245 if ((error = copyout((caddr_t)access, user_access_structp->access,
1246 num_files * sizeof (short)))) {
1247 goto err_exit_bulk_access;
1248 }
1249
1250 err_exit_bulk_access:
1251
1252 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1253
1254 release_pathbuff((char *) cache.acache);
1255 release_pathbuff((char *) cache.haveaccess);
1256 release_pathbuff((char *) file_ids);
1257 release_pathbuff((char *) access);
1258
1259 return (error);
1260 } /* HFS_BULKACCESS */
1261
1262 case HFS_SETACLSTATE: {
1263 int state;
1264
1265 if (!is_suser()) {
1266 return (EPERM);
1267 }
1268 if (ap->a_data == NULL) {
1269 return (EINVAL);
1270 }
1271 state = *(int *)ap->a_data;
1272 if (state == 0 || state == 1)
1273 return hfs_setextendedsecurity(hfsmp, state);
1274 else
1275 return (EINVAL);
1276 }
1277
1278 case F_FULLFSYNC: {
1279 int error;
1280
1281 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1282 if (error == 0) {
1283 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1284 hfs_unlock(VTOC(vp));
1285 }
1286
1287 return error;
1288 }
1289
1290 case F_CHKCLEAN: {
1291 register struct cnode *cp;
1292 int error;
1293
1294 if (!vnode_isreg(vp))
1295 return EINVAL;
1296
1297 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1298 if (error == 0) {
1299 cp = VTOC(vp);
1300 /*
1301 * used by regression test to determine if
1302 * all the dirty pages (via write) have been cleaned
1303 * after a call to 'fsysnc'.
1304 */
1305 error = is_file_clean(vp, VTOF(vp)->ff_size);
1306 hfs_unlock(cp);
1307 }
1308 return (error);
1309 }
1310
1311 case F_RDADVISE: {
1312 register struct radvisory *ra;
1313 struct filefork *fp;
1314 int error;
1315
1316 if (!vnode_isreg(vp))
1317 return EINVAL;
1318
1319 ra = (struct radvisory *)(ap->a_data);
1320 fp = VTOF(vp);
1321
1322 /* Protect against a size change. */
1323 hfs_lock_truncate(VTOC(vp), TRUE);
1324
1325 if (ra->ra_offset >= fp->ff_size) {
1326 error = EFBIG;
1327 } else {
1328 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1329 }
1330
1331 hfs_unlock_truncate(VTOC(vp));
1332 return (error);
1333 }
1334
1335 case F_READBOOTSTRAP:
1336 case F_WRITEBOOTSTRAP:
1337 {
1338 struct vnode *devvp = NULL;
1339 user_fbootstraptransfer_t *user_bootstrapp;
1340 int devBlockSize;
1341 int error;
1342 uio_t auio;
1343 daddr64_t blockNumber;
1344 u_long blockOffset;
1345 u_long xfersize;
1346 struct buf *bp;
1347 user_fbootstraptransfer_t user_bootstrap;
1348
1349 if (!vnode_isvroot(vp))
1350 return (EINVAL);
1351 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1352 * to a user_fbootstraptransfer_t else we get a pointer to a
1353 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1354 */
1355 if (is64bit) {
1356 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1357 }
1358 else {
1359 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1360 user_bootstrapp = &user_bootstrap;
1361 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1362 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1363 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1364 }
1365 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1366 return EINVAL;
1367
1368 devvp = VTOHFS(vp)->hfs_devvp;
1369 auio = uio_create(1, user_bootstrapp->fbt_offset,
1370 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1371 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1372 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1373
1374 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1375
1376 while (uio_resid(auio) > 0) {
1377 blockNumber = uio_offset(auio) / devBlockSize;
1378 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1379 if (error) {
1380 if (bp) buf_brelse(bp);
1381 uio_free(auio);
1382 return error;
1383 };
1384
1385 blockOffset = uio_offset(auio) % devBlockSize;
1386 xfersize = devBlockSize - blockOffset;
1387 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1388 if (error) {
1389 buf_brelse(bp);
1390 uio_free(auio);
1391 return error;
1392 };
1393 if (uio_rw(auio) == UIO_WRITE) {
1394 error = VNOP_BWRITE(bp);
1395 if (error) {
1396 uio_free(auio);
1397 return error;
1398 }
1399 } else {
1400 buf_brelse(bp);
1401 };
1402 };
1403 uio_free(auio);
1404 };
1405 return 0;
1406
1407 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1408 {
1409 if (is64bit) {
1410 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1411 }
1412 else {
1413 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1414 }
1415 return 0;
1416 }
1417
1418 case HFS_GET_MOUNT_TIME:
1419 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1420 break;
1421
1422 case HFS_GET_LAST_MTIME:
1423 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1424 break;
1425
1426 case HFS_SET_BOOT_INFO:
1427 if (!vnode_isvroot(vp))
1428 return(EINVAL);
1429 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1430 return(EACCES); /* must be superuser or owner of filesystem */
1431 HFS_MOUNT_LOCK(hfsmp, TRUE);
1432 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1433 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1434 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1435 break;
1436
1437 case HFS_GET_BOOT_INFO:
1438 if (!vnode_isvroot(vp))
1439 return(EINVAL);
1440 HFS_MOUNT_LOCK(hfsmp, TRUE);
1441 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1442 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1443 break;
1444
1445 default:
1446 return (ENOTTY);
1447 }
1448
1449 /* Should never get here */
1450 return 0;
1451 }
1452
1453 /*
1454 * select
1455 */
1456 int
1457 hfs_vnop_select(__unused struct vnop_select_args *ap)
1458 /*
1459 struct vnop_select_args {
1460 vnode_t a_vp;
1461 int a_which;
1462 int a_fflags;
1463 void *a_wql;
1464 vfs_context_t a_context;
1465 };
1466 */
1467 {
1468 /*
1469 * We should really check to see if I/O is possible.
1470 */
1471 return (1);
1472 }
1473
1474 /*
1475 * Converts a logical block number to a physical block, and optionally returns
1476 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1477 * The physical block number is based on the device block size, currently its 512.
1478 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1479 */
1480 int
1481 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1482 {
1483 struct cnode *cp = VTOC(vp);
1484 struct filefork *fp = VTOF(vp);
1485 struct hfsmount *hfsmp = VTOHFS(vp);
1486 int retval = E_NONE;
1487 daddr_t logBlockSize;
1488 size_t bytesContAvail = 0;
1489 off_t blockposition;
1490 int lockExtBtree;
1491 int lockflags = 0;
1492
1493 /*
1494 * Check for underlying vnode requests and ensure that logical
1495 * to physical mapping is requested.
1496 */
1497 if (vpp != NULL)
1498 *vpp = cp->c_devvp;
1499 if (bnp == NULL)
1500 return (0);
1501
1502 logBlockSize = GetLogicalBlockSize(vp);
1503 blockposition = (off_t)bn * (off_t)logBlockSize;
1504
1505 lockExtBtree = overflow_extents(fp);
1506
1507 if (lockExtBtree)
1508 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1509
1510 retval = MacToVFSError(
1511 MapFileBlockC (HFSTOVCB(hfsmp),
1512 (FCB*)fp,
1513 MAXPHYSIO,
1514 blockposition,
1515 bnp,
1516 &bytesContAvail));
1517
1518 if (lockExtBtree)
1519 hfs_systemfile_unlock(hfsmp, lockflags);
1520
1521 if (retval == E_NONE) {
1522 /* Figure out how many read ahead blocks there are */
1523 if (runp != NULL) {
1524 if (can_cluster(logBlockSize)) {
1525 /* Make sure this result never goes negative: */
1526 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1527 } else {
1528 *runp = 0;
1529 }
1530 }
1531 }
1532 return (retval);
1533 }
1534
1535 /*
1536 * Convert logical block number to file offset.
1537 */
1538 int
1539 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1540 /*
1541 struct vnop_blktooff_args {
1542 vnode_t a_vp;
1543 daddr64_t a_lblkno;
1544 off_t *a_offset;
1545 };
1546 */
1547 {
1548 if (ap->a_vp == NULL)
1549 return (EINVAL);
1550 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1551
1552 return(0);
1553 }
1554
1555 /*
1556 * Convert file offset to logical block number.
1557 */
1558 int
1559 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1560 /*
1561 struct vnop_offtoblk_args {
1562 vnode_t a_vp;
1563 off_t a_offset;
1564 daddr64_t *a_lblkno;
1565 };
1566 */
1567 {
1568 if (ap->a_vp == NULL)
1569 return (EINVAL);
1570 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1571
1572 return(0);
1573 }
1574
1575 /*
1576 * Map file offset to physical block number.
1577 *
1578 * System file cnodes are expected to be locked (shared or exclusive).
1579 */
1580 int
1581 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1582 /*
1583 struct vnop_blockmap_args {
1584 vnode_t a_vp;
1585 off_t a_foffset;
1586 size_t a_size;
1587 daddr64_t *a_bpn;
1588 size_t *a_run;
1589 void *a_poff;
1590 int a_flags;
1591 vfs_context_t a_context;
1592 };
1593 */
1594 {
1595 struct vnode *vp = ap->a_vp;
1596 struct cnode *cp;
1597 struct filefork *fp;
1598 struct hfsmount *hfsmp;
1599 size_t bytesContAvail = 0;
1600 int retval = E_NONE;
1601 int syslocks = 0;
1602 int lockflags = 0;
1603 struct rl_entry *invalid_range;
1604 enum rl_overlaptype overlaptype;
1605 int started_tr = 0;
1606 int tooklock = 0;
1607
1608 /*
1609 * Check for underlying vnode requests and ensure that logical
1610 * to physical mapping is requested.
1611 */
1612 if (ap->a_bpn == NULL)
1613 return (0);
1614
1615 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1616 if (VTOC(vp)->c_lockowner != current_thread()) {
1617 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1618 tooklock = 1;
1619 } else {
1620 cp = VTOC(vp);
1621 panic("blockmap: %s cnode lock already held!\n",
1622 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1623 }
1624 }
1625 hfsmp = VTOHFS(vp);
1626 cp = VTOC(vp);
1627 fp = VTOF(vp);
1628
1629 retry:
1630 if (fp->ff_unallocblocks) {
1631 if (hfs_start_transaction(hfsmp) != 0) {
1632 retval = EINVAL;
1633 goto exit;
1634 } else {
1635 started_tr = 1;
1636 }
1637 syslocks = SFL_EXTENTS | SFL_BITMAP;
1638
1639 } else if (overflow_extents(fp)) {
1640 syslocks = SFL_EXTENTS;
1641 }
1642
1643 if (syslocks)
1644 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1645
1646 /*
1647 * Check for any delayed allocations.
1648 */
1649 if (fp->ff_unallocblocks) {
1650 SInt64 actbytes;
1651 u_int32_t loanedBlocks;
1652
1653 //
1654 // Make sure we have a transaction. It's possible
1655 // that we came in and fp->ff_unallocblocks was zero
1656 // but during the time we blocked acquiring the extents
1657 // btree, ff_unallocblocks became non-zero and so we
1658 // will need to start a transaction.
1659 //
1660 if (started_tr == 0) {
1661 if (syslocks) {
1662 hfs_systemfile_unlock(hfsmp, lockflags);
1663 syslocks = 0;
1664 }
1665 goto retry;
1666 }
1667
1668 /*
1669 * Note: ExtendFileC will Release any blocks on loan and
1670 * aquire real blocks. So we ask to extend by zero bytes
1671 * since ExtendFileC will account for the virtual blocks.
1672 */
1673
1674 loanedBlocks = fp->ff_unallocblocks;
1675 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1676 kEFAllMask | kEFNoClumpMask, &actbytes);
1677
1678 if (retval) {
1679 fp->ff_unallocblocks = loanedBlocks;
1680 cp->c_blocks += loanedBlocks;
1681 fp->ff_blocks += loanedBlocks;
1682
1683 HFS_MOUNT_LOCK(hfsmp, TRUE);
1684 hfsmp->loanedBlocks += loanedBlocks;
1685 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1686 }
1687
1688 if (retval) {
1689 hfs_systemfile_unlock(hfsmp, lockflags);
1690 cp->c_flag |= C_MODIFIED;
1691 if (started_tr) {
1692 (void) hfs_update(vp, TRUE);
1693 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1694
1695 hfs_end_transaction(hfsmp);
1696 }
1697 goto exit;
1698 }
1699 }
1700
1701 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1702 ap->a_bpn, &bytesContAvail);
1703 if (syslocks) {
1704 hfs_systemfile_unlock(hfsmp, lockflags);
1705 syslocks = 0;
1706 }
1707
1708 if (started_tr) {
1709 (void) hfs_update(vp, TRUE);
1710 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1711 hfs_end_transaction(hfsmp);
1712 started_tr = 0;
1713 }
1714 if (retval) {
1715 goto exit;
1716 }
1717
1718 /* Adjust the mapping information for invalid file ranges: */
1719 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1720 ap->a_foffset + (off_t)bytesContAvail - 1,
1721 &invalid_range);
1722 if (overlaptype != RL_NOOVERLAP) {
1723 switch(overlaptype) {
1724 case RL_MATCHINGOVERLAP:
1725 case RL_OVERLAPCONTAINSRANGE:
1726 case RL_OVERLAPSTARTSBEFORE:
1727 /* There's no valid block for this byte offset: */
1728 *ap->a_bpn = (daddr64_t)-1;
1729 /* There's no point limiting the amount to be returned
1730 * if the invalid range that was hit extends all the way
1731 * to the EOF (i.e. there's no valid bytes between the
1732 * end of this range and the file's EOF):
1733 */
1734 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1735 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1736 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1737 }
1738 break;
1739
1740 case RL_OVERLAPISCONTAINED:
1741 case RL_OVERLAPENDSAFTER:
1742 /* The range of interest hits an invalid block before the end: */
1743 if (invalid_range->rl_start == ap->a_foffset) {
1744 /* There's actually no valid information to be had starting here: */
1745 *ap->a_bpn = (daddr64_t)-1;
1746 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1747 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1748 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1749 }
1750 } else {
1751 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1752 }
1753 break;
1754
1755 case RL_NOOVERLAP:
1756 break;
1757 } /* end switch */
1758 if (bytesContAvail > ap->a_size)
1759 bytesContAvail = ap->a_size;
1760 }
1761 if (ap->a_run)
1762 *ap->a_run = bytesContAvail;
1763
1764 if (ap->a_poff)
1765 *(int *)ap->a_poff = 0;
1766 exit:
1767 if (tooklock)
1768 hfs_unlock(cp);
1769
1770 return (MacToVFSError(retval));
1771 }
1772
1773
1774 /*
1775 * prepare and issue the I/O
1776 * buf_strategy knows how to deal
1777 * with requests that require
1778 * fragmented I/Os
1779 */
1780 int
1781 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1782 {
1783 buf_t bp = ap->a_bp;
1784 vnode_t vp = buf_vnode(bp);
1785 struct cnode *cp = VTOC(vp);
1786
1787 return (buf_strategy(cp->c_devvp, ap));
1788 }
1789
1790
1791 static int
1792 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1793 {
1794 register struct cnode *cp = VTOC(vp);
1795 struct filefork *fp = VTOF(vp);
1796 struct proc *p = vfs_context_proc(context);;
1797 kauth_cred_t cred = vfs_context_ucred(context);
1798 int retval;
1799 off_t bytesToAdd;
1800 off_t actualBytesAdded;
1801 off_t filebytes;
1802 u_long fileblocks;
1803 int blksize;
1804 struct hfsmount *hfsmp;
1805 int lockflags;
1806
1807 blksize = VTOVCB(vp)->blockSize;
1808 fileblocks = fp->ff_blocks;
1809 filebytes = (off_t)fileblocks * (off_t)blksize;
1810
1811 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1812 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1813
1814 if (length < 0)
1815 return (EINVAL);
1816
1817 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1818 return (EFBIG);
1819
1820 hfsmp = VTOHFS(vp);
1821
1822 retval = E_NONE;
1823
1824 /* Files that are changing size are not hot file candidates. */
1825 if (hfsmp->hfc_stage == HFC_RECORDING) {
1826 fp->ff_bytesread = 0;
1827 }
1828
1829 /*
1830 * We cannot just check if fp->ff_size == length (as an optimization)
1831 * since there may be extra physical blocks that also need truncation.
1832 */
1833 #if QUOTA
1834 if ((retval = hfs_getinoquota(cp)))
1835 return(retval);
1836 #endif /* QUOTA */
1837
1838 /*
1839 * Lengthen the size of the file. We must ensure that the
1840 * last byte of the file is allocated. Since the smallest
1841 * value of ff_size is 0, length will be at least 1.
1842 */
1843 if (length > (off_t)fp->ff_size) {
1844 #if QUOTA
1845 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1846 cred, 0);
1847 if (retval)
1848 goto Err_Exit;
1849 #endif /* QUOTA */
1850 /*
1851 * If we don't have enough physical space then
1852 * we need to extend the physical size.
1853 */
1854 if (length > filebytes) {
1855 int eflags;
1856 u_long blockHint = 0;
1857
1858 /* All or nothing and don't round up to clumpsize. */
1859 eflags = kEFAllMask | kEFNoClumpMask;
1860
1861 if (cred && suser(cred, NULL) != 0)
1862 eflags |= kEFReserveMask; /* keep a reserve */
1863
1864 /*
1865 * Allocate Journal and Quota files in metadata zone.
1866 */
1867 if (filebytes == 0 &&
1868 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1869 hfs_virtualmetafile(cp)) {
1870 eflags |= kEFMetadataMask;
1871 blockHint = hfsmp->hfs_metazone_start;
1872 }
1873 if (hfs_start_transaction(hfsmp) != 0) {
1874 retval = EINVAL;
1875 goto Err_Exit;
1876 }
1877
1878 /* Protect extents b-tree and allocation bitmap */
1879 lockflags = SFL_BITMAP;
1880 if (overflow_extents(fp))
1881 lockflags |= SFL_EXTENTS;
1882 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1883
1884 while ((length > filebytes) && (retval == E_NONE)) {
1885 bytesToAdd = length - filebytes;
1886 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1887 (FCB*)fp,
1888 bytesToAdd,
1889 blockHint,
1890 eflags,
1891 &actualBytesAdded));
1892
1893 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1894 if (actualBytesAdded == 0 && retval == E_NONE) {
1895 if (length > filebytes)
1896 length = filebytes;
1897 break;
1898 }
1899 } /* endwhile */
1900
1901 hfs_systemfile_unlock(hfsmp, lockflags);
1902
1903 if (hfsmp->jnl) {
1904 (void) hfs_update(vp, TRUE);
1905 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1906 }
1907
1908 hfs_end_transaction(hfsmp);
1909
1910 if (retval)
1911 goto Err_Exit;
1912
1913 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1914 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1915 }
1916
1917 if (!(flags & IO_NOZEROFILL)) {
1918 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1919 struct rl_entry *invalid_range;
1920 off_t zero_limit;
1921
1922 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1923 if (length < zero_limit) zero_limit = length;
1924
1925 if (length > (off_t)fp->ff_size) {
1926 struct timeval tv;
1927
1928 /* Extending the file: time to fill out the current last page w. zeroes? */
1929 if ((fp->ff_size & PAGE_MASK_64) &&
1930 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1931 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1932
1933 /* There's some valid data at the start of the (current) last page
1934 of the file, so zero out the remainder of that page to ensure the
1935 entire page contains valid data. Since there is no invalid range
1936 possible past the (current) eof, there's no need to remove anything
1937 from the invalid range list before calling cluster_write(): */
1938 hfs_unlock(cp);
1939 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1940 fp->ff_size, (off_t)0,
1941 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1942 hfs_lock(cp, HFS_FORCE_LOCK);
1943 if (retval) goto Err_Exit;
1944
1945 /* Merely invalidate the remaining area, if necessary: */
1946 if (length > zero_limit) {
1947 microuptime(&tv);
1948 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1949 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1950 }
1951 } else {
1952 /* The page containing the (current) eof is invalid: just add the
1953 remainder of the page to the invalid list, along with the area
1954 being newly allocated:
1955 */
1956 microuptime(&tv);
1957 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1958 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1959 };
1960 }
1961 } else {
1962 panic("hfs_truncate: invoked on non-UBC object?!");
1963 };
1964 }
1965 cp->c_touch_modtime = TRUE;
1966 fp->ff_size = length;
1967
1968 /* Nested transactions will do their own ubc_setsize. */
1969 if (!skipsetsize) {
1970 /*
1971 * ubc_setsize can cause a pagein here
1972 * so we need to drop cnode lock.
1973 */
1974 hfs_unlock(cp);
1975 ubc_setsize(vp, length);
1976 hfs_lock(cp, HFS_FORCE_LOCK);
1977 }
1978
1979 } else { /* Shorten the size of the file */
1980
1981 if ((off_t)fp->ff_size > length) {
1982 /*
1983 * Any buffers that are past the truncation point need to be
1984 * invalidated (to maintain buffer cache consistency).
1985 */
1986
1987 /* Nested transactions will do their own ubc_setsize. */
1988 if (!skipsetsize) {
1989 /*
1990 * ubc_setsize can cause a pageout here
1991 * so we need to drop cnode lock.
1992 */
1993 hfs_unlock(cp);
1994 ubc_setsize(vp, length);
1995 hfs_lock(cp, HFS_FORCE_LOCK);
1996 }
1997
1998 /* Any space previously marked as invalid is now irrelevant: */
1999 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2000 }
2001
2002 /*
2003 * Account for any unmapped blocks. Note that the new
2004 * file length can still end up with unmapped blocks.
2005 */
2006 if (fp->ff_unallocblocks > 0) {
2007 u_int32_t finalblks;
2008 u_int32_t loanedBlocks;
2009
2010 HFS_MOUNT_LOCK(hfsmp, TRUE);
2011
2012 loanedBlocks = fp->ff_unallocblocks;
2013 cp->c_blocks -= loanedBlocks;
2014 fp->ff_blocks -= loanedBlocks;
2015 fp->ff_unallocblocks = 0;
2016
2017 hfsmp->loanedBlocks -= loanedBlocks;
2018
2019 finalblks = (length + blksize - 1) / blksize;
2020 if (finalblks > fp->ff_blocks) {
2021 /* calculate required unmapped blocks */
2022 loanedBlocks = finalblks - fp->ff_blocks;
2023 hfsmp->loanedBlocks += loanedBlocks;
2024
2025 fp->ff_unallocblocks = loanedBlocks;
2026 cp->c_blocks += loanedBlocks;
2027 fp->ff_blocks += loanedBlocks;
2028 }
2029 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2030 }
2031
2032 /*
2033 * For a TBE process the deallocation of the file blocks is
2034 * delayed until the file is closed. And hfs_close calls
2035 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2036 * isn't set, we make sure this isn't a TBE process.
2037 */
2038 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2039 #if QUOTA
2040 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2041 #endif /* QUOTA */
2042 if (hfs_start_transaction(hfsmp) != 0) {
2043 retval = EINVAL;
2044 goto Err_Exit;
2045 }
2046
2047 if (fp->ff_unallocblocks == 0) {
2048 /* Protect extents b-tree and allocation bitmap */
2049 lockflags = SFL_BITMAP;
2050 if (overflow_extents(fp))
2051 lockflags |= SFL_EXTENTS;
2052 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2053
2054 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2055 (FCB*)fp, length, false));
2056
2057 hfs_systemfile_unlock(hfsmp, lockflags);
2058 }
2059 if (hfsmp->jnl) {
2060 (void) hfs_update(vp, TRUE);
2061 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2062 }
2063
2064 hfs_end_transaction(hfsmp);
2065
2066 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2067 if (retval)
2068 goto Err_Exit;
2069 #if QUOTA
2070 /* These are bytesreleased */
2071 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2072 #endif /* QUOTA */
2073 }
2074 /* Only set update flag if the logical length changes */
2075 if ((off_t)fp->ff_size != length)
2076 cp->c_touch_modtime = TRUE;
2077 fp->ff_size = length;
2078 }
2079 cp->c_touch_chgtime = TRUE;
2080 retval = hfs_update(vp, MNT_WAIT);
2081 if (retval) {
2082 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2083 -1, -1, -1, retval, 0);
2084 }
2085
2086 Err_Exit:
2087
2088 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2089 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2090
2091 return (retval);
2092 }
2093
2094
2095
2096 /*
2097 * Truncate a cnode to at most length size, freeing (or adding) the
2098 * disk blocks.
2099 */
2100 __private_extern__
2101 int
2102 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2103 vfs_context_t context)
2104 {
2105 struct filefork *fp = VTOF(vp);
2106 off_t filebytes;
2107 u_long fileblocks;
2108 int blksize, error = 0;
2109
2110 if (vnode_isdir(vp))
2111 return (EISDIR); /* cannot truncate an HFS directory! */
2112
2113 blksize = VTOVCB(vp)->blockSize;
2114 fileblocks = fp->ff_blocks;
2115 filebytes = (off_t)fileblocks * (off_t)blksize;
2116
2117 // have to loop truncating or growing files that are
2118 // really big because otherwise transactions can get
2119 // enormous and consume too many kernel resources.
2120
2121 if (length < filebytes) {
2122 while (filebytes > length) {
2123 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2124 filebytes -= HFS_BIGFILE_SIZE;
2125 } else {
2126 filebytes = length;
2127 }
2128 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2129 if (error)
2130 break;
2131 }
2132 } else if (length > filebytes) {
2133 while (filebytes < length) {
2134 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2135 filebytes += HFS_BIGFILE_SIZE;
2136 } else {
2137 filebytes = length;
2138 }
2139 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2140 if (error)
2141 break;
2142 }
2143 } else /* Same logical size */ {
2144
2145 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2146 }
2147 /* Files that are changing size are not hot file candidates. */
2148 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2149 fp->ff_bytesread = 0;
2150 }
2151
2152 return (error);
2153 }
2154
2155
2156
2157 /*
2158 * Preallocate file storage space.
2159 */
2160 int
2161 hfs_vnop_allocate(struct vnop_allocate_args /* {
2162 vnode_t a_vp;
2163 off_t a_length;
2164 u_int32_t a_flags;
2165 off_t *a_bytesallocated;
2166 off_t a_offset;
2167 vfs_context_t a_context;
2168 } */ *ap)
2169 {
2170 struct vnode *vp = ap->a_vp;
2171 struct cnode *cp;
2172 struct filefork *fp;
2173 ExtendedVCB *vcb;
2174 off_t length = ap->a_length;
2175 off_t startingPEOF;
2176 off_t moreBytesRequested;
2177 off_t actualBytesAdded;
2178 off_t filebytes;
2179 u_long fileblocks;
2180 int retval, retval2;
2181 UInt32 blockHint;
2182 UInt32 extendFlags; /* For call to ExtendFileC */
2183 struct hfsmount *hfsmp;
2184 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2185 int lockflags;
2186
2187 *(ap->a_bytesallocated) = 0;
2188
2189 if (!vnode_isreg(vp))
2190 return (EISDIR);
2191 if (length < (off_t)0)
2192 return (EINVAL);
2193
2194 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2195 return (retval);
2196 cp = VTOC(vp);
2197 fp = VTOF(vp);
2198 hfsmp = VTOHFS(vp);
2199 vcb = VTOVCB(vp);
2200
2201 fileblocks = fp->ff_blocks;
2202 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2203
2204 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2205 retval = EINVAL;
2206 goto Err_Exit;
2207 }
2208
2209 /* Fill in the flags word for the call to Extend the file */
2210
2211 extendFlags = kEFNoClumpMask;
2212 if (ap->a_flags & ALLOCATECONTIG)
2213 extendFlags |= kEFContigMask;
2214 if (ap->a_flags & ALLOCATEALL)
2215 extendFlags |= kEFAllMask;
2216 if (cred && suser(cred, NULL) != 0)
2217 extendFlags |= kEFReserveMask;
2218
2219 retval = E_NONE;
2220 blockHint = 0;
2221 startingPEOF = filebytes;
2222
2223 if (ap->a_flags & ALLOCATEFROMPEOF)
2224 length += filebytes;
2225 else if (ap->a_flags & ALLOCATEFROMVOL)
2226 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2227
2228 /* If no changes are necesary, then we're done */
2229 if (filebytes == length)
2230 goto Std_Exit;
2231
2232 /*
2233 * Lengthen the size of the file. We must ensure that the
2234 * last byte of the file is allocated. Since the smallest
2235 * value of filebytes is 0, length will be at least 1.
2236 */
2237 if (length > filebytes) {
2238 moreBytesRequested = length - filebytes;
2239
2240 #if QUOTA
2241 retval = hfs_chkdq(cp,
2242 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2243 cred, 0);
2244 if (retval)
2245 goto Err_Exit;
2246
2247 #endif /* QUOTA */
2248 /*
2249 * Metadata zone checks.
2250 */
2251 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2252 /*
2253 * Allocate Journal and Quota files in metadata zone.
2254 */
2255 if (hfs_virtualmetafile(cp)) {
2256 extendFlags |= kEFMetadataMask;
2257 blockHint = hfsmp->hfs_metazone_start;
2258 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2259 (blockHint <= hfsmp->hfs_metazone_end)) {
2260 /*
2261 * Move blockHint outside metadata zone.
2262 */
2263 blockHint = hfsmp->hfs_metazone_end + 1;
2264 }
2265 }
2266
2267 if (hfs_start_transaction(hfsmp) != 0) {
2268 retval = EINVAL;
2269 goto Err_Exit;
2270 }
2271
2272 /* Protect extents b-tree and allocation bitmap */
2273 lockflags = SFL_BITMAP;
2274 if (overflow_extents(fp))
2275 lockflags |= SFL_EXTENTS;
2276 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2277
2278 retval = MacToVFSError(ExtendFileC(vcb,
2279 (FCB*)fp,
2280 moreBytesRequested,
2281 blockHint,
2282 extendFlags,
2283 &actualBytesAdded));
2284
2285 *(ap->a_bytesallocated) = actualBytesAdded;
2286 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2287
2288 hfs_systemfile_unlock(hfsmp, lockflags);
2289
2290 if (hfsmp->jnl) {
2291 (void) hfs_update(vp, TRUE);
2292 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2293 }
2294
2295 hfs_end_transaction(hfsmp);
2296
2297 /*
2298 * if we get an error and no changes were made then exit
2299 * otherwise we must do the hfs_update to reflect the changes
2300 */
2301 if (retval && (startingPEOF == filebytes))
2302 goto Err_Exit;
2303
2304 /*
2305 * Adjust actualBytesAdded to be allocation block aligned, not
2306 * clump size aligned.
2307 * NOTE: So what we are reporting does not affect reality
2308 * until the file is closed, when we truncate the file to allocation
2309 * block size.
2310 */
2311 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2312 *(ap->a_bytesallocated) =
2313 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2314
2315 } else { /* Shorten the size of the file */
2316
2317 if (fp->ff_size > length) {
2318 /*
2319 * Any buffers that are past the truncation point need to be
2320 * invalidated (to maintain buffer cache consistency).
2321 */
2322 }
2323
2324 if (hfs_start_transaction(hfsmp) != 0) {
2325 retval = EINVAL;
2326 goto Err_Exit;
2327 }
2328
2329 /* Protect extents b-tree and allocation bitmap */
2330 lockflags = SFL_BITMAP;
2331 if (overflow_extents(fp))
2332 lockflags |= SFL_EXTENTS;
2333 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2334
2335 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2336
2337 hfs_systemfile_unlock(hfsmp, lockflags);
2338
2339 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2340
2341 if (hfsmp->jnl) {
2342 (void) hfs_update(vp, TRUE);
2343 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2344 }
2345
2346 hfs_end_transaction(hfsmp);
2347
2348
2349 /*
2350 * if we get an error and no changes were made then exit
2351 * otherwise we must do the hfs_update to reflect the changes
2352 */
2353 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2354 #if QUOTA
2355 /* These are bytesreleased */
2356 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2357 #endif /* QUOTA */
2358
2359 if (fp->ff_size > filebytes) {
2360 fp->ff_size = filebytes;
2361
2362 hfs_unlock(cp);
2363 ubc_setsize(vp, fp->ff_size);
2364 hfs_lock(cp, HFS_FORCE_LOCK);
2365 }
2366 }
2367
2368 Std_Exit:
2369 cp->c_touch_chgtime = TRUE;
2370 cp->c_touch_modtime = TRUE;
2371 retval2 = hfs_update(vp, MNT_WAIT);
2372
2373 if (retval == 0)
2374 retval = retval2;
2375 Err_Exit:
2376 hfs_unlock(cp);
2377 return (retval);
2378 }
2379
2380
2381 /*
2382 * Pagein for HFS filesystem
2383 */
2384 int
2385 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2386 /*
2387 struct vnop_pagein_args {
2388 vnode_t a_vp,
2389 upl_t a_pl,
2390 vm_offset_t a_pl_offset,
2391 off_t a_f_offset,
2392 size_t a_size,
2393 int a_flags
2394 vfs_context_t a_context;
2395 };
2396 */
2397 {
2398 vnode_t vp = ap->a_vp;
2399 int error;
2400
2401 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2402 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2403 /*
2404 * Keep track of blocks read.
2405 */
2406 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2407 struct cnode *cp;
2408 struct filefork *fp;
2409 int bytesread;
2410 int took_cnode_lock = 0;
2411
2412 cp = VTOC(vp);
2413 fp = VTOF(vp);
2414
2415 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2416 bytesread = fp->ff_size;
2417 else
2418 bytesread = ap->a_size;
2419
2420 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2421 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2422 hfs_lock(cp, HFS_FORCE_LOCK);
2423 took_cnode_lock = 1;
2424 }
2425 /*
2426 * If this file hasn't been seen since the start of
2427 * the current sampling period then start over.
2428 */
2429 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2430 struct timeval tv;
2431
2432 fp->ff_bytesread = bytesread;
2433 microtime(&tv);
2434 cp->c_atime = tv.tv_sec;
2435 } else {
2436 fp->ff_bytesread += bytesread;
2437 }
2438 cp->c_touch_acctime = TRUE;
2439 if (took_cnode_lock)
2440 hfs_unlock(cp);
2441 }
2442 return (error);
2443 }
2444
2445 /*
2446 * Pageout for HFS filesystem.
2447 */
2448 int
2449 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2450 /*
2451 struct vnop_pageout_args {
2452 vnode_t a_vp,
2453 upl_t a_pl,
2454 vm_offset_t a_pl_offset,
2455 off_t a_f_offset,
2456 size_t a_size,
2457 int a_flags
2458 vfs_context_t a_context;
2459 };
2460 */
2461 {
2462 vnode_t vp = ap->a_vp;
2463 struct cnode *cp;
2464 struct filefork *fp;
2465 int retval;
2466 off_t end_of_range;
2467 off_t filesize;
2468
2469 cp = VTOC(vp);
2470 if (cp->c_lockowner == current_thread()) {
2471 panic("pageout: %s cnode lock already held!\n",
2472 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2473 }
2474 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2475 return (retval);
2476 }
2477 fp = VTOF(vp);
2478
2479 filesize = fp->ff_size;
2480 end_of_range = ap->a_f_offset + ap->a_size - 1;
2481
2482 if (end_of_range >= filesize) {
2483 end_of_range = (off_t)(filesize - 1);
2484 }
2485 if (ap->a_f_offset < filesize) {
2486 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2487 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2488 }
2489 hfs_unlock(cp);
2490
2491 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2492 ap->a_size, filesize, ap->a_flags);
2493
2494 /*
2495 * If data was written, and setuid or setgid bits are set and
2496 * this process is not the superuser then clear the setuid and
2497 * setgid bits as a precaution against tampering.
2498 */
2499 if ((retval == 0) &&
2500 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2501 (vfs_context_suser(ap->a_context) != 0)) {
2502 hfs_lock(cp, HFS_FORCE_LOCK);
2503 cp->c_mode &= ~(S_ISUID | S_ISGID);
2504 cp->c_touch_chgtime = TRUE;
2505 hfs_unlock(cp);
2506 }
2507 return (retval);
2508 }
2509
2510 /*
2511 * Intercept B-Tree node writes to unswap them if necessary.
2512 */
2513 int
2514 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2515 {
2516 int retval = 0;
2517 register struct buf *bp = ap->a_bp;
2518 register struct vnode *vp = buf_vnode(bp);
2519 #if BYTE_ORDER == LITTLE_ENDIAN
2520 BlockDescriptor block;
2521
2522 /* Trap B-Tree writes */
2523 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2524 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2525 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2526
2527 /* Swap if the B-Tree node is in native byte order */
2528 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2529 /* Prepare the block pointer */
2530 block.blockHeader = bp;
2531 block.buffer = (char *)buf_dataptr(bp);
2532 /* not found in cache ==> came from disk */
2533 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2534 block.blockSize = buf_count(bp);
2535
2536 /* Endian un-swap B-Tree node */
2537 SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2538 }
2539
2540 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2541 }
2542 #endif
2543 /* This buffer shouldn't be locked anymore but if it is clear it */
2544 if ((buf_flags(bp) & B_LOCKED)) {
2545 // XXXdbg
2546 if (VTOHFS(vp)->jnl) {
2547 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2548 }
2549 buf_clearflags(bp, B_LOCKED);
2550 }
2551 retval = vn_bwrite (ap);
2552
2553 return (retval);
2554 }
2555
2556 /*
2557 * Relocate a file to a new location on disk
2558 * cnode must be locked on entry
2559 *
2560 * Relocation occurs by cloning the file's data from its
2561 * current set of blocks to a new set of blocks. During
2562 * the relocation all of the blocks (old and new) are
2563 * owned by the file.
2564 *
2565 * -----------------
2566 * |///////////////|
2567 * -----------------
2568 * 0 N (file offset)
2569 *
2570 * ----------------- -----------------
2571 * |///////////////| | | STEP 1 (aquire new blocks)
2572 * ----------------- -----------------
2573 * 0 N N+1 2N
2574 *
2575 * ----------------- -----------------
2576 * |///////////////| |///////////////| STEP 2 (clone data)
2577 * ----------------- -----------------
2578 * 0 N N+1 2N
2579 *
2580 * -----------------
2581 * |///////////////| STEP 3 (head truncate blocks)
2582 * -----------------
2583 * 0 N
2584 *
2585 * During steps 2 and 3 page-outs to file offsets less
2586 * than or equal to N are suspended.
2587 *
2588 * During step 3 page-ins to the file get supended.
2589 */
2590 __private_extern__
2591 int
2592 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2593 struct proc *p)
2594 {
2595 struct cnode *cp;
2596 struct filefork *fp;
2597 struct hfsmount *hfsmp;
2598 u_int32_t headblks;
2599 u_int32_t datablks;
2600 u_int32_t blksize;
2601 u_int32_t growsize;
2602 u_int32_t nextallocsave;
2603 daddr64_t sector_a, sector_b;
2604 int disabled_caching = 0;
2605 int eflags;
2606 off_t newbytes;
2607 int retval;
2608 int lockflags = 0;
2609 int took_trunc_lock = 0;
2610 int started_tr = 0;
2611 enum vtype vnodetype;
2612
2613 vnodetype = vnode_vtype(vp);
2614 if (vnodetype != VREG && vnodetype != VLNK) {
2615 return (EPERM);
2616 }
2617
2618 hfsmp = VTOHFS(vp);
2619 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2620 return (ENOSPC);
2621 }
2622
2623 cp = VTOC(vp);
2624 fp = VTOF(vp);
2625 if (fp->ff_unallocblocks)
2626 return (EINVAL);
2627 blksize = hfsmp->blockSize;
2628 if (blockHint == 0)
2629 blockHint = hfsmp->nextAllocation;
2630
2631 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2632 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2633 return (EFBIG);
2634 }
2635
2636 //
2637 // We do not believe that this call to hfs_fsync() is
2638 // necessary and it causes a journal transaction
2639 // deadlock so we are removing it.
2640 //
2641 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2642 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2643 // if (retval)
2644 // return (retval);
2645 //}
2646
2647 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2648 hfs_unlock(cp);
2649 hfs_lock_truncate(cp, TRUE);
2650 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2651 hfs_unlock_truncate(cp);
2652 return (retval);
2653 }
2654 took_trunc_lock = 1;
2655 }
2656 headblks = fp->ff_blocks;
2657 datablks = howmany(fp->ff_size, blksize);
2658 growsize = datablks * blksize;
2659 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2660 if (blockHint >= hfsmp->hfs_metazone_start &&
2661 blockHint <= hfsmp->hfs_metazone_end)
2662 eflags |= kEFMetadataMask;
2663
2664 if (hfs_start_transaction(hfsmp) != 0) {
2665 if (took_trunc_lock)
2666 hfs_unlock_truncate(cp);
2667 return (EINVAL);
2668 }
2669 started_tr = 1;
2670 /*
2671 * Protect the extents b-tree and the allocation bitmap
2672 * during MapFileBlockC and ExtendFileC operations.
2673 */
2674 lockflags = SFL_BITMAP;
2675 if (overflow_extents(fp))
2676 lockflags |= SFL_EXTENTS;
2677 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2678
2679 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2680 if (retval) {
2681 retval = MacToVFSError(retval);
2682 goto out;
2683 }
2684
2685 /*
2686 * STEP 1 - aquire new allocation blocks.
2687 */
2688 if (!vnode_isnocache(vp)) {
2689 vnode_setnocache(vp);
2690 disabled_caching = 1;
2691
2692 }
2693 nextallocsave = hfsmp->nextAllocation;
2694 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2695 if (eflags & kEFMetadataMask) {
2696 HFS_MOUNT_LOCK(hfsmp, TRUE);
2697 hfsmp->nextAllocation = nextallocsave;
2698 hfsmp->vcbFlags |= 0xFF00;
2699 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2700 }
2701
2702 retval = MacToVFSError(retval);
2703 if (retval == 0) {
2704 cp->c_flag |= C_MODIFIED;
2705 if (newbytes < growsize) {
2706 retval = ENOSPC;
2707 goto restore;
2708 } else if (fp->ff_blocks < (headblks + datablks)) {
2709 printf("hfs_relocate: allocation failed");
2710 retval = ENOSPC;
2711 goto restore;
2712 }
2713
2714 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2715 if (retval) {
2716 retval = MacToVFSError(retval);
2717 } else if ((sector_a + 1) == sector_b) {
2718 retval = ENOSPC;
2719 goto restore;
2720 } else if ((eflags & kEFMetadataMask) &&
2721 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2722 hfsmp->hfs_metazone_end)) {
2723 printf("hfs_relocate: didn't move into metadata zone\n");
2724 retval = ENOSPC;
2725 goto restore;
2726 }
2727 }
2728 /* Done with system locks and journal for now. */
2729 hfs_systemfile_unlock(hfsmp, lockflags);
2730 lockflags = 0;
2731 hfs_end_transaction(hfsmp);
2732 started_tr = 0;
2733
2734 if (retval) {
2735 /*
2736 * Check to see if failure is due to excessive fragmentation.
2737 */
2738 if ((retval == ENOSPC) &&
2739 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2740 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2741 }
2742 goto out;
2743 }
2744 /*
2745 * STEP 2 - clone file data into the new allocation blocks.
2746 */
2747
2748 if (vnodetype == VLNK)
2749 retval = hfs_clonelink(vp, blksize, cred, p);
2750 else if (vnode_issystem(vp))
2751 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2752 else
2753 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2754
2755 /* Start transaction for step 3 or for a restore. */
2756 if (hfs_start_transaction(hfsmp) != 0) {
2757 retval = EINVAL;
2758 goto out;
2759 }
2760 started_tr = 1;
2761 if (retval)
2762 goto restore;
2763
2764 /*
2765 * STEP 3 - switch to cloned data and remove old blocks.
2766 */
2767 lockflags = SFL_BITMAP;
2768 if (overflow_extents(fp))
2769 lockflags |= SFL_EXTENTS;
2770 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2771
2772 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2773
2774 hfs_systemfile_unlock(hfsmp, lockflags);
2775 lockflags = 0;
2776 if (retval)
2777 goto restore;
2778 out:
2779 if (took_trunc_lock)
2780 hfs_unlock_truncate(cp);
2781
2782 if (lockflags) {
2783 hfs_systemfile_unlock(hfsmp, lockflags);
2784 lockflags = 0;
2785 }
2786
2787 // See comment up above about calls to hfs_fsync()
2788 //
2789 //if (retval == 0)
2790 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2791
2792 if (hfsmp->jnl) {
2793 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2794 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2795 else
2796 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2797 }
2798 exit:
2799 if (disabled_caching) {
2800 vnode_clearnocache(vp);
2801 }
2802 if (started_tr)
2803 hfs_end_transaction(hfsmp);
2804
2805 return (retval);
2806
2807 restore:
2808 if (fp->ff_blocks == headblks)
2809 goto exit;
2810 /*
2811 * Give back any newly allocated space.
2812 */
2813 if (lockflags == 0) {
2814 lockflags = SFL_BITMAP;
2815 if (overflow_extents(fp))
2816 lockflags |= SFL_EXTENTS;
2817 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2818 }
2819
2820 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2821
2822 hfs_systemfile_unlock(hfsmp, lockflags);
2823 lockflags = 0;
2824
2825 if (took_trunc_lock)
2826 hfs_unlock_truncate(cp);
2827 goto exit;
2828 }
2829
2830
2831 /*
2832 * Clone a symlink.
2833 *
2834 */
2835 static int
2836 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2837 {
2838 struct buf *head_bp = NULL;
2839 struct buf *tail_bp = NULL;
2840 int error;
2841
2842
2843 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2844 if (error)
2845 goto out;
2846
2847 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2848 if (tail_bp == NULL) {
2849 error = EIO;
2850 goto out;
2851 }
2852 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2853 error = (int)buf_bwrite(tail_bp);
2854 out:
2855 if (head_bp) {
2856 buf_markinvalid(head_bp);
2857 buf_brelse(head_bp);
2858 }
2859 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2860
2861 return (error);
2862 }
2863
2864 /*
2865 * Clone a file's data within the file.
2866 *
2867 */
2868 static int
2869 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2870 {
2871 caddr_t bufp;
2872 size_t writebase;
2873 size_t bufsize;
2874 size_t copysize;
2875 size_t iosize;
2876 off_t filesize;
2877 size_t offset;
2878 uio_t auio;
2879 int error = 0;
2880
2881 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2882 writebase = blkstart * blksize;
2883 copysize = blkcnt * blksize;
2884 iosize = bufsize = MIN(copysize, 4096 * 16);
2885 offset = 0;
2886
2887 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2888 return (ENOMEM);
2889 }
2890 hfs_unlock(VTOC(vp));
2891
2892 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2893
2894 while (offset < copysize) {
2895 iosize = MIN(copysize - offset, iosize);
2896
2897 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2898 uio_addiov(auio, (uintptr_t)bufp, iosize);
2899
2900 error = cluster_read(vp, auio, copysize, 0);
2901 if (error) {
2902 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2903 break;
2904 }
2905 if (uio_resid(auio) != 0) {
2906 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2907 error = EIO;
2908 break;
2909 }
2910
2911 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2912 uio_addiov(auio, (uintptr_t)bufp, iosize);
2913
2914 error = cluster_write(vp, auio, filesize + offset,
2915 filesize + offset + iosize,
2916 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2917 if (error) {
2918 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2919 break;
2920 }
2921 if (uio_resid(auio) != 0) {
2922 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2923 error = EIO;
2924 break;
2925 }
2926 offset += iosize;
2927 }
2928 uio_free(auio);
2929
2930 /*
2931 * No need to call ubc_sync_range or hfs_invalbuf
2932 * since the file was copied using IO_NOCACHE.
2933 */
2934
2935 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2936
2937 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2938 return (error);
2939 }
2940
2941 /*
2942 * Clone a system (metadata) file.
2943 *
2944 */
2945 static int
2946 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2947 kauth_cred_t cred, struct proc *p)
2948 {
2949 caddr_t bufp;
2950 char * offset;
2951 size_t bufsize;
2952 size_t iosize;
2953 struct buf *bp = NULL;
2954 daddr64_t blkno;
2955 daddr64_t blk;
2956 daddr64_t start_blk;
2957 daddr64_t last_blk;
2958 int breadcnt;
2959 int i;
2960 int error = 0;
2961
2962
2963 iosize = GetLogicalBlockSize(vp);
2964 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2965 breadcnt = bufsize / iosize;
2966
2967 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2968 return (ENOMEM);
2969 }
2970 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
2971 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
2972 blkno = 0;
2973
2974 while (blkno < last_blk) {
2975 /*
2976 * Read up to a megabyte
2977 */
2978 offset = bufp;
2979 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
2980 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
2981 if (error) {
2982 printf("hfs_clonesysfile: meta_bread error %d\n", error);
2983 goto out;
2984 }
2985 if (buf_count(bp) != iosize) {
2986 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
2987 goto out;
2988 }
2989 bcopy((char *)buf_dataptr(bp), offset, iosize);
2990
2991 buf_markinvalid(bp);
2992 buf_brelse(bp);
2993 bp = NULL;
2994
2995 offset += iosize;
2996 }
2997
2998 /*
2999 * Write up to a megabyte
3000 */
3001 offset = bufp;
3002 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3003 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3004 if (bp == NULL) {
3005 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3006 error = EIO;
3007 goto out;
3008 }
3009 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3010 error = (int)buf_bwrite(bp);
3011 bp = NULL;
3012 if (error)
3013 goto out;
3014 offset += iosize;
3015 }
3016 }
3017 out:
3018 if (bp) {
3019 buf_brelse(bp);
3020 }
3021
3022 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3023
3024 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3025
3026 return (error);
3027 }