]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
3a54712daea4f4606bfd4a89100bbc6093a84c3d
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* @(#)hfs_readwrite.c 1.0
23 *
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
25 *
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
27 *
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
36 #include <sys/stat.h>
37 #include <sys/buf.h>
38 #include <sys/proc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/vfs_context.h>
43
44 #include <miscfs/specfs/specdev.h>
45
46 #include <sys/ubc.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_kern.h>
49
50 #include <sys/kdebug.h>
51
52 #include "hfs.h"
53 #include "hfs_endian.h"
54 #include "hfs_fsctl.h"
55 #include "hfs_quota.h"
56 #include "hfscommon/headers/FileMgrInternal.h"
57 #include "hfscommon/headers/BTreesInternal.h"
58 #include "hfs_cnode.h"
59 #include "hfs_dbg.h"
60
61 extern int overflow_extents(struct filefork *fp);
62
63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
64
65 enum {
66 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
67 };
68
69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
70
71 extern int hfs_setextendedsecurity(struct hfsmount *, int);
72
73
74 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
75 static int hfs_clonefile(struct vnode *, int, int, int);
76 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
77
78
79 /*****************************************************************************
80 *
81 * I/O Operations on vnodes
82 *
83 *****************************************************************************/
84 int hfs_vnop_read(struct vnop_read_args *);
85 int hfs_vnop_write(struct vnop_write_args *);
86 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
87 int hfs_vnop_select(struct vnop_select_args *);
88 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
89 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
90 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
91 int hfs_vnop_strategy(struct vnop_strategy_args *);
92 int hfs_vnop_allocate(struct vnop_allocate_args *);
93 int hfs_vnop_pagein(struct vnop_pagein_args *);
94 int hfs_vnop_pageout(struct vnop_pageout_args *);
95 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
96
97
98 /*
99 * Read data from a file.
100 */
101 int
102 hfs_vnop_read(struct vnop_read_args *ap)
103 {
104 uio_t uio = ap->a_uio;
105 struct vnode *vp = ap->a_vp;
106 struct cnode *cp;
107 struct filefork *fp;
108 struct hfsmount *hfsmp;
109 off_t filesize;
110 off_t filebytes;
111 off_t start_resid = uio_resid(uio);
112 off_t offset = uio_offset(uio);
113 int retval = 0;
114
115
116 /* Preflight checks */
117 if (!vnode_isreg(vp)) {
118 /* can only read regular files */
119 if (vnode_isdir(vp))
120 return (EISDIR);
121 else
122 return (EPERM);
123 }
124 if (start_resid == 0)
125 return (0); /* Nothing left to do */
126 if (offset < 0)
127 return (EINVAL); /* cant read from a negative offset */
128
129 cp = VTOC(vp);
130 fp = VTOF(vp);
131 hfsmp = VTOHFS(vp);
132
133 /* Protect against a size change. */
134 hfs_lock_truncate(cp, 0);
135
136 filesize = fp->ff_size;
137 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
138 if (offset > filesize) {
139 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
140 (offset > (off_t)MAXHFSFILESIZE)) {
141 retval = EFBIG;
142 }
143 goto exit;
144 }
145
146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
147 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
148
149 retval = cluster_read(vp, uio, filesize, 0);
150
151 cp->c_touch_acctime = TRUE;
152
153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
154 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
155
156 /*
157 * Keep track blocks read
158 */
159 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
160 int took_cnode_lock = 0;
161 off_t bytesread;
162
163 bytesread = start_resid - uio_resid(uio);
164
165 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
166 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
167 hfs_lock(cp, HFS_FORCE_LOCK);
168 took_cnode_lock = 1;
169 }
170 /*
171 * If this file hasn't been seen since the start of
172 * the current sampling period then start over.
173 */
174 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
175 struct timeval tv;
176
177 fp->ff_bytesread = bytesread;
178 microtime(&tv);
179 cp->c_atime = tv.tv_sec;
180 } else {
181 fp->ff_bytesread += bytesread;
182 }
183 if (took_cnode_lock)
184 hfs_unlock(cp);
185 }
186 exit:
187 hfs_unlock_truncate(cp);
188 return (retval);
189 }
190
191 /*
192 * Write data to a file.
193 */
194 int
195 hfs_vnop_write(struct vnop_write_args *ap)
196 {
197 uio_t uio = ap->a_uio;
198 struct vnode *vp = ap->a_vp;
199 struct cnode *cp;
200 struct filefork *fp;
201 struct hfsmount *hfsmp;
202 kauth_cred_t cred = NULL;
203 off_t origFileSize;
204 off_t writelimit;
205 off_t bytesToAdd;
206 off_t actualBytesAdded;
207 off_t filebytes;
208 off_t offset;
209 size_t resid;
210 int eflags;
211 int ioflag = ap->a_ioflag;
212 int retval = 0;
213 int lockflags;
214 int cnode_locked = 0;
215
216 // LP64todo - fix this! uio_resid may be 64-bit value
217 resid = uio_resid(uio);
218 offset = uio_offset(uio);
219
220 if (offset < 0)
221 return (EINVAL);
222 if (resid == 0)
223 return (E_NONE);
224 if (!vnode_isreg(vp))
225 return (EPERM); /* Can only write regular files */
226
227 /* Protect against a size change. */
228 hfs_lock_truncate(VTOC(vp), TRUE);
229
230 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
231 hfs_unlock_truncate(VTOC(vp));
232 return (retval);
233 }
234 cnode_locked = 1;
235 cp = VTOC(vp);
236 fp = VTOF(vp);
237 hfsmp = VTOHFS(vp);
238 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
239
240 if (ioflag & IO_APPEND) {
241 uio_setoffset(uio, fp->ff_size);
242 offset = fp->ff_size;
243 }
244 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
245 retval = EPERM;
246 goto exit;
247 }
248
249 origFileSize = fp->ff_size;
250 eflags = kEFDeferMask; /* defer file block allocations */
251
252 #ifdef HFS_SPARSE_DEV
253 /*
254 * When the underlying device is sparse and space
255 * is low (< 8MB), stop doing delayed allocations
256 * and begin doing synchronous I/O.
257 */
258 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
259 (hfs_freeblks(hfsmp, 0) < 2048)) {
260 eflags &= ~kEFDeferMask;
261 ioflag |= IO_SYNC;
262 }
263 #endif /* HFS_SPARSE_DEV */
264
265 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
266 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
267
268 /* Now test if we need to extend the file */
269 /* Doing so will adjust the filebytes for us */
270
271 writelimit = offset + resid;
272 if (writelimit <= filebytes)
273 goto sizeok;
274
275 cred = vfs_context_ucred(ap->a_context);
276 #if QUOTA
277 bytesToAdd = writelimit - filebytes;
278 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
279 cred, 0);
280 if (retval)
281 goto exit;
282 #endif /* QUOTA */
283
284 if (hfs_start_transaction(hfsmp) != 0) {
285 retval = EINVAL;
286 goto exit;
287 }
288
289 while (writelimit > filebytes) {
290 bytesToAdd = writelimit - filebytes;
291 if (cred && suser(cred, NULL) != 0)
292 eflags |= kEFReserveMask;
293
294 /* Protect extents b-tree and allocation bitmap */
295 lockflags = SFL_BITMAP;
296 if (overflow_extents(fp))
297 lockflags |= SFL_EXTENTS;
298 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
299
300 /* Files that are changing size are not hot file candidates. */
301 if (hfsmp->hfc_stage == HFC_RECORDING) {
302 fp->ff_bytesread = 0;
303 }
304 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
305 0, eflags, &actualBytesAdded));
306
307 hfs_systemfile_unlock(hfsmp, lockflags);
308
309 if ((actualBytesAdded == 0) && (retval == E_NONE))
310 retval = ENOSPC;
311 if (retval != E_NONE)
312 break;
313 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
315 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
316 }
317 (void) hfs_update(vp, TRUE);
318 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
319 (void) hfs_end_transaction(hfsmp);
320
321 sizeok:
322 if (retval == E_NONE) {
323 off_t filesize;
324 off_t zero_off;
325 off_t tail_off;
326 off_t inval_start;
327 off_t inval_end;
328 off_t io_start;
329 int lflag;
330 struct rl_entry *invalid_range;
331
332 if (writelimit > fp->ff_size)
333 filesize = writelimit;
334 else
335 filesize = fp->ff_size;
336
337 lflag = (ioflag & IO_SYNC);
338
339 if (offset <= fp->ff_size) {
340 zero_off = offset & ~PAGE_MASK_64;
341
342 /* Check to see whether the area between the zero_offset and the start
343 of the transfer to see whether is invalid and should be zero-filled
344 as part of the transfer:
345 */
346 if (offset > zero_off) {
347 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
348 lflag |= IO_HEADZEROFILL;
349 }
350 } else {
351 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
352
353 /* The bytes between fp->ff_size and uio->uio_offset must never be
354 read without being zeroed. The current last block is filled with zeroes
355 if it holds valid data but in all cases merely do a little bookkeeping
356 to track the area from the end of the current last page to the start of
357 the area actually written. For the same reason only the bytes up to the
358 start of the page where this write will start is invalidated; any remainder
359 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
360
361 Note that inval_start, the start of the page after the current EOF,
362 may be past the start of the write, in which case the zeroing
363 will be handled by the cluser_write of the actual data.
364 */
365 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
366 inval_end = offset & ~PAGE_MASK_64;
367 zero_off = fp->ff_size;
368
369 if ((fp->ff_size & PAGE_MASK_64) &&
370 (rl_scan(&fp->ff_invalidranges,
371 eof_page_base,
372 fp->ff_size - 1,
373 &invalid_range) != RL_NOOVERLAP)) {
374 /* The page containing the EOF is not valid, so the
375 entire page must be made inaccessible now. If the write
376 starts on a page beyond the page containing the eof
377 (inval_end > eof_page_base), add the
378 whole page to the range to be invalidated. Otherwise
379 (i.e. if the write starts on the same page), zero-fill
380 the entire page explicitly now:
381 */
382 if (inval_end > eof_page_base) {
383 inval_start = eof_page_base;
384 } else {
385 zero_off = eof_page_base;
386 };
387 };
388
389 if (inval_start < inval_end) {
390 struct timeval tv;
391 /* There's some range of data that's going to be marked invalid */
392
393 if (zero_off < inval_start) {
394 /* The pages between inval_start and inval_end are going to be invalidated,
395 and the actual write will start on a page past inval_end. Now's the last
396 chance to zero-fill the page containing the EOF:
397 */
398 hfs_unlock(cp);
399 cnode_locked = 0;
400 retval = cluster_write(vp, (uio_t) 0,
401 fp->ff_size, inval_start,
402 zero_off, (off_t)0,
403 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
404 hfs_lock(cp, HFS_FORCE_LOCK);
405 cnode_locked = 1;
406 if (retval) goto ioerr_exit;
407 offset = uio_offset(uio);
408 };
409
410 /* Mark the remaining area of the newly allocated space as invalid: */
411 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
412 microuptime(&tv);
413 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
414 zero_off = fp->ff_size = inval_end;
415 };
416
417 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
418 };
419
420 /* Check to see whether the area between the end of the write and the end of
421 the page it falls in is invalid and should be zero-filled as part of the transfer:
422 */
423 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
424 if (tail_off > filesize) tail_off = filesize;
425 if (tail_off > writelimit) {
426 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
427 lflag |= IO_TAILZEROFILL;
428 };
429 };
430
431 /*
432 * if the write starts beyond the current EOF (possibly advanced in the
433 * zeroing of the last block, above), then we'll zero fill from the current EOF
434 * to where the write begins:
435 *
436 * NOTE: If (and ONLY if) the portion of the file about to be written is
437 * before the current EOF it might be marked as invalid now and must be
438 * made readable (removed from the invalid ranges) before cluster_write
439 * tries to write it:
440 */
441 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
442 if (io_start < fp->ff_size) {
443 off_t io_end;
444
445 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
446 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
447 };
448
449 hfs_unlock(cp);
450 cnode_locked = 0;
451 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
452 tail_off, lflag | IO_NOZERODIRTY);
453 offset = uio_offset(uio);
454 if (offset > fp->ff_size) {
455 fp->ff_size = offset;
456
457 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
458 /* Files that are changing size are not hot file candidates. */
459 if (hfsmp->hfc_stage == HFC_RECORDING)
460 fp->ff_bytesread = 0;
461 }
462 if (resid > uio_resid(uio)) {
463 cp->c_touch_chgtime = TRUE;
464 cp->c_touch_modtime = TRUE;
465 }
466 }
467 HFS_KNOTE(vp, NOTE_WRITE);
468
469 ioerr_exit:
470 /*
471 * If we successfully wrote any data, and we are not the superuser
472 * we clear the setuid and setgid bits as a precaution against
473 * tampering.
474 */
475 if (cp->c_mode & (S_ISUID | S_ISGID)) {
476 cred = vfs_context_ucred(ap->a_context);
477 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
478 if (!cnode_locked) {
479 hfs_lock(cp, HFS_FORCE_LOCK);
480 cnode_locked = 1;
481 }
482 cp->c_mode &= ~(S_ISUID | S_ISGID);
483 }
484 }
485 if (retval) {
486 if (ioflag & IO_UNIT) {
487 if (!cnode_locked) {
488 hfs_lock(cp, HFS_FORCE_LOCK);
489 cnode_locked = 1;
490 }
491 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
492 0, ap->a_context);
493 // LP64todo - fix this! resid needs to by user_ssize_t
494 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
495 uio_setresid(uio, resid);
496 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
497 }
498 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
499 if (!cnode_locked) {
500 hfs_lock(cp, HFS_FORCE_LOCK);
501 cnode_locked = 1;
502 }
503 retval = hfs_update(vp, TRUE);
504 }
505 /* Updating vcbWrCnt doesn't need to be atomic. */
506 hfsmp->vcbWrCnt++;
507
508 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
509 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
510 exit:
511 if (cnode_locked)
512 hfs_unlock(cp);
513 hfs_unlock_truncate(cp);
514 return (retval);
515 }
516
517 /* support for the "bulk-access" fcntl */
518
519 #define CACHE_ELEMS 64
520 #define CACHE_LEVELS 16
521 #define PARENT_IDS_FLAG 0x100
522
523 /* from hfs_attrlist.c */
524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
525 mode_t obj_mode, struct mount *mp,
526 kauth_cred_t cred, struct proc *p);
527
528 /* from vfs/vfs_fsevents.c */
529 extern char *get_pathbuff(void);
530 extern void release_pathbuff(char *buff);
531
532 struct access_cache {
533 int numcached;
534 int cachehits; /* these two for statistics gathering */
535 int lookups;
536 unsigned int *acache;
537 Boolean *haveaccess;
538 };
539
540 struct access_t {
541 uid_t uid; /* IN: effective user id */
542 short flags; /* IN: access requested (i.e. R_OK) */
543 short num_groups; /* IN: number of groups user belongs to */
544 int num_files; /* IN: number of files to process */
545 int *file_ids; /* IN: array of file ids */
546 gid_t *groups; /* IN: array of groups */
547 short *access; /* OUT: access info for each file (0 for 'has access') */
548 };
549
550 struct user_access_t {
551 uid_t uid; /* IN: effective user id */
552 short flags; /* IN: access requested (i.e. R_OK) */
553 short num_groups; /* IN: number of groups user belongs to */
554 int num_files; /* IN: number of files to process */
555 user_addr_t file_ids; /* IN: array of file ids */
556 user_addr_t groups; /* IN: array of groups */
557 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
558 };
559
560 /*
561 * Perform a binary search for the given parent_id. Return value is
562 * found/not found boolean, and indexp will be the index of the item
563 * or the index at which to insert the item if it's not found.
564 */
565 static int
566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
567 {
568 unsigned int lo, hi;
569 int index, matches = 0;
570
571 if (cache->numcached == 0) {
572 *indexp = 0;
573 return 0; // table is empty, so insert at index=0 and report no match
574 }
575
576 if (cache->numcached > CACHE_ELEMS) {
577 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
578 cache->numcached, CACHE_ELEMS);*/
579 cache->numcached = CACHE_ELEMS;
580 }
581
582 lo = 0;
583 hi = cache->numcached - 1;
584 index = -1;
585
586 /* perform binary search for parent_id */
587 do {
588 unsigned int mid = (hi - lo)/2 + lo;
589 unsigned int this_id = cache->acache[mid];
590
591 if (parent_id == this_id) {
592 index = mid;
593 break;
594 }
595
596 if (parent_id < this_id) {
597 hi = mid;
598 continue;
599 }
600
601 if (parent_id > this_id) {
602 lo = mid + 1;
603 continue;
604 }
605 } while(lo < hi);
606
607 /* check if lo and hi converged on the match */
608 if (parent_id == cache->acache[hi]) {
609 index = hi;
610 }
611
612 /* if no existing entry found, find index for new one */
613 if (index == -1) {
614 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
615 matches = 0;
616 } else {
617 matches = 1;
618 }
619
620 *indexp = index;
621 return matches;
622 }
623
624 /*
625 * Add a node to the access_cache at the given index (or do a lookup first
626 * to find the index if -1 is passed in). We currently do a replace rather
627 * than an insert if the cache is full.
628 */
629 static void
630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
631 {
632 int lookup_index = -1;
633
634 /* need to do a lookup first if -1 passed for index */
635 if (index == -1) {
636 if (lookup_bucket(cache, &lookup_index, nodeID)) {
637 if (cache->haveaccess[lookup_index] != access) {
638 /* change access info for existing entry... should never happen */
639 cache->haveaccess[lookup_index] = access;
640 }
641
642 /* mission accomplished */
643 return;
644 } else {
645 index = lookup_index;
646 }
647
648 }
649
650 /* if the cache is full, do a replace rather than an insert */
651 if (cache->numcached >= CACHE_ELEMS) {
652 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
653 cache->numcached = CACHE_ELEMS-1;
654
655 if (index > cache->numcached) {
656 // printf("index %d pinned to %d\n", index, cache->numcached);
657 index = cache->numcached;
658 }
659 } else if (index >= 0 && index < cache->numcached) {
660 /* only do bcopy if we're inserting */
661 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
662 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
663 }
664
665 cache->acache[index] = nodeID;
666 cache->haveaccess[index] = access;
667 cache->numcached++;
668 }
669
670
671 struct cinfo {
672 uid_t uid;
673 gid_t gid;
674 mode_t mode;
675 cnid_t parentcnid;
676 };
677
678 static int
679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
680 {
681 struct cinfo *cip = (struct cinfo *)arg;
682
683 cip->uid = attrp->ca_uid;
684 cip->gid = attrp->ca_gid;
685 cip->mode = attrp->ca_mode;
686 cip->parentcnid = descp->cd_parentcnid;
687
688 return (0);
689 }
690
691 /*
692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
693 * isn't incore, then go to the catalog.
694 */
695 static int
696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
697 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
698 {
699 int error = 0;
700
701 /* if this id matches the one the fsctl was called with, skip the lookup */
702 if (cnid == skip_cp->c_cnid) {
703 cnattrp->ca_uid = skip_cp->c_uid;
704 cnattrp->ca_gid = skip_cp->c_gid;
705 cnattrp->ca_mode = skip_cp->c_mode;
706 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
707 } else {
708 struct cinfo c_info;
709
710 /* otherwise, check the cnode hash incase the file/dir is incore */
711 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
712 cnattrp->ca_uid = c_info.uid;
713 cnattrp->ca_gid = c_info.gid;
714 cnattrp->ca_mode = c_info.mode;
715 keyp->hfsPlus.parentID = c_info.parentcnid;
716 } else {
717 int lockflags;
718
719 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
720
721 /* lookup this cnid in the catalog */
722 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
723
724 hfs_systemfile_unlock(hfsmp, lockflags);
725
726 cache->lookups++;
727 }
728 }
729
730 return (error);
731 }
732
733 /*
734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
735 * up to CACHE_LEVELS as we progress towards the root.
736 */
737 static int
738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
739 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
740 {
741 int myErr = 0;
742 int myResult;
743 HFSCatalogNodeID thisNodeID;
744 unsigned long myPerms;
745 struct cat_attr cnattr;
746 int cache_index = -1;
747 CatalogKey catkey;
748
749 int i = 0, ids_to_cache = 0;
750 int parent_ids[CACHE_LEVELS];
751
752 /* root always has access */
753 if (!suser(myp_ucred, NULL)) {
754 return (1);
755 }
756
757 thisNodeID = nodeID;
758 while (thisNodeID >= kRootDirID) {
759 myResult = 0; /* default to "no access" */
760
761 /* check the cache before resorting to hitting the catalog */
762
763 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
764 * to look any further after hitting cached dir */
765
766 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
767 cache->cachehits++;
768 myResult = cache->haveaccess[cache_index];
769 goto ExitThisRoutine;
770 }
771
772 /* remember which parents we want to cache */
773 if (ids_to_cache < CACHE_LEVELS) {
774 parent_ids[ids_to_cache] = thisNodeID;
775 ids_to_cache++;
776 }
777
778 /* do the lookup (checks the cnode hash, then the catalog) */
779 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
780 if (myErr) {
781 goto ExitThisRoutine; /* no access */
782 }
783
784 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
785 cnattr.ca_mode, hfsmp->hfs_mp,
786 myp_ucred, theProcPtr);
787
788 if ( (myPerms & X_OK) == 0 ) {
789 myResult = 0;
790 goto ExitThisRoutine; /* no access */
791 }
792
793 /* up the hierarchy we go */
794 thisNodeID = catkey.hfsPlus.parentID;
795 }
796
797 /* if here, we have access to this node */
798 myResult = 1;
799
800 ExitThisRoutine:
801 if (myErr) {
802 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
803 myResult = 0;
804 }
805 *err = myErr;
806
807 /* cache the parent directory(ies) */
808 for (i = 0; i < ids_to_cache; i++) {
809 /* small optimization: get rid of double-lookup for all these */
810 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
811 add_node(cache, -1, parent_ids[i], myResult);
812 }
813
814 return (myResult);
815 }
816 /* end "bulk-access" support */
817
818
819
820 /*
821 * Callback for use with freeze ioctl.
822 */
823 static int
824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
825 {
826 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
827
828 return 0;
829 }
830
831 /*
832 * Control filesystem operating characteristics.
833 */
834 int
835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
836 vnode_t a_vp;
837 int a_command;
838 caddr_t a_data;
839 int a_fflag;
840 vfs_context_t a_context;
841 } */ *ap)
842 {
843 struct vnode * vp = ap->a_vp;
844 struct hfsmount *hfsmp = VTOHFS(vp);
845 vfs_context_t context = ap->a_context;
846 kauth_cred_t cred = vfs_context_ucred(context);
847 proc_t p = vfs_context_proc(context);
848 struct vfsstatfs *vfsp;
849 boolean_t is64bit;
850
851 is64bit = proc_is64bit(p);
852
853 switch (ap->a_command) {
854
855 case HFS_RESIZE_VOLUME: {
856 u_int64_t newsize;
857 u_int64_t cursize;
858
859 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
860 if (suser(cred, NULL) &&
861 kauth_cred_getuid(cred) != vfsp->f_owner) {
862 return (EACCES); /* must be owner of file system */
863 }
864 if (!vnode_isvroot(vp)) {
865 return (EINVAL);
866 }
867 newsize = *(u_int64_t *)ap->a_data;
868 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
869
870 if (newsize > cursize) {
871 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
872 } else if (newsize < cursize) {
873 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
874 } else {
875 return (0);
876 }
877 }
878 case HFS_CHANGE_NEXT_ALLOCATION: {
879 u_int32_t location;
880
881 if (vnode_vfsisrdonly(vp)) {
882 return (EROFS);
883 }
884 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
885 if (suser(cred, NULL) &&
886 kauth_cred_getuid(cred) != vfsp->f_owner) {
887 return (EACCES); /* must be owner of file system */
888 }
889 if (!vnode_isvroot(vp)) {
890 return (EINVAL);
891 }
892 location = *(u_int32_t *)ap->a_data;
893 if (location > hfsmp->totalBlocks - 1) {
894 return (EINVAL);
895 }
896 /* Return previous value. */
897 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
898 HFS_MOUNT_LOCK(hfsmp, TRUE);
899 hfsmp->nextAllocation = location;
900 hfsmp->vcbFlags |= 0xFF00;
901 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
902 return (0);
903 }
904
905 #ifdef HFS_SPARSE_DEV
906 case HFS_SETBACKINGSTOREINFO: {
907 struct vnode * bsfs_rootvp;
908 struct vnode * di_vp;
909 struct hfs_backingstoreinfo *bsdata;
910 int error = 0;
911
912 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
913 return (EALREADY);
914 }
915 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
916 if (suser(cred, NULL) &&
917 kauth_cred_getuid(cred) != vfsp->f_owner) {
918 return (EACCES); /* must be owner of file system */
919 }
920 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
921 if (bsdata == NULL) {
922 return (EINVAL);
923 }
924 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
925 return (error);
926 }
927 if ((error = vnode_getwithref(di_vp))) {
928 file_drop(bsdata->backingfd);
929 return(error);
930 }
931
932 if (vnode_mount(vp) == vnode_mount(di_vp)) {
933 (void)vnode_put(di_vp);
934 file_drop(bsdata->backingfd);
935 return (EINVAL);
936 }
937
938 /*
939 * Obtain the backing fs root vnode and keep a reference
940 * on it. This reference will be dropped in hfs_unmount.
941 */
942 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
943 if (error) {
944 (void)vnode_put(di_vp);
945 file_drop(bsdata->backingfd);
946 return (error);
947 }
948 vnode_ref(bsfs_rootvp);
949 vnode_put(bsfs_rootvp);
950
951 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
952 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
953 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
954 hfsmp->hfs_sparsebandblks *= 4;
955
956 (void)vnode_put(di_vp);
957 file_drop(bsdata->backingfd);
958 return (0);
959 }
960 case HFS_CLRBACKINGSTOREINFO: {
961 struct vnode * tmpvp;
962
963 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
964 if (suser(cred, NULL) &&
965 kauth_cred_getuid(cred) != vfsp->f_owner) {
966 return (EACCES); /* must be owner of file system */
967 }
968 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
969 hfsmp->hfs_backingfs_rootvp) {
970
971 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
972 tmpvp = hfsmp->hfs_backingfs_rootvp;
973 hfsmp->hfs_backingfs_rootvp = NULLVP;
974 hfsmp->hfs_sparsebandblks = 0;
975 vnode_rele(tmpvp);
976 }
977 return (0);
978 }
979 #endif /* HFS_SPARSE_DEV */
980
981 case F_FREEZE_FS: {
982 struct mount *mp;
983 task_t task;
984
985 if (!is_suser())
986 return (EACCES);
987
988 mp = vnode_mount(vp);
989 hfsmp = VFSTOHFS(mp);
990
991 if (!(hfsmp->jnl))
992 return (ENOTSUP);
993
994 task = current_task();
995 task_working_set_disable(task);
996
997 // flush things before we get started to try and prevent
998 // dirty data from being paged out while we're frozen.
999 // note: can't do this after taking the lock as it will
1000 // deadlock against ourselves.
1001 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1002 hfs_global_exclusive_lock_acquire(hfsmp);
1003 journal_flush(hfsmp->jnl);
1004 // don't need to iterate on all vnodes, we just need to
1005 // wait for writes to the system files and the device vnode
1006 // vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1007 if (HFSTOVCB(hfsmp)->extentsRefNum)
1008 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1009 if (HFSTOVCB(hfsmp)->catalogRefNum)
1010 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1011 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1012 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1013 if (hfsmp->hfs_attribute_vp)
1014 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1015 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1016
1017 hfsmp->hfs_freezing_proc = current_proc();
1018
1019 return (0);
1020 }
1021
1022 case F_THAW_FS: {
1023 if (!is_suser())
1024 return (EACCES);
1025
1026 // if we're not the one who froze the fs then we
1027 // can't thaw it.
1028 if (hfsmp->hfs_freezing_proc != current_proc()) {
1029 return EINVAL;
1030 }
1031
1032 // NOTE: if you add code here, also go check the
1033 // code that "thaws" the fs in hfs_vnop_close()
1034 //
1035 hfsmp->hfs_freezing_proc = NULL;
1036 hfs_global_exclusive_lock_release(hfsmp);
1037
1038 return (0);
1039 }
1040
1041 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1042 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1043
1044 case HFS_BULKACCESS_FSCTL:
1045 case HFS_BULKACCESS: {
1046 /*
1047 * NOTE: on entry, the vnode is locked. Incase this vnode
1048 * happens to be in our list of file_ids, we'll note it
1049 * avoid calling hfs_chashget_nowait() on that id as that
1050 * will cause a "locking against myself" panic.
1051 */
1052 Boolean check_leaf = true;
1053
1054 struct user_access_t *user_access_structp;
1055 struct user_access_t tmp_user_access_t;
1056 struct access_cache cache;
1057
1058 int error = 0, i;
1059
1060 dev_t dev = VTOC(vp)->c_dev;
1061
1062 short flags;
1063 struct ucred myucred; /* XXX ILLEGAL */
1064 int num_files;
1065 int *file_ids = NULL;
1066 short *access = NULL;
1067
1068 cnid_t cnid;
1069 cnid_t prevParent_cnid = 0;
1070 unsigned long myPerms;
1071 short myaccess = 0;
1072 struct cat_attr cnattr;
1073 CatalogKey catkey;
1074 struct cnode *skip_cp = VTOC(vp);
1075 struct vfs_context my_context;
1076
1077 /* first, return error if not run as root */
1078 if (cred->cr_ruid != 0) {
1079 return EPERM;
1080 }
1081
1082 /* initialize the local cache and buffers */
1083 cache.numcached = 0;
1084 cache.cachehits = 0;
1085 cache.lookups = 0;
1086
1087 file_ids = (int *) get_pathbuff();
1088 access = (short *) get_pathbuff();
1089 cache.acache = (int *) get_pathbuff();
1090 cache.haveaccess = (Boolean *) get_pathbuff();
1091
1092 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1093 release_pathbuff((char *) file_ids);
1094 release_pathbuff((char *) access);
1095 release_pathbuff((char *) cache.acache);
1096 release_pathbuff((char *) cache.haveaccess);
1097
1098 return ENOMEM;
1099 }
1100
1101 /* struct copyin done during dispatch... need to copy file_id array separately */
1102 if (ap->a_data == NULL) {
1103 error = EINVAL;
1104 goto err_exit_bulk_access;
1105 }
1106
1107 if (is64bit) {
1108 user_access_structp = (struct user_access_t *)ap->a_data;
1109 }
1110 else {
1111 struct access_t * accessp = (struct access_t *)ap->a_data;
1112 tmp_user_access_t.uid = accessp->uid;
1113 tmp_user_access_t.flags = accessp->flags;
1114 tmp_user_access_t.num_groups = accessp->num_groups;
1115 tmp_user_access_t.num_files = accessp->num_files;
1116 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1117 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1118 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1119 user_access_structp = &tmp_user_access_t;
1120 }
1121
1122 num_files = user_access_structp->num_files;
1123 if (num_files < 1) {
1124 goto err_exit_bulk_access;
1125 }
1126 if (num_files > 256) {
1127 error = EINVAL;
1128 goto err_exit_bulk_access;
1129 }
1130
1131 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1132 num_files * sizeof(int)))) {
1133 goto err_exit_bulk_access;
1134 }
1135
1136 /* fill in the ucred structure */
1137 flags = user_access_structp->flags;
1138 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1139 flags = R_OK;
1140 }
1141
1142 /* check if we've been passed leaf node ids or parent ids */
1143 if (flags & PARENT_IDS_FLAG) {
1144 check_leaf = false;
1145 }
1146
1147 memset(&myucred, 0, sizeof(myucred));
1148 myucred.cr_ref = 1;
1149 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1150 myucred.cr_ngroups = user_access_structp->num_groups;
1151 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1152 myucred.cr_ngroups = 0;
1153 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1154 myucred.cr_ngroups * sizeof(gid_t)))) {
1155 goto err_exit_bulk_access;
1156 }
1157 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1158
1159 my_context.vc_proc = p;
1160 my_context.vc_ucred = &myucred;
1161
1162 /* Check access to each file_id passed in */
1163 for (i = 0; i < num_files; i++) {
1164 #if 0
1165 cnid = (cnid_t) file_ids[i];
1166
1167 /* root always has access */
1168 if (!suser(&myucred, NULL)) {
1169 access[i] = 0;
1170 continue;
1171 }
1172
1173 if (check_leaf) {
1174
1175 /* do the lookup (checks the cnode hash, then the catalog) */
1176 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1177 if (error) {
1178 access[i] = (short) error;
1179 continue;
1180 }
1181
1182 /* before calling CheckAccess(), check the target file for read access */
1183 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1184 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1185
1186
1187 /* fail fast if no access */
1188 if ((myPerms & flags) == 0) {
1189 access[i] = EACCES;
1190 continue;
1191 }
1192 } else {
1193 /* we were passed an array of parent ids */
1194 catkey.hfsPlus.parentID = cnid;
1195 }
1196
1197 /* if the last guy had the same parent and had access, we're done */
1198 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1199 cache.cachehits++;
1200 access[i] = 0;
1201 continue;
1202 }
1203
1204 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1205 skip_cp, p, &myucred, dev);
1206
1207 if ( myaccess ) {
1208 access[i] = 0; // have access.. no errors to report
1209 } else {
1210 access[i] = (error != 0 ? (short) error : EACCES);
1211 }
1212
1213 prevParent_cnid = catkey.hfsPlus.parentID;
1214 #else
1215 int myErr;
1216
1217 cnid = (cnid_t)file_ids[i];
1218
1219 while (cnid >= kRootDirID) {
1220 /* get the vnode for this cnid */
1221 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1222 if ( myErr ) {
1223 access[i] = EACCES;
1224 break;
1225 }
1226
1227 cnid = VTOC(vp)->c_parentcnid;
1228
1229 hfs_unlock(VTOC(vp));
1230 if (vnode_vtype(vp) == VDIR) {
1231 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, &my_context);
1232 if (myErr) {
1233 // try again with just read-access
1234 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1235 }
1236 } else {
1237 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1238 }
1239 vnode_put(vp);
1240 access[i] = myErr;
1241 if (myErr) {
1242 break;
1243 }
1244 }
1245 #endif
1246 }
1247
1248 /* copyout the access array */
1249 if ((error = copyout((caddr_t)access, user_access_structp->access,
1250 num_files * sizeof (short)))) {
1251 goto err_exit_bulk_access;
1252 }
1253
1254 err_exit_bulk_access:
1255
1256 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1257
1258 release_pathbuff((char *) cache.acache);
1259 release_pathbuff((char *) cache.haveaccess);
1260 release_pathbuff((char *) file_ids);
1261 release_pathbuff((char *) access);
1262
1263 return (error);
1264 } /* HFS_BULKACCESS */
1265
1266 case HFS_SETACLSTATE: {
1267 int state;
1268
1269 if (!is_suser()) {
1270 return (EPERM);
1271 }
1272 if (ap->a_data == NULL) {
1273 return (EINVAL);
1274 }
1275 state = *(int *)ap->a_data;
1276 if (state == 0 || state == 1)
1277 return hfs_setextendedsecurity(hfsmp, state);
1278 else
1279 return (EINVAL);
1280 }
1281
1282 case F_FULLFSYNC: {
1283 int error;
1284
1285 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1286 if (error == 0) {
1287 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1288 hfs_unlock(VTOC(vp));
1289 }
1290
1291 return error;
1292 }
1293
1294 case F_CHKCLEAN: {
1295 register struct cnode *cp;
1296 int error;
1297
1298 if (!vnode_isreg(vp))
1299 return EINVAL;
1300
1301 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1302 if (error == 0) {
1303 cp = VTOC(vp);
1304 /*
1305 * used by regression test to determine if
1306 * all the dirty pages (via write) have been cleaned
1307 * after a call to 'fsysnc'.
1308 */
1309 error = is_file_clean(vp, VTOF(vp)->ff_size);
1310 hfs_unlock(cp);
1311 }
1312 return (error);
1313 }
1314
1315 case F_RDADVISE: {
1316 register struct radvisory *ra;
1317 struct filefork *fp;
1318 int error;
1319
1320 if (!vnode_isreg(vp))
1321 return EINVAL;
1322
1323 ra = (struct radvisory *)(ap->a_data);
1324 fp = VTOF(vp);
1325
1326 /* Protect against a size change. */
1327 hfs_lock_truncate(VTOC(vp), TRUE);
1328
1329 if (ra->ra_offset >= fp->ff_size) {
1330 error = EFBIG;
1331 } else {
1332 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1333 }
1334
1335 hfs_unlock_truncate(VTOC(vp));
1336 return (error);
1337 }
1338
1339 case F_READBOOTSTRAP:
1340 case F_WRITEBOOTSTRAP:
1341 {
1342 struct vnode *devvp = NULL;
1343 user_fbootstraptransfer_t *user_bootstrapp;
1344 int devBlockSize;
1345 int error;
1346 uio_t auio;
1347 daddr64_t blockNumber;
1348 u_long blockOffset;
1349 u_long xfersize;
1350 struct buf *bp;
1351 user_fbootstraptransfer_t user_bootstrap;
1352
1353 if (!vnode_isvroot(vp))
1354 return (EINVAL);
1355 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1356 * to a user_fbootstraptransfer_t else we get a pointer to a
1357 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1358 */
1359 if (is64bit) {
1360 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1361 }
1362 else {
1363 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1364 user_bootstrapp = &user_bootstrap;
1365 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1366 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1367 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1368 }
1369 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1370 return EINVAL;
1371
1372 devvp = VTOHFS(vp)->hfs_devvp;
1373 auio = uio_create(1, user_bootstrapp->fbt_offset,
1374 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1375 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1376 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1377
1378 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1379
1380 while (uio_resid(auio) > 0) {
1381 blockNumber = uio_offset(auio) / devBlockSize;
1382 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1383 if (error) {
1384 if (bp) buf_brelse(bp);
1385 uio_free(auio);
1386 return error;
1387 };
1388
1389 blockOffset = uio_offset(auio) % devBlockSize;
1390 xfersize = devBlockSize - blockOffset;
1391 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1392 if (error) {
1393 buf_brelse(bp);
1394 uio_free(auio);
1395 return error;
1396 };
1397 if (uio_rw(auio) == UIO_WRITE) {
1398 error = VNOP_BWRITE(bp);
1399 if (error) {
1400 uio_free(auio);
1401 return error;
1402 }
1403 } else {
1404 buf_brelse(bp);
1405 };
1406 };
1407 uio_free(auio);
1408 };
1409 return 0;
1410
1411 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1412 {
1413 if (is64bit) {
1414 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1415 }
1416 else {
1417 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1418 }
1419 return 0;
1420 }
1421
1422 case HFS_GET_MOUNT_TIME:
1423 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1424 break;
1425
1426 case HFS_GET_LAST_MTIME:
1427 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1428 break;
1429
1430 case HFS_SET_BOOT_INFO:
1431 if (!vnode_isvroot(vp))
1432 return(EINVAL);
1433 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1434 return(EACCES); /* must be superuser or owner of filesystem */
1435 HFS_MOUNT_LOCK(hfsmp, TRUE);
1436 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1437 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1438 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1439 break;
1440
1441 case HFS_GET_BOOT_INFO:
1442 if (!vnode_isvroot(vp))
1443 return(EINVAL);
1444 HFS_MOUNT_LOCK(hfsmp, TRUE);
1445 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1446 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1447 break;
1448
1449 default:
1450 return (ENOTTY);
1451 }
1452
1453 /* Should never get here */
1454 return 0;
1455 }
1456
1457 /*
1458 * select
1459 */
1460 int
1461 hfs_vnop_select(__unused struct vnop_select_args *ap)
1462 /*
1463 struct vnop_select_args {
1464 vnode_t a_vp;
1465 int a_which;
1466 int a_fflags;
1467 void *a_wql;
1468 vfs_context_t a_context;
1469 };
1470 */
1471 {
1472 /*
1473 * We should really check to see if I/O is possible.
1474 */
1475 return (1);
1476 }
1477
1478 /*
1479 * Converts a logical block number to a physical block, and optionally returns
1480 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1481 * The physical block number is based on the device block size, currently its 512.
1482 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1483 */
1484 int
1485 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1486 {
1487 struct cnode *cp = VTOC(vp);
1488 struct filefork *fp = VTOF(vp);
1489 struct hfsmount *hfsmp = VTOHFS(vp);
1490 int retval = E_NONE;
1491 daddr_t logBlockSize;
1492 size_t bytesContAvail = 0;
1493 off_t blockposition;
1494 int lockExtBtree;
1495 int lockflags = 0;
1496
1497 /*
1498 * Check for underlying vnode requests and ensure that logical
1499 * to physical mapping is requested.
1500 */
1501 if (vpp != NULL)
1502 *vpp = cp->c_devvp;
1503 if (bnp == NULL)
1504 return (0);
1505
1506 logBlockSize = GetLogicalBlockSize(vp);
1507 blockposition = (off_t)bn * (off_t)logBlockSize;
1508
1509 lockExtBtree = overflow_extents(fp);
1510
1511 if (lockExtBtree)
1512 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1513
1514 retval = MacToVFSError(
1515 MapFileBlockC (HFSTOVCB(hfsmp),
1516 (FCB*)fp,
1517 MAXPHYSIO,
1518 blockposition,
1519 bnp,
1520 &bytesContAvail));
1521
1522 if (lockExtBtree)
1523 hfs_systemfile_unlock(hfsmp, lockflags);
1524
1525 if (retval == E_NONE) {
1526 /* Figure out how many read ahead blocks there are */
1527 if (runp != NULL) {
1528 if (can_cluster(logBlockSize)) {
1529 /* Make sure this result never goes negative: */
1530 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1531 } else {
1532 *runp = 0;
1533 }
1534 }
1535 }
1536 return (retval);
1537 }
1538
1539 /*
1540 * Convert logical block number to file offset.
1541 */
1542 int
1543 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1544 /*
1545 struct vnop_blktooff_args {
1546 vnode_t a_vp;
1547 daddr64_t a_lblkno;
1548 off_t *a_offset;
1549 };
1550 */
1551 {
1552 if (ap->a_vp == NULL)
1553 return (EINVAL);
1554 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1555
1556 return(0);
1557 }
1558
1559 /*
1560 * Convert file offset to logical block number.
1561 */
1562 int
1563 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1564 /*
1565 struct vnop_offtoblk_args {
1566 vnode_t a_vp;
1567 off_t a_offset;
1568 daddr64_t *a_lblkno;
1569 };
1570 */
1571 {
1572 if (ap->a_vp == NULL)
1573 return (EINVAL);
1574 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1575
1576 return(0);
1577 }
1578
1579 /*
1580 * Map file offset to physical block number.
1581 *
1582 * System file cnodes are expected to be locked (shared or exclusive).
1583 */
1584 int
1585 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1586 /*
1587 struct vnop_blockmap_args {
1588 vnode_t a_vp;
1589 off_t a_foffset;
1590 size_t a_size;
1591 daddr64_t *a_bpn;
1592 size_t *a_run;
1593 void *a_poff;
1594 int a_flags;
1595 vfs_context_t a_context;
1596 };
1597 */
1598 {
1599 struct vnode *vp = ap->a_vp;
1600 struct cnode *cp;
1601 struct filefork *fp;
1602 struct hfsmount *hfsmp;
1603 size_t bytesContAvail = 0;
1604 int retval = E_NONE;
1605 int syslocks = 0;
1606 int lockflags = 0;
1607 struct rl_entry *invalid_range;
1608 enum rl_overlaptype overlaptype;
1609 int started_tr = 0;
1610 int tooklock = 0;
1611
1612 /*
1613 * Check for underlying vnode requests and ensure that logical
1614 * to physical mapping is requested.
1615 */
1616 if (ap->a_bpn == NULL)
1617 return (0);
1618
1619 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1620 if (VTOC(vp)->c_lockowner != current_thread()) {
1621 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1622 tooklock = 1;
1623 } else {
1624 cp = VTOC(vp);
1625 panic("blockmap: %s cnode lock already held!\n",
1626 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1627 }
1628 }
1629 hfsmp = VTOHFS(vp);
1630 cp = VTOC(vp);
1631 fp = VTOF(vp);
1632
1633 retry:
1634 if (fp->ff_unallocblocks) {
1635 if (hfs_start_transaction(hfsmp) != 0) {
1636 retval = EINVAL;
1637 goto exit;
1638 } else {
1639 started_tr = 1;
1640 }
1641 syslocks = SFL_EXTENTS | SFL_BITMAP;
1642
1643 } else if (overflow_extents(fp)) {
1644 syslocks = SFL_EXTENTS;
1645 }
1646
1647 if (syslocks)
1648 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1649
1650 /*
1651 * Check for any delayed allocations.
1652 */
1653 if (fp->ff_unallocblocks) {
1654 SInt64 actbytes;
1655 u_int32_t loanedBlocks;
1656
1657 //
1658 // Make sure we have a transaction. It's possible
1659 // that we came in and fp->ff_unallocblocks was zero
1660 // but during the time we blocked acquiring the extents
1661 // btree, ff_unallocblocks became non-zero and so we
1662 // will need to start a transaction.
1663 //
1664 if (started_tr == 0) {
1665 if (syslocks) {
1666 hfs_systemfile_unlock(hfsmp, lockflags);
1667 syslocks = 0;
1668 }
1669 goto retry;
1670 }
1671
1672 /*
1673 * Note: ExtendFileC will Release any blocks on loan and
1674 * aquire real blocks. So we ask to extend by zero bytes
1675 * since ExtendFileC will account for the virtual blocks.
1676 */
1677
1678 loanedBlocks = fp->ff_unallocblocks;
1679 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1680 kEFAllMask | kEFNoClumpMask, &actbytes);
1681
1682 if (retval) {
1683 fp->ff_unallocblocks = loanedBlocks;
1684 cp->c_blocks += loanedBlocks;
1685 fp->ff_blocks += loanedBlocks;
1686
1687 HFS_MOUNT_LOCK(hfsmp, TRUE);
1688 hfsmp->loanedBlocks += loanedBlocks;
1689 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1690 }
1691
1692 if (retval) {
1693 hfs_systemfile_unlock(hfsmp, lockflags);
1694 cp->c_flag |= C_MODIFIED;
1695 if (started_tr) {
1696 (void) hfs_update(vp, TRUE);
1697 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1698
1699 hfs_end_transaction(hfsmp);
1700 }
1701 goto exit;
1702 }
1703 }
1704
1705 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1706 ap->a_bpn, &bytesContAvail);
1707 if (syslocks) {
1708 hfs_systemfile_unlock(hfsmp, lockflags);
1709 syslocks = 0;
1710 }
1711
1712 if (started_tr) {
1713 (void) hfs_update(vp, TRUE);
1714 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1715 hfs_end_transaction(hfsmp);
1716 started_tr = 0;
1717 }
1718 if (retval) {
1719 goto exit;
1720 }
1721
1722 /* Adjust the mapping information for invalid file ranges: */
1723 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1724 ap->a_foffset + (off_t)bytesContAvail - 1,
1725 &invalid_range);
1726 if (overlaptype != RL_NOOVERLAP) {
1727 switch(overlaptype) {
1728 case RL_MATCHINGOVERLAP:
1729 case RL_OVERLAPCONTAINSRANGE:
1730 case RL_OVERLAPSTARTSBEFORE:
1731 /* There's no valid block for this byte offset: */
1732 *ap->a_bpn = (daddr64_t)-1;
1733 /* There's no point limiting the amount to be returned
1734 * if the invalid range that was hit extends all the way
1735 * to the EOF (i.e. there's no valid bytes between the
1736 * end of this range and the file's EOF):
1737 */
1738 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1739 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1740 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1741 }
1742 break;
1743
1744 case RL_OVERLAPISCONTAINED:
1745 case RL_OVERLAPENDSAFTER:
1746 /* The range of interest hits an invalid block before the end: */
1747 if (invalid_range->rl_start == ap->a_foffset) {
1748 /* There's actually no valid information to be had starting here: */
1749 *ap->a_bpn = (daddr64_t)-1;
1750 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1751 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1752 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1753 }
1754 } else {
1755 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1756 }
1757 break;
1758
1759 case RL_NOOVERLAP:
1760 break;
1761 } /* end switch */
1762 if (bytesContAvail > ap->a_size)
1763 bytesContAvail = ap->a_size;
1764 }
1765 if (ap->a_run)
1766 *ap->a_run = bytesContAvail;
1767
1768 if (ap->a_poff)
1769 *(int *)ap->a_poff = 0;
1770 exit:
1771 if (tooklock)
1772 hfs_unlock(cp);
1773
1774 return (MacToVFSError(retval));
1775 }
1776
1777
1778 /*
1779 * prepare and issue the I/O
1780 * buf_strategy knows how to deal
1781 * with requests that require
1782 * fragmented I/Os
1783 */
1784 int
1785 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1786 {
1787 buf_t bp = ap->a_bp;
1788 vnode_t vp = buf_vnode(bp);
1789 struct cnode *cp = VTOC(vp);
1790
1791 return (buf_strategy(cp->c_devvp, ap));
1792 }
1793
1794
1795 static int
1796 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1797 {
1798 register struct cnode *cp = VTOC(vp);
1799 struct filefork *fp = VTOF(vp);
1800 struct proc *p = vfs_context_proc(context);;
1801 kauth_cred_t cred = vfs_context_ucred(context);
1802 int retval;
1803 off_t bytesToAdd;
1804 off_t actualBytesAdded;
1805 off_t filebytes;
1806 u_long fileblocks;
1807 int blksize;
1808 struct hfsmount *hfsmp;
1809 int lockflags;
1810
1811 blksize = VTOVCB(vp)->blockSize;
1812 fileblocks = fp->ff_blocks;
1813 filebytes = (off_t)fileblocks * (off_t)blksize;
1814
1815 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1816 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1817
1818 if (length < 0)
1819 return (EINVAL);
1820
1821 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1822 return (EFBIG);
1823
1824 hfsmp = VTOHFS(vp);
1825
1826 retval = E_NONE;
1827
1828 /* Files that are changing size are not hot file candidates. */
1829 if (hfsmp->hfc_stage == HFC_RECORDING) {
1830 fp->ff_bytesread = 0;
1831 }
1832
1833 /*
1834 * We cannot just check if fp->ff_size == length (as an optimization)
1835 * since there may be extra physical blocks that also need truncation.
1836 */
1837 #if QUOTA
1838 if ((retval = hfs_getinoquota(cp)))
1839 return(retval);
1840 #endif /* QUOTA */
1841
1842 /*
1843 * Lengthen the size of the file. We must ensure that the
1844 * last byte of the file is allocated. Since the smallest
1845 * value of ff_size is 0, length will be at least 1.
1846 */
1847 if (length > (off_t)fp->ff_size) {
1848 #if QUOTA
1849 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1850 cred, 0);
1851 if (retval)
1852 goto Err_Exit;
1853 #endif /* QUOTA */
1854 /*
1855 * If we don't have enough physical space then
1856 * we need to extend the physical size.
1857 */
1858 if (length > filebytes) {
1859 int eflags;
1860 u_long blockHint = 0;
1861
1862 /* All or nothing and don't round up to clumpsize. */
1863 eflags = kEFAllMask | kEFNoClumpMask;
1864
1865 if (cred && suser(cred, NULL) != 0)
1866 eflags |= kEFReserveMask; /* keep a reserve */
1867
1868 /*
1869 * Allocate Journal and Quota files in metadata zone.
1870 */
1871 if (filebytes == 0 &&
1872 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1873 hfs_virtualmetafile(cp)) {
1874 eflags |= kEFMetadataMask;
1875 blockHint = hfsmp->hfs_metazone_start;
1876 }
1877 if (hfs_start_transaction(hfsmp) != 0) {
1878 retval = EINVAL;
1879 goto Err_Exit;
1880 }
1881
1882 /* Protect extents b-tree and allocation bitmap */
1883 lockflags = SFL_BITMAP;
1884 if (overflow_extents(fp))
1885 lockflags |= SFL_EXTENTS;
1886 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1887
1888 while ((length > filebytes) && (retval == E_NONE)) {
1889 bytesToAdd = length - filebytes;
1890 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1891 (FCB*)fp,
1892 bytesToAdd,
1893 blockHint,
1894 eflags,
1895 &actualBytesAdded));
1896
1897 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1898 if (actualBytesAdded == 0 && retval == E_NONE) {
1899 if (length > filebytes)
1900 length = filebytes;
1901 break;
1902 }
1903 } /* endwhile */
1904
1905 hfs_systemfile_unlock(hfsmp, lockflags);
1906
1907 if (hfsmp->jnl) {
1908 (void) hfs_update(vp, TRUE);
1909 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1910 }
1911
1912 hfs_end_transaction(hfsmp);
1913
1914 if (retval)
1915 goto Err_Exit;
1916
1917 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1918 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1919 }
1920
1921 if (!(flags & IO_NOZEROFILL)) {
1922 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1923 struct rl_entry *invalid_range;
1924 off_t zero_limit;
1925
1926 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1927 if (length < zero_limit) zero_limit = length;
1928
1929 if (length > (off_t)fp->ff_size) {
1930 struct timeval tv;
1931
1932 /* Extending the file: time to fill out the current last page w. zeroes? */
1933 if ((fp->ff_size & PAGE_MASK_64) &&
1934 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1935 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1936
1937 /* There's some valid data at the start of the (current) last page
1938 of the file, so zero out the remainder of that page to ensure the
1939 entire page contains valid data. Since there is no invalid range
1940 possible past the (current) eof, there's no need to remove anything
1941 from the invalid range list before calling cluster_write(): */
1942 hfs_unlock(cp);
1943 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1944 fp->ff_size, (off_t)0,
1945 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1946 hfs_lock(cp, HFS_FORCE_LOCK);
1947 if (retval) goto Err_Exit;
1948
1949 /* Merely invalidate the remaining area, if necessary: */
1950 if (length > zero_limit) {
1951 microuptime(&tv);
1952 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1953 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1954 }
1955 } else {
1956 /* The page containing the (current) eof is invalid: just add the
1957 remainder of the page to the invalid list, along with the area
1958 being newly allocated:
1959 */
1960 microuptime(&tv);
1961 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1962 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1963 };
1964 }
1965 } else {
1966 panic("hfs_truncate: invoked on non-UBC object?!");
1967 };
1968 }
1969 cp->c_touch_modtime = TRUE;
1970 fp->ff_size = length;
1971
1972 /* Nested transactions will do their own ubc_setsize. */
1973 if (!skipsetsize) {
1974 /*
1975 * ubc_setsize can cause a pagein here
1976 * so we need to drop cnode lock.
1977 */
1978 hfs_unlock(cp);
1979 ubc_setsize(vp, length);
1980 hfs_lock(cp, HFS_FORCE_LOCK);
1981 }
1982
1983 } else { /* Shorten the size of the file */
1984
1985 if ((off_t)fp->ff_size > length) {
1986 /*
1987 * Any buffers that are past the truncation point need to be
1988 * invalidated (to maintain buffer cache consistency).
1989 */
1990
1991 /* Nested transactions will do their own ubc_setsize. */
1992 if (!skipsetsize) {
1993 /*
1994 * ubc_setsize can cause a pageout here
1995 * so we need to drop cnode lock.
1996 */
1997 hfs_unlock(cp);
1998 ubc_setsize(vp, length);
1999 hfs_lock(cp, HFS_FORCE_LOCK);
2000 }
2001
2002 /* Any space previously marked as invalid is now irrelevant: */
2003 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2004 }
2005
2006 /*
2007 * Account for any unmapped blocks. Note that the new
2008 * file length can still end up with unmapped blocks.
2009 */
2010 if (fp->ff_unallocblocks > 0) {
2011 u_int32_t finalblks;
2012 u_int32_t loanedBlocks;
2013
2014 HFS_MOUNT_LOCK(hfsmp, TRUE);
2015
2016 loanedBlocks = fp->ff_unallocblocks;
2017 cp->c_blocks -= loanedBlocks;
2018 fp->ff_blocks -= loanedBlocks;
2019 fp->ff_unallocblocks = 0;
2020
2021 hfsmp->loanedBlocks -= loanedBlocks;
2022
2023 finalblks = (length + blksize - 1) / blksize;
2024 if (finalblks > fp->ff_blocks) {
2025 /* calculate required unmapped blocks */
2026 loanedBlocks = finalblks - fp->ff_blocks;
2027 hfsmp->loanedBlocks += loanedBlocks;
2028
2029 fp->ff_unallocblocks = loanedBlocks;
2030 cp->c_blocks += loanedBlocks;
2031 fp->ff_blocks += loanedBlocks;
2032 }
2033 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2034 }
2035
2036 /*
2037 * For a TBE process the deallocation of the file blocks is
2038 * delayed until the file is closed. And hfs_close calls
2039 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2040 * isn't set, we make sure this isn't a TBE process.
2041 */
2042 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2043 #if QUOTA
2044 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2045 #endif /* QUOTA */
2046 if (hfs_start_transaction(hfsmp) != 0) {
2047 retval = EINVAL;
2048 goto Err_Exit;
2049 }
2050
2051 if (fp->ff_unallocblocks == 0) {
2052 /* Protect extents b-tree and allocation bitmap */
2053 lockflags = SFL_BITMAP;
2054 if (overflow_extents(fp))
2055 lockflags |= SFL_EXTENTS;
2056 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2057
2058 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2059 (FCB*)fp, length, false));
2060
2061 hfs_systemfile_unlock(hfsmp, lockflags);
2062 }
2063 if (hfsmp->jnl) {
2064 (void) hfs_update(vp, TRUE);
2065 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2066 }
2067
2068 hfs_end_transaction(hfsmp);
2069
2070 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2071 if (retval)
2072 goto Err_Exit;
2073 #if QUOTA
2074 /* These are bytesreleased */
2075 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2076 #endif /* QUOTA */
2077 }
2078 /* Only set update flag if the logical length changes */
2079 if ((off_t)fp->ff_size != length)
2080 cp->c_touch_modtime = TRUE;
2081 fp->ff_size = length;
2082 }
2083 cp->c_touch_chgtime = TRUE;
2084 retval = hfs_update(vp, MNT_WAIT);
2085 if (retval) {
2086 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2087 -1, -1, -1, retval, 0);
2088 }
2089
2090 Err_Exit:
2091
2092 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2093 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2094
2095 return (retval);
2096 }
2097
2098
2099
2100 /*
2101 * Truncate a cnode to at most length size, freeing (or adding) the
2102 * disk blocks.
2103 */
2104 __private_extern__
2105 int
2106 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2107 vfs_context_t context)
2108 {
2109 struct filefork *fp = VTOF(vp);
2110 off_t filebytes;
2111 u_long fileblocks;
2112 int blksize, error = 0;
2113
2114 if (vnode_isdir(vp))
2115 return (EISDIR); /* cannot truncate an HFS directory! */
2116
2117 blksize = VTOVCB(vp)->blockSize;
2118 fileblocks = fp->ff_blocks;
2119 filebytes = (off_t)fileblocks * (off_t)blksize;
2120
2121 // have to loop truncating or growing files that are
2122 // really big because otherwise transactions can get
2123 // enormous and consume too many kernel resources.
2124
2125 if (length < filebytes) {
2126 while (filebytes > length) {
2127 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2128 filebytes -= HFS_BIGFILE_SIZE;
2129 } else {
2130 filebytes = length;
2131 }
2132 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2133 if (error)
2134 break;
2135 }
2136 } else if (length > filebytes) {
2137 while (filebytes < length) {
2138 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2139 filebytes += HFS_BIGFILE_SIZE;
2140 } else {
2141 filebytes = length;
2142 }
2143 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2144 if (error)
2145 break;
2146 }
2147 } else /* Same logical size */ {
2148
2149 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2150 }
2151 /* Files that are changing size are not hot file candidates. */
2152 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2153 fp->ff_bytesread = 0;
2154 }
2155
2156 return (error);
2157 }
2158
2159
2160
2161 /*
2162 * Preallocate file storage space.
2163 */
2164 int
2165 hfs_vnop_allocate(struct vnop_allocate_args /* {
2166 vnode_t a_vp;
2167 off_t a_length;
2168 u_int32_t a_flags;
2169 off_t *a_bytesallocated;
2170 off_t a_offset;
2171 vfs_context_t a_context;
2172 } */ *ap)
2173 {
2174 struct vnode *vp = ap->a_vp;
2175 struct cnode *cp;
2176 struct filefork *fp;
2177 ExtendedVCB *vcb;
2178 off_t length = ap->a_length;
2179 off_t startingPEOF;
2180 off_t moreBytesRequested;
2181 off_t actualBytesAdded;
2182 off_t filebytes;
2183 u_long fileblocks;
2184 int retval, retval2;
2185 UInt32 blockHint;
2186 UInt32 extendFlags; /* For call to ExtendFileC */
2187 struct hfsmount *hfsmp;
2188 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2189 int lockflags;
2190
2191 *(ap->a_bytesallocated) = 0;
2192
2193 if (!vnode_isreg(vp))
2194 return (EISDIR);
2195 if (length < (off_t)0)
2196 return (EINVAL);
2197
2198 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2199 return (retval);
2200 cp = VTOC(vp);
2201 fp = VTOF(vp);
2202 hfsmp = VTOHFS(vp);
2203 vcb = VTOVCB(vp);
2204
2205 fileblocks = fp->ff_blocks;
2206 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2207
2208 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2209 retval = EINVAL;
2210 goto Err_Exit;
2211 }
2212
2213 /* Fill in the flags word for the call to Extend the file */
2214
2215 extendFlags = kEFNoClumpMask;
2216 if (ap->a_flags & ALLOCATECONTIG)
2217 extendFlags |= kEFContigMask;
2218 if (ap->a_flags & ALLOCATEALL)
2219 extendFlags |= kEFAllMask;
2220 if (cred && suser(cred, NULL) != 0)
2221 extendFlags |= kEFReserveMask;
2222
2223 retval = E_NONE;
2224 blockHint = 0;
2225 startingPEOF = filebytes;
2226
2227 if (ap->a_flags & ALLOCATEFROMPEOF)
2228 length += filebytes;
2229 else if (ap->a_flags & ALLOCATEFROMVOL)
2230 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2231
2232 /* If no changes are necesary, then we're done */
2233 if (filebytes == length)
2234 goto Std_Exit;
2235
2236 /*
2237 * Lengthen the size of the file. We must ensure that the
2238 * last byte of the file is allocated. Since the smallest
2239 * value of filebytes is 0, length will be at least 1.
2240 */
2241 if (length > filebytes) {
2242 moreBytesRequested = length - filebytes;
2243
2244 #if QUOTA
2245 retval = hfs_chkdq(cp,
2246 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2247 cred, 0);
2248 if (retval)
2249 goto Err_Exit;
2250
2251 #endif /* QUOTA */
2252 /*
2253 * Metadata zone checks.
2254 */
2255 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2256 /*
2257 * Allocate Journal and Quota files in metadata zone.
2258 */
2259 if (hfs_virtualmetafile(cp)) {
2260 extendFlags |= kEFMetadataMask;
2261 blockHint = hfsmp->hfs_metazone_start;
2262 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2263 (blockHint <= hfsmp->hfs_metazone_end)) {
2264 /*
2265 * Move blockHint outside metadata zone.
2266 */
2267 blockHint = hfsmp->hfs_metazone_end + 1;
2268 }
2269 }
2270
2271 if (hfs_start_transaction(hfsmp) != 0) {
2272 retval = EINVAL;
2273 goto Err_Exit;
2274 }
2275
2276 /* Protect extents b-tree and allocation bitmap */
2277 lockflags = SFL_BITMAP;
2278 if (overflow_extents(fp))
2279 lockflags |= SFL_EXTENTS;
2280 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2281
2282 retval = MacToVFSError(ExtendFileC(vcb,
2283 (FCB*)fp,
2284 moreBytesRequested,
2285 blockHint,
2286 extendFlags,
2287 &actualBytesAdded));
2288
2289 *(ap->a_bytesallocated) = actualBytesAdded;
2290 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2291
2292 hfs_systemfile_unlock(hfsmp, lockflags);
2293
2294 if (hfsmp->jnl) {
2295 (void) hfs_update(vp, TRUE);
2296 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2297 }
2298
2299 hfs_end_transaction(hfsmp);
2300
2301 /*
2302 * if we get an error and no changes were made then exit
2303 * otherwise we must do the hfs_update to reflect the changes
2304 */
2305 if (retval && (startingPEOF == filebytes))
2306 goto Err_Exit;
2307
2308 /*
2309 * Adjust actualBytesAdded to be allocation block aligned, not
2310 * clump size aligned.
2311 * NOTE: So what we are reporting does not affect reality
2312 * until the file is closed, when we truncate the file to allocation
2313 * block size.
2314 */
2315 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2316 *(ap->a_bytesallocated) =
2317 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2318
2319 } else { /* Shorten the size of the file */
2320
2321 if (fp->ff_size > length) {
2322 /*
2323 * Any buffers that are past the truncation point need to be
2324 * invalidated (to maintain buffer cache consistency).
2325 */
2326 }
2327
2328 if (hfs_start_transaction(hfsmp) != 0) {
2329 retval = EINVAL;
2330 goto Err_Exit;
2331 }
2332
2333 /* Protect extents b-tree and allocation bitmap */
2334 lockflags = SFL_BITMAP;
2335 if (overflow_extents(fp))
2336 lockflags |= SFL_EXTENTS;
2337 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2338
2339 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2340
2341 hfs_systemfile_unlock(hfsmp, lockflags);
2342
2343 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2344
2345 if (hfsmp->jnl) {
2346 (void) hfs_update(vp, TRUE);
2347 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2348 }
2349
2350 hfs_end_transaction(hfsmp);
2351
2352
2353 /*
2354 * if we get an error and no changes were made then exit
2355 * otherwise we must do the hfs_update to reflect the changes
2356 */
2357 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2358 #if QUOTA
2359 /* These are bytesreleased */
2360 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2361 #endif /* QUOTA */
2362
2363 if (fp->ff_size > filebytes) {
2364 fp->ff_size = filebytes;
2365
2366 hfs_unlock(cp);
2367 ubc_setsize(vp, fp->ff_size);
2368 hfs_lock(cp, HFS_FORCE_LOCK);
2369 }
2370 }
2371
2372 Std_Exit:
2373 cp->c_touch_chgtime = TRUE;
2374 cp->c_touch_modtime = TRUE;
2375 retval2 = hfs_update(vp, MNT_WAIT);
2376
2377 if (retval == 0)
2378 retval = retval2;
2379 Err_Exit:
2380 hfs_unlock(cp);
2381 return (retval);
2382 }
2383
2384
2385 /*
2386 * Pagein for HFS filesystem
2387 */
2388 int
2389 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2390 /*
2391 struct vnop_pagein_args {
2392 vnode_t a_vp,
2393 upl_t a_pl,
2394 vm_offset_t a_pl_offset,
2395 off_t a_f_offset,
2396 size_t a_size,
2397 int a_flags
2398 vfs_context_t a_context;
2399 };
2400 */
2401 {
2402 vnode_t vp = ap->a_vp;
2403 int error;
2404
2405 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2406 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2407 /*
2408 * Keep track of blocks read.
2409 */
2410 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2411 struct cnode *cp;
2412 struct filefork *fp;
2413 int bytesread;
2414 int took_cnode_lock = 0;
2415
2416 cp = VTOC(vp);
2417 fp = VTOF(vp);
2418
2419 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2420 bytesread = fp->ff_size;
2421 else
2422 bytesread = ap->a_size;
2423
2424 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2425 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2426 hfs_lock(cp, HFS_FORCE_LOCK);
2427 took_cnode_lock = 1;
2428 }
2429 /*
2430 * If this file hasn't been seen since the start of
2431 * the current sampling period then start over.
2432 */
2433 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2434 struct timeval tv;
2435
2436 fp->ff_bytesread = bytesread;
2437 microtime(&tv);
2438 cp->c_atime = tv.tv_sec;
2439 } else {
2440 fp->ff_bytesread += bytesread;
2441 }
2442 cp->c_touch_acctime = TRUE;
2443 if (took_cnode_lock)
2444 hfs_unlock(cp);
2445 }
2446 return (error);
2447 }
2448
2449 /*
2450 * Pageout for HFS filesystem.
2451 */
2452 int
2453 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2454 /*
2455 struct vnop_pageout_args {
2456 vnode_t a_vp,
2457 upl_t a_pl,
2458 vm_offset_t a_pl_offset,
2459 off_t a_f_offset,
2460 size_t a_size,
2461 int a_flags
2462 vfs_context_t a_context;
2463 };
2464 */
2465 {
2466 vnode_t vp = ap->a_vp;
2467 struct cnode *cp;
2468 struct filefork *fp;
2469 int retval;
2470 off_t end_of_range;
2471 off_t filesize;
2472
2473 cp = VTOC(vp);
2474 if (cp->c_lockowner == current_thread()) {
2475 panic("pageout: %s cnode lock already held!\n",
2476 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2477 }
2478 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2479 return (retval);
2480 }
2481 fp = VTOF(vp);
2482
2483 filesize = fp->ff_size;
2484 end_of_range = ap->a_f_offset + ap->a_size - 1;
2485
2486 if (end_of_range >= filesize) {
2487 end_of_range = (off_t)(filesize - 1);
2488 }
2489 if (ap->a_f_offset < filesize) {
2490 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2491 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2492 }
2493 hfs_unlock(cp);
2494
2495 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2496 ap->a_size, filesize, ap->a_flags);
2497
2498 /*
2499 * If data was written, and setuid or setgid bits are set and
2500 * this process is not the superuser then clear the setuid and
2501 * setgid bits as a precaution against tampering.
2502 */
2503 if ((retval == 0) &&
2504 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2505 (vfs_context_suser(ap->a_context) != 0)) {
2506 hfs_lock(cp, HFS_FORCE_LOCK);
2507 cp->c_mode &= ~(S_ISUID | S_ISGID);
2508 cp->c_touch_chgtime = TRUE;
2509 hfs_unlock(cp);
2510 }
2511 return (retval);
2512 }
2513
2514 /*
2515 * Intercept B-Tree node writes to unswap them if necessary.
2516 */
2517 int
2518 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2519 {
2520 int retval = 0;
2521 register struct buf *bp = ap->a_bp;
2522 register struct vnode *vp = buf_vnode(bp);
2523 #if BYTE_ORDER == LITTLE_ENDIAN
2524 BlockDescriptor block;
2525
2526 /* Trap B-Tree writes */
2527 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2528 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2529 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2530
2531 /* Swap if the B-Tree node is in native byte order */
2532 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2533 /* Prepare the block pointer */
2534 block.blockHeader = bp;
2535 block.buffer = (char *)buf_dataptr(bp);
2536 /* not found in cache ==> came from disk */
2537 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2538 block.blockSize = buf_count(bp);
2539
2540 /* Endian un-swap B-Tree node */
2541 SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2542 }
2543
2544 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2545 }
2546 #endif
2547 /* This buffer shouldn't be locked anymore but if it is clear it */
2548 if ((buf_flags(bp) & B_LOCKED)) {
2549 // XXXdbg
2550 if (VTOHFS(vp)->jnl) {
2551 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2552 }
2553 buf_clearflags(bp, B_LOCKED);
2554 }
2555 retval = vn_bwrite (ap);
2556
2557 return (retval);
2558 }
2559
2560 /*
2561 * Relocate a file to a new location on disk
2562 * cnode must be locked on entry
2563 *
2564 * Relocation occurs by cloning the file's data from its
2565 * current set of blocks to a new set of blocks. During
2566 * the relocation all of the blocks (old and new) are
2567 * owned by the file.
2568 *
2569 * -----------------
2570 * |///////////////|
2571 * -----------------
2572 * 0 N (file offset)
2573 *
2574 * ----------------- -----------------
2575 * |///////////////| | | STEP 1 (aquire new blocks)
2576 * ----------------- -----------------
2577 * 0 N N+1 2N
2578 *
2579 * ----------------- -----------------
2580 * |///////////////| |///////////////| STEP 2 (clone data)
2581 * ----------------- -----------------
2582 * 0 N N+1 2N
2583 *
2584 * -----------------
2585 * |///////////////| STEP 3 (head truncate blocks)
2586 * -----------------
2587 * 0 N
2588 *
2589 * During steps 2 and 3 page-outs to file offsets less
2590 * than or equal to N are suspended.
2591 *
2592 * During step 3 page-ins to the file get supended.
2593 */
2594 __private_extern__
2595 int
2596 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2597 struct proc *p)
2598 {
2599 struct cnode *cp;
2600 struct filefork *fp;
2601 struct hfsmount *hfsmp;
2602 u_int32_t headblks;
2603 u_int32_t datablks;
2604 u_int32_t blksize;
2605 u_int32_t growsize;
2606 u_int32_t nextallocsave;
2607 daddr64_t sector_a, sector_b;
2608 int disabled_caching = 0;
2609 int eflags;
2610 off_t newbytes;
2611 int retval;
2612 int lockflags = 0;
2613 int took_trunc_lock = 0;
2614 int started_tr = 0;
2615 enum vtype vnodetype;
2616
2617 vnodetype = vnode_vtype(vp);
2618 if (vnodetype != VREG && vnodetype != VLNK) {
2619 return (EPERM);
2620 }
2621
2622 hfsmp = VTOHFS(vp);
2623 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2624 return (ENOSPC);
2625 }
2626
2627 cp = VTOC(vp);
2628 fp = VTOF(vp);
2629 if (fp->ff_unallocblocks)
2630 return (EINVAL);
2631 blksize = hfsmp->blockSize;
2632 if (blockHint == 0)
2633 blockHint = hfsmp->nextAllocation;
2634
2635 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2636 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2637 return (EFBIG);
2638 }
2639
2640 //
2641 // We do not believe that this call to hfs_fsync() is
2642 // necessary and it causes a journal transaction
2643 // deadlock so we are removing it.
2644 //
2645 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2646 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2647 // if (retval)
2648 // return (retval);
2649 //}
2650
2651 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2652 hfs_unlock(cp);
2653 hfs_lock_truncate(cp, TRUE);
2654 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2655 hfs_unlock_truncate(cp);
2656 return (retval);
2657 }
2658 took_trunc_lock = 1;
2659 }
2660 headblks = fp->ff_blocks;
2661 datablks = howmany(fp->ff_size, blksize);
2662 growsize = datablks * blksize;
2663 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2664 if (blockHint >= hfsmp->hfs_metazone_start &&
2665 blockHint <= hfsmp->hfs_metazone_end)
2666 eflags |= kEFMetadataMask;
2667
2668 if (hfs_start_transaction(hfsmp) != 0) {
2669 if (took_trunc_lock)
2670 hfs_unlock_truncate(cp);
2671 return (EINVAL);
2672 }
2673 started_tr = 1;
2674 /*
2675 * Protect the extents b-tree and the allocation bitmap
2676 * during MapFileBlockC and ExtendFileC operations.
2677 */
2678 lockflags = SFL_BITMAP;
2679 if (overflow_extents(fp))
2680 lockflags |= SFL_EXTENTS;
2681 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2682
2683 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2684 if (retval) {
2685 retval = MacToVFSError(retval);
2686 goto out;
2687 }
2688
2689 /*
2690 * STEP 1 - aquire new allocation blocks.
2691 */
2692 if (!vnode_isnocache(vp)) {
2693 vnode_setnocache(vp);
2694 disabled_caching = 1;
2695
2696 }
2697 nextallocsave = hfsmp->nextAllocation;
2698 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2699 if (eflags & kEFMetadataMask) {
2700 HFS_MOUNT_LOCK(hfsmp, TRUE);
2701 hfsmp->nextAllocation = nextallocsave;
2702 hfsmp->vcbFlags |= 0xFF00;
2703 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2704 }
2705
2706 retval = MacToVFSError(retval);
2707 if (retval == 0) {
2708 cp->c_flag |= C_MODIFIED;
2709 if (newbytes < growsize) {
2710 retval = ENOSPC;
2711 goto restore;
2712 } else if (fp->ff_blocks < (headblks + datablks)) {
2713 printf("hfs_relocate: allocation failed");
2714 retval = ENOSPC;
2715 goto restore;
2716 }
2717
2718 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2719 if (retval) {
2720 retval = MacToVFSError(retval);
2721 } else if ((sector_a + 1) == sector_b) {
2722 retval = ENOSPC;
2723 goto restore;
2724 } else if ((eflags & kEFMetadataMask) &&
2725 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2726 hfsmp->hfs_metazone_end)) {
2727 printf("hfs_relocate: didn't move into metadata zone\n");
2728 retval = ENOSPC;
2729 goto restore;
2730 }
2731 }
2732 /* Done with system locks and journal for now. */
2733 hfs_systemfile_unlock(hfsmp, lockflags);
2734 lockflags = 0;
2735 hfs_end_transaction(hfsmp);
2736 started_tr = 0;
2737
2738 if (retval) {
2739 /*
2740 * Check to see if failure is due to excessive fragmentation.
2741 */
2742 if ((retval == ENOSPC) &&
2743 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2744 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2745 }
2746 goto out;
2747 }
2748 /*
2749 * STEP 2 - clone file data into the new allocation blocks.
2750 */
2751
2752 if (vnodetype == VLNK)
2753 retval = hfs_clonelink(vp, blksize, cred, p);
2754 else if (vnode_issystem(vp))
2755 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2756 else
2757 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2758
2759 /* Start transaction for step 3 or for a restore. */
2760 if (hfs_start_transaction(hfsmp) != 0) {
2761 retval = EINVAL;
2762 goto out;
2763 }
2764 started_tr = 1;
2765 if (retval)
2766 goto restore;
2767
2768 /*
2769 * STEP 3 - switch to cloned data and remove old blocks.
2770 */
2771 lockflags = SFL_BITMAP;
2772 if (overflow_extents(fp))
2773 lockflags |= SFL_EXTENTS;
2774 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2775
2776 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2777
2778 hfs_systemfile_unlock(hfsmp, lockflags);
2779 lockflags = 0;
2780 if (retval)
2781 goto restore;
2782 out:
2783 if (took_trunc_lock)
2784 hfs_unlock_truncate(cp);
2785
2786 if (lockflags) {
2787 hfs_systemfile_unlock(hfsmp, lockflags);
2788 lockflags = 0;
2789 }
2790
2791 // See comment up above about calls to hfs_fsync()
2792 //
2793 //if (retval == 0)
2794 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2795
2796 if (hfsmp->jnl) {
2797 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2798 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2799 else
2800 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2801 }
2802 exit:
2803 if (disabled_caching) {
2804 vnode_clearnocache(vp);
2805 }
2806 if (started_tr)
2807 hfs_end_transaction(hfsmp);
2808
2809 return (retval);
2810
2811 restore:
2812 if (fp->ff_blocks == headblks)
2813 goto exit;
2814 /*
2815 * Give back any newly allocated space.
2816 */
2817 if (lockflags == 0) {
2818 lockflags = SFL_BITMAP;
2819 if (overflow_extents(fp))
2820 lockflags |= SFL_EXTENTS;
2821 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2822 }
2823
2824 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2825
2826 hfs_systemfile_unlock(hfsmp, lockflags);
2827 lockflags = 0;
2828
2829 if (took_trunc_lock)
2830 hfs_unlock_truncate(cp);
2831 goto exit;
2832 }
2833
2834
2835 /*
2836 * Clone a symlink.
2837 *
2838 */
2839 static int
2840 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2841 {
2842 struct buf *head_bp = NULL;
2843 struct buf *tail_bp = NULL;
2844 int error;
2845
2846
2847 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2848 if (error)
2849 goto out;
2850
2851 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2852 if (tail_bp == NULL) {
2853 error = EIO;
2854 goto out;
2855 }
2856 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2857 error = (int)buf_bwrite(tail_bp);
2858 out:
2859 if (head_bp) {
2860 buf_markinvalid(head_bp);
2861 buf_brelse(head_bp);
2862 }
2863 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2864
2865 return (error);
2866 }
2867
2868 /*
2869 * Clone a file's data within the file.
2870 *
2871 */
2872 static int
2873 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2874 {
2875 caddr_t bufp;
2876 size_t writebase;
2877 size_t bufsize;
2878 size_t copysize;
2879 size_t iosize;
2880 off_t filesize;
2881 size_t offset;
2882 uio_t auio;
2883 int error = 0;
2884
2885 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2886 writebase = blkstart * blksize;
2887 copysize = blkcnt * blksize;
2888 iosize = bufsize = MIN(copysize, 4096 * 16);
2889 offset = 0;
2890
2891 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2892 return (ENOMEM);
2893 }
2894 hfs_unlock(VTOC(vp));
2895
2896 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2897
2898 while (offset < copysize) {
2899 iosize = MIN(copysize - offset, iosize);
2900
2901 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2902 uio_addiov(auio, (uintptr_t)bufp, iosize);
2903
2904 error = cluster_read(vp, auio, copysize, 0);
2905 if (error) {
2906 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2907 break;
2908 }
2909 if (uio_resid(auio) != 0) {
2910 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2911 error = EIO;
2912 break;
2913 }
2914
2915 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2916 uio_addiov(auio, (uintptr_t)bufp, iosize);
2917
2918 error = cluster_write(vp, auio, filesize + offset,
2919 filesize + offset + iosize,
2920 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2921 if (error) {
2922 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2923 break;
2924 }
2925 if (uio_resid(auio) != 0) {
2926 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2927 error = EIO;
2928 break;
2929 }
2930 offset += iosize;
2931 }
2932 uio_free(auio);
2933
2934 /*
2935 * No need to call ubc_sync_range or hfs_invalbuf
2936 * since the file was copied using IO_NOCACHE.
2937 */
2938
2939 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2940
2941 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2942 return (error);
2943 }
2944
2945 /*
2946 * Clone a system (metadata) file.
2947 *
2948 */
2949 static int
2950 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2951 kauth_cred_t cred, struct proc *p)
2952 {
2953 caddr_t bufp;
2954 char * offset;
2955 size_t bufsize;
2956 size_t iosize;
2957 struct buf *bp = NULL;
2958 daddr64_t blkno;
2959 daddr64_t blk;
2960 daddr64_t start_blk;
2961 daddr64_t last_blk;
2962 int breadcnt;
2963 int i;
2964 int error = 0;
2965
2966
2967 iosize = GetLogicalBlockSize(vp);
2968 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2969 breadcnt = bufsize / iosize;
2970
2971 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2972 return (ENOMEM);
2973 }
2974 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
2975 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
2976 blkno = 0;
2977
2978 while (blkno < last_blk) {
2979 /*
2980 * Read up to a megabyte
2981 */
2982 offset = bufp;
2983 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
2984 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
2985 if (error) {
2986 printf("hfs_clonesysfile: meta_bread error %d\n", error);
2987 goto out;
2988 }
2989 if (buf_count(bp) != iosize) {
2990 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
2991 goto out;
2992 }
2993 bcopy((char *)buf_dataptr(bp), offset, iosize);
2994
2995 buf_markinvalid(bp);
2996 buf_brelse(bp);
2997 bp = NULL;
2998
2999 offset += iosize;
3000 }
3001
3002 /*
3003 * Write up to a megabyte
3004 */
3005 offset = bufp;
3006 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3007 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3008 if (bp == NULL) {
3009 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3010 error = EIO;
3011 goto out;
3012 }
3013 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3014 error = (int)buf_bwrite(bp);
3015 bp = NULL;
3016 if (error)
3017 goto out;
3018 offset += iosize;
3019 }
3020 }
3021 out:
3022 if (bp) {
3023 buf_brelse(bp);
3024 }
3025
3026 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3027
3028 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3029
3030 return (error);
3031 }