]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
39c3b37730b688da85975825af27273e9b6db74f
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /* @(#)hfs_readwrite.c 1.0
31 *
32 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
33 *
34 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
35 *
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/resourcevar.h>
41 #include <sys/kernel.h>
42 #include <sys/fcntl.h>
43 #include <sys/filedesc.h>
44 #include <sys/stat.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/kauth.h>
48 #include <sys/vnode.h>
49 #include <sys/uio.h>
50 #include <sys/vfs_context.h>
51
52 #include <miscfs/specfs/specdev.h>
53
54 #include <sys/ubc.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_kern.h>
57
58 #include <sys/kdebug.h>
59
60 #include "hfs.h"
61 #include "hfs_endian.h"
62 #include "hfs_fsctl.h"
63 #include "hfs_quota.h"
64 #include "hfscommon/headers/FileMgrInternal.h"
65 #include "hfscommon/headers/BTreesInternal.h"
66 #include "hfs_cnode.h"
67 #include "hfs_dbg.h"
68
69 extern int overflow_extents(struct filefork *fp);
70
71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
72
73 enum {
74 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
75 };
76
77 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
78
79 extern int hfs_setextendedsecurity(struct hfsmount *, int);
80
81
82 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
83 static int hfs_clonefile(struct vnode *, int, int, int);
84 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
85
86
87 /*****************************************************************************
88 *
89 * I/O Operations on vnodes
90 *
91 *****************************************************************************/
92 int hfs_vnop_read(struct vnop_read_args *);
93 int hfs_vnop_write(struct vnop_write_args *);
94 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
95 int hfs_vnop_select(struct vnop_select_args *);
96 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
97 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
98 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
99 int hfs_vnop_strategy(struct vnop_strategy_args *);
100 int hfs_vnop_allocate(struct vnop_allocate_args *);
101 int hfs_vnop_pagein(struct vnop_pagein_args *);
102 int hfs_vnop_pageout(struct vnop_pageout_args *);
103 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
104
105
106 /*
107 * Read data from a file.
108 */
109 int
110 hfs_vnop_read(struct vnop_read_args *ap)
111 {
112 uio_t uio = ap->a_uio;
113 struct vnode *vp = ap->a_vp;
114 struct cnode *cp;
115 struct filefork *fp;
116 struct hfsmount *hfsmp;
117 off_t filesize;
118 off_t filebytes;
119 off_t start_resid = uio_resid(uio);
120 off_t offset = uio_offset(uio);
121 int retval = 0;
122
123
124 /* Preflight checks */
125 if (!vnode_isreg(vp)) {
126 /* can only read regular files */
127 if (vnode_isdir(vp))
128 return (EISDIR);
129 else
130 return (EPERM);
131 }
132 if (start_resid == 0)
133 return (0); /* Nothing left to do */
134 if (offset < 0)
135 return (EINVAL); /* cant read from a negative offset */
136
137 cp = VTOC(vp);
138 fp = VTOF(vp);
139 hfsmp = VTOHFS(vp);
140
141 /* Protect against a size change. */
142 hfs_lock_truncate(cp, 0);
143
144 filesize = fp->ff_size;
145 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
146 if (offset > filesize) {
147 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
148 (offset > (off_t)MAXHFSFILESIZE)) {
149 retval = EFBIG;
150 }
151 goto exit;
152 }
153
154 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
155 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
156
157 retval = cluster_read(vp, uio, filesize, 0);
158
159 cp->c_touch_acctime = TRUE;
160
161 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
162 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
163
164 /*
165 * Keep track blocks read
166 */
167 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
168 int took_cnode_lock = 0;
169 off_t bytesread;
170
171 bytesread = start_resid - uio_resid(uio);
172
173 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
174 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
175 hfs_lock(cp, HFS_FORCE_LOCK);
176 took_cnode_lock = 1;
177 }
178 /*
179 * If this file hasn't been seen since the start of
180 * the current sampling period then start over.
181 */
182 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
183 struct timeval tv;
184
185 fp->ff_bytesread = bytesread;
186 microtime(&tv);
187 cp->c_atime = tv.tv_sec;
188 } else {
189 fp->ff_bytesread += bytesread;
190 }
191 if (took_cnode_lock)
192 hfs_unlock(cp);
193 }
194 exit:
195 hfs_unlock_truncate(cp);
196 return (retval);
197 }
198
199 /*
200 * Write data to a file.
201 */
202 int
203 hfs_vnop_write(struct vnop_write_args *ap)
204 {
205 uio_t uio = ap->a_uio;
206 struct vnode *vp = ap->a_vp;
207 struct cnode *cp;
208 struct filefork *fp;
209 struct hfsmount *hfsmp;
210 kauth_cred_t cred = NULL;
211 off_t origFileSize;
212 off_t writelimit;
213 off_t bytesToAdd;
214 off_t actualBytesAdded;
215 off_t filebytes;
216 off_t offset;
217 size_t resid;
218 int eflags;
219 int ioflag = ap->a_ioflag;
220 int retval = 0;
221 int lockflags;
222 int cnode_locked = 0;
223
224 // LP64todo - fix this! uio_resid may be 64-bit value
225 resid = uio_resid(uio);
226 offset = uio_offset(uio);
227
228 if (offset < 0)
229 return (EINVAL);
230 if (resid == 0)
231 return (E_NONE);
232 if (!vnode_isreg(vp))
233 return (EPERM); /* Can only write regular files */
234
235 /* Protect against a size change. */
236 hfs_lock_truncate(VTOC(vp), TRUE);
237
238 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
239 hfs_unlock_truncate(VTOC(vp));
240 return (retval);
241 }
242 cnode_locked = 1;
243 cp = VTOC(vp);
244 fp = VTOF(vp);
245 hfsmp = VTOHFS(vp);
246 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
247
248 if (ioflag & IO_APPEND) {
249 uio_setoffset(uio, fp->ff_size);
250 offset = fp->ff_size;
251 }
252 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
253 retval = EPERM;
254 goto exit;
255 }
256
257 origFileSize = fp->ff_size;
258 eflags = kEFDeferMask; /* defer file block allocations */
259
260 #ifdef HFS_SPARSE_DEV
261 /*
262 * When the underlying device is sparse and space
263 * is low (< 8MB), stop doing delayed allocations
264 * and begin doing synchronous I/O.
265 */
266 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
267 (hfs_freeblks(hfsmp, 0) < 2048)) {
268 eflags &= ~kEFDeferMask;
269 ioflag |= IO_SYNC;
270 }
271 #endif /* HFS_SPARSE_DEV */
272
273 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
274 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
275
276 /* Now test if we need to extend the file */
277 /* Doing so will adjust the filebytes for us */
278
279 writelimit = offset + resid;
280 if (writelimit <= filebytes)
281 goto sizeok;
282
283 cred = vfs_context_ucred(ap->a_context);
284 #if QUOTA
285 bytesToAdd = writelimit - filebytes;
286 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
287 cred, 0);
288 if (retval)
289 goto exit;
290 #endif /* QUOTA */
291
292 if (hfs_start_transaction(hfsmp) != 0) {
293 retval = EINVAL;
294 goto exit;
295 }
296
297 while (writelimit > filebytes) {
298 bytesToAdd = writelimit - filebytes;
299 if (cred && suser(cred, NULL) != 0)
300 eflags |= kEFReserveMask;
301
302 /* Protect extents b-tree and allocation bitmap */
303 lockflags = SFL_BITMAP;
304 if (overflow_extents(fp))
305 lockflags |= SFL_EXTENTS;
306 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
307
308 /* Files that are changing size are not hot file candidates. */
309 if (hfsmp->hfc_stage == HFC_RECORDING) {
310 fp->ff_bytesread = 0;
311 }
312 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
313 0, eflags, &actualBytesAdded));
314
315 hfs_systemfile_unlock(hfsmp, lockflags);
316
317 if ((actualBytesAdded == 0) && (retval == E_NONE))
318 retval = ENOSPC;
319 if (retval != E_NONE)
320 break;
321 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
322 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
323 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
324 }
325 (void) hfs_update(vp, TRUE);
326 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
327 (void) hfs_end_transaction(hfsmp);
328
329 sizeok:
330 if (retval == E_NONE) {
331 off_t filesize;
332 off_t zero_off;
333 off_t tail_off;
334 off_t inval_start;
335 off_t inval_end;
336 off_t io_start;
337 int lflag;
338 struct rl_entry *invalid_range;
339
340 if (writelimit > fp->ff_size)
341 filesize = writelimit;
342 else
343 filesize = fp->ff_size;
344
345 lflag = (ioflag & IO_SYNC);
346
347 if (offset <= fp->ff_size) {
348 zero_off = offset & ~PAGE_MASK_64;
349
350 /* Check to see whether the area between the zero_offset and the start
351 of the transfer to see whether is invalid and should be zero-filled
352 as part of the transfer:
353 */
354 if (offset > zero_off) {
355 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
356 lflag |= IO_HEADZEROFILL;
357 }
358 } else {
359 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
360
361 /* The bytes between fp->ff_size and uio->uio_offset must never be
362 read without being zeroed. The current last block is filled with zeroes
363 if it holds valid data but in all cases merely do a little bookkeeping
364 to track the area from the end of the current last page to the start of
365 the area actually written. For the same reason only the bytes up to the
366 start of the page where this write will start is invalidated; any remainder
367 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
368
369 Note that inval_start, the start of the page after the current EOF,
370 may be past the start of the write, in which case the zeroing
371 will be handled by the cluser_write of the actual data.
372 */
373 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
374 inval_end = offset & ~PAGE_MASK_64;
375 zero_off = fp->ff_size;
376
377 if ((fp->ff_size & PAGE_MASK_64) &&
378 (rl_scan(&fp->ff_invalidranges,
379 eof_page_base,
380 fp->ff_size - 1,
381 &invalid_range) != RL_NOOVERLAP)) {
382 /* The page containing the EOF is not valid, so the
383 entire page must be made inaccessible now. If the write
384 starts on a page beyond the page containing the eof
385 (inval_end > eof_page_base), add the
386 whole page to the range to be invalidated. Otherwise
387 (i.e. if the write starts on the same page), zero-fill
388 the entire page explicitly now:
389 */
390 if (inval_end > eof_page_base) {
391 inval_start = eof_page_base;
392 } else {
393 zero_off = eof_page_base;
394 };
395 };
396
397 if (inval_start < inval_end) {
398 struct timeval tv;
399 /* There's some range of data that's going to be marked invalid */
400
401 if (zero_off < inval_start) {
402 /* The pages between inval_start and inval_end are going to be invalidated,
403 and the actual write will start on a page past inval_end. Now's the last
404 chance to zero-fill the page containing the EOF:
405 */
406 hfs_unlock(cp);
407 cnode_locked = 0;
408 retval = cluster_write(vp, (uio_t) 0,
409 fp->ff_size, inval_start,
410 zero_off, (off_t)0,
411 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
412 hfs_lock(cp, HFS_FORCE_LOCK);
413 cnode_locked = 1;
414 if (retval) goto ioerr_exit;
415 offset = uio_offset(uio);
416 };
417
418 /* Mark the remaining area of the newly allocated space as invalid: */
419 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
420 microuptime(&tv);
421 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
422 zero_off = fp->ff_size = inval_end;
423 };
424
425 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
426 };
427
428 /* Check to see whether the area between the end of the write and the end of
429 the page it falls in is invalid and should be zero-filled as part of the transfer:
430 */
431 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
432 if (tail_off > filesize) tail_off = filesize;
433 if (tail_off > writelimit) {
434 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
435 lflag |= IO_TAILZEROFILL;
436 };
437 };
438
439 /*
440 * if the write starts beyond the current EOF (possibly advanced in the
441 * zeroing of the last block, above), then we'll zero fill from the current EOF
442 * to where the write begins:
443 *
444 * NOTE: If (and ONLY if) the portion of the file about to be written is
445 * before the current EOF it might be marked as invalid now and must be
446 * made readable (removed from the invalid ranges) before cluster_write
447 * tries to write it:
448 */
449 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
450 if (io_start < fp->ff_size) {
451 off_t io_end;
452
453 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
454 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
455 };
456
457 hfs_unlock(cp);
458 cnode_locked = 0;
459 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
460 tail_off, lflag | IO_NOZERODIRTY);
461 offset = uio_offset(uio);
462 if (offset > fp->ff_size) {
463 fp->ff_size = offset;
464
465 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
466 /* Files that are changing size are not hot file candidates. */
467 if (hfsmp->hfc_stage == HFC_RECORDING)
468 fp->ff_bytesread = 0;
469 }
470 if (resid > uio_resid(uio)) {
471 cp->c_touch_chgtime = TRUE;
472 cp->c_touch_modtime = TRUE;
473 }
474 }
475 HFS_KNOTE(vp, NOTE_WRITE);
476
477 ioerr_exit:
478 /*
479 * If we successfully wrote any data, and we are not the superuser
480 * we clear the setuid and setgid bits as a precaution against
481 * tampering.
482 */
483 if (cp->c_mode & (S_ISUID | S_ISGID)) {
484 cred = vfs_context_ucred(ap->a_context);
485 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
486 if (!cnode_locked) {
487 hfs_lock(cp, HFS_FORCE_LOCK);
488 cnode_locked = 1;
489 }
490 cp->c_mode &= ~(S_ISUID | S_ISGID);
491 }
492 }
493 if (retval) {
494 if (ioflag & IO_UNIT) {
495 if (!cnode_locked) {
496 hfs_lock(cp, HFS_FORCE_LOCK);
497 cnode_locked = 1;
498 }
499 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
500 0, ap->a_context);
501 // LP64todo - fix this! resid needs to by user_ssize_t
502 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
503 uio_setresid(uio, resid);
504 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
505 }
506 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
507 if (!cnode_locked) {
508 hfs_lock(cp, HFS_FORCE_LOCK);
509 cnode_locked = 1;
510 }
511 retval = hfs_update(vp, TRUE);
512 }
513 /* Updating vcbWrCnt doesn't need to be atomic. */
514 hfsmp->vcbWrCnt++;
515
516 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
517 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
518 exit:
519 if (cnode_locked)
520 hfs_unlock(cp);
521 hfs_unlock_truncate(cp);
522 return (retval);
523 }
524
525 /* support for the "bulk-access" fcntl */
526
527 #define CACHE_ELEMS 64
528 #define CACHE_LEVELS 16
529 #define PARENT_IDS_FLAG 0x100
530
531 /* from hfs_attrlist.c */
532 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
533 mode_t obj_mode, struct mount *mp,
534 kauth_cred_t cred, struct proc *p);
535
536 /* from vfs/vfs_fsevents.c */
537 extern char *get_pathbuff(void);
538 extern void release_pathbuff(char *buff);
539
540 struct access_cache {
541 int numcached;
542 int cachehits; /* these two for statistics gathering */
543 int lookups;
544 unsigned int *acache;
545 Boolean *haveaccess;
546 };
547
548 struct access_t {
549 uid_t uid; /* IN: effective user id */
550 short flags; /* IN: access requested (i.e. R_OK) */
551 short num_groups; /* IN: number of groups user belongs to */
552 int num_files; /* IN: number of files to process */
553 int *file_ids; /* IN: array of file ids */
554 gid_t *groups; /* IN: array of groups */
555 short *access; /* OUT: access info for each file (0 for 'has access') */
556 };
557
558 struct user_access_t {
559 uid_t uid; /* IN: effective user id */
560 short flags; /* IN: access requested (i.e. R_OK) */
561 short num_groups; /* IN: number of groups user belongs to */
562 int num_files; /* IN: number of files to process */
563 user_addr_t file_ids; /* IN: array of file ids */
564 user_addr_t groups; /* IN: array of groups */
565 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
566 };
567
568 /*
569 * Perform a binary search for the given parent_id. Return value is
570 * found/not found boolean, and indexp will be the index of the item
571 * or the index at which to insert the item if it's not found.
572 */
573 static int
574 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
575 {
576 unsigned int lo, hi;
577 int index, matches = 0;
578
579 if (cache->numcached == 0) {
580 *indexp = 0;
581 return 0; // table is empty, so insert at index=0 and report no match
582 }
583
584 if (cache->numcached > CACHE_ELEMS) {
585 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
586 cache->numcached, CACHE_ELEMS);*/
587 cache->numcached = CACHE_ELEMS;
588 }
589
590 lo = 0;
591 hi = cache->numcached - 1;
592 index = -1;
593
594 /* perform binary search for parent_id */
595 do {
596 unsigned int mid = (hi - lo)/2 + lo;
597 unsigned int this_id = cache->acache[mid];
598
599 if (parent_id == this_id) {
600 index = mid;
601 break;
602 }
603
604 if (parent_id < this_id) {
605 hi = mid;
606 continue;
607 }
608
609 if (parent_id > this_id) {
610 lo = mid + 1;
611 continue;
612 }
613 } while(lo < hi);
614
615 /* check if lo and hi converged on the match */
616 if (parent_id == cache->acache[hi]) {
617 index = hi;
618 }
619
620 /* if no existing entry found, find index for new one */
621 if (index == -1) {
622 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
623 matches = 0;
624 } else {
625 matches = 1;
626 }
627
628 *indexp = index;
629 return matches;
630 }
631
632 /*
633 * Add a node to the access_cache at the given index (or do a lookup first
634 * to find the index if -1 is passed in). We currently do a replace rather
635 * than an insert if the cache is full.
636 */
637 static void
638 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
639 {
640 int lookup_index = -1;
641
642 /* need to do a lookup first if -1 passed for index */
643 if (index == -1) {
644 if (lookup_bucket(cache, &lookup_index, nodeID)) {
645 if (cache->haveaccess[lookup_index] != access) {
646 /* change access info for existing entry... should never happen */
647 cache->haveaccess[lookup_index] = access;
648 }
649
650 /* mission accomplished */
651 return;
652 } else {
653 index = lookup_index;
654 }
655
656 }
657
658 /* if the cache is full, do a replace rather than an insert */
659 if (cache->numcached >= CACHE_ELEMS) {
660 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
661 cache->numcached = CACHE_ELEMS-1;
662
663 if (index > cache->numcached) {
664 // printf("index %d pinned to %d\n", index, cache->numcached);
665 index = cache->numcached;
666 }
667 } else if (index >= 0 && index < cache->numcached) {
668 /* only do bcopy if we're inserting */
669 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
670 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
671 }
672
673 cache->acache[index] = nodeID;
674 cache->haveaccess[index] = access;
675 cache->numcached++;
676 }
677
678
679 struct cinfo {
680 uid_t uid;
681 gid_t gid;
682 mode_t mode;
683 cnid_t parentcnid;
684 };
685
686 static int
687 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
688 {
689 struct cinfo *cip = (struct cinfo *)arg;
690
691 cip->uid = attrp->ca_uid;
692 cip->gid = attrp->ca_gid;
693 cip->mode = attrp->ca_mode;
694 cip->parentcnid = descp->cd_parentcnid;
695
696 return (0);
697 }
698
699 /*
700 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
701 * isn't incore, then go to the catalog.
702 */
703 static int
704 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
705 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
706 {
707 int error = 0;
708
709 /* if this id matches the one the fsctl was called with, skip the lookup */
710 if (cnid == skip_cp->c_cnid) {
711 cnattrp->ca_uid = skip_cp->c_uid;
712 cnattrp->ca_gid = skip_cp->c_gid;
713 cnattrp->ca_mode = skip_cp->c_mode;
714 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
715 } else {
716 struct cinfo c_info;
717
718 /* otherwise, check the cnode hash incase the file/dir is incore */
719 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
720 cnattrp->ca_uid = c_info.uid;
721 cnattrp->ca_gid = c_info.gid;
722 cnattrp->ca_mode = c_info.mode;
723 keyp->hfsPlus.parentID = c_info.parentcnid;
724 } else {
725 int lockflags;
726
727 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
728
729 /* lookup this cnid in the catalog */
730 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
731
732 hfs_systemfile_unlock(hfsmp, lockflags);
733
734 cache->lookups++;
735 }
736 }
737
738 return (error);
739 }
740
741 /*
742 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
743 * up to CACHE_LEVELS as we progress towards the root.
744 */
745 static int
746 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
747 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
748 {
749 int myErr = 0;
750 int myResult;
751 HFSCatalogNodeID thisNodeID;
752 unsigned long myPerms;
753 struct cat_attr cnattr;
754 int cache_index = -1;
755 CatalogKey catkey;
756
757 int i = 0, ids_to_cache = 0;
758 int parent_ids[CACHE_LEVELS];
759
760 /* root always has access */
761 if (!suser(myp_ucred, NULL)) {
762 return (1);
763 }
764
765 thisNodeID = nodeID;
766 while (thisNodeID >= kRootDirID) {
767 myResult = 0; /* default to "no access" */
768
769 /* check the cache before resorting to hitting the catalog */
770
771 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
772 * to look any further after hitting cached dir */
773
774 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
775 cache->cachehits++;
776 myResult = cache->haveaccess[cache_index];
777 goto ExitThisRoutine;
778 }
779
780 /* remember which parents we want to cache */
781 if (ids_to_cache < CACHE_LEVELS) {
782 parent_ids[ids_to_cache] = thisNodeID;
783 ids_to_cache++;
784 }
785
786 /* do the lookup (checks the cnode hash, then the catalog) */
787 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
788 if (myErr) {
789 goto ExitThisRoutine; /* no access */
790 }
791
792 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
793 cnattr.ca_mode, hfsmp->hfs_mp,
794 myp_ucred, theProcPtr);
795
796 if ( (myPerms & X_OK) == 0 ) {
797 myResult = 0;
798 goto ExitThisRoutine; /* no access */
799 }
800
801 /* up the hierarchy we go */
802 thisNodeID = catkey.hfsPlus.parentID;
803 }
804
805 /* if here, we have access to this node */
806 myResult = 1;
807
808 ExitThisRoutine:
809 if (myErr) {
810 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
811 myResult = 0;
812 }
813 *err = myErr;
814
815 /* cache the parent directory(ies) */
816 for (i = 0; i < ids_to_cache; i++) {
817 /* small optimization: get rid of double-lookup for all these */
818 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
819 add_node(cache, -1, parent_ids[i], myResult);
820 }
821
822 return (myResult);
823 }
824 /* end "bulk-access" support */
825
826
827
828 /*
829 * Callback for use with freeze ioctl.
830 */
831 static int
832 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
833 {
834 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
835
836 return 0;
837 }
838
839 /*
840 * Control filesystem operating characteristics.
841 */
842 int
843 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
844 vnode_t a_vp;
845 int a_command;
846 caddr_t a_data;
847 int a_fflag;
848 vfs_context_t a_context;
849 } */ *ap)
850 {
851 struct vnode * vp = ap->a_vp;
852 struct hfsmount *hfsmp = VTOHFS(vp);
853 vfs_context_t context = ap->a_context;
854 kauth_cred_t cred = vfs_context_ucred(context);
855 proc_t p = vfs_context_proc(context);
856 struct vfsstatfs *vfsp;
857 boolean_t is64bit;
858
859 is64bit = proc_is64bit(p);
860
861 switch (ap->a_command) {
862
863 case HFS_RESIZE_PROGRESS: {
864
865 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
866 if (suser(cred, NULL) &&
867 kauth_cred_getuid(cred) != vfsp->f_owner) {
868 return (EACCES); /* must be owner of file system */
869 }
870 if (!vnode_isvroot(vp)) {
871 return (EINVAL);
872 }
873 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
874 }
875 case HFS_RESIZE_VOLUME: {
876 u_int64_t newsize;
877 u_int64_t cursize;
878
879 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
880 if (suser(cred, NULL) &&
881 kauth_cred_getuid(cred) != vfsp->f_owner) {
882 return (EACCES); /* must be owner of file system */
883 }
884 if (!vnode_isvroot(vp)) {
885 return (EINVAL);
886 }
887 newsize = *(u_int64_t *)ap->a_data;
888 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
889
890 if (newsize > cursize) {
891 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
892 } else if (newsize < cursize) {
893 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
894 } else {
895 return (0);
896 }
897 }
898 case HFS_CHANGE_NEXT_ALLOCATION: {
899 u_int32_t location;
900
901 if (vnode_vfsisrdonly(vp)) {
902 return (EROFS);
903 }
904 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
905 if (suser(cred, NULL) &&
906 kauth_cred_getuid(cred) != vfsp->f_owner) {
907 return (EACCES); /* must be owner of file system */
908 }
909 if (!vnode_isvroot(vp)) {
910 return (EINVAL);
911 }
912 location = *(u_int32_t *)ap->a_data;
913 if (location > hfsmp->totalBlocks - 1) {
914 return (EINVAL);
915 }
916 /* Return previous value. */
917 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
918 HFS_MOUNT_LOCK(hfsmp, TRUE);
919 hfsmp->nextAllocation = location;
920 hfsmp->vcbFlags |= 0xFF00;
921 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
922 return (0);
923 }
924
925 #ifdef HFS_SPARSE_DEV
926 case HFS_SETBACKINGSTOREINFO: {
927 struct vnode * bsfs_rootvp;
928 struct vnode * di_vp;
929 struct hfs_backingstoreinfo *bsdata;
930 int error = 0;
931
932 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
933 return (EALREADY);
934 }
935 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
936 if (suser(cred, NULL) &&
937 kauth_cred_getuid(cred) != vfsp->f_owner) {
938 return (EACCES); /* must be owner of file system */
939 }
940 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
941 if (bsdata == NULL) {
942 return (EINVAL);
943 }
944 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
945 return (error);
946 }
947 if ((error = vnode_getwithref(di_vp))) {
948 file_drop(bsdata->backingfd);
949 return(error);
950 }
951
952 if (vnode_mount(vp) == vnode_mount(di_vp)) {
953 (void)vnode_put(di_vp);
954 file_drop(bsdata->backingfd);
955 return (EINVAL);
956 }
957
958 /*
959 * Obtain the backing fs root vnode and keep a reference
960 * on it. This reference will be dropped in hfs_unmount.
961 */
962 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
963 if (error) {
964 (void)vnode_put(di_vp);
965 file_drop(bsdata->backingfd);
966 return (error);
967 }
968 vnode_ref(bsfs_rootvp);
969 vnode_put(bsfs_rootvp);
970
971 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
972 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
973 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
974 hfsmp->hfs_sparsebandblks *= 4;
975
976 (void)vnode_put(di_vp);
977 file_drop(bsdata->backingfd);
978 return (0);
979 }
980 case HFS_CLRBACKINGSTOREINFO: {
981 struct vnode * tmpvp;
982
983 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
984 if (suser(cred, NULL) &&
985 kauth_cred_getuid(cred) != vfsp->f_owner) {
986 return (EACCES); /* must be owner of file system */
987 }
988 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
989 hfsmp->hfs_backingfs_rootvp) {
990
991 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
992 tmpvp = hfsmp->hfs_backingfs_rootvp;
993 hfsmp->hfs_backingfs_rootvp = NULLVP;
994 hfsmp->hfs_sparsebandblks = 0;
995 vnode_rele(tmpvp);
996 }
997 return (0);
998 }
999 #endif /* HFS_SPARSE_DEV */
1000
1001 case F_FREEZE_FS: {
1002 struct mount *mp;
1003 task_t task;
1004
1005 if (!is_suser())
1006 return (EACCES);
1007
1008 mp = vnode_mount(vp);
1009 hfsmp = VFSTOHFS(mp);
1010
1011 if (!(hfsmp->jnl))
1012 return (ENOTSUP);
1013
1014 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1015
1016 task = current_task();
1017 task_working_set_disable(task);
1018
1019 // flush things before we get started to try and prevent
1020 // dirty data from being paged out while we're frozen.
1021 // note: can't do this after taking the lock as it will
1022 // deadlock against ourselves.
1023 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1024 hfs_global_exclusive_lock_acquire(hfsmp);
1025 journal_flush(hfsmp->jnl);
1026
1027 // don't need to iterate on all vnodes, we just need to
1028 // wait for writes to the system files and the device vnode
1029 if (HFSTOVCB(hfsmp)->extentsRefNum)
1030 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1031 if (HFSTOVCB(hfsmp)->catalogRefNum)
1032 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1033 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1034 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1035 if (hfsmp->hfs_attribute_vp)
1036 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1037 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1038
1039 hfsmp->hfs_freezing_proc = current_proc();
1040
1041 return (0);
1042 }
1043
1044 case F_THAW_FS: {
1045 if (!is_suser())
1046 return (EACCES);
1047
1048 // if we're not the one who froze the fs then we
1049 // can't thaw it.
1050 if (hfsmp->hfs_freezing_proc != current_proc()) {
1051 return EPERM;
1052 }
1053
1054 // NOTE: if you add code here, also go check the
1055 // code that "thaws" the fs in hfs_vnop_close()
1056 //
1057 hfsmp->hfs_freezing_proc = NULL;
1058 hfs_global_exclusive_lock_release(hfsmp);
1059 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1060
1061 return (0);
1062 }
1063
1064 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1065 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1066
1067 case HFS_BULKACCESS_FSCTL:
1068 case HFS_BULKACCESS: {
1069 /*
1070 * NOTE: on entry, the vnode is locked. Incase this vnode
1071 * happens to be in our list of file_ids, we'll note it
1072 * avoid calling hfs_chashget_nowait() on that id as that
1073 * will cause a "locking against myself" panic.
1074 */
1075 Boolean check_leaf = true;
1076
1077 struct user_access_t *user_access_structp;
1078 struct user_access_t tmp_user_access_t;
1079 struct access_cache cache;
1080
1081 int error = 0, i;
1082
1083 dev_t dev = VTOC(vp)->c_dev;
1084
1085 short flags;
1086 struct ucred myucred; /* XXX ILLEGAL */
1087 int num_files;
1088 int *file_ids = NULL;
1089 short *access = NULL;
1090
1091 cnid_t cnid;
1092 cnid_t prevParent_cnid = 0;
1093 unsigned long myPerms;
1094 short myaccess = 0;
1095 struct cat_attr cnattr;
1096 CatalogKey catkey;
1097 struct cnode *skip_cp = VTOC(vp);
1098 struct vfs_context my_context;
1099
1100 /* first, return error if not run as root */
1101 if (cred->cr_ruid != 0) {
1102 return EPERM;
1103 }
1104
1105 /* initialize the local cache and buffers */
1106 cache.numcached = 0;
1107 cache.cachehits = 0;
1108 cache.lookups = 0;
1109
1110 file_ids = (int *) get_pathbuff();
1111 access = (short *) get_pathbuff();
1112 cache.acache = (int *) get_pathbuff();
1113 cache.haveaccess = (Boolean *) get_pathbuff();
1114
1115 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1116 release_pathbuff((char *) file_ids);
1117 release_pathbuff((char *) access);
1118 release_pathbuff((char *) cache.acache);
1119 release_pathbuff((char *) cache.haveaccess);
1120
1121 return ENOMEM;
1122 }
1123
1124 /* struct copyin done during dispatch... need to copy file_id array separately */
1125 if (ap->a_data == NULL) {
1126 error = EINVAL;
1127 goto err_exit_bulk_access;
1128 }
1129
1130 if (is64bit) {
1131 user_access_structp = (struct user_access_t *)ap->a_data;
1132 }
1133 else {
1134 struct access_t * accessp = (struct access_t *)ap->a_data;
1135 tmp_user_access_t.uid = accessp->uid;
1136 tmp_user_access_t.flags = accessp->flags;
1137 tmp_user_access_t.num_groups = accessp->num_groups;
1138 tmp_user_access_t.num_files = accessp->num_files;
1139 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1140 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1141 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1142 user_access_structp = &tmp_user_access_t;
1143 }
1144
1145 num_files = user_access_structp->num_files;
1146 if (num_files < 1) {
1147 goto err_exit_bulk_access;
1148 }
1149 if (num_files > 256) {
1150 error = EINVAL;
1151 goto err_exit_bulk_access;
1152 }
1153
1154 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1155 num_files * sizeof(int)))) {
1156 goto err_exit_bulk_access;
1157 }
1158
1159 /* fill in the ucred structure */
1160 flags = user_access_structp->flags;
1161 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1162 flags = R_OK;
1163 }
1164
1165 /* check if we've been passed leaf node ids or parent ids */
1166 if (flags & PARENT_IDS_FLAG) {
1167 check_leaf = false;
1168 }
1169
1170 memset(&myucred, 0, sizeof(myucred));
1171 myucred.cr_ref = 1;
1172 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1173 myucred.cr_ngroups = user_access_structp->num_groups;
1174 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1175 myucred.cr_ngroups = 0;
1176 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1177 myucred.cr_ngroups * sizeof(gid_t)))) {
1178 goto err_exit_bulk_access;
1179 }
1180 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1181 myucred.cr_gmuid = myucred.cr_uid;
1182
1183 my_context.vc_proc = p;
1184 my_context.vc_ucred = &myucred;
1185
1186 /* Check access to each file_id passed in */
1187 for (i = 0; i < num_files; i++) {
1188 #if 0
1189 cnid = (cnid_t) file_ids[i];
1190
1191 /* root always has access */
1192 if (!suser(&myucred, NULL)) {
1193 access[i] = 0;
1194 continue;
1195 }
1196
1197 if (check_leaf) {
1198
1199 /* do the lookup (checks the cnode hash, then the catalog) */
1200 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1201 if (error) {
1202 access[i] = (short) error;
1203 continue;
1204 }
1205
1206 /* before calling CheckAccess(), check the target file for read access */
1207 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1208 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1209
1210
1211 /* fail fast if no access */
1212 if ((myPerms & flags) == 0) {
1213 access[i] = EACCES;
1214 continue;
1215 }
1216 } else {
1217 /* we were passed an array of parent ids */
1218 catkey.hfsPlus.parentID = cnid;
1219 }
1220
1221 /* if the last guy had the same parent and had access, we're done */
1222 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1223 cache.cachehits++;
1224 access[i] = 0;
1225 continue;
1226 }
1227
1228 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1229 skip_cp, p, &myucred, dev);
1230
1231 if ( myaccess ) {
1232 access[i] = 0; // have access.. no errors to report
1233 } else {
1234 access[i] = (error != 0 ? (short) error : EACCES);
1235 }
1236
1237 prevParent_cnid = catkey.hfsPlus.parentID;
1238 #else
1239 int myErr;
1240
1241 cnid = (cnid_t)file_ids[i];
1242
1243 while (cnid >= kRootDirID) {
1244 /* get the vnode for this cnid */
1245 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1246 if ( myErr ) {
1247 access[i] = EACCES;
1248 break;
1249 }
1250
1251 cnid = VTOC(vp)->c_parentcnid;
1252
1253 hfs_unlock(VTOC(vp));
1254 if (vnode_vtype(vp) == VDIR) {
1255 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1256 } else {
1257 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1258 }
1259 vnode_put(vp);
1260 access[i] = myErr;
1261 if (myErr) {
1262 break;
1263 }
1264 }
1265 #endif
1266 }
1267
1268 /* copyout the access array */
1269 if ((error = copyout((caddr_t)access, user_access_structp->access,
1270 num_files * sizeof (short)))) {
1271 goto err_exit_bulk_access;
1272 }
1273
1274 err_exit_bulk_access:
1275
1276 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1277
1278 release_pathbuff((char *) cache.acache);
1279 release_pathbuff((char *) cache.haveaccess);
1280 release_pathbuff((char *) file_ids);
1281 release_pathbuff((char *) access);
1282
1283 return (error);
1284 } /* HFS_BULKACCESS */
1285
1286 case HFS_SETACLSTATE: {
1287 int state;
1288
1289 if (ap->a_data == NULL) {
1290 return (EINVAL);
1291 }
1292
1293 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1294 state = *(int *)ap->a_data;
1295
1296 // super-user can enable or disable acl's on a volume.
1297 // the volume owner can only enable acl's
1298 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1299 return (EPERM);
1300 }
1301 if (state == 0 || state == 1)
1302 return hfs_setextendedsecurity(hfsmp, state);
1303 else
1304 return (EINVAL);
1305 }
1306
1307 case F_FULLFSYNC: {
1308 int error;
1309
1310 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1311 if (error == 0) {
1312 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1313 hfs_unlock(VTOC(vp));
1314 }
1315
1316 return error;
1317 }
1318
1319 case F_CHKCLEAN: {
1320 register struct cnode *cp;
1321 int error;
1322
1323 if (!vnode_isreg(vp))
1324 return EINVAL;
1325
1326 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1327 if (error == 0) {
1328 cp = VTOC(vp);
1329 /*
1330 * used by regression test to determine if
1331 * all the dirty pages (via write) have been cleaned
1332 * after a call to 'fsysnc'.
1333 */
1334 error = is_file_clean(vp, VTOF(vp)->ff_size);
1335 hfs_unlock(cp);
1336 }
1337 return (error);
1338 }
1339
1340 case F_RDADVISE: {
1341 register struct radvisory *ra;
1342 struct filefork *fp;
1343 int error;
1344
1345 if (!vnode_isreg(vp))
1346 return EINVAL;
1347
1348 ra = (struct radvisory *)(ap->a_data);
1349 fp = VTOF(vp);
1350
1351 /* Protect against a size change. */
1352 hfs_lock_truncate(VTOC(vp), TRUE);
1353
1354 if (ra->ra_offset >= fp->ff_size) {
1355 error = EFBIG;
1356 } else {
1357 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1358 }
1359
1360 hfs_unlock_truncate(VTOC(vp));
1361 return (error);
1362 }
1363
1364 case F_READBOOTSTRAP:
1365 case F_WRITEBOOTSTRAP:
1366 {
1367 struct vnode *devvp = NULL;
1368 user_fbootstraptransfer_t *user_bootstrapp;
1369 int devBlockSize;
1370 int error;
1371 uio_t auio;
1372 daddr64_t blockNumber;
1373 u_long blockOffset;
1374 u_long xfersize;
1375 struct buf *bp;
1376 user_fbootstraptransfer_t user_bootstrap;
1377
1378 if (!vnode_isvroot(vp))
1379 return (EINVAL);
1380 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1381 * to a user_fbootstraptransfer_t else we get a pointer to a
1382 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1383 */
1384 if (is64bit) {
1385 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1386 }
1387 else {
1388 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1389 user_bootstrapp = &user_bootstrap;
1390 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1391 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1392 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1393 }
1394 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1395 return EINVAL;
1396
1397 devvp = VTOHFS(vp)->hfs_devvp;
1398 auio = uio_create(1, user_bootstrapp->fbt_offset,
1399 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1400 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1401 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1402
1403 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1404
1405 while (uio_resid(auio) > 0) {
1406 blockNumber = uio_offset(auio) / devBlockSize;
1407 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1408 if (error) {
1409 if (bp) buf_brelse(bp);
1410 uio_free(auio);
1411 return error;
1412 };
1413
1414 blockOffset = uio_offset(auio) % devBlockSize;
1415 xfersize = devBlockSize - blockOffset;
1416 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1417 if (error) {
1418 buf_brelse(bp);
1419 uio_free(auio);
1420 return error;
1421 };
1422 if (uio_rw(auio) == UIO_WRITE) {
1423 error = VNOP_BWRITE(bp);
1424 if (error) {
1425 uio_free(auio);
1426 return error;
1427 }
1428 } else {
1429 buf_brelse(bp);
1430 };
1431 };
1432 uio_free(auio);
1433 };
1434 return 0;
1435
1436 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1437 {
1438 if (is64bit) {
1439 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1440 }
1441 else {
1442 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1443 }
1444 return 0;
1445 }
1446
1447 case HFS_GET_MOUNT_TIME:
1448 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1449 break;
1450
1451 case HFS_GET_LAST_MTIME:
1452 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1453 break;
1454
1455 case HFS_SET_BOOT_INFO:
1456 if (!vnode_isvroot(vp))
1457 return(EINVAL);
1458 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1459 return(EACCES); /* must be superuser or owner of filesystem */
1460 HFS_MOUNT_LOCK(hfsmp, TRUE);
1461 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1462 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1463 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1464 break;
1465
1466 case HFS_GET_BOOT_INFO:
1467 if (!vnode_isvroot(vp))
1468 return(EINVAL);
1469 HFS_MOUNT_LOCK(hfsmp, TRUE);
1470 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1471 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1472 break;
1473
1474 default:
1475 return (ENOTTY);
1476 }
1477
1478 /* Should never get here */
1479 return 0;
1480 }
1481
1482 /*
1483 * select
1484 */
1485 int
1486 hfs_vnop_select(__unused struct vnop_select_args *ap)
1487 /*
1488 struct vnop_select_args {
1489 vnode_t a_vp;
1490 int a_which;
1491 int a_fflags;
1492 void *a_wql;
1493 vfs_context_t a_context;
1494 };
1495 */
1496 {
1497 /*
1498 * We should really check to see if I/O is possible.
1499 */
1500 return (1);
1501 }
1502
1503 /*
1504 * Converts a logical block number to a physical block, and optionally returns
1505 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1506 * The physical block number is based on the device block size, currently its 512.
1507 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1508 */
1509 int
1510 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1511 {
1512 struct cnode *cp = VTOC(vp);
1513 struct filefork *fp = VTOF(vp);
1514 struct hfsmount *hfsmp = VTOHFS(vp);
1515 int retval = E_NONE;
1516 daddr_t logBlockSize;
1517 size_t bytesContAvail = 0;
1518 off_t blockposition;
1519 int lockExtBtree;
1520 int lockflags = 0;
1521
1522 /*
1523 * Check for underlying vnode requests and ensure that logical
1524 * to physical mapping is requested.
1525 */
1526 if (vpp != NULL)
1527 *vpp = cp->c_devvp;
1528 if (bnp == NULL)
1529 return (0);
1530
1531 logBlockSize = GetLogicalBlockSize(vp);
1532 blockposition = (off_t)bn * (off_t)logBlockSize;
1533
1534 lockExtBtree = overflow_extents(fp);
1535
1536 if (lockExtBtree)
1537 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1538
1539 retval = MacToVFSError(
1540 MapFileBlockC (HFSTOVCB(hfsmp),
1541 (FCB*)fp,
1542 MAXPHYSIO,
1543 blockposition,
1544 bnp,
1545 &bytesContAvail));
1546
1547 if (lockExtBtree)
1548 hfs_systemfile_unlock(hfsmp, lockflags);
1549
1550 if (retval == E_NONE) {
1551 /* Figure out how many read ahead blocks there are */
1552 if (runp != NULL) {
1553 if (can_cluster(logBlockSize)) {
1554 /* Make sure this result never goes negative: */
1555 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1556 } else {
1557 *runp = 0;
1558 }
1559 }
1560 }
1561 return (retval);
1562 }
1563
1564 /*
1565 * Convert logical block number to file offset.
1566 */
1567 int
1568 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1569 /*
1570 struct vnop_blktooff_args {
1571 vnode_t a_vp;
1572 daddr64_t a_lblkno;
1573 off_t *a_offset;
1574 };
1575 */
1576 {
1577 if (ap->a_vp == NULL)
1578 return (EINVAL);
1579 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1580
1581 return(0);
1582 }
1583
1584 /*
1585 * Convert file offset to logical block number.
1586 */
1587 int
1588 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1589 /*
1590 struct vnop_offtoblk_args {
1591 vnode_t a_vp;
1592 off_t a_offset;
1593 daddr64_t *a_lblkno;
1594 };
1595 */
1596 {
1597 if (ap->a_vp == NULL)
1598 return (EINVAL);
1599 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1600
1601 return(0);
1602 }
1603
1604 /*
1605 * Map file offset to physical block number.
1606 *
1607 * System file cnodes are expected to be locked (shared or exclusive).
1608 */
1609 int
1610 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1611 /*
1612 struct vnop_blockmap_args {
1613 vnode_t a_vp;
1614 off_t a_foffset;
1615 size_t a_size;
1616 daddr64_t *a_bpn;
1617 size_t *a_run;
1618 void *a_poff;
1619 int a_flags;
1620 vfs_context_t a_context;
1621 };
1622 */
1623 {
1624 struct vnode *vp = ap->a_vp;
1625 struct cnode *cp;
1626 struct filefork *fp;
1627 struct hfsmount *hfsmp;
1628 size_t bytesContAvail = 0;
1629 int retval = E_NONE;
1630 int syslocks = 0;
1631 int lockflags = 0;
1632 struct rl_entry *invalid_range;
1633 enum rl_overlaptype overlaptype;
1634 int started_tr = 0;
1635 int tooklock = 0;
1636
1637 /* Do not allow blockmap operation on a directory */
1638 if (vnode_isdir(vp)) {
1639 return (ENOTSUP);
1640 }
1641
1642 /*
1643 * Check for underlying vnode requests and ensure that logical
1644 * to physical mapping is requested.
1645 */
1646 if (ap->a_bpn == NULL)
1647 return (0);
1648
1649 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1650 if (VTOC(vp)->c_lockowner != current_thread()) {
1651 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1652 tooklock = 1;
1653 } else {
1654 cp = VTOC(vp);
1655 panic("blockmap: %s cnode lock already held!\n",
1656 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1657 }
1658 }
1659 hfsmp = VTOHFS(vp);
1660 cp = VTOC(vp);
1661 fp = VTOF(vp);
1662
1663 retry:
1664 if (fp->ff_unallocblocks) {
1665 if (hfs_start_transaction(hfsmp) != 0) {
1666 retval = EINVAL;
1667 goto exit;
1668 } else {
1669 started_tr = 1;
1670 }
1671 syslocks = SFL_EXTENTS | SFL_BITMAP;
1672
1673 } else if (overflow_extents(fp)) {
1674 syslocks = SFL_EXTENTS;
1675 }
1676
1677 if (syslocks)
1678 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1679
1680 /*
1681 * Check for any delayed allocations.
1682 */
1683 if (fp->ff_unallocblocks) {
1684 SInt64 actbytes;
1685 u_int32_t loanedBlocks;
1686
1687 //
1688 // Make sure we have a transaction. It's possible
1689 // that we came in and fp->ff_unallocblocks was zero
1690 // but during the time we blocked acquiring the extents
1691 // btree, ff_unallocblocks became non-zero and so we
1692 // will need to start a transaction.
1693 //
1694 if (started_tr == 0) {
1695 if (syslocks) {
1696 hfs_systemfile_unlock(hfsmp, lockflags);
1697 syslocks = 0;
1698 }
1699 goto retry;
1700 }
1701
1702 /*
1703 * Note: ExtendFileC will Release any blocks on loan and
1704 * aquire real blocks. So we ask to extend by zero bytes
1705 * since ExtendFileC will account for the virtual blocks.
1706 */
1707
1708 loanedBlocks = fp->ff_unallocblocks;
1709 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1710 kEFAllMask | kEFNoClumpMask, &actbytes);
1711
1712 if (retval) {
1713 fp->ff_unallocblocks = loanedBlocks;
1714 cp->c_blocks += loanedBlocks;
1715 fp->ff_blocks += loanedBlocks;
1716
1717 HFS_MOUNT_LOCK(hfsmp, TRUE);
1718 hfsmp->loanedBlocks += loanedBlocks;
1719 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1720 }
1721
1722 if (retval) {
1723 hfs_systemfile_unlock(hfsmp, lockflags);
1724 cp->c_flag |= C_MODIFIED;
1725 if (started_tr) {
1726 (void) hfs_update(vp, TRUE);
1727 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1728
1729 hfs_end_transaction(hfsmp);
1730 }
1731 goto exit;
1732 }
1733 }
1734
1735 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1736 ap->a_bpn, &bytesContAvail);
1737 if (syslocks) {
1738 hfs_systemfile_unlock(hfsmp, lockflags);
1739 syslocks = 0;
1740 }
1741
1742 if (started_tr) {
1743 (void) hfs_update(vp, TRUE);
1744 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1745 hfs_end_transaction(hfsmp);
1746 started_tr = 0;
1747 }
1748 if (retval) {
1749 goto exit;
1750 }
1751
1752 /* Adjust the mapping information for invalid file ranges: */
1753 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1754 ap->a_foffset + (off_t)bytesContAvail - 1,
1755 &invalid_range);
1756 if (overlaptype != RL_NOOVERLAP) {
1757 switch(overlaptype) {
1758 case RL_MATCHINGOVERLAP:
1759 case RL_OVERLAPCONTAINSRANGE:
1760 case RL_OVERLAPSTARTSBEFORE:
1761 /* There's no valid block for this byte offset: */
1762 *ap->a_bpn = (daddr64_t)-1;
1763 /* There's no point limiting the amount to be returned
1764 * if the invalid range that was hit extends all the way
1765 * to the EOF (i.e. there's no valid bytes between the
1766 * end of this range and the file's EOF):
1767 */
1768 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1769 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1770 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1771 }
1772 break;
1773
1774 case RL_OVERLAPISCONTAINED:
1775 case RL_OVERLAPENDSAFTER:
1776 /* The range of interest hits an invalid block before the end: */
1777 if (invalid_range->rl_start == ap->a_foffset) {
1778 /* There's actually no valid information to be had starting here: */
1779 *ap->a_bpn = (daddr64_t)-1;
1780 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1781 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1782 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1783 }
1784 } else {
1785 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1786 }
1787 break;
1788
1789 case RL_NOOVERLAP:
1790 break;
1791 } /* end switch */
1792 if (bytesContAvail > ap->a_size)
1793 bytesContAvail = ap->a_size;
1794 }
1795 if (ap->a_run)
1796 *ap->a_run = bytesContAvail;
1797
1798 if (ap->a_poff)
1799 *(int *)ap->a_poff = 0;
1800 exit:
1801 if (tooklock)
1802 hfs_unlock(cp);
1803
1804 return (MacToVFSError(retval));
1805 }
1806
1807
1808 /*
1809 * prepare and issue the I/O
1810 * buf_strategy knows how to deal
1811 * with requests that require
1812 * fragmented I/Os
1813 */
1814 int
1815 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1816 {
1817 buf_t bp = ap->a_bp;
1818 vnode_t vp = buf_vnode(bp);
1819 struct cnode *cp = VTOC(vp);
1820
1821 return (buf_strategy(cp->c_devvp, ap));
1822 }
1823
1824
1825 static int
1826 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1827 {
1828 register struct cnode *cp = VTOC(vp);
1829 struct filefork *fp = VTOF(vp);
1830 struct proc *p = vfs_context_proc(context);;
1831 kauth_cred_t cred = vfs_context_ucred(context);
1832 int retval;
1833 off_t bytesToAdd;
1834 off_t actualBytesAdded;
1835 off_t filebytes;
1836 u_int64_t old_filesize;
1837 u_long fileblocks;
1838 int blksize;
1839 struct hfsmount *hfsmp;
1840 int lockflags;
1841
1842 blksize = VTOVCB(vp)->blockSize;
1843 fileblocks = fp->ff_blocks;
1844 filebytes = (off_t)fileblocks * (off_t)blksize;
1845 old_filesize = fp->ff_size;
1846
1847 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1848 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1849
1850 if (length < 0)
1851 return (EINVAL);
1852
1853 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1854 return (EFBIG);
1855
1856 hfsmp = VTOHFS(vp);
1857
1858 retval = E_NONE;
1859
1860 /* Files that are changing size are not hot file candidates. */
1861 if (hfsmp->hfc_stage == HFC_RECORDING) {
1862 fp->ff_bytesread = 0;
1863 }
1864
1865 /*
1866 * We cannot just check if fp->ff_size == length (as an optimization)
1867 * since there may be extra physical blocks that also need truncation.
1868 */
1869 #if QUOTA
1870 if ((retval = hfs_getinoquota(cp)))
1871 return(retval);
1872 #endif /* QUOTA */
1873
1874 /*
1875 * Lengthen the size of the file. We must ensure that the
1876 * last byte of the file is allocated. Since the smallest
1877 * value of ff_size is 0, length will be at least 1.
1878 */
1879 if (length > (off_t)fp->ff_size) {
1880 #if QUOTA
1881 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1882 cred, 0);
1883 if (retval)
1884 goto Err_Exit;
1885 #endif /* QUOTA */
1886 /*
1887 * If we don't have enough physical space then
1888 * we need to extend the physical size.
1889 */
1890 if (length > filebytes) {
1891 int eflags;
1892 u_long blockHint = 0;
1893
1894 /* All or nothing and don't round up to clumpsize. */
1895 eflags = kEFAllMask | kEFNoClumpMask;
1896
1897 if (cred && suser(cred, NULL) != 0)
1898 eflags |= kEFReserveMask; /* keep a reserve */
1899
1900 /*
1901 * Allocate Journal and Quota files in metadata zone.
1902 */
1903 if (filebytes == 0 &&
1904 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1905 hfs_virtualmetafile(cp)) {
1906 eflags |= kEFMetadataMask;
1907 blockHint = hfsmp->hfs_metazone_start;
1908 }
1909 if (hfs_start_transaction(hfsmp) != 0) {
1910 retval = EINVAL;
1911 goto Err_Exit;
1912 }
1913
1914 /* Protect extents b-tree and allocation bitmap */
1915 lockflags = SFL_BITMAP;
1916 if (overflow_extents(fp))
1917 lockflags |= SFL_EXTENTS;
1918 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1919
1920 while ((length > filebytes) && (retval == E_NONE)) {
1921 bytesToAdd = length - filebytes;
1922 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1923 (FCB*)fp,
1924 bytesToAdd,
1925 blockHint,
1926 eflags,
1927 &actualBytesAdded));
1928
1929 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1930 if (actualBytesAdded == 0 && retval == E_NONE) {
1931 if (length > filebytes)
1932 length = filebytes;
1933 break;
1934 }
1935 } /* endwhile */
1936
1937 hfs_systemfile_unlock(hfsmp, lockflags);
1938
1939 if (hfsmp->jnl) {
1940 (void) hfs_update(vp, TRUE);
1941 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1942 }
1943
1944 hfs_end_transaction(hfsmp);
1945
1946 if (retval)
1947 goto Err_Exit;
1948
1949 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1950 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1951 }
1952
1953 if (!(flags & IO_NOZEROFILL)) {
1954 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1955 struct rl_entry *invalid_range;
1956 off_t zero_limit;
1957
1958 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1959 if (length < zero_limit) zero_limit = length;
1960
1961 if (length > (off_t)fp->ff_size) {
1962 struct timeval tv;
1963
1964 /* Extending the file: time to fill out the current last page w. zeroes? */
1965 if ((fp->ff_size & PAGE_MASK_64) &&
1966 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1967 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1968
1969 /* There's some valid data at the start of the (current) last page
1970 of the file, so zero out the remainder of that page to ensure the
1971 entire page contains valid data. Since there is no invalid range
1972 possible past the (current) eof, there's no need to remove anything
1973 from the invalid range list before calling cluster_write(): */
1974 hfs_unlock(cp);
1975 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1976 fp->ff_size, (off_t)0,
1977 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1978 hfs_lock(cp, HFS_FORCE_LOCK);
1979 if (retval) goto Err_Exit;
1980
1981 /* Merely invalidate the remaining area, if necessary: */
1982 if (length > zero_limit) {
1983 microuptime(&tv);
1984 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1985 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1986 }
1987 } else {
1988 /* The page containing the (current) eof is invalid: just add the
1989 remainder of the page to the invalid list, along with the area
1990 being newly allocated:
1991 */
1992 microuptime(&tv);
1993 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1994 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1995 };
1996 }
1997 } else {
1998 panic("hfs_truncate: invoked on non-UBC object?!");
1999 };
2000 }
2001 cp->c_touch_modtime = TRUE;
2002 fp->ff_size = length;
2003
2004 /* Nested transactions will do their own ubc_setsize. */
2005 if (!skipsetsize) {
2006 /*
2007 * ubc_setsize can cause a pagein here
2008 * so we need to drop cnode lock.
2009 */
2010 hfs_unlock(cp);
2011 ubc_setsize(vp, length);
2012 hfs_lock(cp, HFS_FORCE_LOCK);
2013 }
2014
2015 } else { /* Shorten the size of the file */
2016
2017 if ((off_t)fp->ff_size > length) {
2018 /*
2019 * Any buffers that are past the truncation point need to be
2020 * invalidated (to maintain buffer cache consistency).
2021 */
2022
2023 /* Nested transactions will do their own ubc_setsize. */
2024 if (!skipsetsize) {
2025 /*
2026 * ubc_setsize can cause a pageout here
2027 * so we need to drop cnode lock.
2028 */
2029 hfs_unlock(cp);
2030 ubc_setsize(vp, length);
2031 hfs_lock(cp, HFS_FORCE_LOCK);
2032 }
2033
2034 /* Any space previously marked as invalid is now irrelevant: */
2035 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2036 }
2037
2038 /*
2039 * Account for any unmapped blocks. Note that the new
2040 * file length can still end up with unmapped blocks.
2041 */
2042 if (fp->ff_unallocblocks > 0) {
2043 u_int32_t finalblks;
2044 u_int32_t loanedBlocks;
2045
2046 HFS_MOUNT_LOCK(hfsmp, TRUE);
2047
2048 loanedBlocks = fp->ff_unallocblocks;
2049 cp->c_blocks -= loanedBlocks;
2050 fp->ff_blocks -= loanedBlocks;
2051 fp->ff_unallocblocks = 0;
2052
2053 hfsmp->loanedBlocks -= loanedBlocks;
2054
2055 finalblks = (length + blksize - 1) / blksize;
2056 if (finalblks > fp->ff_blocks) {
2057 /* calculate required unmapped blocks */
2058 loanedBlocks = finalblks - fp->ff_blocks;
2059 hfsmp->loanedBlocks += loanedBlocks;
2060
2061 fp->ff_unallocblocks = loanedBlocks;
2062 cp->c_blocks += loanedBlocks;
2063 fp->ff_blocks += loanedBlocks;
2064 }
2065 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2066 }
2067
2068 /*
2069 * For a TBE process the deallocation of the file blocks is
2070 * delayed until the file is closed. And hfs_close calls
2071 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2072 * isn't set, we make sure this isn't a TBE process.
2073 */
2074 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2075 #if QUOTA
2076 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2077 #endif /* QUOTA */
2078 if (hfs_start_transaction(hfsmp) != 0) {
2079 retval = EINVAL;
2080 goto Err_Exit;
2081 }
2082
2083 if (fp->ff_unallocblocks == 0) {
2084 /* Protect extents b-tree and allocation bitmap */
2085 lockflags = SFL_BITMAP;
2086 if (overflow_extents(fp))
2087 lockflags |= SFL_EXTENTS;
2088 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2089
2090 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2091 (FCB*)fp, length, false));
2092
2093 hfs_systemfile_unlock(hfsmp, lockflags);
2094 }
2095 if (hfsmp->jnl) {
2096 if (retval == 0) {
2097 fp->ff_size = length;
2098 }
2099 (void) hfs_update(vp, TRUE);
2100 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2101 }
2102
2103 hfs_end_transaction(hfsmp);
2104
2105 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2106 if (retval)
2107 goto Err_Exit;
2108 #if QUOTA
2109 /* These are bytesreleased */
2110 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2111 #endif /* QUOTA */
2112 }
2113 /* Only set update flag if the logical length changes */
2114 if (old_filesize != length)
2115 cp->c_touch_modtime = TRUE;
2116 fp->ff_size = length;
2117 }
2118 cp->c_touch_chgtime = TRUE;
2119 retval = hfs_update(vp, MNT_WAIT);
2120 if (retval) {
2121 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2122 -1, -1, -1, retval, 0);
2123 }
2124
2125 Err_Exit:
2126
2127 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2128 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2129
2130 return (retval);
2131 }
2132
2133
2134
2135 /*
2136 * Truncate a cnode to at most length size, freeing (or adding) the
2137 * disk blocks.
2138 */
2139 __private_extern__
2140 int
2141 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2142 vfs_context_t context)
2143 {
2144 struct filefork *fp = VTOF(vp);
2145 off_t filebytes;
2146 u_long fileblocks;
2147 int blksize, error = 0;
2148 struct cnode *cp = VTOC(vp);
2149
2150 if (vnode_isdir(vp))
2151 return (EISDIR); /* cannot truncate an HFS directory! */
2152
2153 blksize = VTOVCB(vp)->blockSize;
2154 fileblocks = fp->ff_blocks;
2155 filebytes = (off_t)fileblocks * (off_t)blksize;
2156
2157 // have to loop truncating or growing files that are
2158 // really big because otherwise transactions can get
2159 // enormous and consume too many kernel resources.
2160
2161 if (length < filebytes) {
2162 while (filebytes > length) {
2163 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2164 filebytes -= HFS_BIGFILE_SIZE;
2165 } else {
2166 filebytes = length;
2167 }
2168 cp->c_flag |= C_FORCEUPDATE;
2169 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2170 if (error)
2171 break;
2172 }
2173 } else if (length > filebytes) {
2174 while (filebytes < length) {
2175 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2176 filebytes += HFS_BIGFILE_SIZE;
2177 } else {
2178 filebytes = length;
2179 }
2180 cp->c_flag |= C_FORCEUPDATE;
2181 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2182 if (error)
2183 break;
2184 }
2185 } else /* Same logical size */ {
2186
2187 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2188 }
2189 /* Files that are changing size are not hot file candidates. */
2190 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2191 fp->ff_bytesread = 0;
2192 }
2193
2194 return (error);
2195 }
2196
2197
2198
2199 /*
2200 * Preallocate file storage space.
2201 */
2202 int
2203 hfs_vnop_allocate(struct vnop_allocate_args /* {
2204 vnode_t a_vp;
2205 off_t a_length;
2206 u_int32_t a_flags;
2207 off_t *a_bytesallocated;
2208 off_t a_offset;
2209 vfs_context_t a_context;
2210 } */ *ap)
2211 {
2212 struct vnode *vp = ap->a_vp;
2213 struct cnode *cp;
2214 struct filefork *fp;
2215 ExtendedVCB *vcb;
2216 off_t length = ap->a_length;
2217 off_t startingPEOF;
2218 off_t moreBytesRequested;
2219 off_t actualBytesAdded;
2220 off_t filebytes;
2221 u_long fileblocks;
2222 int retval, retval2;
2223 UInt32 blockHint;
2224 UInt32 extendFlags; /* For call to ExtendFileC */
2225 struct hfsmount *hfsmp;
2226 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2227 int lockflags;
2228
2229 *(ap->a_bytesallocated) = 0;
2230
2231 if (!vnode_isreg(vp))
2232 return (EISDIR);
2233 if (length < (off_t)0)
2234 return (EINVAL);
2235
2236 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2237 return (retval);
2238 cp = VTOC(vp);
2239 fp = VTOF(vp);
2240 hfsmp = VTOHFS(vp);
2241 vcb = VTOVCB(vp);
2242
2243 fileblocks = fp->ff_blocks;
2244 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2245
2246 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2247 retval = EINVAL;
2248 goto Err_Exit;
2249 }
2250
2251 /* Fill in the flags word for the call to Extend the file */
2252
2253 extendFlags = kEFNoClumpMask;
2254 if (ap->a_flags & ALLOCATECONTIG)
2255 extendFlags |= kEFContigMask;
2256 if (ap->a_flags & ALLOCATEALL)
2257 extendFlags |= kEFAllMask;
2258 if (cred && suser(cred, NULL) != 0)
2259 extendFlags |= kEFReserveMask;
2260
2261 retval = E_NONE;
2262 blockHint = 0;
2263 startingPEOF = filebytes;
2264
2265 if (ap->a_flags & ALLOCATEFROMPEOF)
2266 length += filebytes;
2267 else if (ap->a_flags & ALLOCATEFROMVOL)
2268 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2269
2270 /* If no changes are necesary, then we're done */
2271 if (filebytes == length)
2272 goto Std_Exit;
2273
2274 /*
2275 * Lengthen the size of the file. We must ensure that the
2276 * last byte of the file is allocated. Since the smallest
2277 * value of filebytes is 0, length will be at least 1.
2278 */
2279 if (length > filebytes) {
2280 moreBytesRequested = length - filebytes;
2281
2282 #if QUOTA
2283 retval = hfs_chkdq(cp,
2284 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2285 cred, 0);
2286 if (retval)
2287 goto Err_Exit;
2288
2289 #endif /* QUOTA */
2290 /*
2291 * Metadata zone checks.
2292 */
2293 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2294 /*
2295 * Allocate Journal and Quota files in metadata zone.
2296 */
2297 if (hfs_virtualmetafile(cp)) {
2298 extendFlags |= kEFMetadataMask;
2299 blockHint = hfsmp->hfs_metazone_start;
2300 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2301 (blockHint <= hfsmp->hfs_metazone_end)) {
2302 /*
2303 * Move blockHint outside metadata zone.
2304 */
2305 blockHint = hfsmp->hfs_metazone_end + 1;
2306 }
2307 }
2308
2309 if (hfs_start_transaction(hfsmp) != 0) {
2310 retval = EINVAL;
2311 goto Err_Exit;
2312 }
2313
2314 /* Protect extents b-tree and allocation bitmap */
2315 lockflags = SFL_BITMAP;
2316 if (overflow_extents(fp))
2317 lockflags |= SFL_EXTENTS;
2318 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2319
2320 retval = MacToVFSError(ExtendFileC(vcb,
2321 (FCB*)fp,
2322 moreBytesRequested,
2323 blockHint,
2324 extendFlags,
2325 &actualBytesAdded));
2326
2327 *(ap->a_bytesallocated) = actualBytesAdded;
2328 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2329
2330 hfs_systemfile_unlock(hfsmp, lockflags);
2331
2332 if (hfsmp->jnl) {
2333 (void) hfs_update(vp, TRUE);
2334 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2335 }
2336
2337 hfs_end_transaction(hfsmp);
2338
2339 /*
2340 * if we get an error and no changes were made then exit
2341 * otherwise we must do the hfs_update to reflect the changes
2342 */
2343 if (retval && (startingPEOF == filebytes))
2344 goto Err_Exit;
2345
2346 /*
2347 * Adjust actualBytesAdded to be allocation block aligned, not
2348 * clump size aligned.
2349 * NOTE: So what we are reporting does not affect reality
2350 * until the file is closed, when we truncate the file to allocation
2351 * block size.
2352 */
2353 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2354 *(ap->a_bytesallocated) =
2355 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2356
2357 } else { /* Shorten the size of the file */
2358
2359 if (fp->ff_size > length) {
2360 /*
2361 * Any buffers that are past the truncation point need to be
2362 * invalidated (to maintain buffer cache consistency).
2363 */
2364 }
2365
2366 if (hfs_start_transaction(hfsmp) != 0) {
2367 retval = EINVAL;
2368 goto Err_Exit;
2369 }
2370
2371 /* Protect extents b-tree and allocation bitmap */
2372 lockflags = SFL_BITMAP;
2373 if (overflow_extents(fp))
2374 lockflags |= SFL_EXTENTS;
2375 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2376
2377 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2378
2379 hfs_systemfile_unlock(hfsmp, lockflags);
2380
2381 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2382
2383 if (hfsmp->jnl) {
2384 (void) hfs_update(vp, TRUE);
2385 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2386 }
2387
2388 hfs_end_transaction(hfsmp);
2389
2390
2391 /*
2392 * if we get an error and no changes were made then exit
2393 * otherwise we must do the hfs_update to reflect the changes
2394 */
2395 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2396 #if QUOTA
2397 /* These are bytesreleased */
2398 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2399 #endif /* QUOTA */
2400
2401 if (fp->ff_size > filebytes) {
2402 fp->ff_size = filebytes;
2403
2404 hfs_unlock(cp);
2405 ubc_setsize(vp, fp->ff_size);
2406 hfs_lock(cp, HFS_FORCE_LOCK);
2407 }
2408 }
2409
2410 Std_Exit:
2411 cp->c_touch_chgtime = TRUE;
2412 cp->c_touch_modtime = TRUE;
2413 retval2 = hfs_update(vp, MNT_WAIT);
2414
2415 if (retval == 0)
2416 retval = retval2;
2417 Err_Exit:
2418 hfs_unlock(cp);
2419 return (retval);
2420 }
2421
2422
2423 /*
2424 * Pagein for HFS filesystem
2425 */
2426 int
2427 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2428 /*
2429 struct vnop_pagein_args {
2430 vnode_t a_vp,
2431 upl_t a_pl,
2432 vm_offset_t a_pl_offset,
2433 off_t a_f_offset,
2434 size_t a_size,
2435 int a_flags
2436 vfs_context_t a_context;
2437 };
2438 */
2439 {
2440 vnode_t vp = ap->a_vp;
2441 int error;
2442
2443 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2444 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2445 /*
2446 * Keep track of blocks read.
2447 */
2448 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2449 struct cnode *cp;
2450 struct filefork *fp;
2451 int bytesread;
2452 int took_cnode_lock = 0;
2453
2454 cp = VTOC(vp);
2455 fp = VTOF(vp);
2456
2457 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2458 bytesread = fp->ff_size;
2459 else
2460 bytesread = ap->a_size;
2461
2462 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2463 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2464 hfs_lock(cp, HFS_FORCE_LOCK);
2465 took_cnode_lock = 1;
2466 }
2467 /*
2468 * If this file hasn't been seen since the start of
2469 * the current sampling period then start over.
2470 */
2471 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2472 struct timeval tv;
2473
2474 fp->ff_bytesread = bytesread;
2475 microtime(&tv);
2476 cp->c_atime = tv.tv_sec;
2477 } else {
2478 fp->ff_bytesread += bytesread;
2479 }
2480 cp->c_touch_acctime = TRUE;
2481 if (took_cnode_lock)
2482 hfs_unlock(cp);
2483 }
2484 return (error);
2485 }
2486
2487 /*
2488 * Pageout for HFS filesystem.
2489 */
2490 int
2491 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2492 /*
2493 struct vnop_pageout_args {
2494 vnode_t a_vp,
2495 upl_t a_pl,
2496 vm_offset_t a_pl_offset,
2497 off_t a_f_offset,
2498 size_t a_size,
2499 int a_flags
2500 vfs_context_t a_context;
2501 };
2502 */
2503 {
2504 vnode_t vp = ap->a_vp;
2505 struct cnode *cp;
2506 struct filefork *fp;
2507 int retval;
2508 off_t end_of_range;
2509 off_t filesize;
2510
2511 cp = VTOC(vp);
2512 if (cp->c_lockowner == current_thread()) {
2513 panic("pageout: %s cnode lock already held!\n",
2514 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2515 }
2516 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2517 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2518 ubc_upl_abort_range(ap->a_pl,
2519 ap->a_pl_offset,
2520 ap->a_size,
2521 UPL_ABORT_FREE_ON_EMPTY);
2522 }
2523 return (retval);
2524 }
2525 fp = VTOF(vp);
2526
2527 filesize = fp->ff_size;
2528 end_of_range = ap->a_f_offset + ap->a_size - 1;
2529
2530 if (end_of_range >= filesize) {
2531 end_of_range = (off_t)(filesize - 1);
2532 }
2533 if (ap->a_f_offset < filesize) {
2534 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2535 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2536 }
2537 hfs_unlock(cp);
2538
2539 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2540 ap->a_size, filesize, ap->a_flags);
2541
2542 /*
2543 * If data was written, and setuid or setgid bits are set and
2544 * this process is not the superuser then clear the setuid and
2545 * setgid bits as a precaution against tampering.
2546 */
2547 if ((retval == 0) &&
2548 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2549 (vfs_context_suser(ap->a_context) != 0)) {
2550 hfs_lock(cp, HFS_FORCE_LOCK);
2551 cp->c_mode &= ~(S_ISUID | S_ISGID);
2552 cp->c_touch_chgtime = TRUE;
2553 hfs_unlock(cp);
2554 }
2555 return (retval);
2556 }
2557
2558 /*
2559 * Intercept B-Tree node writes to unswap them if necessary.
2560 */
2561 int
2562 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2563 {
2564 int retval = 0;
2565 register struct buf *bp = ap->a_bp;
2566 register struct vnode *vp = buf_vnode(bp);
2567 BlockDescriptor block;
2568
2569 /* Trap B-Tree writes */
2570 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2571 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2572 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
2573 (vp == VTOHFS(vp)->hfc_filevp)) {
2574
2575 /*
2576 * Swap and validate the node if it is in native byte order.
2577 * This is always be true on big endian, so we always validate
2578 * before writing here. On little endian, the node typically has
2579 * been swapped and validatated when it was written to the journal,
2580 * so we won't do anything here.
2581 */
2582 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2583 /* Prepare the block pointer */
2584 block.blockHeader = bp;
2585 block.buffer = (char *)buf_dataptr(bp);
2586 block.blockNum = buf_lblkno(bp);
2587 /* not found in cache ==> came from disk */
2588 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2589 block.blockSize = buf_count(bp);
2590
2591 /* Endian un-swap B-Tree node */
2592 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2593 if (retval)
2594 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2595 }
2596 }
2597
2598 /* This buffer shouldn't be locked anymore but if it is clear it */
2599 if ((buf_flags(bp) & B_LOCKED)) {
2600 // XXXdbg
2601 if (VTOHFS(vp)->jnl) {
2602 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2603 }
2604 buf_clearflags(bp, B_LOCKED);
2605 }
2606 retval = vn_bwrite (ap);
2607
2608 return (retval);
2609 }
2610
2611 /*
2612 * Relocate a file to a new location on disk
2613 * cnode must be locked on entry
2614 *
2615 * Relocation occurs by cloning the file's data from its
2616 * current set of blocks to a new set of blocks. During
2617 * the relocation all of the blocks (old and new) are
2618 * owned by the file.
2619 *
2620 * -----------------
2621 * |///////////////|
2622 * -----------------
2623 * 0 N (file offset)
2624 *
2625 * ----------------- -----------------
2626 * |///////////////| | | STEP 1 (aquire new blocks)
2627 * ----------------- -----------------
2628 * 0 N N+1 2N
2629 *
2630 * ----------------- -----------------
2631 * |///////////////| |///////////////| STEP 2 (clone data)
2632 * ----------------- -----------------
2633 * 0 N N+1 2N
2634 *
2635 * -----------------
2636 * |///////////////| STEP 3 (head truncate blocks)
2637 * -----------------
2638 * 0 N
2639 *
2640 * During steps 2 and 3 page-outs to file offsets less
2641 * than or equal to N are suspended.
2642 *
2643 * During step 3 page-ins to the file get supended.
2644 */
2645 __private_extern__
2646 int
2647 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2648 struct proc *p)
2649 {
2650 struct cnode *cp;
2651 struct filefork *fp;
2652 struct hfsmount *hfsmp;
2653 u_int32_t headblks;
2654 u_int32_t datablks;
2655 u_int32_t blksize;
2656 u_int32_t growsize;
2657 u_int32_t nextallocsave;
2658 daddr64_t sector_a, sector_b;
2659 int disabled_caching = 0;
2660 int eflags;
2661 off_t newbytes;
2662 int retval;
2663 int lockflags = 0;
2664 int took_trunc_lock = 0;
2665 int started_tr = 0;
2666 enum vtype vnodetype;
2667
2668 vnodetype = vnode_vtype(vp);
2669 if (vnodetype != VREG && vnodetype != VLNK) {
2670 return (EPERM);
2671 }
2672
2673 hfsmp = VTOHFS(vp);
2674 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2675 return (ENOSPC);
2676 }
2677
2678 cp = VTOC(vp);
2679 fp = VTOF(vp);
2680 if (fp->ff_unallocblocks)
2681 return (EINVAL);
2682 blksize = hfsmp->blockSize;
2683 if (blockHint == 0)
2684 blockHint = hfsmp->nextAllocation;
2685
2686 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2687 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2688 return (EFBIG);
2689 }
2690
2691 //
2692 // We do not believe that this call to hfs_fsync() is
2693 // necessary and it causes a journal transaction
2694 // deadlock so we are removing it.
2695 //
2696 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2697 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2698 // if (retval)
2699 // return (retval);
2700 //}
2701
2702 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2703 hfs_unlock(cp);
2704 hfs_lock_truncate(cp, TRUE);
2705 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2706 hfs_unlock_truncate(cp);
2707 return (retval);
2708 }
2709 took_trunc_lock = 1;
2710 }
2711 headblks = fp->ff_blocks;
2712 datablks = howmany(fp->ff_size, blksize);
2713 growsize = datablks * blksize;
2714 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2715 if (blockHint >= hfsmp->hfs_metazone_start &&
2716 blockHint <= hfsmp->hfs_metazone_end)
2717 eflags |= kEFMetadataMask;
2718
2719 if (hfs_start_transaction(hfsmp) != 0) {
2720 if (took_trunc_lock)
2721 hfs_unlock_truncate(cp);
2722 return (EINVAL);
2723 }
2724 started_tr = 1;
2725 /*
2726 * Protect the extents b-tree and the allocation bitmap
2727 * during MapFileBlockC and ExtendFileC operations.
2728 */
2729 lockflags = SFL_BITMAP;
2730 if (overflow_extents(fp))
2731 lockflags |= SFL_EXTENTS;
2732 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2733
2734 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2735 if (retval) {
2736 retval = MacToVFSError(retval);
2737 goto out;
2738 }
2739
2740 /*
2741 * STEP 1 - aquire new allocation blocks.
2742 */
2743 if (!vnode_isnocache(vp)) {
2744 vnode_setnocache(vp);
2745 disabled_caching = 1;
2746
2747 }
2748 nextallocsave = hfsmp->nextAllocation;
2749 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2750 if (eflags & kEFMetadataMask) {
2751 HFS_MOUNT_LOCK(hfsmp, TRUE);
2752 hfsmp->nextAllocation = nextallocsave;
2753 hfsmp->vcbFlags |= 0xFF00;
2754 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2755 }
2756
2757 retval = MacToVFSError(retval);
2758 if (retval == 0) {
2759 cp->c_flag |= C_MODIFIED;
2760 if (newbytes < growsize) {
2761 retval = ENOSPC;
2762 goto restore;
2763 } else if (fp->ff_blocks < (headblks + datablks)) {
2764 printf("hfs_relocate: allocation failed");
2765 retval = ENOSPC;
2766 goto restore;
2767 }
2768
2769 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2770 if (retval) {
2771 retval = MacToVFSError(retval);
2772 } else if ((sector_a + 1) == sector_b) {
2773 retval = ENOSPC;
2774 goto restore;
2775 } else if ((eflags & kEFMetadataMask) &&
2776 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2777 hfsmp->hfs_metazone_end)) {
2778 printf("hfs_relocate: didn't move into metadata zone\n");
2779 retval = ENOSPC;
2780 goto restore;
2781 }
2782 }
2783 /* Done with system locks and journal for now. */
2784 hfs_systemfile_unlock(hfsmp, lockflags);
2785 lockflags = 0;
2786 hfs_end_transaction(hfsmp);
2787 started_tr = 0;
2788
2789 if (retval) {
2790 /*
2791 * Check to see if failure is due to excessive fragmentation.
2792 */
2793 if ((retval == ENOSPC) &&
2794 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2795 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2796 }
2797 goto out;
2798 }
2799 /*
2800 * STEP 2 - clone file data into the new allocation blocks.
2801 */
2802
2803 if (vnodetype == VLNK)
2804 retval = hfs_clonelink(vp, blksize, cred, p);
2805 else if (vnode_issystem(vp))
2806 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2807 else
2808 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2809
2810 /* Start transaction for step 3 or for a restore. */
2811 if (hfs_start_transaction(hfsmp) != 0) {
2812 retval = EINVAL;
2813 goto out;
2814 }
2815 started_tr = 1;
2816 if (retval)
2817 goto restore;
2818
2819 /*
2820 * STEP 3 - switch to cloned data and remove old blocks.
2821 */
2822 lockflags = SFL_BITMAP;
2823 if (overflow_extents(fp))
2824 lockflags |= SFL_EXTENTS;
2825 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2826
2827 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2828
2829 hfs_systemfile_unlock(hfsmp, lockflags);
2830 lockflags = 0;
2831 if (retval)
2832 goto restore;
2833 out:
2834 if (took_trunc_lock)
2835 hfs_unlock_truncate(cp);
2836
2837 if (lockflags) {
2838 hfs_systemfile_unlock(hfsmp, lockflags);
2839 lockflags = 0;
2840 }
2841
2842 /* Push cnode's new extent data to disk. */
2843 if (retval == 0) {
2844 (void) hfs_update(vp, MNT_WAIT);
2845 }
2846
2847 if (hfsmp->jnl) {
2848 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2849 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2850 else
2851 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2852 }
2853 exit:
2854 if (disabled_caching) {
2855 vnode_clearnocache(vp);
2856 }
2857 if (started_tr)
2858 hfs_end_transaction(hfsmp);
2859
2860 return (retval);
2861
2862 restore:
2863 if (fp->ff_blocks == headblks)
2864 goto exit;
2865 /*
2866 * Give back any newly allocated space.
2867 */
2868 if (lockflags == 0) {
2869 lockflags = SFL_BITMAP;
2870 if (overflow_extents(fp))
2871 lockflags |= SFL_EXTENTS;
2872 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2873 }
2874
2875 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2876
2877 hfs_systemfile_unlock(hfsmp, lockflags);
2878 lockflags = 0;
2879
2880 if (took_trunc_lock)
2881 hfs_unlock_truncate(cp);
2882 goto exit;
2883 }
2884
2885
2886 /*
2887 * Clone a symlink.
2888 *
2889 */
2890 static int
2891 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2892 {
2893 struct buf *head_bp = NULL;
2894 struct buf *tail_bp = NULL;
2895 int error;
2896
2897
2898 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2899 if (error)
2900 goto out;
2901
2902 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2903 if (tail_bp == NULL) {
2904 error = EIO;
2905 goto out;
2906 }
2907 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2908 error = (int)buf_bwrite(tail_bp);
2909 out:
2910 if (head_bp) {
2911 buf_markinvalid(head_bp);
2912 buf_brelse(head_bp);
2913 }
2914 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2915
2916 return (error);
2917 }
2918
2919 /*
2920 * Clone a file's data within the file.
2921 *
2922 */
2923 static int
2924 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2925 {
2926 caddr_t bufp;
2927 size_t writebase;
2928 size_t bufsize;
2929 size_t copysize;
2930 size_t iosize;
2931 off_t filesize;
2932 size_t offset;
2933 uio_t auio;
2934 int error = 0;
2935
2936 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2937 writebase = blkstart * blksize;
2938 copysize = blkcnt * blksize;
2939 iosize = bufsize = MIN(copysize, 128 * 1024);
2940 offset = 0;
2941
2942 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2943 return (ENOMEM);
2944 }
2945 hfs_unlock(VTOC(vp));
2946
2947 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2948
2949 while (offset < copysize) {
2950 iosize = MIN(copysize - offset, iosize);
2951
2952 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2953 uio_addiov(auio, (uintptr_t)bufp, iosize);
2954
2955 error = cluster_read(vp, auio, copysize, 0);
2956 if (error) {
2957 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2958 break;
2959 }
2960 if (uio_resid(auio) != 0) {
2961 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2962 error = EIO;
2963 break;
2964 }
2965
2966 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2967 uio_addiov(auio, (uintptr_t)bufp, iosize);
2968
2969 error = cluster_write(vp, auio, filesize + offset,
2970 filesize + offset + iosize,
2971 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2972 if (error) {
2973 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2974 break;
2975 }
2976 if (uio_resid(auio) != 0) {
2977 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2978 error = EIO;
2979 break;
2980 }
2981 offset += iosize;
2982 }
2983 uio_free(auio);
2984
2985 /*
2986 * No need to call ubc_sync_range or hfs_invalbuf
2987 * since the file was copied using IO_NOCACHE.
2988 */
2989
2990 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2991
2992 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2993 return (error);
2994 }
2995
2996 /*
2997 * Clone a system (metadata) file.
2998 *
2999 */
3000 static int
3001 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3002 kauth_cred_t cred, struct proc *p)
3003 {
3004 caddr_t bufp;
3005 char * offset;
3006 size_t bufsize;
3007 size_t iosize;
3008 struct buf *bp = NULL;
3009 daddr64_t blkno;
3010 daddr64_t blk;
3011 daddr64_t start_blk;
3012 daddr64_t last_blk;
3013 int breadcnt;
3014 int i;
3015 int error = 0;
3016
3017
3018 iosize = GetLogicalBlockSize(vp);
3019 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3020 breadcnt = bufsize / iosize;
3021
3022 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3023 return (ENOMEM);
3024 }
3025 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3026 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3027 blkno = 0;
3028
3029 while (blkno < last_blk) {
3030 /*
3031 * Read up to a megabyte
3032 */
3033 offset = bufp;
3034 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3035 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3036 if (error) {
3037 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3038 goto out;
3039 }
3040 if (buf_count(bp) != iosize) {
3041 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3042 goto out;
3043 }
3044 bcopy((char *)buf_dataptr(bp), offset, iosize);
3045
3046 buf_markinvalid(bp);
3047 buf_brelse(bp);
3048 bp = NULL;
3049
3050 offset += iosize;
3051 }
3052
3053 /*
3054 * Write up to a megabyte
3055 */
3056 offset = bufp;
3057 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3058 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3059 if (bp == NULL) {
3060 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3061 error = EIO;
3062 goto out;
3063 }
3064 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3065 error = (int)buf_bwrite(bp);
3066 bp = NULL;
3067 if (error)
3068 goto out;
3069 offset += iosize;
3070 }
3071 }
3072 out:
3073 if (bp) {
3074 buf_brelse(bp);
3075 }
3076
3077 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3078
3079 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3080
3081 return (error);
3082 }