]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
a4343d503d8a5b7d18af7e19aa8ffd23402fa086
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /* @(#)hfs_readwrite.c 1.0
31 *
32 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
33 *
34 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
35 *
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/resourcevar.h>
41 #include <sys/kernel.h>
42 #include <sys/fcntl.h>
43 #include <sys/filedesc.h>
44 #include <sys/stat.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/kauth.h>
48 #include <sys/vnode.h>
49 #include <sys/uio.h>
50 #include <sys/vfs_context.h>
51
52 #include <miscfs/specfs/specdev.h>
53
54 #include <sys/ubc.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_kern.h>
57
58 #include <sys/kdebug.h>
59
60 #include "hfs.h"
61 #include "hfs_endian.h"
62 #include "hfs_fsctl.h"
63 #include "hfs_quota.h"
64 #include "hfscommon/headers/FileMgrInternal.h"
65 #include "hfscommon/headers/BTreesInternal.h"
66 #include "hfs_cnode.h"
67 #include "hfs_dbg.h"
68
69 extern int overflow_extents(struct filefork *fp);
70
71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
72
73 enum {
74 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
75 };
76
77 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
78
79 extern int hfs_setextendedsecurity(struct hfsmount *, int);
80
81
82 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
83 static int hfs_clonefile(struct vnode *, int, int, int);
84 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
85
86
87 /*****************************************************************************
88 *
89 * I/O Operations on vnodes
90 *
91 *****************************************************************************/
92 int hfs_vnop_read(struct vnop_read_args *);
93 int hfs_vnop_write(struct vnop_write_args *);
94 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
95 int hfs_vnop_select(struct vnop_select_args *);
96 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
97 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
98 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
99 int hfs_vnop_strategy(struct vnop_strategy_args *);
100 int hfs_vnop_allocate(struct vnop_allocate_args *);
101 int hfs_vnop_pagein(struct vnop_pagein_args *);
102 int hfs_vnop_pageout(struct vnop_pageout_args *);
103 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
104
105
106 /*
107 * Read data from a file.
108 */
109 int
110 hfs_vnop_read(struct vnop_read_args *ap)
111 {
112 uio_t uio = ap->a_uio;
113 struct vnode *vp = ap->a_vp;
114 struct cnode *cp;
115 struct filefork *fp;
116 struct hfsmount *hfsmp;
117 off_t filesize;
118 off_t filebytes;
119 off_t start_resid = uio_resid(uio);
120 off_t offset = uio_offset(uio);
121 int retval = 0;
122
123
124 /* Preflight checks */
125 if (!vnode_isreg(vp)) {
126 /* can only read regular files */
127 if (vnode_isdir(vp))
128 return (EISDIR);
129 else
130 return (EPERM);
131 }
132 if (start_resid == 0)
133 return (0); /* Nothing left to do */
134 if (offset < 0)
135 return (EINVAL); /* cant read from a negative offset */
136
137 cp = VTOC(vp);
138 fp = VTOF(vp);
139 hfsmp = VTOHFS(vp);
140
141 /* Protect against a size change. */
142 hfs_lock_truncate(cp, 0);
143
144 filesize = fp->ff_size;
145 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
146 if (offset > filesize) {
147 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
148 (offset > (off_t)MAXHFSFILESIZE)) {
149 retval = EFBIG;
150 }
151 goto exit;
152 }
153
154 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
155 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
156
157 retval = cluster_read(vp, uio, filesize, 0);
158
159 cp->c_touch_acctime = TRUE;
160
161 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
162 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
163
164 /*
165 * Keep track blocks read
166 */
167 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
168 int took_cnode_lock = 0;
169 off_t bytesread;
170
171 bytesread = start_resid - uio_resid(uio);
172
173 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
174 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
175 hfs_lock(cp, HFS_FORCE_LOCK);
176 took_cnode_lock = 1;
177 }
178 /*
179 * If this file hasn't been seen since the start of
180 * the current sampling period then start over.
181 */
182 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
183 struct timeval tv;
184
185 fp->ff_bytesread = bytesread;
186 microtime(&tv);
187 cp->c_atime = tv.tv_sec;
188 } else {
189 fp->ff_bytesread += bytesread;
190 }
191 if (took_cnode_lock)
192 hfs_unlock(cp);
193 }
194 exit:
195 hfs_unlock_truncate(cp);
196 return (retval);
197 }
198
199 /*
200 * Write data to a file.
201 */
202 int
203 hfs_vnop_write(struct vnop_write_args *ap)
204 {
205 uio_t uio = ap->a_uio;
206 struct vnode *vp = ap->a_vp;
207 struct cnode *cp;
208 struct filefork *fp;
209 struct hfsmount *hfsmp;
210 kauth_cred_t cred = NULL;
211 off_t origFileSize;
212 off_t writelimit;
213 off_t bytesToAdd;
214 off_t actualBytesAdded;
215 off_t filebytes;
216 off_t offset;
217 size_t resid;
218 int eflags;
219 int ioflag = ap->a_ioflag;
220 int retval = 0;
221 int lockflags;
222 int cnode_locked = 0;
223
224 // LP64todo - fix this! uio_resid may be 64-bit value
225 resid = uio_resid(uio);
226 offset = uio_offset(uio);
227
228 if (offset < 0)
229 return (EINVAL);
230 if (resid == 0)
231 return (E_NONE);
232 if (!vnode_isreg(vp))
233 return (EPERM); /* Can only write regular files */
234
235 /* Protect against a size change. */
236 hfs_lock_truncate(VTOC(vp), TRUE);
237
238 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
239 hfs_unlock_truncate(VTOC(vp));
240 return (retval);
241 }
242 cnode_locked = 1;
243 cp = VTOC(vp);
244 fp = VTOF(vp);
245 hfsmp = VTOHFS(vp);
246 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
247
248 if (ioflag & IO_APPEND) {
249 uio_setoffset(uio, fp->ff_size);
250 offset = fp->ff_size;
251 }
252 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
253 retval = EPERM;
254 goto exit;
255 }
256
257 origFileSize = fp->ff_size;
258 eflags = kEFDeferMask; /* defer file block allocations */
259
260 #ifdef HFS_SPARSE_DEV
261 /*
262 * When the underlying device is sparse and space
263 * is low (< 8MB), stop doing delayed allocations
264 * and begin doing synchronous I/O.
265 */
266 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
267 (hfs_freeblks(hfsmp, 0) < 2048)) {
268 eflags &= ~kEFDeferMask;
269 ioflag |= IO_SYNC;
270 }
271 #endif /* HFS_SPARSE_DEV */
272
273 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
274 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
275
276 /* Now test if we need to extend the file */
277 /* Doing so will adjust the filebytes for us */
278
279 writelimit = offset + resid;
280 if (writelimit <= filebytes)
281 goto sizeok;
282
283 cred = vfs_context_ucred(ap->a_context);
284 #if QUOTA
285 bytesToAdd = writelimit - filebytes;
286 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
287 cred, 0);
288 if (retval)
289 goto exit;
290 #endif /* QUOTA */
291
292 if (hfs_start_transaction(hfsmp) != 0) {
293 retval = EINVAL;
294 goto exit;
295 }
296
297 while (writelimit > filebytes) {
298 bytesToAdd = writelimit - filebytes;
299 if (cred && suser(cred, NULL) != 0)
300 eflags |= kEFReserveMask;
301
302 /* Protect extents b-tree and allocation bitmap */
303 lockflags = SFL_BITMAP;
304 if (overflow_extents(fp))
305 lockflags |= SFL_EXTENTS;
306 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
307
308 /* Files that are changing size are not hot file candidates. */
309 if (hfsmp->hfc_stage == HFC_RECORDING) {
310 fp->ff_bytesread = 0;
311 }
312 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
313 0, eflags, &actualBytesAdded));
314
315 hfs_systemfile_unlock(hfsmp, lockflags);
316
317 if ((actualBytesAdded == 0) && (retval == E_NONE))
318 retval = ENOSPC;
319 if (retval != E_NONE)
320 break;
321 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
322 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
323 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
324 }
325 (void) hfs_update(vp, TRUE);
326 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
327 (void) hfs_end_transaction(hfsmp);
328
329 sizeok:
330 if (retval == E_NONE) {
331 off_t filesize;
332 off_t zero_off;
333 off_t tail_off;
334 off_t inval_start;
335 off_t inval_end;
336 off_t io_start;
337 int lflag;
338 struct rl_entry *invalid_range;
339
340 if (writelimit > fp->ff_size)
341 filesize = writelimit;
342 else
343 filesize = fp->ff_size;
344
345 lflag = (ioflag & IO_SYNC);
346
347 if (offset <= fp->ff_size) {
348 zero_off = offset & ~PAGE_MASK_64;
349
350 /* Check to see whether the area between the zero_offset and the start
351 of the transfer to see whether is invalid and should be zero-filled
352 as part of the transfer:
353 */
354 if (offset > zero_off) {
355 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
356 lflag |= IO_HEADZEROFILL;
357 }
358 } else {
359 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
360
361 /* The bytes between fp->ff_size and uio->uio_offset must never be
362 read without being zeroed. The current last block is filled with zeroes
363 if it holds valid data but in all cases merely do a little bookkeeping
364 to track the area from the end of the current last page to the start of
365 the area actually written. For the same reason only the bytes up to the
366 start of the page where this write will start is invalidated; any remainder
367 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
368
369 Note that inval_start, the start of the page after the current EOF,
370 may be past the start of the write, in which case the zeroing
371 will be handled by the cluser_write of the actual data.
372 */
373 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
374 inval_end = offset & ~PAGE_MASK_64;
375 zero_off = fp->ff_size;
376
377 if ((fp->ff_size & PAGE_MASK_64) &&
378 (rl_scan(&fp->ff_invalidranges,
379 eof_page_base,
380 fp->ff_size - 1,
381 &invalid_range) != RL_NOOVERLAP)) {
382 /* The page containing the EOF is not valid, so the
383 entire page must be made inaccessible now. If the write
384 starts on a page beyond the page containing the eof
385 (inval_end > eof_page_base), add the
386 whole page to the range to be invalidated. Otherwise
387 (i.e. if the write starts on the same page), zero-fill
388 the entire page explicitly now:
389 */
390 if (inval_end > eof_page_base) {
391 inval_start = eof_page_base;
392 } else {
393 zero_off = eof_page_base;
394 };
395 };
396
397 if (inval_start < inval_end) {
398 struct timeval tv;
399 /* There's some range of data that's going to be marked invalid */
400
401 if (zero_off < inval_start) {
402 /* The pages between inval_start and inval_end are going to be invalidated,
403 and the actual write will start on a page past inval_end. Now's the last
404 chance to zero-fill the page containing the EOF:
405 */
406 hfs_unlock(cp);
407 cnode_locked = 0;
408 retval = cluster_write(vp, (uio_t) 0,
409 fp->ff_size, inval_start,
410 zero_off, (off_t)0,
411 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
412 hfs_lock(cp, HFS_FORCE_LOCK);
413 cnode_locked = 1;
414 if (retval) goto ioerr_exit;
415 offset = uio_offset(uio);
416 };
417
418 /* Mark the remaining area of the newly allocated space as invalid: */
419 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
420 microuptime(&tv);
421 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
422 zero_off = fp->ff_size = inval_end;
423 };
424
425 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
426 };
427
428 /* Check to see whether the area between the end of the write and the end of
429 the page it falls in is invalid and should be zero-filled as part of the transfer:
430 */
431 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
432 if (tail_off > filesize) tail_off = filesize;
433 if (tail_off > writelimit) {
434 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
435 lflag |= IO_TAILZEROFILL;
436 };
437 };
438
439 /*
440 * if the write starts beyond the current EOF (possibly advanced in the
441 * zeroing of the last block, above), then we'll zero fill from the current EOF
442 * to where the write begins:
443 *
444 * NOTE: If (and ONLY if) the portion of the file about to be written is
445 * before the current EOF it might be marked as invalid now and must be
446 * made readable (removed from the invalid ranges) before cluster_write
447 * tries to write it:
448 */
449 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
450 if (io_start < fp->ff_size) {
451 off_t io_end;
452
453 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
454 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
455 };
456
457 hfs_unlock(cp);
458 cnode_locked = 0;
459 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
460 tail_off, lflag | IO_NOZERODIRTY);
461 offset = uio_offset(uio);
462 if (offset > fp->ff_size) {
463 fp->ff_size = offset;
464
465 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
466 /* Files that are changing size are not hot file candidates. */
467 if (hfsmp->hfc_stage == HFC_RECORDING)
468 fp->ff_bytesread = 0;
469 }
470 if (resid > uio_resid(uio)) {
471 cp->c_touch_chgtime = TRUE;
472 cp->c_touch_modtime = TRUE;
473 }
474 }
475 HFS_KNOTE(vp, NOTE_WRITE);
476
477 ioerr_exit:
478 /*
479 * If we successfully wrote any data, and we are not the superuser
480 * we clear the setuid and setgid bits as a precaution against
481 * tampering.
482 */
483 if (cp->c_mode & (S_ISUID | S_ISGID)) {
484 cred = vfs_context_ucred(ap->a_context);
485 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
486 if (!cnode_locked) {
487 hfs_lock(cp, HFS_FORCE_LOCK);
488 cnode_locked = 1;
489 }
490 cp->c_mode &= ~(S_ISUID | S_ISGID);
491 }
492 }
493 if (retval) {
494 if (ioflag & IO_UNIT) {
495 if (!cnode_locked) {
496 hfs_lock(cp, HFS_FORCE_LOCK);
497 cnode_locked = 1;
498 }
499 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
500 0, ap->a_context);
501 // LP64todo - fix this! resid needs to by user_ssize_t
502 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
503 uio_setresid(uio, resid);
504 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
505 }
506 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
507 if (!cnode_locked) {
508 hfs_lock(cp, HFS_FORCE_LOCK);
509 cnode_locked = 1;
510 }
511 retval = hfs_update(vp, TRUE);
512 }
513 /* Updating vcbWrCnt doesn't need to be atomic. */
514 hfsmp->vcbWrCnt++;
515
516 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
517 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
518 exit:
519 if (cnode_locked)
520 hfs_unlock(cp);
521 hfs_unlock_truncate(cp);
522 return (retval);
523 }
524
525 /* support for the "bulk-access" fcntl */
526
527 #define CACHE_ELEMS 64
528 #define CACHE_LEVELS 16
529 #define PARENT_IDS_FLAG 0x100
530
531 /* from hfs_attrlist.c */
532 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
533 mode_t obj_mode, struct mount *mp,
534 kauth_cred_t cred, struct proc *p);
535
536 /* from vfs/vfs_fsevents.c */
537 extern char *get_pathbuff(void);
538 extern void release_pathbuff(char *buff);
539
540 struct access_cache {
541 int numcached;
542 int cachehits; /* these two for statistics gathering */
543 int lookups;
544 unsigned int *acache;
545 Boolean *haveaccess;
546 };
547
548 struct access_t {
549 uid_t uid; /* IN: effective user id */
550 short flags; /* IN: access requested (i.e. R_OK) */
551 short num_groups; /* IN: number of groups user belongs to */
552 int num_files; /* IN: number of files to process */
553 int *file_ids; /* IN: array of file ids */
554 gid_t *groups; /* IN: array of groups */
555 short *access; /* OUT: access info for each file (0 for 'has access') */
556 };
557
558 struct user_access_t {
559 uid_t uid; /* IN: effective user id */
560 short flags; /* IN: access requested (i.e. R_OK) */
561 short num_groups; /* IN: number of groups user belongs to */
562 int num_files; /* IN: number of files to process */
563 user_addr_t file_ids; /* IN: array of file ids */
564 user_addr_t groups; /* IN: array of groups */
565 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
566 };
567
568 /*
569 * Perform a binary search for the given parent_id. Return value is
570 * found/not found boolean, and indexp will be the index of the item
571 * or the index at which to insert the item if it's not found.
572 */
573 static int
574 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
575 {
576 unsigned int lo, hi;
577 int index, matches = 0;
578
579 if (cache->numcached == 0) {
580 *indexp = 0;
581 return 0; // table is empty, so insert at index=0 and report no match
582 }
583
584 if (cache->numcached > CACHE_ELEMS) {
585 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
586 cache->numcached, CACHE_ELEMS);*/
587 cache->numcached = CACHE_ELEMS;
588 }
589
590 lo = 0;
591 hi = cache->numcached - 1;
592 index = -1;
593
594 /* perform binary search for parent_id */
595 do {
596 unsigned int mid = (hi - lo)/2 + lo;
597 unsigned int this_id = cache->acache[mid];
598
599 if (parent_id == this_id) {
600 index = mid;
601 break;
602 }
603
604 if (parent_id < this_id) {
605 hi = mid;
606 continue;
607 }
608
609 if (parent_id > this_id) {
610 lo = mid + 1;
611 continue;
612 }
613 } while(lo < hi);
614
615 /* check if lo and hi converged on the match */
616 if (parent_id == cache->acache[hi]) {
617 index = hi;
618 }
619
620 /* if no existing entry found, find index for new one */
621 if (index == -1) {
622 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
623 matches = 0;
624 } else {
625 matches = 1;
626 }
627
628 *indexp = index;
629 return matches;
630 }
631
632 /*
633 * Add a node to the access_cache at the given index (or do a lookup first
634 * to find the index if -1 is passed in). We currently do a replace rather
635 * than an insert if the cache is full.
636 */
637 static void
638 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
639 {
640 int lookup_index = -1;
641
642 /* need to do a lookup first if -1 passed for index */
643 if (index == -1) {
644 if (lookup_bucket(cache, &lookup_index, nodeID)) {
645 if (cache->haveaccess[lookup_index] != access) {
646 /* change access info for existing entry... should never happen */
647 cache->haveaccess[lookup_index] = access;
648 }
649
650 /* mission accomplished */
651 return;
652 } else {
653 index = lookup_index;
654 }
655
656 }
657
658 /* if the cache is full, do a replace rather than an insert */
659 if (cache->numcached >= CACHE_ELEMS) {
660 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
661 cache->numcached = CACHE_ELEMS-1;
662
663 if (index > cache->numcached) {
664 // printf("index %d pinned to %d\n", index, cache->numcached);
665 index = cache->numcached;
666 }
667 } else if (index >= 0 && index < cache->numcached) {
668 /* only do bcopy if we're inserting */
669 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
670 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
671 }
672
673 cache->acache[index] = nodeID;
674 cache->haveaccess[index] = access;
675 cache->numcached++;
676 }
677
678
679 struct cinfo {
680 uid_t uid;
681 gid_t gid;
682 mode_t mode;
683 cnid_t parentcnid;
684 };
685
686 static int
687 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
688 {
689 struct cinfo *cip = (struct cinfo *)arg;
690
691 cip->uid = attrp->ca_uid;
692 cip->gid = attrp->ca_gid;
693 cip->mode = attrp->ca_mode;
694 cip->parentcnid = descp->cd_parentcnid;
695
696 return (0);
697 }
698
699 /*
700 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
701 * isn't incore, then go to the catalog.
702 */
703 static int
704 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
705 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
706 {
707 int error = 0;
708
709 /* if this id matches the one the fsctl was called with, skip the lookup */
710 if (cnid == skip_cp->c_cnid) {
711 cnattrp->ca_uid = skip_cp->c_uid;
712 cnattrp->ca_gid = skip_cp->c_gid;
713 cnattrp->ca_mode = skip_cp->c_mode;
714 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
715 } else {
716 struct cinfo c_info;
717
718 /* otherwise, check the cnode hash incase the file/dir is incore */
719 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
720 cnattrp->ca_uid = c_info.uid;
721 cnattrp->ca_gid = c_info.gid;
722 cnattrp->ca_mode = c_info.mode;
723 keyp->hfsPlus.parentID = c_info.parentcnid;
724 } else {
725 int lockflags;
726
727 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
728
729 /* lookup this cnid in the catalog */
730 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
731
732 hfs_systemfile_unlock(hfsmp, lockflags);
733
734 cache->lookups++;
735 }
736 }
737
738 return (error);
739 }
740
741 /*
742 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
743 * up to CACHE_LEVELS as we progress towards the root.
744 */
745 static int
746 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
747 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
748 {
749 int myErr = 0;
750 int myResult;
751 HFSCatalogNodeID thisNodeID;
752 unsigned long myPerms;
753 struct cat_attr cnattr;
754 int cache_index = -1;
755 CatalogKey catkey;
756
757 int i = 0, ids_to_cache = 0;
758 int parent_ids[CACHE_LEVELS];
759
760 /* root always has access */
761 if (!suser(myp_ucred, NULL)) {
762 return (1);
763 }
764
765 thisNodeID = nodeID;
766 while (thisNodeID >= kRootDirID) {
767 myResult = 0; /* default to "no access" */
768
769 /* check the cache before resorting to hitting the catalog */
770
771 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
772 * to look any further after hitting cached dir */
773
774 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
775 cache->cachehits++;
776 myResult = cache->haveaccess[cache_index];
777 goto ExitThisRoutine;
778 }
779
780 /* remember which parents we want to cache */
781 if (ids_to_cache < CACHE_LEVELS) {
782 parent_ids[ids_to_cache] = thisNodeID;
783 ids_to_cache++;
784 }
785
786 /* do the lookup (checks the cnode hash, then the catalog) */
787 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
788 if (myErr) {
789 goto ExitThisRoutine; /* no access */
790 }
791
792 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
793 cnattr.ca_mode, hfsmp->hfs_mp,
794 myp_ucred, theProcPtr);
795
796 if ( (myPerms & X_OK) == 0 ) {
797 myResult = 0;
798 goto ExitThisRoutine; /* no access */
799 }
800
801 /* up the hierarchy we go */
802 thisNodeID = catkey.hfsPlus.parentID;
803 }
804
805 /* if here, we have access to this node */
806 myResult = 1;
807
808 ExitThisRoutine:
809 if (myErr) {
810 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
811 myResult = 0;
812 }
813 *err = myErr;
814
815 /* cache the parent directory(ies) */
816 for (i = 0; i < ids_to_cache; i++) {
817 /* small optimization: get rid of double-lookup for all these */
818 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
819 add_node(cache, -1, parent_ids[i], myResult);
820 }
821
822 return (myResult);
823 }
824 /* end "bulk-access" support */
825
826
827
828 /*
829 * Callback for use with freeze ioctl.
830 */
831 static int
832 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
833 {
834 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
835
836 return 0;
837 }
838
839 /*
840 * Control filesystem operating characteristics.
841 */
842 int
843 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
844 vnode_t a_vp;
845 int a_command;
846 caddr_t a_data;
847 int a_fflag;
848 vfs_context_t a_context;
849 } */ *ap)
850 {
851 struct vnode * vp = ap->a_vp;
852 struct hfsmount *hfsmp = VTOHFS(vp);
853 vfs_context_t context = ap->a_context;
854 kauth_cred_t cred = vfs_context_ucred(context);
855 proc_t p = vfs_context_proc(context);
856 struct vfsstatfs *vfsp;
857 boolean_t is64bit;
858
859 is64bit = proc_is64bit(p);
860
861 switch (ap->a_command) {
862
863 case HFS_RESIZE_VOLUME: {
864 u_int64_t newsize;
865 u_int64_t cursize;
866
867 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
868 if (suser(cred, NULL) &&
869 kauth_cred_getuid(cred) != vfsp->f_owner) {
870 return (EACCES); /* must be owner of file system */
871 }
872 if (!vnode_isvroot(vp)) {
873 return (EINVAL);
874 }
875 newsize = *(u_int64_t *)ap->a_data;
876 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
877
878 if (newsize > cursize) {
879 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
880 } else if (newsize < cursize) {
881 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
882 } else {
883 return (0);
884 }
885 }
886 case HFS_CHANGE_NEXT_ALLOCATION: {
887 u_int32_t location;
888
889 if (vnode_vfsisrdonly(vp)) {
890 return (EROFS);
891 }
892 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
893 if (suser(cred, NULL) &&
894 kauth_cred_getuid(cred) != vfsp->f_owner) {
895 return (EACCES); /* must be owner of file system */
896 }
897 if (!vnode_isvroot(vp)) {
898 return (EINVAL);
899 }
900 location = *(u_int32_t *)ap->a_data;
901 if (location > hfsmp->totalBlocks - 1) {
902 return (EINVAL);
903 }
904 /* Return previous value. */
905 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
906 HFS_MOUNT_LOCK(hfsmp, TRUE);
907 hfsmp->nextAllocation = location;
908 hfsmp->vcbFlags |= 0xFF00;
909 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
910 return (0);
911 }
912
913 #ifdef HFS_SPARSE_DEV
914 case HFS_SETBACKINGSTOREINFO: {
915 struct vnode * bsfs_rootvp;
916 struct vnode * di_vp;
917 struct hfs_backingstoreinfo *bsdata;
918 int error = 0;
919
920 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
921 return (EALREADY);
922 }
923 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
924 if (suser(cred, NULL) &&
925 kauth_cred_getuid(cred) != vfsp->f_owner) {
926 return (EACCES); /* must be owner of file system */
927 }
928 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
929 if (bsdata == NULL) {
930 return (EINVAL);
931 }
932 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
933 return (error);
934 }
935 if ((error = vnode_getwithref(di_vp))) {
936 file_drop(bsdata->backingfd);
937 return(error);
938 }
939
940 if (vnode_mount(vp) == vnode_mount(di_vp)) {
941 (void)vnode_put(di_vp);
942 file_drop(bsdata->backingfd);
943 return (EINVAL);
944 }
945
946 /*
947 * Obtain the backing fs root vnode and keep a reference
948 * on it. This reference will be dropped in hfs_unmount.
949 */
950 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
951 if (error) {
952 (void)vnode_put(di_vp);
953 file_drop(bsdata->backingfd);
954 return (error);
955 }
956 vnode_ref(bsfs_rootvp);
957 vnode_put(bsfs_rootvp);
958
959 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
960 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
961 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
962 hfsmp->hfs_sparsebandblks *= 4;
963
964 (void)vnode_put(di_vp);
965 file_drop(bsdata->backingfd);
966 return (0);
967 }
968 case HFS_CLRBACKINGSTOREINFO: {
969 struct vnode * tmpvp;
970
971 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
972 if (suser(cred, NULL) &&
973 kauth_cred_getuid(cred) != vfsp->f_owner) {
974 return (EACCES); /* must be owner of file system */
975 }
976 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
977 hfsmp->hfs_backingfs_rootvp) {
978
979 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
980 tmpvp = hfsmp->hfs_backingfs_rootvp;
981 hfsmp->hfs_backingfs_rootvp = NULLVP;
982 hfsmp->hfs_sparsebandblks = 0;
983 vnode_rele(tmpvp);
984 }
985 return (0);
986 }
987 #endif /* HFS_SPARSE_DEV */
988
989 case F_FREEZE_FS: {
990 struct mount *mp;
991 task_t task;
992
993 if (!is_suser())
994 return (EACCES);
995
996 mp = vnode_mount(vp);
997 hfsmp = VFSTOHFS(mp);
998
999 if (!(hfsmp->jnl))
1000 return (ENOTSUP);
1001
1002 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1003
1004 task = current_task();
1005 task_working_set_disable(task);
1006
1007 // flush things before we get started to try and prevent
1008 // dirty data from being paged out while we're frozen.
1009 // note: can't do this after taking the lock as it will
1010 // deadlock against ourselves.
1011 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1012 hfs_global_exclusive_lock_acquire(hfsmp);
1013 journal_flush(hfsmp->jnl);
1014
1015 // don't need to iterate on all vnodes, we just need to
1016 // wait for writes to the system files and the device vnode
1017 if (HFSTOVCB(hfsmp)->extentsRefNum)
1018 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1019 if (HFSTOVCB(hfsmp)->catalogRefNum)
1020 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1021 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1022 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1023 if (hfsmp->hfs_attribute_vp)
1024 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1025 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1026
1027 hfsmp->hfs_freezing_proc = current_proc();
1028
1029 return (0);
1030 }
1031
1032 case F_THAW_FS: {
1033 if (!is_suser())
1034 return (EACCES);
1035
1036 // if we're not the one who froze the fs then we
1037 // can't thaw it.
1038 if (hfsmp->hfs_freezing_proc != current_proc()) {
1039 return EPERM;
1040 }
1041
1042 // NOTE: if you add code here, also go check the
1043 // code that "thaws" the fs in hfs_vnop_close()
1044 //
1045 hfsmp->hfs_freezing_proc = NULL;
1046 hfs_global_exclusive_lock_release(hfsmp);
1047 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1048
1049 return (0);
1050 }
1051
1052 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1053 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1054
1055 case HFS_BULKACCESS_FSCTL:
1056 case HFS_BULKACCESS: {
1057 /*
1058 * NOTE: on entry, the vnode is locked. Incase this vnode
1059 * happens to be in our list of file_ids, we'll note it
1060 * avoid calling hfs_chashget_nowait() on that id as that
1061 * will cause a "locking against myself" panic.
1062 */
1063 Boolean check_leaf = true;
1064
1065 struct user_access_t *user_access_structp;
1066 struct user_access_t tmp_user_access_t;
1067 struct access_cache cache;
1068
1069 int error = 0, i;
1070
1071 dev_t dev = VTOC(vp)->c_dev;
1072
1073 short flags;
1074 struct ucred myucred; /* XXX ILLEGAL */
1075 int num_files;
1076 int *file_ids = NULL;
1077 short *access = NULL;
1078
1079 cnid_t cnid;
1080 cnid_t prevParent_cnid = 0;
1081 unsigned long myPerms;
1082 short myaccess = 0;
1083 struct cat_attr cnattr;
1084 CatalogKey catkey;
1085 struct cnode *skip_cp = VTOC(vp);
1086 struct vfs_context my_context;
1087
1088 /* first, return error if not run as root */
1089 if (cred->cr_ruid != 0) {
1090 return EPERM;
1091 }
1092
1093 /* initialize the local cache and buffers */
1094 cache.numcached = 0;
1095 cache.cachehits = 0;
1096 cache.lookups = 0;
1097
1098 file_ids = (int *) get_pathbuff();
1099 access = (short *) get_pathbuff();
1100 cache.acache = (int *) get_pathbuff();
1101 cache.haveaccess = (Boolean *) get_pathbuff();
1102
1103 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1104 release_pathbuff((char *) file_ids);
1105 release_pathbuff((char *) access);
1106 release_pathbuff((char *) cache.acache);
1107 release_pathbuff((char *) cache.haveaccess);
1108
1109 return ENOMEM;
1110 }
1111
1112 /* struct copyin done during dispatch... need to copy file_id array separately */
1113 if (ap->a_data == NULL) {
1114 error = EINVAL;
1115 goto err_exit_bulk_access;
1116 }
1117
1118 if (is64bit) {
1119 user_access_structp = (struct user_access_t *)ap->a_data;
1120 }
1121 else {
1122 struct access_t * accessp = (struct access_t *)ap->a_data;
1123 tmp_user_access_t.uid = accessp->uid;
1124 tmp_user_access_t.flags = accessp->flags;
1125 tmp_user_access_t.num_groups = accessp->num_groups;
1126 tmp_user_access_t.num_files = accessp->num_files;
1127 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1128 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1129 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1130 user_access_structp = &tmp_user_access_t;
1131 }
1132
1133 num_files = user_access_structp->num_files;
1134 if (num_files < 1) {
1135 goto err_exit_bulk_access;
1136 }
1137 if (num_files > 256) {
1138 error = EINVAL;
1139 goto err_exit_bulk_access;
1140 }
1141
1142 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1143 num_files * sizeof(int)))) {
1144 goto err_exit_bulk_access;
1145 }
1146
1147 /* fill in the ucred structure */
1148 flags = user_access_structp->flags;
1149 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1150 flags = R_OK;
1151 }
1152
1153 /* check if we've been passed leaf node ids or parent ids */
1154 if (flags & PARENT_IDS_FLAG) {
1155 check_leaf = false;
1156 }
1157
1158 memset(&myucred, 0, sizeof(myucred));
1159 myucred.cr_ref = 1;
1160 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1161 myucred.cr_ngroups = user_access_structp->num_groups;
1162 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1163 myucred.cr_ngroups = 0;
1164 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1165 myucred.cr_ngroups * sizeof(gid_t)))) {
1166 goto err_exit_bulk_access;
1167 }
1168 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1169 myucred.cr_gmuid = myucred.cr_uid;
1170
1171 my_context.vc_proc = p;
1172 my_context.vc_ucred = &myucred;
1173
1174 /* Check access to each file_id passed in */
1175 for (i = 0; i < num_files; i++) {
1176 #if 0
1177 cnid = (cnid_t) file_ids[i];
1178
1179 /* root always has access */
1180 if (!suser(&myucred, NULL)) {
1181 access[i] = 0;
1182 continue;
1183 }
1184
1185 if (check_leaf) {
1186
1187 /* do the lookup (checks the cnode hash, then the catalog) */
1188 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1189 if (error) {
1190 access[i] = (short) error;
1191 continue;
1192 }
1193
1194 /* before calling CheckAccess(), check the target file for read access */
1195 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1196 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1197
1198
1199 /* fail fast if no access */
1200 if ((myPerms & flags) == 0) {
1201 access[i] = EACCES;
1202 continue;
1203 }
1204 } else {
1205 /* we were passed an array of parent ids */
1206 catkey.hfsPlus.parentID = cnid;
1207 }
1208
1209 /* if the last guy had the same parent and had access, we're done */
1210 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1211 cache.cachehits++;
1212 access[i] = 0;
1213 continue;
1214 }
1215
1216 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1217 skip_cp, p, &myucred, dev);
1218
1219 if ( myaccess ) {
1220 access[i] = 0; // have access.. no errors to report
1221 } else {
1222 access[i] = (error != 0 ? (short) error : EACCES);
1223 }
1224
1225 prevParent_cnid = catkey.hfsPlus.parentID;
1226 #else
1227 int myErr;
1228
1229 cnid = (cnid_t)file_ids[i];
1230
1231 while (cnid >= kRootDirID) {
1232 /* get the vnode for this cnid */
1233 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1234 if ( myErr ) {
1235 access[i] = EACCES;
1236 break;
1237 }
1238
1239 cnid = VTOC(vp)->c_parentcnid;
1240
1241 hfs_unlock(VTOC(vp));
1242 if (vnode_vtype(vp) == VDIR) {
1243 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1244 } else {
1245 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1246 }
1247 vnode_put(vp);
1248 access[i] = myErr;
1249 if (myErr) {
1250 break;
1251 }
1252 }
1253 #endif
1254 }
1255
1256 /* copyout the access array */
1257 if ((error = copyout((caddr_t)access, user_access_structp->access,
1258 num_files * sizeof (short)))) {
1259 goto err_exit_bulk_access;
1260 }
1261
1262 err_exit_bulk_access:
1263
1264 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1265
1266 release_pathbuff((char *) cache.acache);
1267 release_pathbuff((char *) cache.haveaccess);
1268 release_pathbuff((char *) file_ids);
1269 release_pathbuff((char *) access);
1270
1271 return (error);
1272 } /* HFS_BULKACCESS */
1273
1274 case HFS_SETACLSTATE: {
1275 int state;
1276
1277 if (ap->a_data == NULL) {
1278 return (EINVAL);
1279 }
1280
1281 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1282 state = *(int *)ap->a_data;
1283
1284 // super-user can enable or disable acl's on a volume.
1285 // the volume owner can only enable acl's
1286 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1287 return (EPERM);
1288 }
1289 if (state == 0 || state == 1)
1290 return hfs_setextendedsecurity(hfsmp, state);
1291 else
1292 return (EINVAL);
1293 }
1294
1295 case F_FULLFSYNC: {
1296 int error;
1297
1298 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1299 if (error == 0) {
1300 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1301 hfs_unlock(VTOC(vp));
1302 }
1303
1304 return error;
1305 }
1306
1307 case F_CHKCLEAN: {
1308 register struct cnode *cp;
1309 int error;
1310
1311 if (!vnode_isreg(vp))
1312 return EINVAL;
1313
1314 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1315 if (error == 0) {
1316 cp = VTOC(vp);
1317 /*
1318 * used by regression test to determine if
1319 * all the dirty pages (via write) have been cleaned
1320 * after a call to 'fsysnc'.
1321 */
1322 error = is_file_clean(vp, VTOF(vp)->ff_size);
1323 hfs_unlock(cp);
1324 }
1325 return (error);
1326 }
1327
1328 case F_RDADVISE: {
1329 register struct radvisory *ra;
1330 struct filefork *fp;
1331 int error;
1332
1333 if (!vnode_isreg(vp))
1334 return EINVAL;
1335
1336 ra = (struct radvisory *)(ap->a_data);
1337 fp = VTOF(vp);
1338
1339 /* Protect against a size change. */
1340 hfs_lock_truncate(VTOC(vp), TRUE);
1341
1342 if (ra->ra_offset >= fp->ff_size) {
1343 error = EFBIG;
1344 } else {
1345 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1346 }
1347
1348 hfs_unlock_truncate(VTOC(vp));
1349 return (error);
1350 }
1351
1352 case F_READBOOTSTRAP:
1353 case F_WRITEBOOTSTRAP:
1354 {
1355 struct vnode *devvp = NULL;
1356 user_fbootstraptransfer_t *user_bootstrapp;
1357 int devBlockSize;
1358 int error;
1359 uio_t auio;
1360 daddr64_t blockNumber;
1361 u_long blockOffset;
1362 u_long xfersize;
1363 struct buf *bp;
1364 user_fbootstraptransfer_t user_bootstrap;
1365
1366 if (!vnode_isvroot(vp))
1367 return (EINVAL);
1368 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1369 * to a user_fbootstraptransfer_t else we get a pointer to a
1370 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1371 */
1372 if (is64bit) {
1373 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1374 }
1375 else {
1376 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1377 user_bootstrapp = &user_bootstrap;
1378 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1379 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1380 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1381 }
1382 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1383 return EINVAL;
1384
1385 devvp = VTOHFS(vp)->hfs_devvp;
1386 auio = uio_create(1, user_bootstrapp->fbt_offset,
1387 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1388 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1389 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1390
1391 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1392
1393 while (uio_resid(auio) > 0) {
1394 blockNumber = uio_offset(auio) / devBlockSize;
1395 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1396 if (error) {
1397 if (bp) buf_brelse(bp);
1398 uio_free(auio);
1399 return error;
1400 };
1401
1402 blockOffset = uio_offset(auio) % devBlockSize;
1403 xfersize = devBlockSize - blockOffset;
1404 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1405 if (error) {
1406 buf_brelse(bp);
1407 uio_free(auio);
1408 return error;
1409 };
1410 if (uio_rw(auio) == UIO_WRITE) {
1411 error = VNOP_BWRITE(bp);
1412 if (error) {
1413 uio_free(auio);
1414 return error;
1415 }
1416 } else {
1417 buf_brelse(bp);
1418 };
1419 };
1420 uio_free(auio);
1421 };
1422 return 0;
1423
1424 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1425 {
1426 if (is64bit) {
1427 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1428 }
1429 else {
1430 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1431 }
1432 return 0;
1433 }
1434
1435 case HFS_GET_MOUNT_TIME:
1436 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1437 break;
1438
1439 case HFS_GET_LAST_MTIME:
1440 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1441 break;
1442
1443 case HFS_SET_BOOT_INFO:
1444 if (!vnode_isvroot(vp))
1445 return(EINVAL);
1446 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1447 return(EACCES); /* must be superuser or owner of filesystem */
1448 HFS_MOUNT_LOCK(hfsmp, TRUE);
1449 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1450 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1451 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1452 break;
1453
1454 case HFS_GET_BOOT_INFO:
1455 if (!vnode_isvroot(vp))
1456 return(EINVAL);
1457 HFS_MOUNT_LOCK(hfsmp, TRUE);
1458 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1459 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1460 break;
1461
1462 default:
1463 return (ENOTTY);
1464 }
1465
1466 /* Should never get here */
1467 return 0;
1468 }
1469
1470 /*
1471 * select
1472 */
1473 int
1474 hfs_vnop_select(__unused struct vnop_select_args *ap)
1475 /*
1476 struct vnop_select_args {
1477 vnode_t a_vp;
1478 int a_which;
1479 int a_fflags;
1480 void *a_wql;
1481 vfs_context_t a_context;
1482 };
1483 */
1484 {
1485 /*
1486 * We should really check to see if I/O is possible.
1487 */
1488 return (1);
1489 }
1490
1491 /*
1492 * Converts a logical block number to a physical block, and optionally returns
1493 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1494 * The physical block number is based on the device block size, currently its 512.
1495 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1496 */
1497 int
1498 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1499 {
1500 struct cnode *cp = VTOC(vp);
1501 struct filefork *fp = VTOF(vp);
1502 struct hfsmount *hfsmp = VTOHFS(vp);
1503 int retval = E_NONE;
1504 daddr_t logBlockSize;
1505 size_t bytesContAvail = 0;
1506 off_t blockposition;
1507 int lockExtBtree;
1508 int lockflags = 0;
1509
1510 /*
1511 * Check for underlying vnode requests and ensure that logical
1512 * to physical mapping is requested.
1513 */
1514 if (vpp != NULL)
1515 *vpp = cp->c_devvp;
1516 if (bnp == NULL)
1517 return (0);
1518
1519 logBlockSize = GetLogicalBlockSize(vp);
1520 blockposition = (off_t)bn * (off_t)logBlockSize;
1521
1522 lockExtBtree = overflow_extents(fp);
1523
1524 if (lockExtBtree)
1525 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1526
1527 retval = MacToVFSError(
1528 MapFileBlockC (HFSTOVCB(hfsmp),
1529 (FCB*)fp,
1530 MAXPHYSIO,
1531 blockposition,
1532 bnp,
1533 &bytesContAvail));
1534
1535 if (lockExtBtree)
1536 hfs_systemfile_unlock(hfsmp, lockflags);
1537
1538 if (retval == E_NONE) {
1539 /* Figure out how many read ahead blocks there are */
1540 if (runp != NULL) {
1541 if (can_cluster(logBlockSize)) {
1542 /* Make sure this result never goes negative: */
1543 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1544 } else {
1545 *runp = 0;
1546 }
1547 }
1548 }
1549 return (retval);
1550 }
1551
1552 /*
1553 * Convert logical block number to file offset.
1554 */
1555 int
1556 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1557 /*
1558 struct vnop_blktooff_args {
1559 vnode_t a_vp;
1560 daddr64_t a_lblkno;
1561 off_t *a_offset;
1562 };
1563 */
1564 {
1565 if (ap->a_vp == NULL)
1566 return (EINVAL);
1567 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1568
1569 return(0);
1570 }
1571
1572 /*
1573 * Convert file offset to logical block number.
1574 */
1575 int
1576 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1577 /*
1578 struct vnop_offtoblk_args {
1579 vnode_t a_vp;
1580 off_t a_offset;
1581 daddr64_t *a_lblkno;
1582 };
1583 */
1584 {
1585 if (ap->a_vp == NULL)
1586 return (EINVAL);
1587 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1588
1589 return(0);
1590 }
1591
1592 /*
1593 * Map file offset to physical block number.
1594 *
1595 * System file cnodes are expected to be locked (shared or exclusive).
1596 */
1597 int
1598 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1599 /*
1600 struct vnop_blockmap_args {
1601 vnode_t a_vp;
1602 off_t a_foffset;
1603 size_t a_size;
1604 daddr64_t *a_bpn;
1605 size_t *a_run;
1606 void *a_poff;
1607 int a_flags;
1608 vfs_context_t a_context;
1609 };
1610 */
1611 {
1612 struct vnode *vp = ap->a_vp;
1613 struct cnode *cp;
1614 struct filefork *fp;
1615 struct hfsmount *hfsmp;
1616 size_t bytesContAvail = 0;
1617 int retval = E_NONE;
1618 int syslocks = 0;
1619 int lockflags = 0;
1620 struct rl_entry *invalid_range;
1621 enum rl_overlaptype overlaptype;
1622 int started_tr = 0;
1623 int tooklock = 0;
1624
1625 /* Do not allow blockmap operation on a directory */
1626 if (vnode_isdir(vp)) {
1627 return (ENOTSUP);
1628 }
1629
1630 /*
1631 * Check for underlying vnode requests and ensure that logical
1632 * to physical mapping is requested.
1633 */
1634 if (ap->a_bpn == NULL)
1635 return (0);
1636
1637 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1638 if (VTOC(vp)->c_lockowner != current_thread()) {
1639 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1640 tooklock = 1;
1641 } else {
1642 cp = VTOC(vp);
1643 panic("blockmap: %s cnode lock already held!\n",
1644 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1645 }
1646 }
1647 hfsmp = VTOHFS(vp);
1648 cp = VTOC(vp);
1649 fp = VTOF(vp);
1650
1651 retry:
1652 if (fp->ff_unallocblocks) {
1653 if (hfs_start_transaction(hfsmp) != 0) {
1654 retval = EINVAL;
1655 goto exit;
1656 } else {
1657 started_tr = 1;
1658 }
1659 syslocks = SFL_EXTENTS | SFL_BITMAP;
1660
1661 } else if (overflow_extents(fp)) {
1662 syslocks = SFL_EXTENTS;
1663 }
1664
1665 if (syslocks)
1666 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1667
1668 /*
1669 * Check for any delayed allocations.
1670 */
1671 if (fp->ff_unallocblocks) {
1672 SInt64 actbytes;
1673 u_int32_t loanedBlocks;
1674
1675 //
1676 // Make sure we have a transaction. It's possible
1677 // that we came in and fp->ff_unallocblocks was zero
1678 // but during the time we blocked acquiring the extents
1679 // btree, ff_unallocblocks became non-zero and so we
1680 // will need to start a transaction.
1681 //
1682 if (started_tr == 0) {
1683 if (syslocks) {
1684 hfs_systemfile_unlock(hfsmp, lockflags);
1685 syslocks = 0;
1686 }
1687 goto retry;
1688 }
1689
1690 /*
1691 * Note: ExtendFileC will Release any blocks on loan and
1692 * aquire real blocks. So we ask to extend by zero bytes
1693 * since ExtendFileC will account for the virtual blocks.
1694 */
1695
1696 loanedBlocks = fp->ff_unallocblocks;
1697 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1698 kEFAllMask | kEFNoClumpMask, &actbytes);
1699
1700 if (retval) {
1701 fp->ff_unallocblocks = loanedBlocks;
1702 cp->c_blocks += loanedBlocks;
1703 fp->ff_blocks += loanedBlocks;
1704
1705 HFS_MOUNT_LOCK(hfsmp, TRUE);
1706 hfsmp->loanedBlocks += loanedBlocks;
1707 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1708 }
1709
1710 if (retval) {
1711 hfs_systemfile_unlock(hfsmp, lockflags);
1712 cp->c_flag |= C_MODIFIED;
1713 if (started_tr) {
1714 (void) hfs_update(vp, TRUE);
1715 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1716
1717 hfs_end_transaction(hfsmp);
1718 }
1719 goto exit;
1720 }
1721 }
1722
1723 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1724 ap->a_bpn, &bytesContAvail);
1725 if (syslocks) {
1726 hfs_systemfile_unlock(hfsmp, lockflags);
1727 syslocks = 0;
1728 }
1729
1730 if (started_tr) {
1731 (void) hfs_update(vp, TRUE);
1732 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1733 hfs_end_transaction(hfsmp);
1734 started_tr = 0;
1735 }
1736 if (retval) {
1737 goto exit;
1738 }
1739
1740 /* Adjust the mapping information for invalid file ranges: */
1741 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1742 ap->a_foffset + (off_t)bytesContAvail - 1,
1743 &invalid_range);
1744 if (overlaptype != RL_NOOVERLAP) {
1745 switch(overlaptype) {
1746 case RL_MATCHINGOVERLAP:
1747 case RL_OVERLAPCONTAINSRANGE:
1748 case RL_OVERLAPSTARTSBEFORE:
1749 /* There's no valid block for this byte offset: */
1750 *ap->a_bpn = (daddr64_t)-1;
1751 /* There's no point limiting the amount to be returned
1752 * if the invalid range that was hit extends all the way
1753 * to the EOF (i.e. there's no valid bytes between the
1754 * end of this range and the file's EOF):
1755 */
1756 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1757 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1758 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1759 }
1760 break;
1761
1762 case RL_OVERLAPISCONTAINED:
1763 case RL_OVERLAPENDSAFTER:
1764 /* The range of interest hits an invalid block before the end: */
1765 if (invalid_range->rl_start == ap->a_foffset) {
1766 /* There's actually no valid information to be had starting here: */
1767 *ap->a_bpn = (daddr64_t)-1;
1768 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1769 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1770 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1771 }
1772 } else {
1773 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1774 }
1775 break;
1776
1777 case RL_NOOVERLAP:
1778 break;
1779 } /* end switch */
1780 if (bytesContAvail > ap->a_size)
1781 bytesContAvail = ap->a_size;
1782 }
1783 if (ap->a_run)
1784 *ap->a_run = bytesContAvail;
1785
1786 if (ap->a_poff)
1787 *(int *)ap->a_poff = 0;
1788 exit:
1789 if (tooklock)
1790 hfs_unlock(cp);
1791
1792 return (MacToVFSError(retval));
1793 }
1794
1795
1796 /*
1797 * prepare and issue the I/O
1798 * buf_strategy knows how to deal
1799 * with requests that require
1800 * fragmented I/Os
1801 */
1802 int
1803 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1804 {
1805 buf_t bp = ap->a_bp;
1806 vnode_t vp = buf_vnode(bp);
1807 struct cnode *cp = VTOC(vp);
1808
1809 return (buf_strategy(cp->c_devvp, ap));
1810 }
1811
1812
1813 static int
1814 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1815 {
1816 register struct cnode *cp = VTOC(vp);
1817 struct filefork *fp = VTOF(vp);
1818 struct proc *p = vfs_context_proc(context);;
1819 kauth_cred_t cred = vfs_context_ucred(context);
1820 int retval;
1821 off_t bytesToAdd;
1822 off_t actualBytesAdded;
1823 off_t filebytes;
1824 u_int64_t old_filesize;
1825 u_long fileblocks;
1826 int blksize;
1827 struct hfsmount *hfsmp;
1828 int lockflags;
1829
1830 blksize = VTOVCB(vp)->blockSize;
1831 fileblocks = fp->ff_blocks;
1832 filebytes = (off_t)fileblocks * (off_t)blksize;
1833 old_filesize = fp->ff_size;
1834
1835 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1836 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1837
1838 if (length < 0)
1839 return (EINVAL);
1840
1841 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1842 return (EFBIG);
1843
1844 hfsmp = VTOHFS(vp);
1845
1846 retval = E_NONE;
1847
1848 /* Files that are changing size are not hot file candidates. */
1849 if (hfsmp->hfc_stage == HFC_RECORDING) {
1850 fp->ff_bytesread = 0;
1851 }
1852
1853 /*
1854 * We cannot just check if fp->ff_size == length (as an optimization)
1855 * since there may be extra physical blocks that also need truncation.
1856 */
1857 #if QUOTA
1858 if ((retval = hfs_getinoquota(cp)))
1859 return(retval);
1860 #endif /* QUOTA */
1861
1862 /*
1863 * Lengthen the size of the file. We must ensure that the
1864 * last byte of the file is allocated. Since the smallest
1865 * value of ff_size is 0, length will be at least 1.
1866 */
1867 if (length > (off_t)fp->ff_size) {
1868 #if QUOTA
1869 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1870 cred, 0);
1871 if (retval)
1872 goto Err_Exit;
1873 #endif /* QUOTA */
1874 /*
1875 * If we don't have enough physical space then
1876 * we need to extend the physical size.
1877 */
1878 if (length > filebytes) {
1879 int eflags;
1880 u_long blockHint = 0;
1881
1882 /* All or nothing and don't round up to clumpsize. */
1883 eflags = kEFAllMask | kEFNoClumpMask;
1884
1885 if (cred && suser(cred, NULL) != 0)
1886 eflags |= kEFReserveMask; /* keep a reserve */
1887
1888 /*
1889 * Allocate Journal and Quota files in metadata zone.
1890 */
1891 if (filebytes == 0 &&
1892 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1893 hfs_virtualmetafile(cp)) {
1894 eflags |= kEFMetadataMask;
1895 blockHint = hfsmp->hfs_metazone_start;
1896 }
1897 if (hfs_start_transaction(hfsmp) != 0) {
1898 retval = EINVAL;
1899 goto Err_Exit;
1900 }
1901
1902 /* Protect extents b-tree and allocation bitmap */
1903 lockflags = SFL_BITMAP;
1904 if (overflow_extents(fp))
1905 lockflags |= SFL_EXTENTS;
1906 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1907
1908 while ((length > filebytes) && (retval == E_NONE)) {
1909 bytesToAdd = length - filebytes;
1910 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1911 (FCB*)fp,
1912 bytesToAdd,
1913 blockHint,
1914 eflags,
1915 &actualBytesAdded));
1916
1917 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1918 if (actualBytesAdded == 0 && retval == E_NONE) {
1919 if (length > filebytes)
1920 length = filebytes;
1921 break;
1922 }
1923 } /* endwhile */
1924
1925 hfs_systemfile_unlock(hfsmp, lockflags);
1926
1927 if (hfsmp->jnl) {
1928 (void) hfs_update(vp, TRUE);
1929 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1930 }
1931
1932 hfs_end_transaction(hfsmp);
1933
1934 if (retval)
1935 goto Err_Exit;
1936
1937 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1938 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1939 }
1940
1941 if (!(flags & IO_NOZEROFILL)) {
1942 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1943 struct rl_entry *invalid_range;
1944 off_t zero_limit;
1945
1946 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1947 if (length < zero_limit) zero_limit = length;
1948
1949 if (length > (off_t)fp->ff_size) {
1950 struct timeval tv;
1951
1952 /* Extending the file: time to fill out the current last page w. zeroes? */
1953 if ((fp->ff_size & PAGE_MASK_64) &&
1954 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1955 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1956
1957 /* There's some valid data at the start of the (current) last page
1958 of the file, so zero out the remainder of that page to ensure the
1959 entire page contains valid data. Since there is no invalid range
1960 possible past the (current) eof, there's no need to remove anything
1961 from the invalid range list before calling cluster_write(): */
1962 hfs_unlock(cp);
1963 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1964 fp->ff_size, (off_t)0,
1965 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1966 hfs_lock(cp, HFS_FORCE_LOCK);
1967 if (retval) goto Err_Exit;
1968
1969 /* Merely invalidate the remaining area, if necessary: */
1970 if (length > zero_limit) {
1971 microuptime(&tv);
1972 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1973 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1974 }
1975 } else {
1976 /* The page containing the (current) eof is invalid: just add the
1977 remainder of the page to the invalid list, along with the area
1978 being newly allocated:
1979 */
1980 microuptime(&tv);
1981 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1982 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1983 };
1984 }
1985 } else {
1986 panic("hfs_truncate: invoked on non-UBC object?!");
1987 };
1988 }
1989 cp->c_touch_modtime = TRUE;
1990 fp->ff_size = length;
1991
1992 /* Nested transactions will do their own ubc_setsize. */
1993 if (!skipsetsize) {
1994 /*
1995 * ubc_setsize can cause a pagein here
1996 * so we need to drop cnode lock.
1997 */
1998 hfs_unlock(cp);
1999 ubc_setsize(vp, length);
2000 hfs_lock(cp, HFS_FORCE_LOCK);
2001 }
2002
2003 } else { /* Shorten the size of the file */
2004
2005 if ((off_t)fp->ff_size > length) {
2006 /*
2007 * Any buffers that are past the truncation point need to be
2008 * invalidated (to maintain buffer cache consistency).
2009 */
2010
2011 /* Nested transactions will do their own ubc_setsize. */
2012 if (!skipsetsize) {
2013 /*
2014 * ubc_setsize can cause a pageout here
2015 * so we need to drop cnode lock.
2016 */
2017 hfs_unlock(cp);
2018 ubc_setsize(vp, length);
2019 hfs_lock(cp, HFS_FORCE_LOCK);
2020 }
2021
2022 /* Any space previously marked as invalid is now irrelevant: */
2023 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2024 }
2025
2026 /*
2027 * Account for any unmapped blocks. Note that the new
2028 * file length can still end up with unmapped blocks.
2029 */
2030 if (fp->ff_unallocblocks > 0) {
2031 u_int32_t finalblks;
2032 u_int32_t loanedBlocks;
2033
2034 HFS_MOUNT_LOCK(hfsmp, TRUE);
2035
2036 loanedBlocks = fp->ff_unallocblocks;
2037 cp->c_blocks -= loanedBlocks;
2038 fp->ff_blocks -= loanedBlocks;
2039 fp->ff_unallocblocks = 0;
2040
2041 hfsmp->loanedBlocks -= loanedBlocks;
2042
2043 finalblks = (length + blksize - 1) / blksize;
2044 if (finalblks > fp->ff_blocks) {
2045 /* calculate required unmapped blocks */
2046 loanedBlocks = finalblks - fp->ff_blocks;
2047 hfsmp->loanedBlocks += loanedBlocks;
2048
2049 fp->ff_unallocblocks = loanedBlocks;
2050 cp->c_blocks += loanedBlocks;
2051 fp->ff_blocks += loanedBlocks;
2052 }
2053 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2054 }
2055
2056 /*
2057 * For a TBE process the deallocation of the file blocks is
2058 * delayed until the file is closed. And hfs_close calls
2059 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2060 * isn't set, we make sure this isn't a TBE process.
2061 */
2062 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2063 #if QUOTA
2064 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2065 #endif /* QUOTA */
2066 if (hfs_start_transaction(hfsmp) != 0) {
2067 retval = EINVAL;
2068 goto Err_Exit;
2069 }
2070
2071 if (fp->ff_unallocblocks == 0) {
2072 /* Protect extents b-tree and allocation bitmap */
2073 lockflags = SFL_BITMAP;
2074 if (overflow_extents(fp))
2075 lockflags |= SFL_EXTENTS;
2076 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2077
2078 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2079 (FCB*)fp, length, false));
2080
2081 hfs_systemfile_unlock(hfsmp, lockflags);
2082 }
2083 if (hfsmp->jnl) {
2084 if (retval == 0) {
2085 fp->ff_size = length;
2086 }
2087 (void) hfs_update(vp, TRUE);
2088 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2089 }
2090
2091 hfs_end_transaction(hfsmp);
2092
2093 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2094 if (retval)
2095 goto Err_Exit;
2096 #if QUOTA
2097 /* These are bytesreleased */
2098 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2099 #endif /* QUOTA */
2100 }
2101 /* Only set update flag if the logical length changes */
2102 if (old_filesize != length)
2103 cp->c_touch_modtime = TRUE;
2104 fp->ff_size = length;
2105 }
2106 cp->c_touch_chgtime = TRUE;
2107 retval = hfs_update(vp, MNT_WAIT);
2108 if (retval) {
2109 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2110 -1, -1, -1, retval, 0);
2111 }
2112
2113 Err_Exit:
2114
2115 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2116 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2117
2118 return (retval);
2119 }
2120
2121
2122
2123 /*
2124 * Truncate a cnode to at most length size, freeing (or adding) the
2125 * disk blocks.
2126 */
2127 __private_extern__
2128 int
2129 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2130 vfs_context_t context)
2131 {
2132 struct filefork *fp = VTOF(vp);
2133 off_t filebytes;
2134 u_long fileblocks;
2135 int blksize, error = 0;
2136 struct cnode *cp = VTOC(vp);
2137
2138 if (vnode_isdir(vp))
2139 return (EISDIR); /* cannot truncate an HFS directory! */
2140
2141 blksize = VTOVCB(vp)->blockSize;
2142 fileblocks = fp->ff_blocks;
2143 filebytes = (off_t)fileblocks * (off_t)blksize;
2144
2145 // have to loop truncating or growing files that are
2146 // really big because otherwise transactions can get
2147 // enormous and consume too many kernel resources.
2148
2149 if (length < filebytes) {
2150 while (filebytes > length) {
2151 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2152 filebytes -= HFS_BIGFILE_SIZE;
2153 } else {
2154 filebytes = length;
2155 }
2156 cp->c_flag |= C_FORCEUPDATE;
2157 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2158 if (error)
2159 break;
2160 }
2161 } else if (length > filebytes) {
2162 while (filebytes < length) {
2163 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2164 filebytes += HFS_BIGFILE_SIZE;
2165 } else {
2166 filebytes = length;
2167 }
2168 cp->c_flag |= C_FORCEUPDATE;
2169 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2170 if (error)
2171 break;
2172 }
2173 } else /* Same logical size */ {
2174
2175 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2176 }
2177 /* Files that are changing size are not hot file candidates. */
2178 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2179 fp->ff_bytesread = 0;
2180 }
2181
2182 return (error);
2183 }
2184
2185
2186
2187 /*
2188 * Preallocate file storage space.
2189 */
2190 int
2191 hfs_vnop_allocate(struct vnop_allocate_args /* {
2192 vnode_t a_vp;
2193 off_t a_length;
2194 u_int32_t a_flags;
2195 off_t *a_bytesallocated;
2196 off_t a_offset;
2197 vfs_context_t a_context;
2198 } */ *ap)
2199 {
2200 struct vnode *vp = ap->a_vp;
2201 struct cnode *cp;
2202 struct filefork *fp;
2203 ExtendedVCB *vcb;
2204 off_t length = ap->a_length;
2205 off_t startingPEOF;
2206 off_t moreBytesRequested;
2207 off_t actualBytesAdded;
2208 off_t filebytes;
2209 u_long fileblocks;
2210 int retval, retval2;
2211 UInt32 blockHint;
2212 UInt32 extendFlags; /* For call to ExtendFileC */
2213 struct hfsmount *hfsmp;
2214 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2215 int lockflags;
2216
2217 *(ap->a_bytesallocated) = 0;
2218
2219 if (!vnode_isreg(vp))
2220 return (EISDIR);
2221 if (length < (off_t)0)
2222 return (EINVAL);
2223
2224 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2225 return (retval);
2226 cp = VTOC(vp);
2227 fp = VTOF(vp);
2228 hfsmp = VTOHFS(vp);
2229 vcb = VTOVCB(vp);
2230
2231 fileblocks = fp->ff_blocks;
2232 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2233
2234 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2235 retval = EINVAL;
2236 goto Err_Exit;
2237 }
2238
2239 /* Fill in the flags word for the call to Extend the file */
2240
2241 extendFlags = kEFNoClumpMask;
2242 if (ap->a_flags & ALLOCATECONTIG)
2243 extendFlags |= kEFContigMask;
2244 if (ap->a_flags & ALLOCATEALL)
2245 extendFlags |= kEFAllMask;
2246 if (cred && suser(cred, NULL) != 0)
2247 extendFlags |= kEFReserveMask;
2248
2249 retval = E_NONE;
2250 blockHint = 0;
2251 startingPEOF = filebytes;
2252
2253 if (ap->a_flags & ALLOCATEFROMPEOF)
2254 length += filebytes;
2255 else if (ap->a_flags & ALLOCATEFROMVOL)
2256 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2257
2258 /* If no changes are necesary, then we're done */
2259 if (filebytes == length)
2260 goto Std_Exit;
2261
2262 /*
2263 * Lengthen the size of the file. We must ensure that the
2264 * last byte of the file is allocated. Since the smallest
2265 * value of filebytes is 0, length will be at least 1.
2266 */
2267 if (length > filebytes) {
2268 moreBytesRequested = length - filebytes;
2269
2270 #if QUOTA
2271 retval = hfs_chkdq(cp,
2272 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2273 cred, 0);
2274 if (retval)
2275 goto Err_Exit;
2276
2277 #endif /* QUOTA */
2278 /*
2279 * Metadata zone checks.
2280 */
2281 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2282 /*
2283 * Allocate Journal and Quota files in metadata zone.
2284 */
2285 if (hfs_virtualmetafile(cp)) {
2286 extendFlags |= kEFMetadataMask;
2287 blockHint = hfsmp->hfs_metazone_start;
2288 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2289 (blockHint <= hfsmp->hfs_metazone_end)) {
2290 /*
2291 * Move blockHint outside metadata zone.
2292 */
2293 blockHint = hfsmp->hfs_metazone_end + 1;
2294 }
2295 }
2296
2297 if (hfs_start_transaction(hfsmp) != 0) {
2298 retval = EINVAL;
2299 goto Err_Exit;
2300 }
2301
2302 /* Protect extents b-tree and allocation bitmap */
2303 lockflags = SFL_BITMAP;
2304 if (overflow_extents(fp))
2305 lockflags |= SFL_EXTENTS;
2306 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2307
2308 retval = MacToVFSError(ExtendFileC(vcb,
2309 (FCB*)fp,
2310 moreBytesRequested,
2311 blockHint,
2312 extendFlags,
2313 &actualBytesAdded));
2314
2315 *(ap->a_bytesallocated) = actualBytesAdded;
2316 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2317
2318 hfs_systemfile_unlock(hfsmp, lockflags);
2319
2320 if (hfsmp->jnl) {
2321 (void) hfs_update(vp, TRUE);
2322 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2323 }
2324
2325 hfs_end_transaction(hfsmp);
2326
2327 /*
2328 * if we get an error and no changes were made then exit
2329 * otherwise we must do the hfs_update to reflect the changes
2330 */
2331 if (retval && (startingPEOF == filebytes))
2332 goto Err_Exit;
2333
2334 /*
2335 * Adjust actualBytesAdded to be allocation block aligned, not
2336 * clump size aligned.
2337 * NOTE: So what we are reporting does not affect reality
2338 * until the file is closed, when we truncate the file to allocation
2339 * block size.
2340 */
2341 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2342 *(ap->a_bytesallocated) =
2343 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2344
2345 } else { /* Shorten the size of the file */
2346
2347 if (fp->ff_size > length) {
2348 /*
2349 * Any buffers that are past the truncation point need to be
2350 * invalidated (to maintain buffer cache consistency).
2351 */
2352 }
2353
2354 if (hfs_start_transaction(hfsmp) != 0) {
2355 retval = EINVAL;
2356 goto Err_Exit;
2357 }
2358
2359 /* Protect extents b-tree and allocation bitmap */
2360 lockflags = SFL_BITMAP;
2361 if (overflow_extents(fp))
2362 lockflags |= SFL_EXTENTS;
2363 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2364
2365 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2366
2367 hfs_systemfile_unlock(hfsmp, lockflags);
2368
2369 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2370
2371 if (hfsmp->jnl) {
2372 (void) hfs_update(vp, TRUE);
2373 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2374 }
2375
2376 hfs_end_transaction(hfsmp);
2377
2378
2379 /*
2380 * if we get an error and no changes were made then exit
2381 * otherwise we must do the hfs_update to reflect the changes
2382 */
2383 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2384 #if QUOTA
2385 /* These are bytesreleased */
2386 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2387 #endif /* QUOTA */
2388
2389 if (fp->ff_size > filebytes) {
2390 fp->ff_size = filebytes;
2391
2392 hfs_unlock(cp);
2393 ubc_setsize(vp, fp->ff_size);
2394 hfs_lock(cp, HFS_FORCE_LOCK);
2395 }
2396 }
2397
2398 Std_Exit:
2399 cp->c_touch_chgtime = TRUE;
2400 cp->c_touch_modtime = TRUE;
2401 retval2 = hfs_update(vp, MNT_WAIT);
2402
2403 if (retval == 0)
2404 retval = retval2;
2405 Err_Exit:
2406 hfs_unlock(cp);
2407 return (retval);
2408 }
2409
2410
2411 /*
2412 * Pagein for HFS filesystem
2413 */
2414 int
2415 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2416 /*
2417 struct vnop_pagein_args {
2418 vnode_t a_vp,
2419 upl_t a_pl,
2420 vm_offset_t a_pl_offset,
2421 off_t a_f_offset,
2422 size_t a_size,
2423 int a_flags
2424 vfs_context_t a_context;
2425 };
2426 */
2427 {
2428 vnode_t vp = ap->a_vp;
2429 int error;
2430
2431 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2432 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2433 /*
2434 * Keep track of blocks read.
2435 */
2436 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2437 struct cnode *cp;
2438 struct filefork *fp;
2439 int bytesread;
2440 int took_cnode_lock = 0;
2441
2442 cp = VTOC(vp);
2443 fp = VTOF(vp);
2444
2445 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2446 bytesread = fp->ff_size;
2447 else
2448 bytesread = ap->a_size;
2449
2450 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2451 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2452 hfs_lock(cp, HFS_FORCE_LOCK);
2453 took_cnode_lock = 1;
2454 }
2455 /*
2456 * If this file hasn't been seen since the start of
2457 * the current sampling period then start over.
2458 */
2459 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2460 struct timeval tv;
2461
2462 fp->ff_bytesread = bytesread;
2463 microtime(&tv);
2464 cp->c_atime = tv.tv_sec;
2465 } else {
2466 fp->ff_bytesread += bytesread;
2467 }
2468 cp->c_touch_acctime = TRUE;
2469 if (took_cnode_lock)
2470 hfs_unlock(cp);
2471 }
2472 return (error);
2473 }
2474
2475 /*
2476 * Pageout for HFS filesystem.
2477 */
2478 int
2479 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2480 /*
2481 struct vnop_pageout_args {
2482 vnode_t a_vp,
2483 upl_t a_pl,
2484 vm_offset_t a_pl_offset,
2485 off_t a_f_offset,
2486 size_t a_size,
2487 int a_flags
2488 vfs_context_t a_context;
2489 };
2490 */
2491 {
2492 vnode_t vp = ap->a_vp;
2493 struct cnode *cp;
2494 struct filefork *fp;
2495 int retval;
2496 off_t end_of_range;
2497 off_t filesize;
2498
2499 cp = VTOC(vp);
2500 if (cp->c_lockowner == current_thread()) {
2501 panic("pageout: %s cnode lock already held!\n",
2502 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2503 }
2504 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2505 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2506 ubc_upl_abort_range(ap->a_pl,
2507 ap->a_pl_offset,
2508 ap->a_size,
2509 UPL_ABORT_FREE_ON_EMPTY);
2510 }
2511 return (retval);
2512 }
2513 fp = VTOF(vp);
2514
2515 filesize = fp->ff_size;
2516 end_of_range = ap->a_f_offset + ap->a_size - 1;
2517
2518 if (end_of_range >= filesize) {
2519 end_of_range = (off_t)(filesize - 1);
2520 }
2521 if (ap->a_f_offset < filesize) {
2522 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2523 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2524 }
2525 hfs_unlock(cp);
2526
2527 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2528 ap->a_size, filesize, ap->a_flags);
2529
2530 /*
2531 * If data was written, and setuid or setgid bits are set and
2532 * this process is not the superuser then clear the setuid and
2533 * setgid bits as a precaution against tampering.
2534 */
2535 if ((retval == 0) &&
2536 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2537 (vfs_context_suser(ap->a_context) != 0)) {
2538 hfs_lock(cp, HFS_FORCE_LOCK);
2539 cp->c_mode &= ~(S_ISUID | S_ISGID);
2540 cp->c_touch_chgtime = TRUE;
2541 hfs_unlock(cp);
2542 }
2543 return (retval);
2544 }
2545
2546 /*
2547 * Intercept B-Tree node writes to unswap them if necessary.
2548 */
2549 int
2550 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2551 {
2552 int retval = 0;
2553 register struct buf *bp = ap->a_bp;
2554 register struct vnode *vp = buf_vnode(bp);
2555 BlockDescriptor block;
2556
2557 /* Trap B-Tree writes */
2558 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2559 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2560 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2561
2562 /*
2563 * Swap and validate the node if it is in native byte order.
2564 * This is always be true on big endian, so we always validate
2565 * before writing here. On little endian, the node typically has
2566 * been swapped and validatated when it was written to the journal,
2567 * so we won't do anything here.
2568 */
2569 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2570 /* Prepare the block pointer */
2571 block.blockHeader = bp;
2572 block.buffer = (char *)buf_dataptr(bp);
2573 block.blockNum = buf_lblkno(bp);
2574 /* not found in cache ==> came from disk */
2575 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2576 block.blockSize = buf_count(bp);
2577
2578 /* Endian un-swap B-Tree node */
2579 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2580 if (retval)
2581 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2582 }
2583 }
2584
2585 /* This buffer shouldn't be locked anymore but if it is clear it */
2586 if ((buf_flags(bp) & B_LOCKED)) {
2587 // XXXdbg
2588 if (VTOHFS(vp)->jnl) {
2589 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2590 }
2591 buf_clearflags(bp, B_LOCKED);
2592 }
2593 retval = vn_bwrite (ap);
2594
2595 return (retval);
2596 }
2597
2598 /*
2599 * Relocate a file to a new location on disk
2600 * cnode must be locked on entry
2601 *
2602 * Relocation occurs by cloning the file's data from its
2603 * current set of blocks to a new set of blocks. During
2604 * the relocation all of the blocks (old and new) are
2605 * owned by the file.
2606 *
2607 * -----------------
2608 * |///////////////|
2609 * -----------------
2610 * 0 N (file offset)
2611 *
2612 * ----------------- -----------------
2613 * |///////////////| | | STEP 1 (aquire new blocks)
2614 * ----------------- -----------------
2615 * 0 N N+1 2N
2616 *
2617 * ----------------- -----------------
2618 * |///////////////| |///////////////| STEP 2 (clone data)
2619 * ----------------- -----------------
2620 * 0 N N+1 2N
2621 *
2622 * -----------------
2623 * |///////////////| STEP 3 (head truncate blocks)
2624 * -----------------
2625 * 0 N
2626 *
2627 * During steps 2 and 3 page-outs to file offsets less
2628 * than or equal to N are suspended.
2629 *
2630 * During step 3 page-ins to the file get supended.
2631 */
2632 __private_extern__
2633 int
2634 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2635 struct proc *p)
2636 {
2637 struct cnode *cp;
2638 struct filefork *fp;
2639 struct hfsmount *hfsmp;
2640 u_int32_t headblks;
2641 u_int32_t datablks;
2642 u_int32_t blksize;
2643 u_int32_t growsize;
2644 u_int32_t nextallocsave;
2645 daddr64_t sector_a, sector_b;
2646 int disabled_caching = 0;
2647 int eflags;
2648 off_t newbytes;
2649 int retval;
2650 int lockflags = 0;
2651 int took_trunc_lock = 0;
2652 int started_tr = 0;
2653 enum vtype vnodetype;
2654
2655 vnodetype = vnode_vtype(vp);
2656 if (vnodetype != VREG && vnodetype != VLNK) {
2657 return (EPERM);
2658 }
2659
2660 hfsmp = VTOHFS(vp);
2661 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2662 return (ENOSPC);
2663 }
2664
2665 cp = VTOC(vp);
2666 fp = VTOF(vp);
2667 if (fp->ff_unallocblocks)
2668 return (EINVAL);
2669 blksize = hfsmp->blockSize;
2670 if (blockHint == 0)
2671 blockHint = hfsmp->nextAllocation;
2672
2673 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2674 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2675 return (EFBIG);
2676 }
2677
2678 //
2679 // We do not believe that this call to hfs_fsync() is
2680 // necessary and it causes a journal transaction
2681 // deadlock so we are removing it.
2682 //
2683 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2684 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2685 // if (retval)
2686 // return (retval);
2687 //}
2688
2689 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2690 hfs_unlock(cp);
2691 hfs_lock_truncate(cp, TRUE);
2692 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2693 hfs_unlock_truncate(cp);
2694 return (retval);
2695 }
2696 took_trunc_lock = 1;
2697 }
2698 headblks = fp->ff_blocks;
2699 datablks = howmany(fp->ff_size, blksize);
2700 growsize = datablks * blksize;
2701 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2702 if (blockHint >= hfsmp->hfs_metazone_start &&
2703 blockHint <= hfsmp->hfs_metazone_end)
2704 eflags |= kEFMetadataMask;
2705
2706 if (hfs_start_transaction(hfsmp) != 0) {
2707 if (took_trunc_lock)
2708 hfs_unlock_truncate(cp);
2709 return (EINVAL);
2710 }
2711 started_tr = 1;
2712 /*
2713 * Protect the extents b-tree and the allocation bitmap
2714 * during MapFileBlockC and ExtendFileC operations.
2715 */
2716 lockflags = SFL_BITMAP;
2717 if (overflow_extents(fp))
2718 lockflags |= SFL_EXTENTS;
2719 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2720
2721 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2722 if (retval) {
2723 retval = MacToVFSError(retval);
2724 goto out;
2725 }
2726
2727 /*
2728 * STEP 1 - aquire new allocation blocks.
2729 */
2730 if (!vnode_isnocache(vp)) {
2731 vnode_setnocache(vp);
2732 disabled_caching = 1;
2733
2734 }
2735 nextallocsave = hfsmp->nextAllocation;
2736 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2737 if (eflags & kEFMetadataMask) {
2738 HFS_MOUNT_LOCK(hfsmp, TRUE);
2739 hfsmp->nextAllocation = nextallocsave;
2740 hfsmp->vcbFlags |= 0xFF00;
2741 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2742 }
2743
2744 retval = MacToVFSError(retval);
2745 if (retval == 0) {
2746 cp->c_flag |= C_MODIFIED;
2747 if (newbytes < growsize) {
2748 retval = ENOSPC;
2749 goto restore;
2750 } else if (fp->ff_blocks < (headblks + datablks)) {
2751 printf("hfs_relocate: allocation failed");
2752 retval = ENOSPC;
2753 goto restore;
2754 }
2755
2756 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2757 if (retval) {
2758 retval = MacToVFSError(retval);
2759 } else if ((sector_a + 1) == sector_b) {
2760 retval = ENOSPC;
2761 goto restore;
2762 } else if ((eflags & kEFMetadataMask) &&
2763 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2764 hfsmp->hfs_metazone_end)) {
2765 printf("hfs_relocate: didn't move into metadata zone\n");
2766 retval = ENOSPC;
2767 goto restore;
2768 }
2769 }
2770 /* Done with system locks and journal for now. */
2771 hfs_systemfile_unlock(hfsmp, lockflags);
2772 lockflags = 0;
2773 hfs_end_transaction(hfsmp);
2774 started_tr = 0;
2775
2776 if (retval) {
2777 /*
2778 * Check to see if failure is due to excessive fragmentation.
2779 */
2780 if ((retval == ENOSPC) &&
2781 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2782 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2783 }
2784 goto out;
2785 }
2786 /*
2787 * STEP 2 - clone file data into the new allocation blocks.
2788 */
2789
2790 if (vnodetype == VLNK)
2791 retval = hfs_clonelink(vp, blksize, cred, p);
2792 else if (vnode_issystem(vp))
2793 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2794 else
2795 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2796
2797 /* Start transaction for step 3 or for a restore. */
2798 if (hfs_start_transaction(hfsmp) != 0) {
2799 retval = EINVAL;
2800 goto out;
2801 }
2802 started_tr = 1;
2803 if (retval)
2804 goto restore;
2805
2806 /*
2807 * STEP 3 - switch to cloned data and remove old blocks.
2808 */
2809 lockflags = SFL_BITMAP;
2810 if (overflow_extents(fp))
2811 lockflags |= SFL_EXTENTS;
2812 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2813
2814 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2815
2816 hfs_systemfile_unlock(hfsmp, lockflags);
2817 lockflags = 0;
2818 if (retval)
2819 goto restore;
2820 out:
2821 if (took_trunc_lock)
2822 hfs_unlock_truncate(cp);
2823
2824 if (lockflags) {
2825 hfs_systemfile_unlock(hfsmp, lockflags);
2826 lockflags = 0;
2827 }
2828
2829 // See comment up above about calls to hfs_fsync()
2830 //
2831 //if (retval == 0)
2832 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2833
2834 if (hfsmp->jnl) {
2835 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2836 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2837 else
2838 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2839 }
2840 exit:
2841 if (disabled_caching) {
2842 vnode_clearnocache(vp);
2843 }
2844 if (started_tr)
2845 hfs_end_transaction(hfsmp);
2846
2847 return (retval);
2848
2849 restore:
2850 if (fp->ff_blocks == headblks)
2851 goto exit;
2852 /*
2853 * Give back any newly allocated space.
2854 */
2855 if (lockflags == 0) {
2856 lockflags = SFL_BITMAP;
2857 if (overflow_extents(fp))
2858 lockflags |= SFL_EXTENTS;
2859 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2860 }
2861
2862 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2863
2864 hfs_systemfile_unlock(hfsmp, lockflags);
2865 lockflags = 0;
2866
2867 if (took_trunc_lock)
2868 hfs_unlock_truncate(cp);
2869 goto exit;
2870 }
2871
2872
2873 /*
2874 * Clone a symlink.
2875 *
2876 */
2877 static int
2878 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2879 {
2880 struct buf *head_bp = NULL;
2881 struct buf *tail_bp = NULL;
2882 int error;
2883
2884
2885 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2886 if (error)
2887 goto out;
2888
2889 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2890 if (tail_bp == NULL) {
2891 error = EIO;
2892 goto out;
2893 }
2894 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2895 error = (int)buf_bwrite(tail_bp);
2896 out:
2897 if (head_bp) {
2898 buf_markinvalid(head_bp);
2899 buf_brelse(head_bp);
2900 }
2901 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2902
2903 return (error);
2904 }
2905
2906 /*
2907 * Clone a file's data within the file.
2908 *
2909 */
2910 static int
2911 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2912 {
2913 caddr_t bufp;
2914 size_t writebase;
2915 size_t bufsize;
2916 size_t copysize;
2917 size_t iosize;
2918 off_t filesize;
2919 size_t offset;
2920 uio_t auio;
2921 int error = 0;
2922
2923 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2924 writebase = blkstart * blksize;
2925 copysize = blkcnt * blksize;
2926 iosize = bufsize = MIN(copysize, 4096 * 16);
2927 offset = 0;
2928
2929 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2930 return (ENOMEM);
2931 }
2932 hfs_unlock(VTOC(vp));
2933
2934 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2935
2936 while (offset < copysize) {
2937 iosize = MIN(copysize - offset, iosize);
2938
2939 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2940 uio_addiov(auio, (uintptr_t)bufp, iosize);
2941
2942 error = cluster_read(vp, auio, copysize, 0);
2943 if (error) {
2944 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2945 break;
2946 }
2947 if (uio_resid(auio) != 0) {
2948 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2949 error = EIO;
2950 break;
2951 }
2952
2953 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2954 uio_addiov(auio, (uintptr_t)bufp, iosize);
2955
2956 error = cluster_write(vp, auio, filesize + offset,
2957 filesize + offset + iosize,
2958 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2959 if (error) {
2960 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2961 break;
2962 }
2963 if (uio_resid(auio) != 0) {
2964 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2965 error = EIO;
2966 break;
2967 }
2968 offset += iosize;
2969 }
2970 uio_free(auio);
2971
2972 /*
2973 * No need to call ubc_sync_range or hfs_invalbuf
2974 * since the file was copied using IO_NOCACHE.
2975 */
2976
2977 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2978
2979 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2980 return (error);
2981 }
2982
2983 /*
2984 * Clone a system (metadata) file.
2985 *
2986 */
2987 static int
2988 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2989 kauth_cred_t cred, struct proc *p)
2990 {
2991 caddr_t bufp;
2992 char * offset;
2993 size_t bufsize;
2994 size_t iosize;
2995 struct buf *bp = NULL;
2996 daddr64_t blkno;
2997 daddr64_t blk;
2998 daddr64_t start_blk;
2999 daddr64_t last_blk;
3000 int breadcnt;
3001 int i;
3002 int error = 0;
3003
3004
3005 iosize = GetLogicalBlockSize(vp);
3006 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3007 breadcnt = bufsize / iosize;
3008
3009 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3010 return (ENOMEM);
3011 }
3012 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3013 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3014 blkno = 0;
3015
3016 while (blkno < last_blk) {
3017 /*
3018 * Read up to a megabyte
3019 */
3020 offset = bufp;
3021 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3022 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3023 if (error) {
3024 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3025 goto out;
3026 }
3027 if (buf_count(bp) != iosize) {
3028 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3029 goto out;
3030 }
3031 bcopy((char *)buf_dataptr(bp), offset, iosize);
3032
3033 buf_markinvalid(bp);
3034 buf_brelse(bp);
3035 bp = NULL;
3036
3037 offset += iosize;
3038 }
3039
3040 /*
3041 * Write up to a megabyte
3042 */
3043 offset = bufp;
3044 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3045 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3046 if (bp == NULL) {
3047 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3048 error = EIO;
3049 goto out;
3050 }
3051 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3052 error = (int)buf_bwrite(bp);
3053 bp = NULL;
3054 if (error)
3055 goto out;
3056 offset += iosize;
3057 }
3058 }
3059 out:
3060 if (bp) {
3061 buf_brelse(bp);
3062 }
3063
3064 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3065
3066 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3067
3068 return (error);
3069 }