]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* @(#)hfs_readwrite.c 1.0
23 *
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
25 *
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
27 *
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
36 #include <sys/stat.h>
37 #include <sys/buf.h>
38 #include <sys/proc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/vfs_context.h>
43
44 #include <miscfs/specfs/specdev.h>
45
46 #include <sys/ubc.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_kern.h>
49
50 #include <sys/kdebug.h>
51
52 #include "hfs.h"
53 #include "hfs_endian.h"
54 #include "hfs_fsctl.h"
55 #include "hfs_quota.h"
56 #include "hfscommon/headers/FileMgrInternal.h"
57 #include "hfscommon/headers/BTreesInternal.h"
58 #include "hfs_cnode.h"
59 #include "hfs_dbg.h"
60
61 extern int overflow_extents(struct filefork *fp);
62
63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
64
65 enum {
66 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
67 };
68
69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
70
71 extern int hfs_setextendedsecurity(struct hfsmount *, int);
72
73
74 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
75 static int hfs_clonefile(struct vnode *, int, int, int);
76 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
77
78
79 /*****************************************************************************
80 *
81 * I/O Operations on vnodes
82 *
83 *****************************************************************************/
84 int hfs_vnop_read(struct vnop_read_args *);
85 int hfs_vnop_write(struct vnop_write_args *);
86 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
87 int hfs_vnop_select(struct vnop_select_args *);
88 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
89 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
90 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
91 int hfs_vnop_strategy(struct vnop_strategy_args *);
92 int hfs_vnop_allocate(struct vnop_allocate_args *);
93 int hfs_vnop_pagein(struct vnop_pagein_args *);
94 int hfs_vnop_pageout(struct vnop_pageout_args *);
95 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
96
97
98 /*
99 * Read data from a file.
100 */
101 int
102 hfs_vnop_read(struct vnop_read_args *ap)
103 {
104 uio_t uio = ap->a_uio;
105 struct vnode *vp = ap->a_vp;
106 struct cnode *cp;
107 struct filefork *fp;
108 struct hfsmount *hfsmp;
109 off_t filesize;
110 off_t filebytes;
111 off_t start_resid = uio_resid(uio);
112 off_t offset = uio_offset(uio);
113 int retval = 0;
114
115
116 /* Preflight checks */
117 if (!vnode_isreg(vp)) {
118 /* can only read regular files */
119 if (vnode_isdir(vp))
120 return (EISDIR);
121 else
122 return (EPERM);
123 }
124 if (start_resid == 0)
125 return (0); /* Nothing left to do */
126 if (offset < 0)
127 return (EINVAL); /* cant read from a negative offset */
128
129 cp = VTOC(vp);
130 fp = VTOF(vp);
131 hfsmp = VTOHFS(vp);
132
133 /* Protect against a size change. */
134 hfs_lock_truncate(cp, 0);
135
136 filesize = fp->ff_size;
137 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
138 if (offset > filesize) {
139 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
140 (offset > (off_t)MAXHFSFILESIZE)) {
141 retval = EFBIG;
142 }
143 goto exit;
144 }
145
146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
147 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
148
149 retval = cluster_read(vp, uio, filesize, 0);
150
151 cp->c_touch_acctime = TRUE;
152
153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
154 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
155
156 /*
157 * Keep track blocks read
158 */
159 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
160 int took_cnode_lock = 0;
161 off_t bytesread;
162
163 bytesread = start_resid - uio_resid(uio);
164
165 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
166 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
167 hfs_lock(cp, HFS_FORCE_LOCK);
168 took_cnode_lock = 1;
169 }
170 /*
171 * If this file hasn't been seen since the start of
172 * the current sampling period then start over.
173 */
174 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
175 struct timeval tv;
176
177 fp->ff_bytesread = bytesread;
178 microtime(&tv);
179 cp->c_atime = tv.tv_sec;
180 } else {
181 fp->ff_bytesread += bytesread;
182 }
183 if (took_cnode_lock)
184 hfs_unlock(cp);
185 }
186 exit:
187 hfs_unlock_truncate(cp);
188 return (retval);
189 }
190
191 /*
192 * Write data to a file.
193 */
194 int
195 hfs_vnop_write(struct vnop_write_args *ap)
196 {
197 uio_t uio = ap->a_uio;
198 struct vnode *vp = ap->a_vp;
199 struct cnode *cp;
200 struct filefork *fp;
201 struct hfsmount *hfsmp;
202 kauth_cred_t cred = NULL;
203 off_t origFileSize;
204 off_t writelimit;
205 off_t bytesToAdd;
206 off_t actualBytesAdded;
207 off_t filebytes;
208 off_t offset;
209 size_t resid;
210 int eflags;
211 int ioflag = ap->a_ioflag;
212 int retval = 0;
213 int lockflags;
214 int cnode_locked = 0;
215
216 // LP64todo - fix this! uio_resid may be 64-bit value
217 resid = uio_resid(uio);
218 offset = uio_offset(uio);
219
220 if (offset < 0)
221 return (EINVAL);
222 if (resid == 0)
223 return (E_NONE);
224 if (!vnode_isreg(vp))
225 return (EPERM); /* Can only write regular files */
226
227 /* Protect against a size change. */
228 hfs_lock_truncate(VTOC(vp), TRUE);
229
230 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
231 hfs_unlock_truncate(VTOC(vp));
232 return (retval);
233 }
234 cnode_locked = 1;
235 cp = VTOC(vp);
236 fp = VTOF(vp);
237 hfsmp = VTOHFS(vp);
238 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
239
240 if (ioflag & IO_APPEND) {
241 uio_setoffset(uio, fp->ff_size);
242 offset = fp->ff_size;
243 }
244 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
245 retval = EPERM;
246 goto exit;
247 }
248
249 origFileSize = fp->ff_size;
250 eflags = kEFDeferMask; /* defer file block allocations */
251
252 #ifdef HFS_SPARSE_DEV
253 /*
254 * When the underlying device is sparse and space
255 * is low (< 8MB), stop doing delayed allocations
256 * and begin doing synchronous I/O.
257 */
258 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
259 (hfs_freeblks(hfsmp, 0) < 2048)) {
260 eflags &= ~kEFDeferMask;
261 ioflag |= IO_SYNC;
262 }
263 #endif /* HFS_SPARSE_DEV */
264
265 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
266 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
267
268 /* Now test if we need to extend the file */
269 /* Doing so will adjust the filebytes for us */
270
271 writelimit = offset + resid;
272 if (writelimit <= filebytes)
273 goto sizeok;
274
275 cred = vfs_context_ucred(ap->a_context);
276 #if QUOTA
277 bytesToAdd = writelimit - filebytes;
278 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
279 cred, 0);
280 if (retval)
281 goto exit;
282 #endif /* QUOTA */
283
284 if (hfs_start_transaction(hfsmp) != 0) {
285 retval = EINVAL;
286 goto exit;
287 }
288
289 while (writelimit > filebytes) {
290 bytesToAdd = writelimit - filebytes;
291 if (cred && suser(cred, NULL) != 0)
292 eflags |= kEFReserveMask;
293
294 /* Protect extents b-tree and allocation bitmap */
295 lockflags = SFL_BITMAP;
296 if (overflow_extents(fp))
297 lockflags |= SFL_EXTENTS;
298 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
299
300 /* Files that are changing size are not hot file candidates. */
301 if (hfsmp->hfc_stage == HFC_RECORDING) {
302 fp->ff_bytesread = 0;
303 }
304 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
305 0, eflags, &actualBytesAdded));
306
307 hfs_systemfile_unlock(hfsmp, lockflags);
308
309 if ((actualBytesAdded == 0) && (retval == E_NONE))
310 retval = ENOSPC;
311 if (retval != E_NONE)
312 break;
313 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
315 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
316 }
317 (void) hfs_update(vp, TRUE);
318 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
319 (void) hfs_end_transaction(hfsmp);
320
321 sizeok:
322 if (retval == E_NONE) {
323 off_t filesize;
324 off_t zero_off;
325 off_t tail_off;
326 off_t inval_start;
327 off_t inval_end;
328 off_t io_start;
329 int lflag;
330 struct rl_entry *invalid_range;
331
332 if (writelimit > fp->ff_size)
333 filesize = writelimit;
334 else
335 filesize = fp->ff_size;
336
337 lflag = (ioflag & IO_SYNC);
338
339 if (offset <= fp->ff_size) {
340 zero_off = offset & ~PAGE_MASK_64;
341
342 /* Check to see whether the area between the zero_offset and the start
343 of the transfer to see whether is invalid and should be zero-filled
344 as part of the transfer:
345 */
346 if (offset > zero_off) {
347 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
348 lflag |= IO_HEADZEROFILL;
349 }
350 } else {
351 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
352
353 /* The bytes between fp->ff_size and uio->uio_offset must never be
354 read without being zeroed. The current last block is filled with zeroes
355 if it holds valid data but in all cases merely do a little bookkeeping
356 to track the area from the end of the current last page to the start of
357 the area actually written. For the same reason only the bytes up to the
358 start of the page where this write will start is invalidated; any remainder
359 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
360
361 Note that inval_start, the start of the page after the current EOF,
362 may be past the start of the write, in which case the zeroing
363 will be handled by the cluser_write of the actual data.
364 */
365 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
366 inval_end = offset & ~PAGE_MASK_64;
367 zero_off = fp->ff_size;
368
369 if ((fp->ff_size & PAGE_MASK_64) &&
370 (rl_scan(&fp->ff_invalidranges,
371 eof_page_base,
372 fp->ff_size - 1,
373 &invalid_range) != RL_NOOVERLAP)) {
374 /* The page containing the EOF is not valid, so the
375 entire page must be made inaccessible now. If the write
376 starts on a page beyond the page containing the eof
377 (inval_end > eof_page_base), add the
378 whole page to the range to be invalidated. Otherwise
379 (i.e. if the write starts on the same page), zero-fill
380 the entire page explicitly now:
381 */
382 if (inval_end > eof_page_base) {
383 inval_start = eof_page_base;
384 } else {
385 zero_off = eof_page_base;
386 };
387 };
388
389 if (inval_start < inval_end) {
390 struct timeval tv;
391 /* There's some range of data that's going to be marked invalid */
392
393 if (zero_off < inval_start) {
394 /* The pages between inval_start and inval_end are going to be invalidated,
395 and the actual write will start on a page past inval_end. Now's the last
396 chance to zero-fill the page containing the EOF:
397 */
398 hfs_unlock(cp);
399 cnode_locked = 0;
400 retval = cluster_write(vp, (uio_t) 0,
401 fp->ff_size, inval_start,
402 zero_off, (off_t)0,
403 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
404 hfs_lock(cp, HFS_FORCE_LOCK);
405 cnode_locked = 1;
406 if (retval) goto ioerr_exit;
407 offset = uio_offset(uio);
408 };
409
410 /* Mark the remaining area of the newly allocated space as invalid: */
411 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
412 microuptime(&tv);
413 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
414 zero_off = fp->ff_size = inval_end;
415 };
416
417 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
418 };
419
420 /* Check to see whether the area between the end of the write and the end of
421 the page it falls in is invalid and should be zero-filled as part of the transfer:
422 */
423 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
424 if (tail_off > filesize) tail_off = filesize;
425 if (tail_off > writelimit) {
426 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
427 lflag |= IO_TAILZEROFILL;
428 };
429 };
430
431 /*
432 * if the write starts beyond the current EOF (possibly advanced in the
433 * zeroing of the last block, above), then we'll zero fill from the current EOF
434 * to where the write begins:
435 *
436 * NOTE: If (and ONLY if) the portion of the file about to be written is
437 * before the current EOF it might be marked as invalid now and must be
438 * made readable (removed from the invalid ranges) before cluster_write
439 * tries to write it:
440 */
441 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
442 if (io_start < fp->ff_size) {
443 off_t io_end;
444
445 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
446 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
447 };
448
449 hfs_unlock(cp);
450 cnode_locked = 0;
451 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
452 tail_off, lflag | IO_NOZERODIRTY);
453 offset = uio_offset(uio);
454 if (offset > fp->ff_size) {
455 fp->ff_size = offset;
456
457 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
458 /* Files that are changing size are not hot file candidates. */
459 if (hfsmp->hfc_stage == HFC_RECORDING)
460 fp->ff_bytesread = 0;
461 }
462 if (resid > uio_resid(uio)) {
463 cp->c_touch_chgtime = TRUE;
464 cp->c_touch_modtime = TRUE;
465 }
466 }
467 HFS_KNOTE(vp, NOTE_WRITE);
468
469 ioerr_exit:
470 /*
471 * If we successfully wrote any data, and we are not the superuser
472 * we clear the setuid and setgid bits as a precaution against
473 * tampering.
474 */
475 if (cp->c_mode & (S_ISUID | S_ISGID)) {
476 cred = vfs_context_ucred(ap->a_context);
477 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
478 if (!cnode_locked) {
479 hfs_lock(cp, HFS_FORCE_LOCK);
480 cnode_locked = 1;
481 }
482 cp->c_mode &= ~(S_ISUID | S_ISGID);
483 }
484 }
485 if (retval) {
486 if (ioflag & IO_UNIT) {
487 if (!cnode_locked) {
488 hfs_lock(cp, HFS_FORCE_LOCK);
489 cnode_locked = 1;
490 }
491 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
492 0, ap->a_context);
493 // LP64todo - fix this! resid needs to by user_ssize_t
494 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
495 uio_setresid(uio, resid);
496 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
497 }
498 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
499 if (!cnode_locked) {
500 hfs_lock(cp, HFS_FORCE_LOCK);
501 cnode_locked = 1;
502 }
503 retval = hfs_update(vp, TRUE);
504 }
505 /* Updating vcbWrCnt doesn't need to be atomic. */
506 hfsmp->vcbWrCnt++;
507
508 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
509 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
510 exit:
511 if (cnode_locked)
512 hfs_unlock(cp);
513 hfs_unlock_truncate(cp);
514 return (retval);
515 }
516
517 /* support for the "bulk-access" fcntl */
518
519 #define CACHE_ELEMS 64
520 #define CACHE_LEVELS 16
521 #define PARENT_IDS_FLAG 0x100
522
523 /* from hfs_attrlist.c */
524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
525 mode_t obj_mode, struct mount *mp,
526 kauth_cred_t cred, struct proc *p);
527
528 /* from vfs/vfs_fsevents.c */
529 extern char *get_pathbuff(void);
530 extern void release_pathbuff(char *buff);
531
532 struct access_cache {
533 int numcached;
534 int cachehits; /* these two for statistics gathering */
535 int lookups;
536 unsigned int *acache;
537 Boolean *haveaccess;
538 };
539
540 struct access_t {
541 uid_t uid; /* IN: effective user id */
542 short flags; /* IN: access requested (i.e. R_OK) */
543 short num_groups; /* IN: number of groups user belongs to */
544 int num_files; /* IN: number of files to process */
545 int *file_ids; /* IN: array of file ids */
546 gid_t *groups; /* IN: array of groups */
547 short *access; /* OUT: access info for each file (0 for 'has access') */
548 };
549
550 struct user_access_t {
551 uid_t uid; /* IN: effective user id */
552 short flags; /* IN: access requested (i.e. R_OK) */
553 short num_groups; /* IN: number of groups user belongs to */
554 int num_files; /* IN: number of files to process */
555 user_addr_t file_ids; /* IN: array of file ids */
556 user_addr_t groups; /* IN: array of groups */
557 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
558 };
559
560 /*
561 * Perform a binary search for the given parent_id. Return value is
562 * found/not found boolean, and indexp will be the index of the item
563 * or the index at which to insert the item if it's not found.
564 */
565 static int
566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
567 {
568 unsigned int lo, hi;
569 int index, matches = 0;
570
571 if (cache->numcached == 0) {
572 *indexp = 0;
573 return 0; // table is empty, so insert at index=0 and report no match
574 }
575
576 if (cache->numcached > CACHE_ELEMS) {
577 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
578 cache->numcached, CACHE_ELEMS);*/
579 cache->numcached = CACHE_ELEMS;
580 }
581
582 lo = 0;
583 hi = cache->numcached - 1;
584 index = -1;
585
586 /* perform binary search for parent_id */
587 do {
588 unsigned int mid = (hi - lo)/2 + lo;
589 unsigned int this_id = cache->acache[mid];
590
591 if (parent_id == this_id) {
592 index = mid;
593 break;
594 }
595
596 if (parent_id < this_id) {
597 hi = mid;
598 continue;
599 }
600
601 if (parent_id > this_id) {
602 lo = mid + 1;
603 continue;
604 }
605 } while(lo < hi);
606
607 /* check if lo and hi converged on the match */
608 if (parent_id == cache->acache[hi]) {
609 index = hi;
610 }
611
612 /* if no existing entry found, find index for new one */
613 if (index == -1) {
614 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
615 matches = 0;
616 } else {
617 matches = 1;
618 }
619
620 *indexp = index;
621 return matches;
622 }
623
624 /*
625 * Add a node to the access_cache at the given index (or do a lookup first
626 * to find the index if -1 is passed in). We currently do a replace rather
627 * than an insert if the cache is full.
628 */
629 static void
630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
631 {
632 int lookup_index = -1;
633
634 /* need to do a lookup first if -1 passed for index */
635 if (index == -1) {
636 if (lookup_bucket(cache, &lookup_index, nodeID)) {
637 if (cache->haveaccess[lookup_index] != access) {
638 /* change access info for existing entry... should never happen */
639 cache->haveaccess[lookup_index] = access;
640 }
641
642 /* mission accomplished */
643 return;
644 } else {
645 index = lookup_index;
646 }
647
648 }
649
650 /* if the cache is full, do a replace rather than an insert */
651 if (cache->numcached >= CACHE_ELEMS) {
652 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
653 cache->numcached = CACHE_ELEMS-1;
654
655 if (index > cache->numcached) {
656 // printf("index %d pinned to %d\n", index, cache->numcached);
657 index = cache->numcached;
658 }
659 } else if (index >= 0 && index < cache->numcached) {
660 /* only do bcopy if we're inserting */
661 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
662 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
663 }
664
665 cache->acache[index] = nodeID;
666 cache->haveaccess[index] = access;
667 cache->numcached++;
668 }
669
670
671 struct cinfo {
672 uid_t uid;
673 gid_t gid;
674 mode_t mode;
675 cnid_t parentcnid;
676 };
677
678 static int
679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
680 {
681 struct cinfo *cip = (struct cinfo *)arg;
682
683 cip->uid = attrp->ca_uid;
684 cip->gid = attrp->ca_gid;
685 cip->mode = attrp->ca_mode;
686 cip->parentcnid = descp->cd_parentcnid;
687
688 return (0);
689 }
690
691 /*
692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
693 * isn't incore, then go to the catalog.
694 */
695 static int
696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
697 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
698 {
699 int error = 0;
700
701 /* if this id matches the one the fsctl was called with, skip the lookup */
702 if (cnid == skip_cp->c_cnid) {
703 cnattrp->ca_uid = skip_cp->c_uid;
704 cnattrp->ca_gid = skip_cp->c_gid;
705 cnattrp->ca_mode = skip_cp->c_mode;
706 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
707 } else {
708 struct cinfo c_info;
709
710 /* otherwise, check the cnode hash incase the file/dir is incore */
711 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
712 cnattrp->ca_uid = c_info.uid;
713 cnattrp->ca_gid = c_info.gid;
714 cnattrp->ca_mode = c_info.mode;
715 keyp->hfsPlus.parentID = c_info.parentcnid;
716 } else {
717 int lockflags;
718
719 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
720
721 /* lookup this cnid in the catalog */
722 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
723
724 hfs_systemfile_unlock(hfsmp, lockflags);
725
726 cache->lookups++;
727 }
728 }
729
730 return (error);
731 }
732
733 /*
734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
735 * up to CACHE_LEVELS as we progress towards the root.
736 */
737 static int
738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
739 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
740 {
741 int myErr = 0;
742 int myResult;
743 HFSCatalogNodeID thisNodeID;
744 unsigned long myPerms;
745 struct cat_attr cnattr;
746 int cache_index = -1;
747 CatalogKey catkey;
748
749 int i = 0, ids_to_cache = 0;
750 int parent_ids[CACHE_LEVELS];
751
752 /* root always has access */
753 if (!suser(myp_ucred, NULL)) {
754 return (1);
755 }
756
757 thisNodeID = nodeID;
758 while (thisNodeID >= kRootDirID) {
759 myResult = 0; /* default to "no access" */
760
761 /* check the cache before resorting to hitting the catalog */
762
763 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
764 * to look any further after hitting cached dir */
765
766 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
767 cache->cachehits++;
768 myResult = cache->haveaccess[cache_index];
769 goto ExitThisRoutine;
770 }
771
772 /* remember which parents we want to cache */
773 if (ids_to_cache < CACHE_LEVELS) {
774 parent_ids[ids_to_cache] = thisNodeID;
775 ids_to_cache++;
776 }
777
778 /* do the lookup (checks the cnode hash, then the catalog) */
779 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
780 if (myErr) {
781 goto ExitThisRoutine; /* no access */
782 }
783
784 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
785 cnattr.ca_mode, hfsmp->hfs_mp,
786 myp_ucred, theProcPtr);
787
788 if ( (myPerms & X_OK) == 0 ) {
789 myResult = 0;
790 goto ExitThisRoutine; /* no access */
791 }
792
793 /* up the hierarchy we go */
794 thisNodeID = catkey.hfsPlus.parentID;
795 }
796
797 /* if here, we have access to this node */
798 myResult = 1;
799
800 ExitThisRoutine:
801 if (myErr) {
802 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
803 myResult = 0;
804 }
805 *err = myErr;
806
807 /* cache the parent directory(ies) */
808 for (i = 0; i < ids_to_cache; i++) {
809 /* small optimization: get rid of double-lookup for all these */
810 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
811 add_node(cache, -1, parent_ids[i], myResult);
812 }
813
814 return (myResult);
815 }
816 /* end "bulk-access" support */
817
818
819
820 /*
821 * Callback for use with freeze ioctl.
822 */
823 static int
824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
825 {
826 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
827
828 return 0;
829 }
830
831 /*
832 * Control filesystem operating characteristics.
833 */
834 int
835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
836 vnode_t a_vp;
837 int a_command;
838 caddr_t a_data;
839 int a_fflag;
840 vfs_context_t a_context;
841 } */ *ap)
842 {
843 struct vnode * vp = ap->a_vp;
844 struct hfsmount *hfsmp = VTOHFS(vp);
845 vfs_context_t context = ap->a_context;
846 kauth_cred_t cred = vfs_context_ucred(context);
847 proc_t p = vfs_context_proc(context);
848 struct vfsstatfs *vfsp;
849 boolean_t is64bit;
850
851 is64bit = proc_is64bit(p);
852
853 switch (ap->a_command) {
854
855 case HFS_RESIZE_PROGRESS: {
856
857 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
858 if (suser(cred, NULL) &&
859 kauth_cred_getuid(cred) != vfsp->f_owner) {
860 return (EACCES); /* must be owner of file system */
861 }
862 if (!vnode_isvroot(vp)) {
863 return (EINVAL);
864 }
865 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
866 }
867 case HFS_RESIZE_VOLUME: {
868 u_int64_t newsize;
869 u_int64_t cursize;
870
871 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
872 if (suser(cred, NULL) &&
873 kauth_cred_getuid(cred) != vfsp->f_owner) {
874 return (EACCES); /* must be owner of file system */
875 }
876 if (!vnode_isvroot(vp)) {
877 return (EINVAL);
878 }
879 newsize = *(u_int64_t *)ap->a_data;
880 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
881
882 if (newsize > cursize) {
883 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
884 } else if (newsize < cursize) {
885 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
886 } else {
887 return (0);
888 }
889 }
890 case HFS_CHANGE_NEXT_ALLOCATION: {
891 u_int32_t location;
892
893 if (vnode_vfsisrdonly(vp)) {
894 return (EROFS);
895 }
896 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
897 if (suser(cred, NULL) &&
898 kauth_cred_getuid(cred) != vfsp->f_owner) {
899 return (EACCES); /* must be owner of file system */
900 }
901 if (!vnode_isvroot(vp)) {
902 return (EINVAL);
903 }
904 location = *(u_int32_t *)ap->a_data;
905 if (location > hfsmp->totalBlocks - 1) {
906 return (EINVAL);
907 }
908 /* Return previous value. */
909 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
910 HFS_MOUNT_LOCK(hfsmp, TRUE);
911 hfsmp->nextAllocation = location;
912 hfsmp->vcbFlags |= 0xFF00;
913 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
914 return (0);
915 }
916
917 #ifdef HFS_SPARSE_DEV
918 case HFS_SETBACKINGSTOREINFO: {
919 struct vnode * bsfs_rootvp;
920 struct vnode * di_vp;
921 struct hfs_backingstoreinfo *bsdata;
922 int error = 0;
923
924 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
925 return (EALREADY);
926 }
927 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
928 if (suser(cred, NULL) &&
929 kauth_cred_getuid(cred) != vfsp->f_owner) {
930 return (EACCES); /* must be owner of file system */
931 }
932 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
933 if (bsdata == NULL) {
934 return (EINVAL);
935 }
936 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
937 return (error);
938 }
939 if ((error = vnode_getwithref(di_vp))) {
940 file_drop(bsdata->backingfd);
941 return(error);
942 }
943
944 if (vnode_mount(vp) == vnode_mount(di_vp)) {
945 (void)vnode_put(di_vp);
946 file_drop(bsdata->backingfd);
947 return (EINVAL);
948 }
949
950 /*
951 * Obtain the backing fs root vnode and keep a reference
952 * on it. This reference will be dropped in hfs_unmount.
953 */
954 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
955 if (error) {
956 (void)vnode_put(di_vp);
957 file_drop(bsdata->backingfd);
958 return (error);
959 }
960 vnode_ref(bsfs_rootvp);
961 vnode_put(bsfs_rootvp);
962
963 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
964 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
965 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
966 hfsmp->hfs_sparsebandblks *= 4;
967
968 (void)vnode_put(di_vp);
969 file_drop(bsdata->backingfd);
970 return (0);
971 }
972 case HFS_CLRBACKINGSTOREINFO: {
973 struct vnode * tmpvp;
974
975 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
976 if (suser(cred, NULL) &&
977 kauth_cred_getuid(cred) != vfsp->f_owner) {
978 return (EACCES); /* must be owner of file system */
979 }
980 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
981 hfsmp->hfs_backingfs_rootvp) {
982
983 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
984 tmpvp = hfsmp->hfs_backingfs_rootvp;
985 hfsmp->hfs_backingfs_rootvp = NULLVP;
986 hfsmp->hfs_sparsebandblks = 0;
987 vnode_rele(tmpvp);
988 }
989 return (0);
990 }
991 #endif /* HFS_SPARSE_DEV */
992
993 case F_FREEZE_FS: {
994 struct mount *mp;
995 task_t task;
996
997 if (!is_suser())
998 return (EACCES);
999
1000 mp = vnode_mount(vp);
1001 hfsmp = VFSTOHFS(mp);
1002
1003 if (!(hfsmp->jnl))
1004 return (ENOTSUP);
1005
1006 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1007
1008 task = current_task();
1009 task_working_set_disable(task);
1010
1011 // flush things before we get started to try and prevent
1012 // dirty data from being paged out while we're frozen.
1013 // note: can't do this after taking the lock as it will
1014 // deadlock against ourselves.
1015 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1016 hfs_global_exclusive_lock_acquire(hfsmp);
1017 journal_flush(hfsmp->jnl);
1018
1019 // don't need to iterate on all vnodes, we just need to
1020 // wait for writes to the system files and the device vnode
1021 if (HFSTOVCB(hfsmp)->extentsRefNum)
1022 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1023 if (HFSTOVCB(hfsmp)->catalogRefNum)
1024 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1025 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1026 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1027 if (hfsmp->hfs_attribute_vp)
1028 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1029 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1030
1031 hfsmp->hfs_freezing_proc = current_proc();
1032
1033 return (0);
1034 }
1035
1036 case F_THAW_FS: {
1037 if (!is_suser())
1038 return (EACCES);
1039
1040 // if we're not the one who froze the fs then we
1041 // can't thaw it.
1042 if (hfsmp->hfs_freezing_proc != current_proc()) {
1043 return EPERM;
1044 }
1045
1046 // NOTE: if you add code here, also go check the
1047 // code that "thaws" the fs in hfs_vnop_close()
1048 //
1049 hfsmp->hfs_freezing_proc = NULL;
1050 hfs_global_exclusive_lock_release(hfsmp);
1051 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1052
1053 return (0);
1054 }
1055
1056 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1057 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1058
1059 case HFS_BULKACCESS_FSCTL:
1060 case HFS_BULKACCESS: {
1061 /*
1062 * NOTE: on entry, the vnode is locked. Incase this vnode
1063 * happens to be in our list of file_ids, we'll note it
1064 * avoid calling hfs_chashget_nowait() on that id as that
1065 * will cause a "locking against myself" panic.
1066 */
1067 Boolean check_leaf = true;
1068
1069 struct user_access_t *user_access_structp;
1070 struct user_access_t tmp_user_access_t;
1071 struct access_cache cache;
1072
1073 int error = 0, i;
1074
1075 dev_t dev = VTOC(vp)->c_dev;
1076
1077 short flags;
1078 struct ucred myucred; /* XXX ILLEGAL */
1079 int num_files;
1080 int *file_ids = NULL;
1081 short *access = NULL;
1082
1083 cnid_t cnid;
1084 cnid_t prevParent_cnid = 0;
1085 unsigned long myPerms;
1086 short myaccess = 0;
1087 struct cat_attr cnattr;
1088 CatalogKey catkey;
1089 struct cnode *skip_cp = VTOC(vp);
1090 struct vfs_context my_context;
1091
1092 /* first, return error if not run as root */
1093 if (cred->cr_ruid != 0) {
1094 return EPERM;
1095 }
1096
1097 /* initialize the local cache and buffers */
1098 cache.numcached = 0;
1099 cache.cachehits = 0;
1100 cache.lookups = 0;
1101
1102 file_ids = (int *) get_pathbuff();
1103 access = (short *) get_pathbuff();
1104 cache.acache = (int *) get_pathbuff();
1105 cache.haveaccess = (Boolean *) get_pathbuff();
1106
1107 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1108 release_pathbuff((char *) file_ids);
1109 release_pathbuff((char *) access);
1110 release_pathbuff((char *) cache.acache);
1111 release_pathbuff((char *) cache.haveaccess);
1112
1113 return ENOMEM;
1114 }
1115
1116 /* struct copyin done during dispatch... need to copy file_id array separately */
1117 if (ap->a_data == NULL) {
1118 error = EINVAL;
1119 goto err_exit_bulk_access;
1120 }
1121
1122 if (is64bit) {
1123 user_access_structp = (struct user_access_t *)ap->a_data;
1124 }
1125 else {
1126 struct access_t * accessp = (struct access_t *)ap->a_data;
1127 tmp_user_access_t.uid = accessp->uid;
1128 tmp_user_access_t.flags = accessp->flags;
1129 tmp_user_access_t.num_groups = accessp->num_groups;
1130 tmp_user_access_t.num_files = accessp->num_files;
1131 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1132 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1133 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1134 user_access_structp = &tmp_user_access_t;
1135 }
1136
1137 num_files = user_access_structp->num_files;
1138 if (num_files < 1) {
1139 goto err_exit_bulk_access;
1140 }
1141 if (num_files > 256) {
1142 error = EINVAL;
1143 goto err_exit_bulk_access;
1144 }
1145
1146 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1147 num_files * sizeof(int)))) {
1148 goto err_exit_bulk_access;
1149 }
1150
1151 /* fill in the ucred structure */
1152 flags = user_access_structp->flags;
1153 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1154 flags = R_OK;
1155 }
1156
1157 /* check if we've been passed leaf node ids or parent ids */
1158 if (flags & PARENT_IDS_FLAG) {
1159 check_leaf = false;
1160 }
1161
1162 memset(&myucred, 0, sizeof(myucred));
1163 myucred.cr_ref = 1;
1164 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1165 myucred.cr_ngroups = user_access_structp->num_groups;
1166 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1167 myucred.cr_ngroups = 0;
1168 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1169 myucred.cr_ngroups * sizeof(gid_t)))) {
1170 goto err_exit_bulk_access;
1171 }
1172 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1173 myucred.cr_gmuid = myucred.cr_uid;
1174
1175 my_context.vc_proc = p;
1176 my_context.vc_ucred = &myucred;
1177
1178 /* Check access to each file_id passed in */
1179 for (i = 0; i < num_files; i++) {
1180 #if 0
1181 cnid = (cnid_t) file_ids[i];
1182
1183 /* root always has access */
1184 if (!suser(&myucred, NULL)) {
1185 access[i] = 0;
1186 continue;
1187 }
1188
1189 if (check_leaf) {
1190
1191 /* do the lookup (checks the cnode hash, then the catalog) */
1192 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1193 if (error) {
1194 access[i] = (short) error;
1195 continue;
1196 }
1197
1198 /* before calling CheckAccess(), check the target file for read access */
1199 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1200 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1201
1202
1203 /* fail fast if no access */
1204 if ((myPerms & flags) == 0) {
1205 access[i] = EACCES;
1206 continue;
1207 }
1208 } else {
1209 /* we were passed an array of parent ids */
1210 catkey.hfsPlus.parentID = cnid;
1211 }
1212
1213 /* if the last guy had the same parent and had access, we're done */
1214 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1215 cache.cachehits++;
1216 access[i] = 0;
1217 continue;
1218 }
1219
1220 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1221 skip_cp, p, &myucred, dev);
1222
1223 if ( myaccess ) {
1224 access[i] = 0; // have access.. no errors to report
1225 } else {
1226 access[i] = (error != 0 ? (short) error : EACCES);
1227 }
1228
1229 prevParent_cnid = catkey.hfsPlus.parentID;
1230 #else
1231 int myErr;
1232
1233 cnid = (cnid_t)file_ids[i];
1234
1235 while (cnid >= kRootDirID) {
1236 /* get the vnode for this cnid */
1237 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1238 if ( myErr ) {
1239 access[i] = EACCES;
1240 break;
1241 }
1242
1243 cnid = VTOC(vp)->c_parentcnid;
1244
1245 hfs_unlock(VTOC(vp));
1246 if (vnode_vtype(vp) == VDIR) {
1247 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1248 } else {
1249 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1250 }
1251 vnode_put(vp);
1252 access[i] = myErr;
1253 if (myErr) {
1254 break;
1255 }
1256 }
1257 #endif
1258 }
1259
1260 /* copyout the access array */
1261 if ((error = copyout((caddr_t)access, user_access_structp->access,
1262 num_files * sizeof (short)))) {
1263 goto err_exit_bulk_access;
1264 }
1265
1266 err_exit_bulk_access:
1267
1268 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1269
1270 release_pathbuff((char *) cache.acache);
1271 release_pathbuff((char *) cache.haveaccess);
1272 release_pathbuff((char *) file_ids);
1273 release_pathbuff((char *) access);
1274
1275 return (error);
1276 } /* HFS_BULKACCESS */
1277
1278 case HFS_SETACLSTATE: {
1279 int state;
1280
1281 if (ap->a_data == NULL) {
1282 return (EINVAL);
1283 }
1284
1285 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1286 state = *(int *)ap->a_data;
1287
1288 // super-user can enable or disable acl's on a volume.
1289 // the volume owner can only enable acl's
1290 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1291 return (EPERM);
1292 }
1293 if (state == 0 || state == 1)
1294 return hfs_setextendedsecurity(hfsmp, state);
1295 else
1296 return (EINVAL);
1297 }
1298
1299 case F_FULLFSYNC: {
1300 int error;
1301
1302 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1303 if (error == 0) {
1304 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1305 hfs_unlock(VTOC(vp));
1306 }
1307
1308 return error;
1309 }
1310
1311 case F_CHKCLEAN: {
1312 register struct cnode *cp;
1313 int error;
1314
1315 if (!vnode_isreg(vp))
1316 return EINVAL;
1317
1318 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1319 if (error == 0) {
1320 cp = VTOC(vp);
1321 /*
1322 * used by regression test to determine if
1323 * all the dirty pages (via write) have been cleaned
1324 * after a call to 'fsysnc'.
1325 */
1326 error = is_file_clean(vp, VTOF(vp)->ff_size);
1327 hfs_unlock(cp);
1328 }
1329 return (error);
1330 }
1331
1332 case F_RDADVISE: {
1333 register struct radvisory *ra;
1334 struct filefork *fp;
1335 int error;
1336
1337 if (!vnode_isreg(vp))
1338 return EINVAL;
1339
1340 ra = (struct radvisory *)(ap->a_data);
1341 fp = VTOF(vp);
1342
1343 /* Protect against a size change. */
1344 hfs_lock_truncate(VTOC(vp), TRUE);
1345
1346 if (ra->ra_offset >= fp->ff_size) {
1347 error = EFBIG;
1348 } else {
1349 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1350 }
1351
1352 hfs_unlock_truncate(VTOC(vp));
1353 return (error);
1354 }
1355
1356 case F_READBOOTSTRAP:
1357 case F_WRITEBOOTSTRAP:
1358 {
1359 struct vnode *devvp = NULL;
1360 user_fbootstraptransfer_t *user_bootstrapp;
1361 int devBlockSize;
1362 int error;
1363 uio_t auio;
1364 daddr64_t blockNumber;
1365 u_long blockOffset;
1366 u_long xfersize;
1367 struct buf *bp;
1368 user_fbootstraptransfer_t user_bootstrap;
1369
1370 if (!vnode_isvroot(vp))
1371 return (EINVAL);
1372 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1373 * to a user_fbootstraptransfer_t else we get a pointer to a
1374 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1375 */
1376 if (is64bit) {
1377 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1378 }
1379 else {
1380 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1381 user_bootstrapp = &user_bootstrap;
1382 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1383 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1384 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1385 }
1386 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1387 return EINVAL;
1388
1389 devvp = VTOHFS(vp)->hfs_devvp;
1390 auio = uio_create(1, user_bootstrapp->fbt_offset,
1391 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1392 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1393 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1394
1395 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1396
1397 while (uio_resid(auio) > 0) {
1398 blockNumber = uio_offset(auio) / devBlockSize;
1399 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1400 if (error) {
1401 if (bp) buf_brelse(bp);
1402 uio_free(auio);
1403 return error;
1404 };
1405
1406 blockOffset = uio_offset(auio) % devBlockSize;
1407 xfersize = devBlockSize - blockOffset;
1408 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1409 if (error) {
1410 buf_brelse(bp);
1411 uio_free(auio);
1412 return error;
1413 };
1414 if (uio_rw(auio) == UIO_WRITE) {
1415 error = VNOP_BWRITE(bp);
1416 if (error) {
1417 uio_free(auio);
1418 return error;
1419 }
1420 } else {
1421 buf_brelse(bp);
1422 };
1423 };
1424 uio_free(auio);
1425 };
1426 return 0;
1427
1428 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1429 {
1430 if (is64bit) {
1431 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1432 }
1433 else {
1434 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1435 }
1436 return 0;
1437 }
1438
1439 case HFS_GET_MOUNT_TIME:
1440 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1441 break;
1442
1443 case HFS_GET_LAST_MTIME:
1444 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1445 break;
1446
1447 case HFS_SET_BOOT_INFO:
1448 if (!vnode_isvroot(vp))
1449 return(EINVAL);
1450 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1451 return(EACCES); /* must be superuser or owner of filesystem */
1452 HFS_MOUNT_LOCK(hfsmp, TRUE);
1453 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1454 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1455 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1456 break;
1457
1458 case HFS_GET_BOOT_INFO:
1459 if (!vnode_isvroot(vp))
1460 return(EINVAL);
1461 HFS_MOUNT_LOCK(hfsmp, TRUE);
1462 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1463 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1464 break;
1465
1466 default:
1467 return (ENOTTY);
1468 }
1469
1470 /* Should never get here */
1471 return 0;
1472 }
1473
1474 /*
1475 * select
1476 */
1477 int
1478 hfs_vnop_select(__unused struct vnop_select_args *ap)
1479 /*
1480 struct vnop_select_args {
1481 vnode_t a_vp;
1482 int a_which;
1483 int a_fflags;
1484 void *a_wql;
1485 vfs_context_t a_context;
1486 };
1487 */
1488 {
1489 /*
1490 * We should really check to see if I/O is possible.
1491 */
1492 return (1);
1493 }
1494
1495 /*
1496 * Converts a logical block number to a physical block, and optionally returns
1497 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1498 * The physical block number is based on the device block size, currently its 512.
1499 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1500 */
1501 int
1502 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1503 {
1504 struct cnode *cp = VTOC(vp);
1505 struct filefork *fp = VTOF(vp);
1506 struct hfsmount *hfsmp = VTOHFS(vp);
1507 int retval = E_NONE;
1508 daddr_t logBlockSize;
1509 size_t bytesContAvail = 0;
1510 off_t blockposition;
1511 int lockExtBtree;
1512 int lockflags = 0;
1513
1514 /*
1515 * Check for underlying vnode requests and ensure that logical
1516 * to physical mapping is requested.
1517 */
1518 if (vpp != NULL)
1519 *vpp = cp->c_devvp;
1520 if (bnp == NULL)
1521 return (0);
1522
1523 logBlockSize = GetLogicalBlockSize(vp);
1524 blockposition = (off_t)bn * (off_t)logBlockSize;
1525
1526 lockExtBtree = overflow_extents(fp);
1527
1528 if (lockExtBtree)
1529 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1530
1531 retval = MacToVFSError(
1532 MapFileBlockC (HFSTOVCB(hfsmp),
1533 (FCB*)fp,
1534 MAXPHYSIO,
1535 blockposition,
1536 bnp,
1537 &bytesContAvail));
1538
1539 if (lockExtBtree)
1540 hfs_systemfile_unlock(hfsmp, lockflags);
1541
1542 if (retval == E_NONE) {
1543 /* Figure out how many read ahead blocks there are */
1544 if (runp != NULL) {
1545 if (can_cluster(logBlockSize)) {
1546 /* Make sure this result never goes negative: */
1547 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1548 } else {
1549 *runp = 0;
1550 }
1551 }
1552 }
1553 return (retval);
1554 }
1555
1556 /*
1557 * Convert logical block number to file offset.
1558 */
1559 int
1560 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1561 /*
1562 struct vnop_blktooff_args {
1563 vnode_t a_vp;
1564 daddr64_t a_lblkno;
1565 off_t *a_offset;
1566 };
1567 */
1568 {
1569 if (ap->a_vp == NULL)
1570 return (EINVAL);
1571 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1572
1573 return(0);
1574 }
1575
1576 /*
1577 * Convert file offset to logical block number.
1578 */
1579 int
1580 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1581 /*
1582 struct vnop_offtoblk_args {
1583 vnode_t a_vp;
1584 off_t a_offset;
1585 daddr64_t *a_lblkno;
1586 };
1587 */
1588 {
1589 if (ap->a_vp == NULL)
1590 return (EINVAL);
1591 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1592
1593 return(0);
1594 }
1595
1596 /*
1597 * Map file offset to physical block number.
1598 *
1599 * System file cnodes are expected to be locked (shared or exclusive).
1600 */
1601 int
1602 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1603 /*
1604 struct vnop_blockmap_args {
1605 vnode_t a_vp;
1606 off_t a_foffset;
1607 size_t a_size;
1608 daddr64_t *a_bpn;
1609 size_t *a_run;
1610 void *a_poff;
1611 int a_flags;
1612 vfs_context_t a_context;
1613 };
1614 */
1615 {
1616 struct vnode *vp = ap->a_vp;
1617 struct cnode *cp;
1618 struct filefork *fp;
1619 struct hfsmount *hfsmp;
1620 size_t bytesContAvail = 0;
1621 int retval = E_NONE;
1622 int syslocks = 0;
1623 int lockflags = 0;
1624 struct rl_entry *invalid_range;
1625 enum rl_overlaptype overlaptype;
1626 int started_tr = 0;
1627 int tooklock = 0;
1628
1629 /* Do not allow blockmap operation on a directory */
1630 if (vnode_isdir(vp)) {
1631 return (ENOTSUP);
1632 }
1633
1634 /*
1635 * Check for underlying vnode requests and ensure that logical
1636 * to physical mapping is requested.
1637 */
1638 if (ap->a_bpn == NULL)
1639 return (0);
1640
1641 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1642 if (VTOC(vp)->c_lockowner != current_thread()) {
1643 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1644 tooklock = 1;
1645 } else {
1646 cp = VTOC(vp);
1647 panic("blockmap: %s cnode lock already held!\n",
1648 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1649 }
1650 }
1651 hfsmp = VTOHFS(vp);
1652 cp = VTOC(vp);
1653 fp = VTOF(vp);
1654
1655 retry:
1656 if (fp->ff_unallocblocks) {
1657 if (hfs_start_transaction(hfsmp) != 0) {
1658 retval = EINVAL;
1659 goto exit;
1660 } else {
1661 started_tr = 1;
1662 }
1663 syslocks = SFL_EXTENTS | SFL_BITMAP;
1664
1665 } else if (overflow_extents(fp)) {
1666 syslocks = SFL_EXTENTS;
1667 }
1668
1669 if (syslocks)
1670 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1671
1672 /*
1673 * Check for any delayed allocations.
1674 */
1675 if (fp->ff_unallocblocks) {
1676 SInt64 actbytes;
1677 u_int32_t loanedBlocks;
1678
1679 //
1680 // Make sure we have a transaction. It's possible
1681 // that we came in and fp->ff_unallocblocks was zero
1682 // but during the time we blocked acquiring the extents
1683 // btree, ff_unallocblocks became non-zero and so we
1684 // will need to start a transaction.
1685 //
1686 if (started_tr == 0) {
1687 if (syslocks) {
1688 hfs_systemfile_unlock(hfsmp, lockflags);
1689 syslocks = 0;
1690 }
1691 goto retry;
1692 }
1693
1694 /*
1695 * Note: ExtendFileC will Release any blocks on loan and
1696 * aquire real blocks. So we ask to extend by zero bytes
1697 * since ExtendFileC will account for the virtual blocks.
1698 */
1699
1700 loanedBlocks = fp->ff_unallocblocks;
1701 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1702 kEFAllMask | kEFNoClumpMask, &actbytes);
1703
1704 if (retval) {
1705 fp->ff_unallocblocks = loanedBlocks;
1706 cp->c_blocks += loanedBlocks;
1707 fp->ff_blocks += loanedBlocks;
1708
1709 HFS_MOUNT_LOCK(hfsmp, TRUE);
1710 hfsmp->loanedBlocks += loanedBlocks;
1711 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1712 }
1713
1714 if (retval) {
1715 hfs_systemfile_unlock(hfsmp, lockflags);
1716 cp->c_flag |= C_MODIFIED;
1717 if (started_tr) {
1718 (void) hfs_update(vp, TRUE);
1719 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1720
1721 hfs_end_transaction(hfsmp);
1722 }
1723 goto exit;
1724 }
1725 }
1726
1727 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1728 ap->a_bpn, &bytesContAvail);
1729 if (syslocks) {
1730 hfs_systemfile_unlock(hfsmp, lockflags);
1731 syslocks = 0;
1732 }
1733
1734 if (started_tr) {
1735 (void) hfs_update(vp, TRUE);
1736 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1737 hfs_end_transaction(hfsmp);
1738 started_tr = 0;
1739 }
1740 if (retval) {
1741 goto exit;
1742 }
1743
1744 /* Adjust the mapping information for invalid file ranges: */
1745 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1746 ap->a_foffset + (off_t)bytesContAvail - 1,
1747 &invalid_range);
1748 if (overlaptype != RL_NOOVERLAP) {
1749 switch(overlaptype) {
1750 case RL_MATCHINGOVERLAP:
1751 case RL_OVERLAPCONTAINSRANGE:
1752 case RL_OVERLAPSTARTSBEFORE:
1753 /* There's no valid block for this byte offset: */
1754 *ap->a_bpn = (daddr64_t)-1;
1755 /* There's no point limiting the amount to be returned
1756 * if the invalid range that was hit extends all the way
1757 * to the EOF (i.e. there's no valid bytes between the
1758 * end of this range and the file's EOF):
1759 */
1760 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1761 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1762 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1763 }
1764 break;
1765
1766 case RL_OVERLAPISCONTAINED:
1767 case RL_OVERLAPENDSAFTER:
1768 /* The range of interest hits an invalid block before the end: */
1769 if (invalid_range->rl_start == ap->a_foffset) {
1770 /* There's actually no valid information to be had starting here: */
1771 *ap->a_bpn = (daddr64_t)-1;
1772 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1773 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1774 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1775 }
1776 } else {
1777 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1778 }
1779 break;
1780
1781 case RL_NOOVERLAP:
1782 break;
1783 } /* end switch */
1784 if (bytesContAvail > ap->a_size)
1785 bytesContAvail = ap->a_size;
1786 }
1787 if (ap->a_run)
1788 *ap->a_run = bytesContAvail;
1789
1790 if (ap->a_poff)
1791 *(int *)ap->a_poff = 0;
1792 exit:
1793 if (tooklock)
1794 hfs_unlock(cp);
1795
1796 return (MacToVFSError(retval));
1797 }
1798
1799
1800 /*
1801 * prepare and issue the I/O
1802 * buf_strategy knows how to deal
1803 * with requests that require
1804 * fragmented I/Os
1805 */
1806 int
1807 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1808 {
1809 buf_t bp = ap->a_bp;
1810 vnode_t vp = buf_vnode(bp);
1811 struct cnode *cp = VTOC(vp);
1812
1813 return (buf_strategy(cp->c_devvp, ap));
1814 }
1815
1816
1817 static int
1818 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1819 {
1820 register struct cnode *cp = VTOC(vp);
1821 struct filefork *fp = VTOF(vp);
1822 struct proc *p = vfs_context_proc(context);;
1823 kauth_cred_t cred = vfs_context_ucred(context);
1824 int retval;
1825 off_t bytesToAdd;
1826 off_t actualBytesAdded;
1827 off_t filebytes;
1828 u_int64_t old_filesize;
1829 u_long fileblocks;
1830 int blksize;
1831 struct hfsmount *hfsmp;
1832 int lockflags;
1833
1834 blksize = VTOVCB(vp)->blockSize;
1835 fileblocks = fp->ff_blocks;
1836 filebytes = (off_t)fileblocks * (off_t)blksize;
1837 old_filesize = fp->ff_size;
1838
1839 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1840 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1841
1842 if (length < 0)
1843 return (EINVAL);
1844
1845 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1846 return (EFBIG);
1847
1848 hfsmp = VTOHFS(vp);
1849
1850 retval = E_NONE;
1851
1852 /* Files that are changing size are not hot file candidates. */
1853 if (hfsmp->hfc_stage == HFC_RECORDING) {
1854 fp->ff_bytesread = 0;
1855 }
1856
1857 /*
1858 * We cannot just check if fp->ff_size == length (as an optimization)
1859 * since there may be extra physical blocks that also need truncation.
1860 */
1861 #if QUOTA
1862 if ((retval = hfs_getinoquota(cp)))
1863 return(retval);
1864 #endif /* QUOTA */
1865
1866 /*
1867 * Lengthen the size of the file. We must ensure that the
1868 * last byte of the file is allocated. Since the smallest
1869 * value of ff_size is 0, length will be at least 1.
1870 */
1871 if (length > (off_t)fp->ff_size) {
1872 #if QUOTA
1873 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1874 cred, 0);
1875 if (retval)
1876 goto Err_Exit;
1877 #endif /* QUOTA */
1878 /*
1879 * If we don't have enough physical space then
1880 * we need to extend the physical size.
1881 */
1882 if (length > filebytes) {
1883 int eflags;
1884 u_long blockHint = 0;
1885
1886 /* All or nothing and don't round up to clumpsize. */
1887 eflags = kEFAllMask | kEFNoClumpMask;
1888
1889 if (cred && suser(cred, NULL) != 0)
1890 eflags |= kEFReserveMask; /* keep a reserve */
1891
1892 /*
1893 * Allocate Journal and Quota files in metadata zone.
1894 */
1895 if (filebytes == 0 &&
1896 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1897 hfs_virtualmetafile(cp)) {
1898 eflags |= kEFMetadataMask;
1899 blockHint = hfsmp->hfs_metazone_start;
1900 }
1901 if (hfs_start_transaction(hfsmp) != 0) {
1902 retval = EINVAL;
1903 goto Err_Exit;
1904 }
1905
1906 /* Protect extents b-tree and allocation bitmap */
1907 lockflags = SFL_BITMAP;
1908 if (overflow_extents(fp))
1909 lockflags |= SFL_EXTENTS;
1910 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1911
1912 while ((length > filebytes) && (retval == E_NONE)) {
1913 bytesToAdd = length - filebytes;
1914 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1915 (FCB*)fp,
1916 bytesToAdd,
1917 blockHint,
1918 eflags,
1919 &actualBytesAdded));
1920
1921 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1922 if (actualBytesAdded == 0 && retval == E_NONE) {
1923 if (length > filebytes)
1924 length = filebytes;
1925 break;
1926 }
1927 } /* endwhile */
1928
1929 hfs_systemfile_unlock(hfsmp, lockflags);
1930
1931 if (hfsmp->jnl) {
1932 (void) hfs_update(vp, TRUE);
1933 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1934 }
1935
1936 hfs_end_transaction(hfsmp);
1937
1938 if (retval)
1939 goto Err_Exit;
1940
1941 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1942 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1943 }
1944
1945 if (!(flags & IO_NOZEROFILL)) {
1946 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1947 struct rl_entry *invalid_range;
1948 off_t zero_limit;
1949
1950 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1951 if (length < zero_limit) zero_limit = length;
1952
1953 if (length > (off_t)fp->ff_size) {
1954 struct timeval tv;
1955
1956 /* Extending the file: time to fill out the current last page w. zeroes? */
1957 if ((fp->ff_size & PAGE_MASK_64) &&
1958 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1959 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1960
1961 /* There's some valid data at the start of the (current) last page
1962 of the file, so zero out the remainder of that page to ensure the
1963 entire page contains valid data. Since there is no invalid range
1964 possible past the (current) eof, there's no need to remove anything
1965 from the invalid range list before calling cluster_write(): */
1966 hfs_unlock(cp);
1967 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1968 fp->ff_size, (off_t)0,
1969 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1970 hfs_lock(cp, HFS_FORCE_LOCK);
1971 if (retval) goto Err_Exit;
1972
1973 /* Merely invalidate the remaining area, if necessary: */
1974 if (length > zero_limit) {
1975 microuptime(&tv);
1976 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1977 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1978 }
1979 } else {
1980 /* The page containing the (current) eof is invalid: just add the
1981 remainder of the page to the invalid list, along with the area
1982 being newly allocated:
1983 */
1984 microuptime(&tv);
1985 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1986 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1987 };
1988 }
1989 } else {
1990 panic("hfs_truncate: invoked on non-UBC object?!");
1991 };
1992 }
1993 cp->c_touch_modtime = TRUE;
1994 fp->ff_size = length;
1995
1996 /* Nested transactions will do their own ubc_setsize. */
1997 if (!skipsetsize) {
1998 /*
1999 * ubc_setsize can cause a pagein here
2000 * so we need to drop cnode lock.
2001 */
2002 hfs_unlock(cp);
2003 ubc_setsize(vp, length);
2004 hfs_lock(cp, HFS_FORCE_LOCK);
2005 }
2006
2007 } else { /* Shorten the size of the file */
2008
2009 if ((off_t)fp->ff_size > length) {
2010 /*
2011 * Any buffers that are past the truncation point need to be
2012 * invalidated (to maintain buffer cache consistency).
2013 */
2014
2015 /* Nested transactions will do their own ubc_setsize. */
2016 if (!skipsetsize) {
2017 /*
2018 * ubc_setsize can cause a pageout here
2019 * so we need to drop cnode lock.
2020 */
2021 hfs_unlock(cp);
2022 ubc_setsize(vp, length);
2023 hfs_lock(cp, HFS_FORCE_LOCK);
2024 }
2025
2026 /* Any space previously marked as invalid is now irrelevant: */
2027 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2028 }
2029
2030 /*
2031 * Account for any unmapped blocks. Note that the new
2032 * file length can still end up with unmapped blocks.
2033 */
2034 if (fp->ff_unallocblocks > 0) {
2035 u_int32_t finalblks;
2036 u_int32_t loanedBlocks;
2037
2038 HFS_MOUNT_LOCK(hfsmp, TRUE);
2039
2040 loanedBlocks = fp->ff_unallocblocks;
2041 cp->c_blocks -= loanedBlocks;
2042 fp->ff_blocks -= loanedBlocks;
2043 fp->ff_unallocblocks = 0;
2044
2045 hfsmp->loanedBlocks -= loanedBlocks;
2046
2047 finalblks = (length + blksize - 1) / blksize;
2048 if (finalblks > fp->ff_blocks) {
2049 /* calculate required unmapped blocks */
2050 loanedBlocks = finalblks - fp->ff_blocks;
2051 hfsmp->loanedBlocks += loanedBlocks;
2052
2053 fp->ff_unallocblocks = loanedBlocks;
2054 cp->c_blocks += loanedBlocks;
2055 fp->ff_blocks += loanedBlocks;
2056 }
2057 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2058 }
2059
2060 /*
2061 * For a TBE process the deallocation of the file blocks is
2062 * delayed until the file is closed. And hfs_close calls
2063 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2064 * isn't set, we make sure this isn't a TBE process.
2065 */
2066 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2067 #if QUOTA
2068 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2069 #endif /* QUOTA */
2070 if (hfs_start_transaction(hfsmp) != 0) {
2071 retval = EINVAL;
2072 goto Err_Exit;
2073 }
2074
2075 if (fp->ff_unallocblocks == 0) {
2076 /* Protect extents b-tree and allocation bitmap */
2077 lockflags = SFL_BITMAP;
2078 if (overflow_extents(fp))
2079 lockflags |= SFL_EXTENTS;
2080 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2081
2082 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2083 (FCB*)fp, length, false));
2084
2085 hfs_systemfile_unlock(hfsmp, lockflags);
2086 }
2087 if (hfsmp->jnl) {
2088 if (retval == 0) {
2089 fp->ff_size = length;
2090 }
2091 (void) hfs_update(vp, TRUE);
2092 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2093 }
2094
2095 hfs_end_transaction(hfsmp);
2096
2097 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2098 if (retval)
2099 goto Err_Exit;
2100 #if QUOTA
2101 /* These are bytesreleased */
2102 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2103 #endif /* QUOTA */
2104 }
2105 /* Only set update flag if the logical length changes */
2106 if (old_filesize != length)
2107 cp->c_touch_modtime = TRUE;
2108 fp->ff_size = length;
2109 }
2110 cp->c_touch_chgtime = TRUE;
2111 retval = hfs_update(vp, MNT_WAIT);
2112 if (retval) {
2113 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2114 -1, -1, -1, retval, 0);
2115 }
2116
2117 Err_Exit:
2118
2119 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2120 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2121
2122 return (retval);
2123 }
2124
2125
2126
2127 /*
2128 * Truncate a cnode to at most length size, freeing (or adding) the
2129 * disk blocks.
2130 */
2131 __private_extern__
2132 int
2133 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2134 vfs_context_t context)
2135 {
2136 struct filefork *fp = VTOF(vp);
2137 off_t filebytes;
2138 u_long fileblocks;
2139 int blksize, error = 0;
2140 struct cnode *cp = VTOC(vp);
2141
2142 if (vnode_isdir(vp))
2143 return (EISDIR); /* cannot truncate an HFS directory! */
2144
2145 blksize = VTOVCB(vp)->blockSize;
2146 fileblocks = fp->ff_blocks;
2147 filebytes = (off_t)fileblocks * (off_t)blksize;
2148
2149 // have to loop truncating or growing files that are
2150 // really big because otherwise transactions can get
2151 // enormous and consume too many kernel resources.
2152
2153 if (length < filebytes) {
2154 while (filebytes > length) {
2155 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2156 filebytes -= HFS_BIGFILE_SIZE;
2157 } else {
2158 filebytes = length;
2159 }
2160 cp->c_flag |= C_FORCEUPDATE;
2161 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2162 if (error)
2163 break;
2164 }
2165 } else if (length > filebytes) {
2166 while (filebytes < length) {
2167 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2168 filebytes += HFS_BIGFILE_SIZE;
2169 } else {
2170 filebytes = length;
2171 }
2172 cp->c_flag |= C_FORCEUPDATE;
2173 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2174 if (error)
2175 break;
2176 }
2177 } else /* Same logical size */ {
2178
2179 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2180 }
2181 /* Files that are changing size are not hot file candidates. */
2182 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2183 fp->ff_bytesread = 0;
2184 }
2185
2186 return (error);
2187 }
2188
2189
2190
2191 /*
2192 * Preallocate file storage space.
2193 */
2194 int
2195 hfs_vnop_allocate(struct vnop_allocate_args /* {
2196 vnode_t a_vp;
2197 off_t a_length;
2198 u_int32_t a_flags;
2199 off_t *a_bytesallocated;
2200 off_t a_offset;
2201 vfs_context_t a_context;
2202 } */ *ap)
2203 {
2204 struct vnode *vp = ap->a_vp;
2205 struct cnode *cp;
2206 struct filefork *fp;
2207 ExtendedVCB *vcb;
2208 off_t length = ap->a_length;
2209 off_t startingPEOF;
2210 off_t moreBytesRequested;
2211 off_t actualBytesAdded;
2212 off_t filebytes;
2213 u_long fileblocks;
2214 int retval, retval2;
2215 UInt32 blockHint;
2216 UInt32 extendFlags; /* For call to ExtendFileC */
2217 struct hfsmount *hfsmp;
2218 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2219 int lockflags;
2220
2221 *(ap->a_bytesallocated) = 0;
2222
2223 if (!vnode_isreg(vp))
2224 return (EISDIR);
2225 if (length < (off_t)0)
2226 return (EINVAL);
2227
2228 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2229 return (retval);
2230 cp = VTOC(vp);
2231 fp = VTOF(vp);
2232 hfsmp = VTOHFS(vp);
2233 vcb = VTOVCB(vp);
2234
2235 fileblocks = fp->ff_blocks;
2236 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2237
2238 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2239 retval = EINVAL;
2240 goto Err_Exit;
2241 }
2242
2243 /* Fill in the flags word for the call to Extend the file */
2244
2245 extendFlags = kEFNoClumpMask;
2246 if (ap->a_flags & ALLOCATECONTIG)
2247 extendFlags |= kEFContigMask;
2248 if (ap->a_flags & ALLOCATEALL)
2249 extendFlags |= kEFAllMask;
2250 if (cred && suser(cred, NULL) != 0)
2251 extendFlags |= kEFReserveMask;
2252
2253 retval = E_NONE;
2254 blockHint = 0;
2255 startingPEOF = filebytes;
2256
2257 if (ap->a_flags & ALLOCATEFROMPEOF)
2258 length += filebytes;
2259 else if (ap->a_flags & ALLOCATEFROMVOL)
2260 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2261
2262 /* If no changes are necesary, then we're done */
2263 if (filebytes == length)
2264 goto Std_Exit;
2265
2266 /*
2267 * Lengthen the size of the file. We must ensure that the
2268 * last byte of the file is allocated. Since the smallest
2269 * value of filebytes is 0, length will be at least 1.
2270 */
2271 if (length > filebytes) {
2272 moreBytesRequested = length - filebytes;
2273
2274 #if QUOTA
2275 retval = hfs_chkdq(cp,
2276 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2277 cred, 0);
2278 if (retval)
2279 goto Err_Exit;
2280
2281 #endif /* QUOTA */
2282 /*
2283 * Metadata zone checks.
2284 */
2285 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2286 /*
2287 * Allocate Journal and Quota files in metadata zone.
2288 */
2289 if (hfs_virtualmetafile(cp)) {
2290 extendFlags |= kEFMetadataMask;
2291 blockHint = hfsmp->hfs_metazone_start;
2292 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2293 (blockHint <= hfsmp->hfs_metazone_end)) {
2294 /*
2295 * Move blockHint outside metadata zone.
2296 */
2297 blockHint = hfsmp->hfs_metazone_end + 1;
2298 }
2299 }
2300
2301 if (hfs_start_transaction(hfsmp) != 0) {
2302 retval = EINVAL;
2303 goto Err_Exit;
2304 }
2305
2306 /* Protect extents b-tree and allocation bitmap */
2307 lockflags = SFL_BITMAP;
2308 if (overflow_extents(fp))
2309 lockflags |= SFL_EXTENTS;
2310 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2311
2312 retval = MacToVFSError(ExtendFileC(vcb,
2313 (FCB*)fp,
2314 moreBytesRequested,
2315 blockHint,
2316 extendFlags,
2317 &actualBytesAdded));
2318
2319 *(ap->a_bytesallocated) = actualBytesAdded;
2320 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2321
2322 hfs_systemfile_unlock(hfsmp, lockflags);
2323
2324 if (hfsmp->jnl) {
2325 (void) hfs_update(vp, TRUE);
2326 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2327 }
2328
2329 hfs_end_transaction(hfsmp);
2330
2331 /*
2332 * if we get an error and no changes were made then exit
2333 * otherwise we must do the hfs_update to reflect the changes
2334 */
2335 if (retval && (startingPEOF == filebytes))
2336 goto Err_Exit;
2337
2338 /*
2339 * Adjust actualBytesAdded to be allocation block aligned, not
2340 * clump size aligned.
2341 * NOTE: So what we are reporting does not affect reality
2342 * until the file is closed, when we truncate the file to allocation
2343 * block size.
2344 */
2345 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2346 *(ap->a_bytesallocated) =
2347 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2348
2349 } else { /* Shorten the size of the file */
2350
2351 if (fp->ff_size > length) {
2352 /*
2353 * Any buffers that are past the truncation point need to be
2354 * invalidated (to maintain buffer cache consistency).
2355 */
2356 }
2357
2358 if (hfs_start_transaction(hfsmp) != 0) {
2359 retval = EINVAL;
2360 goto Err_Exit;
2361 }
2362
2363 /* Protect extents b-tree and allocation bitmap */
2364 lockflags = SFL_BITMAP;
2365 if (overflow_extents(fp))
2366 lockflags |= SFL_EXTENTS;
2367 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2368
2369 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2370
2371 hfs_systemfile_unlock(hfsmp, lockflags);
2372
2373 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2374
2375 if (hfsmp->jnl) {
2376 (void) hfs_update(vp, TRUE);
2377 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2378 }
2379
2380 hfs_end_transaction(hfsmp);
2381
2382
2383 /*
2384 * if we get an error and no changes were made then exit
2385 * otherwise we must do the hfs_update to reflect the changes
2386 */
2387 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2388 #if QUOTA
2389 /* These are bytesreleased */
2390 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2391 #endif /* QUOTA */
2392
2393 if (fp->ff_size > filebytes) {
2394 fp->ff_size = filebytes;
2395
2396 hfs_unlock(cp);
2397 ubc_setsize(vp, fp->ff_size);
2398 hfs_lock(cp, HFS_FORCE_LOCK);
2399 }
2400 }
2401
2402 Std_Exit:
2403 cp->c_touch_chgtime = TRUE;
2404 cp->c_touch_modtime = TRUE;
2405 retval2 = hfs_update(vp, MNT_WAIT);
2406
2407 if (retval == 0)
2408 retval = retval2;
2409 Err_Exit:
2410 hfs_unlock(cp);
2411 return (retval);
2412 }
2413
2414
2415 /*
2416 * Pagein for HFS filesystem
2417 */
2418 int
2419 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2420 /*
2421 struct vnop_pagein_args {
2422 vnode_t a_vp,
2423 upl_t a_pl,
2424 vm_offset_t a_pl_offset,
2425 off_t a_f_offset,
2426 size_t a_size,
2427 int a_flags
2428 vfs_context_t a_context;
2429 };
2430 */
2431 {
2432 vnode_t vp = ap->a_vp;
2433 int error;
2434
2435 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2436 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2437 /*
2438 * Keep track of blocks read.
2439 */
2440 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2441 struct cnode *cp;
2442 struct filefork *fp;
2443 int bytesread;
2444 int took_cnode_lock = 0;
2445
2446 cp = VTOC(vp);
2447 fp = VTOF(vp);
2448
2449 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2450 bytesread = fp->ff_size;
2451 else
2452 bytesread = ap->a_size;
2453
2454 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2455 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2456 hfs_lock(cp, HFS_FORCE_LOCK);
2457 took_cnode_lock = 1;
2458 }
2459 /*
2460 * If this file hasn't been seen since the start of
2461 * the current sampling period then start over.
2462 */
2463 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2464 struct timeval tv;
2465
2466 fp->ff_bytesread = bytesread;
2467 microtime(&tv);
2468 cp->c_atime = tv.tv_sec;
2469 } else {
2470 fp->ff_bytesread += bytesread;
2471 }
2472 cp->c_touch_acctime = TRUE;
2473 if (took_cnode_lock)
2474 hfs_unlock(cp);
2475 }
2476 return (error);
2477 }
2478
2479 /*
2480 * Pageout for HFS filesystem.
2481 */
2482 int
2483 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2484 /*
2485 struct vnop_pageout_args {
2486 vnode_t a_vp,
2487 upl_t a_pl,
2488 vm_offset_t a_pl_offset,
2489 off_t a_f_offset,
2490 size_t a_size,
2491 int a_flags
2492 vfs_context_t a_context;
2493 };
2494 */
2495 {
2496 vnode_t vp = ap->a_vp;
2497 struct cnode *cp;
2498 struct filefork *fp;
2499 int retval;
2500 off_t end_of_range;
2501 off_t filesize;
2502
2503 cp = VTOC(vp);
2504 if (cp->c_lockowner == current_thread()) {
2505 panic("pageout: %s cnode lock already held!\n",
2506 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2507 }
2508 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2509 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2510 ubc_upl_abort_range(ap->a_pl,
2511 ap->a_pl_offset,
2512 ap->a_size,
2513 UPL_ABORT_FREE_ON_EMPTY);
2514 }
2515 return (retval);
2516 }
2517 fp = VTOF(vp);
2518
2519 filesize = fp->ff_size;
2520 end_of_range = ap->a_f_offset + ap->a_size - 1;
2521
2522 if (end_of_range >= filesize) {
2523 end_of_range = (off_t)(filesize - 1);
2524 }
2525 if (ap->a_f_offset < filesize) {
2526 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2527 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2528 }
2529 hfs_unlock(cp);
2530
2531 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2532 ap->a_size, filesize, ap->a_flags);
2533
2534 /*
2535 * If data was written, and setuid or setgid bits are set and
2536 * this process is not the superuser then clear the setuid and
2537 * setgid bits as a precaution against tampering.
2538 */
2539 if ((retval == 0) &&
2540 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2541 (vfs_context_suser(ap->a_context) != 0)) {
2542 hfs_lock(cp, HFS_FORCE_LOCK);
2543 cp->c_mode &= ~(S_ISUID | S_ISGID);
2544 cp->c_touch_chgtime = TRUE;
2545 hfs_unlock(cp);
2546 }
2547 return (retval);
2548 }
2549
2550 /*
2551 * Intercept B-Tree node writes to unswap them if necessary.
2552 */
2553 int
2554 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2555 {
2556 int retval = 0;
2557 register struct buf *bp = ap->a_bp;
2558 register struct vnode *vp = buf_vnode(bp);
2559 BlockDescriptor block;
2560
2561 /* Trap B-Tree writes */
2562 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2563 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2564 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
2565 (vp == VTOHFS(vp)->hfc_filevp)) {
2566
2567 /*
2568 * Swap and validate the node if it is in native byte order.
2569 * This is always be true on big endian, so we always validate
2570 * before writing here. On little endian, the node typically has
2571 * been swapped and validatated when it was written to the journal,
2572 * so we won't do anything here.
2573 */
2574 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2575 /* Prepare the block pointer */
2576 block.blockHeader = bp;
2577 block.buffer = (char *)buf_dataptr(bp);
2578 block.blockNum = buf_lblkno(bp);
2579 /* not found in cache ==> came from disk */
2580 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2581 block.blockSize = buf_count(bp);
2582
2583 /* Endian un-swap B-Tree node */
2584 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2585 if (retval)
2586 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2587 }
2588 }
2589
2590 /* This buffer shouldn't be locked anymore but if it is clear it */
2591 if ((buf_flags(bp) & B_LOCKED)) {
2592 // XXXdbg
2593 if (VTOHFS(vp)->jnl) {
2594 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2595 }
2596 buf_clearflags(bp, B_LOCKED);
2597 }
2598 retval = vn_bwrite (ap);
2599
2600 return (retval);
2601 }
2602
2603 /*
2604 * Relocate a file to a new location on disk
2605 * cnode must be locked on entry
2606 *
2607 * Relocation occurs by cloning the file's data from its
2608 * current set of blocks to a new set of blocks. During
2609 * the relocation all of the blocks (old and new) are
2610 * owned by the file.
2611 *
2612 * -----------------
2613 * |///////////////|
2614 * -----------------
2615 * 0 N (file offset)
2616 *
2617 * ----------------- -----------------
2618 * |///////////////| | | STEP 1 (aquire new blocks)
2619 * ----------------- -----------------
2620 * 0 N N+1 2N
2621 *
2622 * ----------------- -----------------
2623 * |///////////////| |///////////////| STEP 2 (clone data)
2624 * ----------------- -----------------
2625 * 0 N N+1 2N
2626 *
2627 * -----------------
2628 * |///////////////| STEP 3 (head truncate blocks)
2629 * -----------------
2630 * 0 N
2631 *
2632 * During steps 2 and 3 page-outs to file offsets less
2633 * than or equal to N are suspended.
2634 *
2635 * During step 3 page-ins to the file get supended.
2636 */
2637 __private_extern__
2638 int
2639 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2640 struct proc *p)
2641 {
2642 struct cnode *cp;
2643 struct filefork *fp;
2644 struct hfsmount *hfsmp;
2645 u_int32_t headblks;
2646 u_int32_t datablks;
2647 u_int32_t blksize;
2648 u_int32_t growsize;
2649 u_int32_t nextallocsave;
2650 daddr64_t sector_a, sector_b;
2651 int disabled_caching = 0;
2652 int eflags;
2653 off_t newbytes;
2654 int retval;
2655 int lockflags = 0;
2656 int took_trunc_lock = 0;
2657 int started_tr = 0;
2658 enum vtype vnodetype;
2659
2660 vnodetype = vnode_vtype(vp);
2661 if (vnodetype != VREG && vnodetype != VLNK) {
2662 return (EPERM);
2663 }
2664
2665 hfsmp = VTOHFS(vp);
2666 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2667 return (ENOSPC);
2668 }
2669
2670 cp = VTOC(vp);
2671 fp = VTOF(vp);
2672 if (fp->ff_unallocblocks)
2673 return (EINVAL);
2674 blksize = hfsmp->blockSize;
2675 if (blockHint == 0)
2676 blockHint = hfsmp->nextAllocation;
2677
2678 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2679 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2680 return (EFBIG);
2681 }
2682
2683 //
2684 // We do not believe that this call to hfs_fsync() is
2685 // necessary and it causes a journal transaction
2686 // deadlock so we are removing it.
2687 //
2688 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2689 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2690 // if (retval)
2691 // return (retval);
2692 //}
2693
2694 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2695 hfs_unlock(cp);
2696 hfs_lock_truncate(cp, TRUE);
2697 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2698 hfs_unlock_truncate(cp);
2699 return (retval);
2700 }
2701 took_trunc_lock = 1;
2702 }
2703 headblks = fp->ff_blocks;
2704 datablks = howmany(fp->ff_size, blksize);
2705 growsize = datablks * blksize;
2706 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2707 if (blockHint >= hfsmp->hfs_metazone_start &&
2708 blockHint <= hfsmp->hfs_metazone_end)
2709 eflags |= kEFMetadataMask;
2710
2711 if (hfs_start_transaction(hfsmp) != 0) {
2712 if (took_trunc_lock)
2713 hfs_unlock_truncate(cp);
2714 return (EINVAL);
2715 }
2716 started_tr = 1;
2717 /*
2718 * Protect the extents b-tree and the allocation bitmap
2719 * during MapFileBlockC and ExtendFileC operations.
2720 */
2721 lockflags = SFL_BITMAP;
2722 if (overflow_extents(fp))
2723 lockflags |= SFL_EXTENTS;
2724 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2725
2726 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2727 if (retval) {
2728 retval = MacToVFSError(retval);
2729 goto out;
2730 }
2731
2732 /*
2733 * STEP 1 - aquire new allocation blocks.
2734 */
2735 if (!vnode_isnocache(vp)) {
2736 vnode_setnocache(vp);
2737 disabled_caching = 1;
2738
2739 }
2740 nextallocsave = hfsmp->nextAllocation;
2741 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2742 if (eflags & kEFMetadataMask) {
2743 HFS_MOUNT_LOCK(hfsmp, TRUE);
2744 hfsmp->nextAllocation = nextallocsave;
2745 hfsmp->vcbFlags |= 0xFF00;
2746 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2747 }
2748
2749 retval = MacToVFSError(retval);
2750 if (retval == 0) {
2751 cp->c_flag |= C_MODIFIED;
2752 if (newbytes < growsize) {
2753 retval = ENOSPC;
2754 goto restore;
2755 } else if (fp->ff_blocks < (headblks + datablks)) {
2756 printf("hfs_relocate: allocation failed");
2757 retval = ENOSPC;
2758 goto restore;
2759 }
2760
2761 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2762 if (retval) {
2763 retval = MacToVFSError(retval);
2764 } else if ((sector_a + 1) == sector_b) {
2765 retval = ENOSPC;
2766 goto restore;
2767 } else if ((eflags & kEFMetadataMask) &&
2768 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2769 hfsmp->hfs_metazone_end)) {
2770 printf("hfs_relocate: didn't move into metadata zone\n");
2771 retval = ENOSPC;
2772 goto restore;
2773 }
2774 }
2775 /* Done with system locks and journal for now. */
2776 hfs_systemfile_unlock(hfsmp, lockflags);
2777 lockflags = 0;
2778 hfs_end_transaction(hfsmp);
2779 started_tr = 0;
2780
2781 if (retval) {
2782 /*
2783 * Check to see if failure is due to excessive fragmentation.
2784 */
2785 if ((retval == ENOSPC) &&
2786 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2787 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2788 }
2789 goto out;
2790 }
2791 /*
2792 * STEP 2 - clone file data into the new allocation blocks.
2793 */
2794
2795 if (vnodetype == VLNK)
2796 retval = hfs_clonelink(vp, blksize, cred, p);
2797 else if (vnode_issystem(vp))
2798 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2799 else
2800 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2801
2802 /* Start transaction for step 3 or for a restore. */
2803 if (hfs_start_transaction(hfsmp) != 0) {
2804 retval = EINVAL;
2805 goto out;
2806 }
2807 started_tr = 1;
2808 if (retval)
2809 goto restore;
2810
2811 /*
2812 * STEP 3 - switch to cloned data and remove old blocks.
2813 */
2814 lockflags = SFL_BITMAP;
2815 if (overflow_extents(fp))
2816 lockflags |= SFL_EXTENTS;
2817 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2818
2819 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2820
2821 hfs_systemfile_unlock(hfsmp, lockflags);
2822 lockflags = 0;
2823 if (retval)
2824 goto restore;
2825 out:
2826 if (took_trunc_lock)
2827 hfs_unlock_truncate(cp);
2828
2829 if (lockflags) {
2830 hfs_systemfile_unlock(hfsmp, lockflags);
2831 lockflags = 0;
2832 }
2833
2834 /* Push cnode's new extent data to disk. */
2835 if (retval == 0) {
2836 (void) hfs_update(vp, MNT_WAIT);
2837 }
2838
2839 if (hfsmp->jnl) {
2840 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2841 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2842 else
2843 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2844 }
2845 exit:
2846 if (disabled_caching) {
2847 vnode_clearnocache(vp);
2848 }
2849 if (started_tr)
2850 hfs_end_transaction(hfsmp);
2851
2852 return (retval);
2853
2854 restore:
2855 if (fp->ff_blocks == headblks)
2856 goto exit;
2857 /*
2858 * Give back any newly allocated space.
2859 */
2860 if (lockflags == 0) {
2861 lockflags = SFL_BITMAP;
2862 if (overflow_extents(fp))
2863 lockflags |= SFL_EXTENTS;
2864 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2865 }
2866
2867 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2868
2869 hfs_systemfile_unlock(hfsmp, lockflags);
2870 lockflags = 0;
2871
2872 if (took_trunc_lock)
2873 hfs_unlock_truncate(cp);
2874 goto exit;
2875 }
2876
2877
2878 /*
2879 * Clone a symlink.
2880 *
2881 */
2882 static int
2883 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2884 {
2885 struct buf *head_bp = NULL;
2886 struct buf *tail_bp = NULL;
2887 int error;
2888
2889
2890 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2891 if (error)
2892 goto out;
2893
2894 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2895 if (tail_bp == NULL) {
2896 error = EIO;
2897 goto out;
2898 }
2899 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2900 error = (int)buf_bwrite(tail_bp);
2901 out:
2902 if (head_bp) {
2903 buf_markinvalid(head_bp);
2904 buf_brelse(head_bp);
2905 }
2906 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2907
2908 return (error);
2909 }
2910
2911 /*
2912 * Clone a file's data within the file.
2913 *
2914 */
2915 static int
2916 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2917 {
2918 caddr_t bufp;
2919 size_t writebase;
2920 size_t bufsize;
2921 size_t copysize;
2922 size_t iosize;
2923 off_t filesize;
2924 size_t offset;
2925 uio_t auio;
2926 int error = 0;
2927
2928 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2929 writebase = blkstart * blksize;
2930 copysize = blkcnt * blksize;
2931 iosize = bufsize = MIN(copysize, 128 * 1024);
2932 offset = 0;
2933
2934 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2935 return (ENOMEM);
2936 }
2937 hfs_unlock(VTOC(vp));
2938
2939 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2940
2941 while (offset < copysize) {
2942 iosize = MIN(copysize - offset, iosize);
2943
2944 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2945 uio_addiov(auio, (uintptr_t)bufp, iosize);
2946
2947 error = cluster_read(vp, auio, copysize, 0);
2948 if (error) {
2949 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2950 break;
2951 }
2952 if (uio_resid(auio) != 0) {
2953 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2954 error = EIO;
2955 break;
2956 }
2957
2958 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2959 uio_addiov(auio, (uintptr_t)bufp, iosize);
2960
2961 error = cluster_write(vp, auio, filesize + offset,
2962 filesize + offset + iosize,
2963 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2964 if (error) {
2965 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2966 break;
2967 }
2968 if (uio_resid(auio) != 0) {
2969 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2970 error = EIO;
2971 break;
2972 }
2973 offset += iosize;
2974 }
2975 uio_free(auio);
2976
2977 /*
2978 * No need to call ubc_sync_range or hfs_invalbuf
2979 * since the file was copied using IO_NOCACHE.
2980 */
2981
2982 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2983
2984 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2985 return (error);
2986 }
2987
2988 /*
2989 * Clone a system (metadata) file.
2990 *
2991 */
2992 static int
2993 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2994 kauth_cred_t cred, struct proc *p)
2995 {
2996 caddr_t bufp;
2997 char * offset;
2998 size_t bufsize;
2999 size_t iosize;
3000 struct buf *bp = NULL;
3001 daddr64_t blkno;
3002 daddr64_t blk;
3003 daddr64_t start_blk;
3004 daddr64_t last_blk;
3005 int breadcnt;
3006 int i;
3007 int error = 0;
3008
3009
3010 iosize = GetLogicalBlockSize(vp);
3011 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3012 breadcnt = bufsize / iosize;
3013
3014 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3015 return (ENOMEM);
3016 }
3017 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3018 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3019 blkno = 0;
3020
3021 while (blkno < last_blk) {
3022 /*
3023 * Read up to a megabyte
3024 */
3025 offset = bufp;
3026 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3027 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3028 if (error) {
3029 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3030 goto out;
3031 }
3032 if (buf_count(bp) != iosize) {
3033 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3034 goto out;
3035 }
3036 bcopy((char *)buf_dataptr(bp), offset, iosize);
3037
3038 buf_markinvalid(bp);
3039 buf_brelse(bp);
3040 bp = NULL;
3041
3042 offset += iosize;
3043 }
3044
3045 /*
3046 * Write up to a megabyte
3047 */
3048 offset = bufp;
3049 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3050 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3051 if (bp == NULL) {
3052 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3053 error = EIO;
3054 goto out;
3055 }
3056 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3057 error = (int)buf_bwrite(bp);
3058 bp = NULL;
3059 if (error)
3060 goto out;
3061 offset += iosize;
3062 }
3063 }
3064 out:
3065 if (bp) {
3066 buf_brelse(bp);
3067 }
3068
3069 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3070
3071 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3072
3073 return (error);
3074 }