]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
91cf8b7f773ec225468503d644661b04579c6a1c
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* @(#)hfs_readwrite.c 1.0
23 *
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
25 *
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
27 *
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
36 #include <sys/stat.h>
37 #include <sys/buf.h>
38 #include <sys/proc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/vfs_context.h>
43 #include <sys/disk.h>
44 #include <sys/sysctl.h>
45
46 #include <miscfs/specfs/specdev.h>
47
48 #include <sys/ubc.h>
49 #include <vm/vm_pageout.h>
50 #include <vm/vm_kern.h>
51
52 #include <sys/kdebug.h>
53
54 #include "hfs.h"
55 #include "hfs_endian.h"
56 #include "hfs_fsctl.h"
57 #include "hfs_quota.h"
58 #include "hfscommon/headers/FileMgrInternal.h"
59 #include "hfscommon/headers/BTreesInternal.h"
60 #include "hfs_cnode.h"
61 #include "hfs_dbg.h"
62
63 extern int overflow_extents(struct filefork *fp);
64
65 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
66
67 enum {
68 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
69 };
70
71 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
72
73 extern int hfs_setextendedsecurity(struct hfsmount *, int);
74
75
76 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
77 static int hfs_clonefile(struct vnode *, int, int, int);
78 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
79
80
81 int flush_cache_on_write = 0;
82 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
83
84
85 /*****************************************************************************
86 *
87 * I/O Operations on vnodes
88 *
89 *****************************************************************************/
90 int hfs_vnop_read(struct vnop_read_args *);
91 int hfs_vnop_write(struct vnop_write_args *);
92 int hfs_vnop_ioctl(struct vnop_ioctl_args *);
93 int hfs_vnop_select(struct vnop_select_args *);
94 int hfs_vnop_blktooff(struct vnop_blktooff_args *);
95 int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
96 int hfs_vnop_blockmap(struct vnop_blockmap_args *);
97 int hfs_vnop_strategy(struct vnop_strategy_args *);
98 int hfs_vnop_allocate(struct vnop_allocate_args *);
99 int hfs_vnop_pagein(struct vnop_pagein_args *);
100 int hfs_vnop_pageout(struct vnop_pageout_args *);
101 int hfs_vnop_bwrite(struct vnop_bwrite_args *);
102
103
104 /*
105 * Read data from a file.
106 */
107 int
108 hfs_vnop_read(struct vnop_read_args *ap)
109 {
110 uio_t uio = ap->a_uio;
111 struct vnode *vp = ap->a_vp;
112 struct cnode *cp;
113 struct filefork *fp;
114 struct hfsmount *hfsmp;
115 off_t filesize;
116 off_t filebytes;
117 off_t start_resid = uio_resid(uio);
118 off_t offset = uio_offset(uio);
119 int retval = 0;
120
121
122 /* Preflight checks */
123 if (!vnode_isreg(vp)) {
124 /* can only read regular files */
125 if (vnode_isdir(vp))
126 return (EISDIR);
127 else
128 return (EPERM);
129 }
130 if (start_resid == 0)
131 return (0); /* Nothing left to do */
132 if (offset < 0)
133 return (EINVAL); /* cant read from a negative offset */
134
135 cp = VTOC(vp);
136 fp = VTOF(vp);
137 hfsmp = VTOHFS(vp);
138
139 /* Protect against a size change. */
140 hfs_lock_truncate(cp, 0);
141
142 filesize = fp->ff_size;
143 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
144 if (offset > filesize) {
145 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
146 (offset > (off_t)MAXHFSFILESIZE)) {
147 retval = EFBIG;
148 }
149 goto exit;
150 }
151
152 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
153 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
154
155 retval = cluster_read(vp, uio, filesize, 0);
156
157 cp->c_touch_acctime = TRUE;
158
159 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
160 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
161
162 /*
163 * Keep track blocks read
164 */
165 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
166 int took_cnode_lock = 0;
167 off_t bytesread;
168
169 bytesread = start_resid - uio_resid(uio);
170
171 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
172 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
173 hfs_lock(cp, HFS_FORCE_LOCK);
174 took_cnode_lock = 1;
175 }
176 /*
177 * If this file hasn't been seen since the start of
178 * the current sampling period then start over.
179 */
180 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
181 struct timeval tv;
182
183 fp->ff_bytesread = bytesread;
184 microtime(&tv);
185 cp->c_atime = tv.tv_sec;
186 } else {
187 fp->ff_bytesread += bytesread;
188 }
189 if (took_cnode_lock)
190 hfs_unlock(cp);
191 }
192 exit:
193 hfs_unlock_truncate(cp);
194 return (retval);
195 }
196
197 /*
198 * Write data to a file.
199 */
200 int
201 hfs_vnop_write(struct vnop_write_args *ap)
202 {
203 uio_t uio = ap->a_uio;
204 struct vnode *vp = ap->a_vp;
205 struct cnode *cp;
206 struct filefork *fp;
207 struct hfsmount *hfsmp;
208 kauth_cred_t cred = NULL;
209 off_t origFileSize;
210 off_t writelimit;
211 off_t bytesToAdd;
212 off_t actualBytesAdded;
213 off_t filebytes;
214 off_t offset;
215 size_t resid;
216 int eflags;
217 int ioflag = ap->a_ioflag;
218 int retval = 0;
219 int lockflags;
220 int cnode_locked = 0;
221
222 // LP64todo - fix this! uio_resid may be 64-bit value
223 resid = uio_resid(uio);
224 offset = uio_offset(uio);
225
226 if (offset < 0)
227 return (EINVAL);
228 if (resid == 0)
229 return (E_NONE);
230 if (!vnode_isreg(vp))
231 return (EPERM); /* Can only write regular files */
232
233 /* Protect against a size change. */
234 hfs_lock_truncate(VTOC(vp), TRUE);
235
236 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
237 hfs_unlock_truncate(VTOC(vp));
238 return (retval);
239 }
240 cnode_locked = 1;
241 cp = VTOC(vp);
242 fp = VTOF(vp);
243 hfsmp = VTOHFS(vp);
244 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
245
246 if (ioflag & IO_APPEND) {
247 uio_setoffset(uio, fp->ff_size);
248 offset = fp->ff_size;
249 }
250 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
251 retval = EPERM;
252 goto exit;
253 }
254
255 origFileSize = fp->ff_size;
256 eflags = kEFDeferMask; /* defer file block allocations */
257
258 #ifdef HFS_SPARSE_DEV
259 /*
260 * When the underlying device is sparse and space
261 * is low (< 8MB), stop doing delayed allocations
262 * and begin doing synchronous I/O.
263 */
264 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
265 (hfs_freeblks(hfsmp, 0) < 2048)) {
266 eflags &= ~kEFDeferMask;
267 ioflag |= IO_SYNC;
268 }
269 #endif /* HFS_SPARSE_DEV */
270
271 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
272 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
273
274 /* Now test if we need to extend the file */
275 /* Doing so will adjust the filebytes for us */
276
277 writelimit = offset + resid;
278 if (writelimit <= filebytes)
279 goto sizeok;
280
281 cred = vfs_context_ucred(ap->a_context);
282 #if QUOTA
283 bytesToAdd = writelimit - filebytes;
284 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
285 cred, 0);
286 if (retval)
287 goto exit;
288 #endif /* QUOTA */
289
290 if (hfs_start_transaction(hfsmp) != 0) {
291 retval = EINVAL;
292 goto exit;
293 }
294
295 while (writelimit > filebytes) {
296 bytesToAdd = writelimit - filebytes;
297 if (cred && suser(cred, NULL) != 0)
298 eflags |= kEFReserveMask;
299
300 /* Protect extents b-tree and allocation bitmap */
301 lockflags = SFL_BITMAP;
302 if (overflow_extents(fp))
303 lockflags |= SFL_EXTENTS;
304 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
305
306 /* Files that are changing size are not hot file candidates. */
307 if (hfsmp->hfc_stage == HFC_RECORDING) {
308 fp->ff_bytesread = 0;
309 }
310 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
311 0, eflags, &actualBytesAdded));
312
313 hfs_systemfile_unlock(hfsmp, lockflags);
314
315 if ((actualBytesAdded == 0) && (retval == E_NONE))
316 retval = ENOSPC;
317 if (retval != E_NONE)
318 break;
319 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
320 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
321 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
322 }
323 (void) hfs_update(vp, TRUE);
324 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
325 (void) hfs_end_transaction(hfsmp);
326
327 sizeok:
328 if (retval == E_NONE) {
329 off_t filesize;
330 off_t zero_off;
331 off_t tail_off;
332 off_t inval_start;
333 off_t inval_end;
334 off_t io_start;
335 int lflag;
336 struct rl_entry *invalid_range;
337
338 if (writelimit > fp->ff_size)
339 filesize = writelimit;
340 else
341 filesize = fp->ff_size;
342
343 lflag = (ioflag & IO_SYNC);
344
345 if (offset <= fp->ff_size) {
346 zero_off = offset & ~PAGE_MASK_64;
347
348 /* Check to see whether the area between the zero_offset and the start
349 of the transfer to see whether is invalid and should be zero-filled
350 as part of the transfer:
351 */
352 if (offset > zero_off) {
353 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
354 lflag |= IO_HEADZEROFILL;
355 }
356 } else {
357 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
358
359 /* The bytes between fp->ff_size and uio->uio_offset must never be
360 read without being zeroed. The current last block is filled with zeroes
361 if it holds valid data but in all cases merely do a little bookkeeping
362 to track the area from the end of the current last page to the start of
363 the area actually written. For the same reason only the bytes up to the
364 start of the page where this write will start is invalidated; any remainder
365 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
366
367 Note that inval_start, the start of the page after the current EOF,
368 may be past the start of the write, in which case the zeroing
369 will be handled by the cluser_write of the actual data.
370 */
371 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
372 inval_end = offset & ~PAGE_MASK_64;
373 zero_off = fp->ff_size;
374
375 if ((fp->ff_size & PAGE_MASK_64) &&
376 (rl_scan(&fp->ff_invalidranges,
377 eof_page_base,
378 fp->ff_size - 1,
379 &invalid_range) != RL_NOOVERLAP)) {
380 /* The page containing the EOF is not valid, so the
381 entire page must be made inaccessible now. If the write
382 starts on a page beyond the page containing the eof
383 (inval_end > eof_page_base), add the
384 whole page to the range to be invalidated. Otherwise
385 (i.e. if the write starts on the same page), zero-fill
386 the entire page explicitly now:
387 */
388 if (inval_end > eof_page_base) {
389 inval_start = eof_page_base;
390 } else {
391 zero_off = eof_page_base;
392 };
393 };
394
395 if (inval_start < inval_end) {
396 struct timeval tv;
397 /* There's some range of data that's going to be marked invalid */
398
399 if (zero_off < inval_start) {
400 /* The pages between inval_start and inval_end are going to be invalidated,
401 and the actual write will start on a page past inval_end. Now's the last
402 chance to zero-fill the page containing the EOF:
403 */
404 hfs_unlock(cp);
405 cnode_locked = 0;
406 retval = cluster_write(vp, (uio_t) 0,
407 fp->ff_size, inval_start,
408 zero_off, (off_t)0,
409 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
410 hfs_lock(cp, HFS_FORCE_LOCK);
411 cnode_locked = 1;
412 if (retval) goto ioerr_exit;
413 offset = uio_offset(uio);
414 };
415
416 /* Mark the remaining area of the newly allocated space as invalid: */
417 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
418 microuptime(&tv);
419 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
420 zero_off = fp->ff_size = inval_end;
421 };
422
423 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
424 };
425
426 /* Check to see whether the area between the end of the write and the end of
427 the page it falls in is invalid and should be zero-filled as part of the transfer:
428 */
429 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
430 if (tail_off > filesize) tail_off = filesize;
431 if (tail_off > writelimit) {
432 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
433 lflag |= IO_TAILZEROFILL;
434 };
435 };
436
437 /*
438 * if the write starts beyond the current EOF (possibly advanced in the
439 * zeroing of the last block, above), then we'll zero fill from the current EOF
440 * to where the write begins:
441 *
442 * NOTE: If (and ONLY if) the portion of the file about to be written is
443 * before the current EOF it might be marked as invalid now and must be
444 * made readable (removed from the invalid ranges) before cluster_write
445 * tries to write it:
446 */
447 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
448 if (io_start < fp->ff_size) {
449 off_t io_end;
450
451 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
452 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
453 };
454
455 hfs_unlock(cp);
456 cnode_locked = 0;
457 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
458 tail_off, lflag | IO_NOZERODIRTY);
459 offset = uio_offset(uio);
460 if (offset > fp->ff_size) {
461 fp->ff_size = offset;
462
463 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
464 /* Files that are changing size are not hot file candidates. */
465 if (hfsmp->hfc_stage == HFC_RECORDING)
466 fp->ff_bytesread = 0;
467 }
468 if (resid > uio_resid(uio)) {
469 cp->c_touch_chgtime = TRUE;
470 cp->c_touch_modtime = TRUE;
471 }
472 }
473
474 // XXXdbg - testing for vivek and paul lambert
475 {
476 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
477 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
478 }
479 }
480 HFS_KNOTE(vp, NOTE_WRITE);
481
482 ioerr_exit:
483 /*
484 * If we successfully wrote any data, and we are not the superuser
485 * we clear the setuid and setgid bits as a precaution against
486 * tampering.
487 */
488 if (cp->c_mode & (S_ISUID | S_ISGID)) {
489 cred = vfs_context_ucred(ap->a_context);
490 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
491 if (!cnode_locked) {
492 hfs_lock(cp, HFS_FORCE_LOCK);
493 cnode_locked = 1;
494 }
495 cp->c_mode &= ~(S_ISUID | S_ISGID);
496 }
497 }
498 if (retval) {
499 if (ioflag & IO_UNIT) {
500 if (!cnode_locked) {
501 hfs_lock(cp, HFS_FORCE_LOCK);
502 cnode_locked = 1;
503 }
504 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
505 0, ap->a_context);
506 // LP64todo - fix this! resid needs to by user_ssize_t
507 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
508 uio_setresid(uio, resid);
509 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
510 }
511 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
512 if (!cnode_locked) {
513 hfs_lock(cp, HFS_FORCE_LOCK);
514 cnode_locked = 1;
515 }
516 retval = hfs_update(vp, TRUE);
517 }
518 /* Updating vcbWrCnt doesn't need to be atomic. */
519 hfsmp->vcbWrCnt++;
520
521 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
522 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
523 exit:
524 if (cnode_locked)
525 hfs_unlock(cp);
526 hfs_unlock_truncate(cp);
527 return (retval);
528 }
529
530 /* support for the "bulk-access" fcntl */
531
532 #define CACHE_ELEMS 64
533 #define CACHE_LEVELS 16
534 #define PARENT_IDS_FLAG 0x100
535
536 /* from hfs_attrlist.c */
537 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
538 mode_t obj_mode, struct mount *mp,
539 kauth_cred_t cred, struct proc *p);
540
541 /* from vfs/vfs_fsevents.c */
542 extern char *get_pathbuff(void);
543 extern void release_pathbuff(char *buff);
544
545 struct access_cache {
546 int numcached;
547 int cachehits; /* these two for statistics gathering */
548 int lookups;
549 unsigned int *acache;
550 Boolean *haveaccess;
551 };
552
553 struct access_t {
554 uid_t uid; /* IN: effective user id */
555 short flags; /* IN: access requested (i.e. R_OK) */
556 short num_groups; /* IN: number of groups user belongs to */
557 int num_files; /* IN: number of files to process */
558 int *file_ids; /* IN: array of file ids */
559 gid_t *groups; /* IN: array of groups */
560 short *access; /* OUT: access info for each file (0 for 'has access') */
561 };
562
563 struct user_access_t {
564 uid_t uid; /* IN: effective user id */
565 short flags; /* IN: access requested (i.e. R_OK) */
566 short num_groups; /* IN: number of groups user belongs to */
567 int num_files; /* IN: number of files to process */
568 user_addr_t file_ids; /* IN: array of file ids */
569 user_addr_t groups; /* IN: array of groups */
570 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
571 };
572
573 /*
574 * Perform a binary search for the given parent_id. Return value is
575 * found/not found boolean, and indexp will be the index of the item
576 * or the index at which to insert the item if it's not found.
577 */
578 static int
579 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
580 {
581 unsigned int lo, hi;
582 int index, matches = 0;
583
584 if (cache->numcached == 0) {
585 *indexp = 0;
586 return 0; // table is empty, so insert at index=0 and report no match
587 }
588
589 if (cache->numcached > CACHE_ELEMS) {
590 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
591 cache->numcached, CACHE_ELEMS);*/
592 cache->numcached = CACHE_ELEMS;
593 }
594
595 lo = 0;
596 hi = cache->numcached - 1;
597 index = -1;
598
599 /* perform binary search for parent_id */
600 do {
601 unsigned int mid = (hi - lo)/2 + lo;
602 unsigned int this_id = cache->acache[mid];
603
604 if (parent_id == this_id) {
605 index = mid;
606 break;
607 }
608
609 if (parent_id < this_id) {
610 hi = mid;
611 continue;
612 }
613
614 if (parent_id > this_id) {
615 lo = mid + 1;
616 continue;
617 }
618 } while(lo < hi);
619
620 /* check if lo and hi converged on the match */
621 if (parent_id == cache->acache[hi]) {
622 index = hi;
623 }
624
625 /* if no existing entry found, find index for new one */
626 if (index == -1) {
627 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
628 matches = 0;
629 } else {
630 matches = 1;
631 }
632
633 *indexp = index;
634 return matches;
635 }
636
637 /*
638 * Add a node to the access_cache at the given index (or do a lookup first
639 * to find the index if -1 is passed in). We currently do a replace rather
640 * than an insert if the cache is full.
641 */
642 static void
643 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
644 {
645 int lookup_index = -1;
646
647 /* need to do a lookup first if -1 passed for index */
648 if (index == -1) {
649 if (lookup_bucket(cache, &lookup_index, nodeID)) {
650 if (cache->haveaccess[lookup_index] != access) {
651 /* change access info for existing entry... should never happen */
652 cache->haveaccess[lookup_index] = access;
653 }
654
655 /* mission accomplished */
656 return;
657 } else {
658 index = lookup_index;
659 }
660
661 }
662
663 /* if the cache is full, do a replace rather than an insert */
664 if (cache->numcached >= CACHE_ELEMS) {
665 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
666 cache->numcached = CACHE_ELEMS-1;
667
668 if (index > cache->numcached) {
669 // printf("index %d pinned to %d\n", index, cache->numcached);
670 index = cache->numcached;
671 }
672 } else if (index >= 0 && index < cache->numcached) {
673 /* only do bcopy if we're inserting */
674 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
675 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
676 }
677
678 cache->acache[index] = nodeID;
679 cache->haveaccess[index] = access;
680 cache->numcached++;
681 }
682
683
684 struct cinfo {
685 uid_t uid;
686 gid_t gid;
687 mode_t mode;
688 cnid_t parentcnid;
689 };
690
691 static int
692 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
693 {
694 struct cinfo *cip = (struct cinfo *)arg;
695
696 cip->uid = attrp->ca_uid;
697 cip->gid = attrp->ca_gid;
698 cip->mode = attrp->ca_mode;
699 cip->parentcnid = descp->cd_parentcnid;
700
701 return (0);
702 }
703
704 /*
705 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
706 * isn't incore, then go to the catalog.
707 */
708 static int
709 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
710 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
711 {
712 int error = 0;
713
714 /* if this id matches the one the fsctl was called with, skip the lookup */
715 if (cnid == skip_cp->c_cnid) {
716 cnattrp->ca_uid = skip_cp->c_uid;
717 cnattrp->ca_gid = skip_cp->c_gid;
718 cnattrp->ca_mode = skip_cp->c_mode;
719 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
720 } else {
721 struct cinfo c_info;
722
723 /* otherwise, check the cnode hash incase the file/dir is incore */
724 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
725 cnattrp->ca_uid = c_info.uid;
726 cnattrp->ca_gid = c_info.gid;
727 cnattrp->ca_mode = c_info.mode;
728 keyp->hfsPlus.parentID = c_info.parentcnid;
729 } else {
730 int lockflags;
731
732 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
733
734 /* lookup this cnid in the catalog */
735 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
736
737 hfs_systemfile_unlock(hfsmp, lockflags);
738
739 cache->lookups++;
740 }
741 }
742
743 return (error);
744 }
745
746 /*
747 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
748 * up to CACHE_LEVELS as we progress towards the root.
749 */
750 static int
751 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
752 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
753 {
754 int myErr = 0;
755 int myResult;
756 HFSCatalogNodeID thisNodeID;
757 unsigned long myPerms;
758 struct cat_attr cnattr;
759 int cache_index = -1;
760 CatalogKey catkey;
761
762 int i = 0, ids_to_cache = 0;
763 int parent_ids[CACHE_LEVELS];
764
765 /* root always has access */
766 if (!suser(myp_ucred, NULL)) {
767 return (1);
768 }
769
770 thisNodeID = nodeID;
771 while (thisNodeID >= kRootDirID) {
772 myResult = 0; /* default to "no access" */
773
774 /* check the cache before resorting to hitting the catalog */
775
776 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
777 * to look any further after hitting cached dir */
778
779 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
780 cache->cachehits++;
781 myResult = cache->haveaccess[cache_index];
782 goto ExitThisRoutine;
783 }
784
785 /* remember which parents we want to cache */
786 if (ids_to_cache < CACHE_LEVELS) {
787 parent_ids[ids_to_cache] = thisNodeID;
788 ids_to_cache++;
789 }
790
791 /* do the lookup (checks the cnode hash, then the catalog) */
792 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
793 if (myErr) {
794 goto ExitThisRoutine; /* no access */
795 }
796
797 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
798 cnattr.ca_mode, hfsmp->hfs_mp,
799 myp_ucred, theProcPtr);
800
801 if ( (myPerms & X_OK) == 0 ) {
802 myResult = 0;
803 goto ExitThisRoutine; /* no access */
804 }
805
806 /* up the hierarchy we go */
807 thisNodeID = catkey.hfsPlus.parentID;
808 }
809
810 /* if here, we have access to this node */
811 myResult = 1;
812
813 ExitThisRoutine:
814 if (myErr) {
815 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
816 myResult = 0;
817 }
818 *err = myErr;
819
820 /* cache the parent directory(ies) */
821 for (i = 0; i < ids_to_cache; i++) {
822 /* small optimization: get rid of double-lookup for all these */
823 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
824 add_node(cache, -1, parent_ids[i], myResult);
825 }
826
827 return (myResult);
828 }
829 /* end "bulk-access" support */
830
831
832
833 /*
834 * Callback for use with freeze ioctl.
835 */
836 static int
837 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
838 {
839 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
840
841 return 0;
842 }
843
844 /*
845 * Control filesystem operating characteristics.
846 */
847 int
848 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
849 vnode_t a_vp;
850 int a_command;
851 caddr_t a_data;
852 int a_fflag;
853 vfs_context_t a_context;
854 } */ *ap)
855 {
856 struct vnode * vp = ap->a_vp;
857 struct hfsmount *hfsmp = VTOHFS(vp);
858 vfs_context_t context = ap->a_context;
859 kauth_cred_t cred = vfs_context_ucred(context);
860 proc_t p = vfs_context_proc(context);
861 struct vfsstatfs *vfsp;
862 boolean_t is64bit;
863
864 is64bit = proc_is64bit(p);
865
866 switch (ap->a_command) {
867
868 case HFS_RESIZE_VOLUME: {
869 u_int64_t newsize;
870 u_int64_t cursize;
871
872 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
873 if (suser(cred, NULL) &&
874 kauth_cred_getuid(cred) != vfsp->f_owner) {
875 return (EACCES); /* must be owner of file system */
876 }
877 if (!vnode_isvroot(vp)) {
878 return (EINVAL);
879 }
880 newsize = *(u_int64_t *)ap->a_data;
881 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
882
883 if (newsize > cursize) {
884 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
885 } else if (newsize < cursize) {
886 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
887 } else {
888 return (0);
889 }
890 }
891 case HFS_CHANGE_NEXT_ALLOCATION: {
892 u_int32_t location;
893
894 if (vnode_vfsisrdonly(vp)) {
895 return (EROFS);
896 }
897 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
898 if (suser(cred, NULL) &&
899 kauth_cred_getuid(cred) != vfsp->f_owner) {
900 return (EACCES); /* must be owner of file system */
901 }
902 if (!vnode_isvroot(vp)) {
903 return (EINVAL);
904 }
905 location = *(u_int32_t *)ap->a_data;
906 if (location > hfsmp->totalBlocks - 1) {
907 return (EINVAL);
908 }
909 /* Return previous value. */
910 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
911 HFS_MOUNT_LOCK(hfsmp, TRUE);
912 hfsmp->nextAllocation = location;
913 hfsmp->vcbFlags |= 0xFF00;
914 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
915 return (0);
916 }
917
918 #ifdef HFS_SPARSE_DEV
919 case HFS_SETBACKINGSTOREINFO: {
920 struct vnode * bsfs_rootvp;
921 struct vnode * di_vp;
922 struct hfs_backingstoreinfo *bsdata;
923 int error = 0;
924
925 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
926 return (EALREADY);
927 }
928 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
929 if (suser(cred, NULL) &&
930 kauth_cred_getuid(cred) != vfsp->f_owner) {
931 return (EACCES); /* must be owner of file system */
932 }
933 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
934 if (bsdata == NULL) {
935 return (EINVAL);
936 }
937 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
938 return (error);
939 }
940 if ((error = vnode_getwithref(di_vp))) {
941 file_drop(bsdata->backingfd);
942 return(error);
943 }
944
945 if (vnode_mount(vp) == vnode_mount(di_vp)) {
946 (void)vnode_put(di_vp);
947 file_drop(bsdata->backingfd);
948 return (EINVAL);
949 }
950
951 /*
952 * Obtain the backing fs root vnode and keep a reference
953 * on it. This reference will be dropped in hfs_unmount.
954 */
955 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
956 if (error) {
957 (void)vnode_put(di_vp);
958 file_drop(bsdata->backingfd);
959 return (error);
960 }
961 vnode_ref(bsfs_rootvp);
962 vnode_put(bsfs_rootvp);
963
964 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
965 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
966 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
967 hfsmp->hfs_sparsebandblks *= 4;
968
969 (void)vnode_put(di_vp);
970 file_drop(bsdata->backingfd);
971 return (0);
972 }
973 case HFS_CLRBACKINGSTOREINFO: {
974 struct vnode * tmpvp;
975
976 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
977 if (suser(cred, NULL) &&
978 kauth_cred_getuid(cred) != vfsp->f_owner) {
979 return (EACCES); /* must be owner of file system */
980 }
981 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
982 hfsmp->hfs_backingfs_rootvp) {
983
984 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
985 tmpvp = hfsmp->hfs_backingfs_rootvp;
986 hfsmp->hfs_backingfs_rootvp = NULLVP;
987 hfsmp->hfs_sparsebandblks = 0;
988 vnode_rele(tmpvp);
989 }
990 return (0);
991 }
992 #endif /* HFS_SPARSE_DEV */
993
994 case F_FREEZE_FS: {
995 struct mount *mp;
996 task_t task;
997
998 if (!is_suser())
999 return (EACCES);
1000
1001 mp = vnode_mount(vp);
1002 hfsmp = VFSTOHFS(mp);
1003
1004 if (!(hfsmp->jnl))
1005 return (ENOTSUP);
1006
1007 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1008
1009 task = current_task();
1010 task_working_set_disable(task);
1011
1012 // flush things before we get started to try and prevent
1013 // dirty data from being paged out while we're frozen.
1014 // note: can't do this after taking the lock as it will
1015 // deadlock against ourselves.
1016 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1017 hfs_global_exclusive_lock_acquire(hfsmp);
1018 journal_flush(hfsmp->jnl);
1019
1020 // don't need to iterate on all vnodes, we just need to
1021 // wait for writes to the system files and the device vnode
1022 if (HFSTOVCB(hfsmp)->extentsRefNum)
1023 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1024 if (HFSTOVCB(hfsmp)->catalogRefNum)
1025 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1026 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1027 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1028 if (hfsmp->hfs_attribute_vp)
1029 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1030 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1031
1032 hfsmp->hfs_freezing_proc = current_proc();
1033
1034 return (0);
1035 }
1036
1037 case F_THAW_FS: {
1038 if (!is_suser())
1039 return (EACCES);
1040
1041 // if we're not the one who froze the fs then we
1042 // can't thaw it.
1043 if (hfsmp->hfs_freezing_proc != current_proc()) {
1044 return EPERM;
1045 }
1046
1047 // NOTE: if you add code here, also go check the
1048 // code that "thaws" the fs in hfs_vnop_close()
1049 //
1050 hfsmp->hfs_freezing_proc = NULL;
1051 hfs_global_exclusive_lock_release(hfsmp);
1052 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1053
1054 return (0);
1055 }
1056
1057 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1058 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1059
1060 case HFS_BULKACCESS_FSCTL:
1061 case HFS_BULKACCESS: {
1062 /*
1063 * NOTE: on entry, the vnode is locked. Incase this vnode
1064 * happens to be in our list of file_ids, we'll note it
1065 * avoid calling hfs_chashget_nowait() on that id as that
1066 * will cause a "locking against myself" panic.
1067 */
1068 Boolean check_leaf = true;
1069
1070 struct user_access_t *user_access_structp;
1071 struct user_access_t tmp_user_access_t;
1072 struct access_cache cache;
1073
1074 int error = 0, i;
1075
1076 dev_t dev = VTOC(vp)->c_dev;
1077
1078 short flags;
1079 struct ucred myucred; /* XXX ILLEGAL */
1080 int num_files;
1081 int *file_ids = NULL;
1082 short *access = NULL;
1083
1084 cnid_t cnid;
1085 cnid_t prevParent_cnid = 0;
1086 unsigned long myPerms;
1087 short myaccess = 0;
1088 struct cat_attr cnattr;
1089 CatalogKey catkey;
1090 struct cnode *skip_cp = VTOC(vp);
1091 struct vfs_context my_context;
1092
1093 /* first, return error if not run as root */
1094 if (cred->cr_ruid != 0) {
1095 return EPERM;
1096 }
1097
1098 /* initialize the local cache and buffers */
1099 cache.numcached = 0;
1100 cache.cachehits = 0;
1101 cache.lookups = 0;
1102
1103 file_ids = (int *) get_pathbuff();
1104 access = (short *) get_pathbuff();
1105 cache.acache = (int *) get_pathbuff();
1106 cache.haveaccess = (Boolean *) get_pathbuff();
1107
1108 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1109 release_pathbuff((char *) file_ids);
1110 release_pathbuff((char *) access);
1111 release_pathbuff((char *) cache.acache);
1112 release_pathbuff((char *) cache.haveaccess);
1113
1114 return ENOMEM;
1115 }
1116
1117 /* struct copyin done during dispatch... need to copy file_id array separately */
1118 if (ap->a_data == NULL) {
1119 error = EINVAL;
1120 goto err_exit_bulk_access;
1121 }
1122
1123 if (is64bit) {
1124 user_access_structp = (struct user_access_t *)ap->a_data;
1125 }
1126 else {
1127 struct access_t * accessp = (struct access_t *)ap->a_data;
1128 tmp_user_access_t.uid = accessp->uid;
1129 tmp_user_access_t.flags = accessp->flags;
1130 tmp_user_access_t.num_groups = accessp->num_groups;
1131 tmp_user_access_t.num_files = accessp->num_files;
1132 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1133 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1134 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1135 user_access_structp = &tmp_user_access_t;
1136 }
1137
1138 num_files = user_access_structp->num_files;
1139 if (num_files < 1) {
1140 goto err_exit_bulk_access;
1141 }
1142 if (num_files > 256) {
1143 error = EINVAL;
1144 goto err_exit_bulk_access;
1145 }
1146
1147 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1148 num_files * sizeof(int)))) {
1149 goto err_exit_bulk_access;
1150 }
1151
1152 /* fill in the ucred structure */
1153 flags = user_access_structp->flags;
1154 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1155 flags = R_OK;
1156 }
1157
1158 /* check if we've been passed leaf node ids or parent ids */
1159 if (flags & PARENT_IDS_FLAG) {
1160 check_leaf = false;
1161 }
1162
1163 memset(&myucred, 0, sizeof(myucred));
1164 myucred.cr_ref = 1;
1165 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1166 myucred.cr_ngroups = user_access_structp->num_groups;
1167 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1168 myucred.cr_ngroups = 0;
1169 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1170 myucred.cr_ngroups * sizeof(gid_t)))) {
1171 goto err_exit_bulk_access;
1172 }
1173 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1174 myucred.cr_gmuid = myucred.cr_uid;
1175
1176 my_context.vc_proc = p;
1177 my_context.vc_ucred = &myucred;
1178
1179 /* Check access to each file_id passed in */
1180 for (i = 0; i < num_files; i++) {
1181 #if 0
1182 cnid = (cnid_t) file_ids[i];
1183
1184 /* root always has access */
1185 if (!suser(&myucred, NULL)) {
1186 access[i] = 0;
1187 continue;
1188 }
1189
1190 if (check_leaf) {
1191
1192 /* do the lookup (checks the cnode hash, then the catalog) */
1193 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1194 if (error) {
1195 access[i] = (short) error;
1196 continue;
1197 }
1198
1199 /* before calling CheckAccess(), check the target file for read access */
1200 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1201 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1202
1203
1204 /* fail fast if no access */
1205 if ((myPerms & flags) == 0) {
1206 access[i] = EACCES;
1207 continue;
1208 }
1209 } else {
1210 /* we were passed an array of parent ids */
1211 catkey.hfsPlus.parentID = cnid;
1212 }
1213
1214 /* if the last guy had the same parent and had access, we're done */
1215 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1216 cache.cachehits++;
1217 access[i] = 0;
1218 continue;
1219 }
1220
1221 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1222 skip_cp, p, &myucred, dev);
1223
1224 if ( myaccess ) {
1225 access[i] = 0; // have access.. no errors to report
1226 } else {
1227 access[i] = (error != 0 ? (short) error : EACCES);
1228 }
1229
1230 prevParent_cnid = catkey.hfsPlus.parentID;
1231 #else
1232 int myErr;
1233
1234 cnid = (cnid_t)file_ids[i];
1235
1236 while (cnid >= kRootDirID) {
1237 /* get the vnode for this cnid */
1238 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1239 if ( myErr ) {
1240 access[i] = EACCES;
1241 break;
1242 }
1243
1244 cnid = VTOC(vp)->c_parentcnid;
1245
1246 hfs_unlock(VTOC(vp));
1247 if (vnode_vtype(vp) == VDIR) {
1248 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1249 } else {
1250 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1251 }
1252 vnode_put(vp);
1253 access[i] = myErr;
1254 if (myErr) {
1255 break;
1256 }
1257 }
1258 #endif
1259 }
1260
1261 /* copyout the access array */
1262 if ((error = copyout((caddr_t)access, user_access_structp->access,
1263 num_files * sizeof (short)))) {
1264 goto err_exit_bulk_access;
1265 }
1266
1267 err_exit_bulk_access:
1268
1269 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1270
1271 release_pathbuff((char *) cache.acache);
1272 release_pathbuff((char *) cache.haveaccess);
1273 release_pathbuff((char *) file_ids);
1274 release_pathbuff((char *) access);
1275
1276 return (error);
1277 } /* HFS_BULKACCESS */
1278
1279 case HFS_SETACLSTATE: {
1280 int state;
1281
1282 if (ap->a_data == NULL) {
1283 return (EINVAL);
1284 }
1285
1286 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1287 state = *(int *)ap->a_data;
1288
1289 // super-user can enable or disable acl's on a volume.
1290 // the volume owner can only enable acl's
1291 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1292 return (EPERM);
1293 }
1294 if (state == 0 || state == 1)
1295 return hfs_setextendedsecurity(hfsmp, state);
1296 else
1297 return (EINVAL);
1298 }
1299
1300 case F_FULLFSYNC: {
1301 int error;
1302
1303 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1304 if (error == 0) {
1305 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1306 hfs_unlock(VTOC(vp));
1307 }
1308
1309 return error;
1310 }
1311
1312 case F_CHKCLEAN: {
1313 register struct cnode *cp;
1314 int error;
1315
1316 if (!vnode_isreg(vp))
1317 return EINVAL;
1318
1319 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1320 if (error == 0) {
1321 cp = VTOC(vp);
1322 /*
1323 * used by regression test to determine if
1324 * all the dirty pages (via write) have been cleaned
1325 * after a call to 'fsysnc'.
1326 */
1327 error = is_file_clean(vp, VTOF(vp)->ff_size);
1328 hfs_unlock(cp);
1329 }
1330 return (error);
1331 }
1332
1333 case F_RDADVISE: {
1334 register struct radvisory *ra;
1335 struct filefork *fp;
1336 int error;
1337
1338 if (!vnode_isreg(vp))
1339 return EINVAL;
1340
1341 ra = (struct radvisory *)(ap->a_data);
1342 fp = VTOF(vp);
1343
1344 /* Protect against a size change. */
1345 hfs_lock_truncate(VTOC(vp), TRUE);
1346
1347 if (ra->ra_offset >= fp->ff_size) {
1348 error = EFBIG;
1349 } else {
1350 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1351 }
1352
1353 hfs_unlock_truncate(VTOC(vp));
1354 return (error);
1355 }
1356
1357 case F_READBOOTSTRAP:
1358 case F_WRITEBOOTSTRAP:
1359 {
1360 struct vnode *devvp = NULL;
1361 user_fbootstraptransfer_t *user_bootstrapp;
1362 int devBlockSize;
1363 int error;
1364 uio_t auio;
1365 daddr64_t blockNumber;
1366 u_long blockOffset;
1367 u_long xfersize;
1368 struct buf *bp;
1369 user_fbootstraptransfer_t user_bootstrap;
1370
1371 if (!vnode_isvroot(vp))
1372 return (EINVAL);
1373 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1374 * to a user_fbootstraptransfer_t else we get a pointer to a
1375 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1376 */
1377 if (is64bit) {
1378 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1379 }
1380 else {
1381 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1382 user_bootstrapp = &user_bootstrap;
1383 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1384 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1385 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1386 }
1387 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1388 return EINVAL;
1389
1390 devvp = VTOHFS(vp)->hfs_devvp;
1391 auio = uio_create(1, user_bootstrapp->fbt_offset,
1392 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1393 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1394 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1395
1396 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1397
1398 while (uio_resid(auio) > 0) {
1399 blockNumber = uio_offset(auio) / devBlockSize;
1400 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1401 if (error) {
1402 if (bp) buf_brelse(bp);
1403 uio_free(auio);
1404 return error;
1405 };
1406
1407 blockOffset = uio_offset(auio) % devBlockSize;
1408 xfersize = devBlockSize - blockOffset;
1409 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1410 if (error) {
1411 buf_brelse(bp);
1412 uio_free(auio);
1413 return error;
1414 };
1415 if (uio_rw(auio) == UIO_WRITE) {
1416 error = VNOP_BWRITE(bp);
1417 if (error) {
1418 uio_free(auio);
1419 return error;
1420 }
1421 } else {
1422 buf_brelse(bp);
1423 };
1424 };
1425 uio_free(auio);
1426 };
1427 return 0;
1428
1429 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1430 {
1431 if (is64bit) {
1432 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1433 }
1434 else {
1435 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1436 }
1437 return 0;
1438 }
1439
1440 case HFS_GET_MOUNT_TIME:
1441 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1442 break;
1443
1444 case HFS_GET_LAST_MTIME:
1445 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1446 break;
1447
1448 case HFS_SET_BOOT_INFO:
1449 if (!vnode_isvroot(vp))
1450 return(EINVAL);
1451 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1452 return(EACCES); /* must be superuser or owner of filesystem */
1453 HFS_MOUNT_LOCK(hfsmp, TRUE);
1454 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1455 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1456 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1457 break;
1458
1459 case HFS_GET_BOOT_INFO:
1460 if (!vnode_isvroot(vp))
1461 return(EINVAL);
1462 HFS_MOUNT_LOCK(hfsmp, TRUE);
1463 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1464 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1465 break;
1466
1467 default:
1468 return (ENOTTY);
1469 }
1470
1471 /* Should never get here */
1472 return 0;
1473 }
1474
1475 /*
1476 * select
1477 */
1478 int
1479 hfs_vnop_select(__unused struct vnop_select_args *ap)
1480 /*
1481 struct vnop_select_args {
1482 vnode_t a_vp;
1483 int a_which;
1484 int a_fflags;
1485 void *a_wql;
1486 vfs_context_t a_context;
1487 };
1488 */
1489 {
1490 /*
1491 * We should really check to see if I/O is possible.
1492 */
1493 return (1);
1494 }
1495
1496 /*
1497 * Converts a logical block number to a physical block, and optionally returns
1498 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1499 * The physical block number is based on the device block size, currently its 512.
1500 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1501 */
1502 int
1503 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1504 {
1505 struct cnode *cp = VTOC(vp);
1506 struct filefork *fp = VTOF(vp);
1507 struct hfsmount *hfsmp = VTOHFS(vp);
1508 int retval = E_NONE;
1509 daddr_t logBlockSize;
1510 size_t bytesContAvail = 0;
1511 off_t blockposition;
1512 int lockExtBtree;
1513 int lockflags = 0;
1514
1515 /*
1516 * Check for underlying vnode requests and ensure that logical
1517 * to physical mapping is requested.
1518 */
1519 if (vpp != NULL)
1520 *vpp = cp->c_devvp;
1521 if (bnp == NULL)
1522 return (0);
1523
1524 logBlockSize = GetLogicalBlockSize(vp);
1525 blockposition = (off_t)bn * (off_t)logBlockSize;
1526
1527 lockExtBtree = overflow_extents(fp);
1528
1529 if (lockExtBtree)
1530 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1531
1532 retval = MacToVFSError(
1533 MapFileBlockC (HFSTOVCB(hfsmp),
1534 (FCB*)fp,
1535 MAXPHYSIO,
1536 blockposition,
1537 bnp,
1538 &bytesContAvail));
1539
1540 if (lockExtBtree)
1541 hfs_systemfile_unlock(hfsmp, lockflags);
1542
1543 if (retval == E_NONE) {
1544 /* Figure out how many read ahead blocks there are */
1545 if (runp != NULL) {
1546 if (can_cluster(logBlockSize)) {
1547 /* Make sure this result never goes negative: */
1548 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1549 } else {
1550 *runp = 0;
1551 }
1552 }
1553 }
1554 return (retval);
1555 }
1556
1557 /*
1558 * Convert logical block number to file offset.
1559 */
1560 int
1561 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1562 /*
1563 struct vnop_blktooff_args {
1564 vnode_t a_vp;
1565 daddr64_t a_lblkno;
1566 off_t *a_offset;
1567 };
1568 */
1569 {
1570 if (ap->a_vp == NULL)
1571 return (EINVAL);
1572 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1573
1574 return(0);
1575 }
1576
1577 /*
1578 * Convert file offset to logical block number.
1579 */
1580 int
1581 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1582 /*
1583 struct vnop_offtoblk_args {
1584 vnode_t a_vp;
1585 off_t a_offset;
1586 daddr64_t *a_lblkno;
1587 };
1588 */
1589 {
1590 if (ap->a_vp == NULL)
1591 return (EINVAL);
1592 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1593
1594 return(0);
1595 }
1596
1597 /*
1598 * Map file offset to physical block number.
1599 *
1600 * System file cnodes are expected to be locked (shared or exclusive).
1601 */
1602 int
1603 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1604 /*
1605 struct vnop_blockmap_args {
1606 vnode_t a_vp;
1607 off_t a_foffset;
1608 size_t a_size;
1609 daddr64_t *a_bpn;
1610 size_t *a_run;
1611 void *a_poff;
1612 int a_flags;
1613 vfs_context_t a_context;
1614 };
1615 */
1616 {
1617 struct vnode *vp = ap->a_vp;
1618 struct cnode *cp;
1619 struct filefork *fp;
1620 struct hfsmount *hfsmp;
1621 size_t bytesContAvail = 0;
1622 int retval = E_NONE;
1623 int syslocks = 0;
1624 int lockflags = 0;
1625 struct rl_entry *invalid_range;
1626 enum rl_overlaptype overlaptype;
1627 int started_tr = 0;
1628 int tooklock = 0;
1629
1630 /* Do not allow blockmap operation on a directory */
1631 if (vnode_isdir(vp)) {
1632 return (ENOTSUP);
1633 }
1634
1635 /*
1636 * Check for underlying vnode requests and ensure that logical
1637 * to physical mapping is requested.
1638 */
1639 if (ap->a_bpn == NULL)
1640 return (0);
1641
1642 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1643 if (VTOC(vp)->c_lockowner != current_thread()) {
1644 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1645 tooklock = 1;
1646 } else {
1647 cp = VTOC(vp);
1648 panic("blockmap: %s cnode lock already held!\n",
1649 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1650 }
1651 }
1652 hfsmp = VTOHFS(vp);
1653 cp = VTOC(vp);
1654 fp = VTOF(vp);
1655
1656 retry:
1657 if (fp->ff_unallocblocks) {
1658 if (hfs_start_transaction(hfsmp) != 0) {
1659 retval = EINVAL;
1660 goto exit;
1661 } else {
1662 started_tr = 1;
1663 }
1664 syslocks = SFL_EXTENTS | SFL_BITMAP;
1665
1666 } else if (overflow_extents(fp)) {
1667 syslocks = SFL_EXTENTS;
1668 }
1669
1670 if (syslocks)
1671 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1672
1673 /*
1674 * Check for any delayed allocations.
1675 */
1676 if (fp->ff_unallocblocks) {
1677 SInt64 actbytes;
1678 u_int32_t loanedBlocks;
1679
1680 //
1681 // Make sure we have a transaction. It's possible
1682 // that we came in and fp->ff_unallocblocks was zero
1683 // but during the time we blocked acquiring the extents
1684 // btree, ff_unallocblocks became non-zero and so we
1685 // will need to start a transaction.
1686 //
1687 if (started_tr == 0) {
1688 if (syslocks) {
1689 hfs_systemfile_unlock(hfsmp, lockflags);
1690 syslocks = 0;
1691 }
1692 goto retry;
1693 }
1694
1695 /*
1696 * Note: ExtendFileC will Release any blocks on loan and
1697 * aquire real blocks. So we ask to extend by zero bytes
1698 * since ExtendFileC will account for the virtual blocks.
1699 */
1700
1701 loanedBlocks = fp->ff_unallocblocks;
1702 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1703 kEFAllMask | kEFNoClumpMask, &actbytes);
1704
1705 if (retval) {
1706 fp->ff_unallocblocks = loanedBlocks;
1707 cp->c_blocks += loanedBlocks;
1708 fp->ff_blocks += loanedBlocks;
1709
1710 HFS_MOUNT_LOCK(hfsmp, TRUE);
1711 hfsmp->loanedBlocks += loanedBlocks;
1712 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1713 }
1714
1715 if (retval) {
1716 hfs_systemfile_unlock(hfsmp, lockflags);
1717 cp->c_flag |= C_MODIFIED;
1718 if (started_tr) {
1719 (void) hfs_update(vp, TRUE);
1720 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1721
1722 hfs_end_transaction(hfsmp);
1723 }
1724 goto exit;
1725 }
1726 }
1727
1728 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1729 ap->a_bpn, &bytesContAvail);
1730 if (syslocks) {
1731 hfs_systemfile_unlock(hfsmp, lockflags);
1732 syslocks = 0;
1733 }
1734
1735 if (started_tr) {
1736 (void) hfs_update(vp, TRUE);
1737 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1738 hfs_end_transaction(hfsmp);
1739 started_tr = 0;
1740 }
1741 if (retval) {
1742 goto exit;
1743 }
1744
1745 /* Adjust the mapping information for invalid file ranges: */
1746 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1747 ap->a_foffset + (off_t)bytesContAvail - 1,
1748 &invalid_range);
1749 if (overlaptype != RL_NOOVERLAP) {
1750 switch(overlaptype) {
1751 case RL_MATCHINGOVERLAP:
1752 case RL_OVERLAPCONTAINSRANGE:
1753 case RL_OVERLAPSTARTSBEFORE:
1754 /* There's no valid block for this byte offset: */
1755 *ap->a_bpn = (daddr64_t)-1;
1756 /* There's no point limiting the amount to be returned
1757 * if the invalid range that was hit extends all the way
1758 * to the EOF (i.e. there's no valid bytes between the
1759 * end of this range and the file's EOF):
1760 */
1761 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1762 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1763 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1764 }
1765 break;
1766
1767 case RL_OVERLAPISCONTAINED:
1768 case RL_OVERLAPENDSAFTER:
1769 /* The range of interest hits an invalid block before the end: */
1770 if (invalid_range->rl_start == ap->a_foffset) {
1771 /* There's actually no valid information to be had starting here: */
1772 *ap->a_bpn = (daddr64_t)-1;
1773 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1774 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1775 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1776 }
1777 } else {
1778 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1779 }
1780 break;
1781
1782 case RL_NOOVERLAP:
1783 break;
1784 } /* end switch */
1785 if (bytesContAvail > ap->a_size)
1786 bytesContAvail = ap->a_size;
1787 }
1788 if (ap->a_run)
1789 *ap->a_run = bytesContAvail;
1790
1791 if (ap->a_poff)
1792 *(int *)ap->a_poff = 0;
1793 exit:
1794 if (tooklock)
1795 hfs_unlock(cp);
1796
1797 return (MacToVFSError(retval));
1798 }
1799
1800
1801 /*
1802 * prepare and issue the I/O
1803 * buf_strategy knows how to deal
1804 * with requests that require
1805 * fragmented I/Os
1806 */
1807 int
1808 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1809 {
1810 buf_t bp = ap->a_bp;
1811 vnode_t vp = buf_vnode(bp);
1812 struct cnode *cp = VTOC(vp);
1813
1814 return (buf_strategy(cp->c_devvp, ap));
1815 }
1816
1817
1818 static int
1819 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1820 {
1821 register struct cnode *cp = VTOC(vp);
1822 struct filefork *fp = VTOF(vp);
1823 struct proc *p = vfs_context_proc(context);;
1824 kauth_cred_t cred = vfs_context_ucred(context);
1825 int retval;
1826 off_t bytesToAdd;
1827 off_t actualBytesAdded;
1828 off_t filebytes;
1829 u_int64_t old_filesize;
1830 u_long fileblocks;
1831 int blksize;
1832 struct hfsmount *hfsmp;
1833 int lockflags;
1834
1835 blksize = VTOVCB(vp)->blockSize;
1836 fileblocks = fp->ff_blocks;
1837 filebytes = (off_t)fileblocks * (off_t)blksize;
1838 old_filesize = fp->ff_size;
1839
1840 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1841 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1842
1843 if (length < 0)
1844 return (EINVAL);
1845
1846 /* This should only happen with a corrupt filesystem */
1847 if ((off_t)fp->ff_size < 0)
1848 return (EINVAL);
1849
1850 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1851 return (EFBIG);
1852
1853 hfsmp = VTOHFS(vp);
1854
1855 retval = E_NONE;
1856
1857 /* Files that are changing size are not hot file candidates. */
1858 if (hfsmp->hfc_stage == HFC_RECORDING) {
1859 fp->ff_bytesread = 0;
1860 }
1861
1862 /*
1863 * We cannot just check if fp->ff_size == length (as an optimization)
1864 * since there may be extra physical blocks that also need truncation.
1865 */
1866 #if QUOTA
1867 if ((retval = hfs_getinoquota(cp)))
1868 return(retval);
1869 #endif /* QUOTA */
1870
1871 /*
1872 * Lengthen the size of the file. We must ensure that the
1873 * last byte of the file is allocated. Since the smallest
1874 * value of ff_size is 0, length will be at least 1.
1875 */
1876 if (length > (off_t)fp->ff_size) {
1877 #if QUOTA
1878 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1879 cred, 0);
1880 if (retval)
1881 goto Err_Exit;
1882 #endif /* QUOTA */
1883 /*
1884 * If we don't have enough physical space then
1885 * we need to extend the physical size.
1886 */
1887 if (length > filebytes) {
1888 int eflags;
1889 u_long blockHint = 0;
1890
1891 /* All or nothing and don't round up to clumpsize. */
1892 eflags = kEFAllMask | kEFNoClumpMask;
1893
1894 if (cred && suser(cred, NULL) != 0)
1895 eflags |= kEFReserveMask; /* keep a reserve */
1896
1897 /*
1898 * Allocate Journal and Quota files in metadata zone.
1899 */
1900 if (filebytes == 0 &&
1901 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1902 hfs_virtualmetafile(cp)) {
1903 eflags |= kEFMetadataMask;
1904 blockHint = hfsmp->hfs_metazone_start;
1905 }
1906 if (hfs_start_transaction(hfsmp) != 0) {
1907 retval = EINVAL;
1908 goto Err_Exit;
1909 }
1910
1911 /* Protect extents b-tree and allocation bitmap */
1912 lockflags = SFL_BITMAP;
1913 if (overflow_extents(fp))
1914 lockflags |= SFL_EXTENTS;
1915 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1916
1917 while ((length > filebytes) && (retval == E_NONE)) {
1918 bytesToAdd = length - filebytes;
1919 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1920 (FCB*)fp,
1921 bytesToAdd,
1922 blockHint,
1923 eflags,
1924 &actualBytesAdded));
1925
1926 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1927 if (actualBytesAdded == 0 && retval == E_NONE) {
1928 if (length > filebytes)
1929 length = filebytes;
1930 break;
1931 }
1932 } /* endwhile */
1933
1934 hfs_systemfile_unlock(hfsmp, lockflags);
1935
1936 if (hfsmp->jnl) {
1937 (void) hfs_update(vp, TRUE);
1938 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1939 }
1940
1941 hfs_end_transaction(hfsmp);
1942
1943 if (retval)
1944 goto Err_Exit;
1945
1946 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1947 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1948 }
1949
1950 if (!(flags & IO_NOZEROFILL)) {
1951 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1952 struct rl_entry *invalid_range;
1953 off_t zero_limit;
1954
1955 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1956 if (length < zero_limit) zero_limit = length;
1957
1958 if (length > (off_t)fp->ff_size) {
1959 struct timeval tv;
1960
1961 /* Extending the file: time to fill out the current last page w. zeroes? */
1962 if ((fp->ff_size & PAGE_MASK_64) &&
1963 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1964 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1965
1966 /* There's some valid data at the start of the (current) last page
1967 of the file, so zero out the remainder of that page to ensure the
1968 entire page contains valid data. Since there is no invalid range
1969 possible past the (current) eof, there's no need to remove anything
1970 from the invalid range list before calling cluster_write(): */
1971 hfs_unlock(cp);
1972 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1973 fp->ff_size, (off_t)0,
1974 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1975 hfs_lock(cp, HFS_FORCE_LOCK);
1976 if (retval) goto Err_Exit;
1977
1978 /* Merely invalidate the remaining area, if necessary: */
1979 if (length > zero_limit) {
1980 microuptime(&tv);
1981 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1982 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1983 }
1984 } else {
1985 /* The page containing the (current) eof is invalid: just add the
1986 remainder of the page to the invalid list, along with the area
1987 being newly allocated:
1988 */
1989 microuptime(&tv);
1990 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1991 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1992 };
1993 }
1994 } else {
1995 panic("hfs_truncate: invoked on non-UBC object?!");
1996 };
1997 }
1998 cp->c_touch_modtime = TRUE;
1999 fp->ff_size = length;
2000
2001 /* Nested transactions will do their own ubc_setsize. */
2002 if (!skipsetsize) {
2003 /*
2004 * ubc_setsize can cause a pagein here
2005 * so we need to drop cnode lock.
2006 */
2007 hfs_unlock(cp);
2008 ubc_setsize(vp, length);
2009 hfs_lock(cp, HFS_FORCE_LOCK);
2010 }
2011
2012 } else { /* Shorten the size of the file */
2013
2014 if ((off_t)fp->ff_size > length) {
2015 /*
2016 * Any buffers that are past the truncation point need to be
2017 * invalidated (to maintain buffer cache consistency).
2018 */
2019
2020 /* Nested transactions will do their own ubc_setsize. */
2021 if (!skipsetsize) {
2022 /*
2023 * ubc_setsize can cause a pageout here
2024 * so we need to drop cnode lock.
2025 */
2026 hfs_unlock(cp);
2027 ubc_setsize(vp, length);
2028 hfs_lock(cp, HFS_FORCE_LOCK);
2029 }
2030
2031 /* Any space previously marked as invalid is now irrelevant: */
2032 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2033 }
2034
2035 /*
2036 * Account for any unmapped blocks. Note that the new
2037 * file length can still end up with unmapped blocks.
2038 */
2039 if (fp->ff_unallocblocks > 0) {
2040 u_int32_t finalblks;
2041 u_int32_t loanedBlocks;
2042
2043 HFS_MOUNT_LOCK(hfsmp, TRUE);
2044
2045 loanedBlocks = fp->ff_unallocblocks;
2046 cp->c_blocks -= loanedBlocks;
2047 fp->ff_blocks -= loanedBlocks;
2048 fp->ff_unallocblocks = 0;
2049
2050 hfsmp->loanedBlocks -= loanedBlocks;
2051
2052 finalblks = (length + blksize - 1) / blksize;
2053 if (finalblks > fp->ff_blocks) {
2054 /* calculate required unmapped blocks */
2055 loanedBlocks = finalblks - fp->ff_blocks;
2056 hfsmp->loanedBlocks += loanedBlocks;
2057
2058 fp->ff_unallocblocks = loanedBlocks;
2059 cp->c_blocks += loanedBlocks;
2060 fp->ff_blocks += loanedBlocks;
2061 }
2062 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2063 }
2064
2065 /*
2066 * For a TBE process the deallocation of the file blocks is
2067 * delayed until the file is closed. And hfs_close calls
2068 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2069 * isn't set, we make sure this isn't a TBE process.
2070 */
2071 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2072 #if QUOTA
2073 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2074 #endif /* QUOTA */
2075 if (hfs_start_transaction(hfsmp) != 0) {
2076 retval = EINVAL;
2077 goto Err_Exit;
2078 }
2079
2080 if (fp->ff_unallocblocks == 0) {
2081 /* Protect extents b-tree and allocation bitmap */
2082 lockflags = SFL_BITMAP;
2083 if (overflow_extents(fp))
2084 lockflags |= SFL_EXTENTS;
2085 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2086
2087 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2088 (FCB*)fp, length, false));
2089
2090 hfs_systemfile_unlock(hfsmp, lockflags);
2091 }
2092 if (hfsmp->jnl) {
2093 if (retval == 0) {
2094 fp->ff_size = length;
2095 }
2096 (void) hfs_update(vp, TRUE);
2097 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2098 }
2099
2100 hfs_end_transaction(hfsmp);
2101
2102 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2103 if (retval)
2104 goto Err_Exit;
2105 #if QUOTA
2106 /* These are bytesreleased */
2107 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2108 #endif /* QUOTA */
2109 }
2110 /* Only set update flag if the logical length changes */
2111 if (old_filesize != length)
2112 cp->c_touch_modtime = TRUE;
2113 fp->ff_size = length;
2114 }
2115 cp->c_touch_chgtime = TRUE;
2116 retval = hfs_update(vp, MNT_WAIT);
2117 if (retval) {
2118 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2119 -1, -1, -1, retval, 0);
2120 }
2121
2122 Err_Exit:
2123
2124 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2125 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2126
2127 return (retval);
2128 }
2129
2130
2131
2132 /*
2133 * Truncate a cnode to at most length size, freeing (or adding) the
2134 * disk blocks.
2135 */
2136 __private_extern__
2137 int
2138 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2139 vfs_context_t context)
2140 {
2141 struct filefork *fp = VTOF(vp);
2142 off_t filebytes;
2143 u_long fileblocks;
2144 int blksize, error = 0;
2145 struct cnode *cp = VTOC(vp);
2146
2147 if (vnode_isdir(vp))
2148 return (EISDIR); /* cannot truncate an HFS directory! */
2149
2150 blksize = VTOVCB(vp)->blockSize;
2151 fileblocks = fp->ff_blocks;
2152 filebytes = (off_t)fileblocks * (off_t)blksize;
2153
2154 // have to loop truncating or growing files that are
2155 // really big because otherwise transactions can get
2156 // enormous and consume too many kernel resources.
2157
2158 if (length < filebytes) {
2159 while (filebytes > length) {
2160 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2161 filebytes -= HFS_BIGFILE_SIZE;
2162 } else {
2163 filebytes = length;
2164 }
2165 cp->c_flag |= C_FORCEUPDATE;
2166 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2167 if (error)
2168 break;
2169 }
2170 } else if (length > filebytes) {
2171 while (filebytes < length) {
2172 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2173 filebytes += HFS_BIGFILE_SIZE;
2174 } else {
2175 filebytes = length;
2176 }
2177 cp->c_flag |= C_FORCEUPDATE;
2178 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2179 if (error)
2180 break;
2181 }
2182 } else /* Same logical size */ {
2183
2184 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2185 }
2186 /* Files that are changing size are not hot file candidates. */
2187 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2188 fp->ff_bytesread = 0;
2189 }
2190
2191 return (error);
2192 }
2193
2194
2195
2196 /*
2197 * Preallocate file storage space.
2198 */
2199 int
2200 hfs_vnop_allocate(struct vnop_allocate_args /* {
2201 vnode_t a_vp;
2202 off_t a_length;
2203 u_int32_t a_flags;
2204 off_t *a_bytesallocated;
2205 off_t a_offset;
2206 vfs_context_t a_context;
2207 } */ *ap)
2208 {
2209 struct vnode *vp = ap->a_vp;
2210 struct cnode *cp;
2211 struct filefork *fp;
2212 ExtendedVCB *vcb;
2213 off_t length = ap->a_length;
2214 off_t startingPEOF;
2215 off_t moreBytesRequested;
2216 off_t actualBytesAdded;
2217 off_t filebytes;
2218 u_long fileblocks;
2219 int retval, retval2;
2220 UInt32 blockHint;
2221 UInt32 extendFlags; /* For call to ExtendFileC */
2222 struct hfsmount *hfsmp;
2223 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2224 int lockflags;
2225
2226 *(ap->a_bytesallocated) = 0;
2227
2228 if (!vnode_isreg(vp))
2229 return (EISDIR);
2230 if (length < (off_t)0)
2231 return (EINVAL);
2232
2233 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2234 return (retval);
2235 cp = VTOC(vp);
2236 fp = VTOF(vp);
2237 hfsmp = VTOHFS(vp);
2238 vcb = VTOVCB(vp);
2239
2240 fileblocks = fp->ff_blocks;
2241 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2242
2243 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2244 retval = EINVAL;
2245 goto Err_Exit;
2246 }
2247
2248 /* Fill in the flags word for the call to Extend the file */
2249
2250 extendFlags = kEFNoClumpMask;
2251 if (ap->a_flags & ALLOCATECONTIG)
2252 extendFlags |= kEFContigMask;
2253 if (ap->a_flags & ALLOCATEALL)
2254 extendFlags |= kEFAllMask;
2255 if (cred && suser(cred, NULL) != 0)
2256 extendFlags |= kEFReserveMask;
2257
2258 retval = E_NONE;
2259 blockHint = 0;
2260 startingPEOF = filebytes;
2261
2262 if (ap->a_flags & ALLOCATEFROMPEOF)
2263 length += filebytes;
2264 else if (ap->a_flags & ALLOCATEFROMVOL)
2265 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2266
2267 /* If no changes are necesary, then we're done */
2268 if (filebytes == length)
2269 goto Std_Exit;
2270
2271 /*
2272 * Lengthen the size of the file. We must ensure that the
2273 * last byte of the file is allocated. Since the smallest
2274 * value of filebytes is 0, length will be at least 1.
2275 */
2276 if (length > filebytes) {
2277 moreBytesRequested = length - filebytes;
2278
2279 #if QUOTA
2280 retval = hfs_chkdq(cp,
2281 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2282 cred, 0);
2283 if (retval)
2284 goto Err_Exit;
2285
2286 #endif /* QUOTA */
2287 /*
2288 * Metadata zone checks.
2289 */
2290 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2291 /*
2292 * Allocate Journal and Quota files in metadata zone.
2293 */
2294 if (hfs_virtualmetafile(cp)) {
2295 extendFlags |= kEFMetadataMask;
2296 blockHint = hfsmp->hfs_metazone_start;
2297 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2298 (blockHint <= hfsmp->hfs_metazone_end)) {
2299 /*
2300 * Move blockHint outside metadata zone.
2301 */
2302 blockHint = hfsmp->hfs_metazone_end + 1;
2303 }
2304 }
2305
2306 if (hfs_start_transaction(hfsmp) != 0) {
2307 retval = EINVAL;
2308 goto Err_Exit;
2309 }
2310
2311 /* Protect extents b-tree and allocation bitmap */
2312 lockflags = SFL_BITMAP;
2313 if (overflow_extents(fp))
2314 lockflags |= SFL_EXTENTS;
2315 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2316
2317 retval = MacToVFSError(ExtendFileC(vcb,
2318 (FCB*)fp,
2319 moreBytesRequested,
2320 blockHint,
2321 extendFlags,
2322 &actualBytesAdded));
2323
2324 *(ap->a_bytesallocated) = actualBytesAdded;
2325 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2326
2327 hfs_systemfile_unlock(hfsmp, lockflags);
2328
2329 if (hfsmp->jnl) {
2330 (void) hfs_update(vp, TRUE);
2331 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2332 }
2333
2334 hfs_end_transaction(hfsmp);
2335
2336 /*
2337 * if we get an error and no changes were made then exit
2338 * otherwise we must do the hfs_update to reflect the changes
2339 */
2340 if (retval && (startingPEOF == filebytes))
2341 goto Err_Exit;
2342
2343 /*
2344 * Adjust actualBytesAdded to be allocation block aligned, not
2345 * clump size aligned.
2346 * NOTE: So what we are reporting does not affect reality
2347 * until the file is closed, when we truncate the file to allocation
2348 * block size.
2349 */
2350 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2351 *(ap->a_bytesallocated) =
2352 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2353
2354 } else { /* Shorten the size of the file */
2355
2356 if (fp->ff_size > length) {
2357 /*
2358 * Any buffers that are past the truncation point need to be
2359 * invalidated (to maintain buffer cache consistency).
2360 */
2361 }
2362
2363 if (hfs_start_transaction(hfsmp) != 0) {
2364 retval = EINVAL;
2365 goto Err_Exit;
2366 }
2367
2368 /* Protect extents b-tree and allocation bitmap */
2369 lockflags = SFL_BITMAP;
2370 if (overflow_extents(fp))
2371 lockflags |= SFL_EXTENTS;
2372 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2373
2374 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2375
2376 hfs_systemfile_unlock(hfsmp, lockflags);
2377
2378 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2379
2380 if (hfsmp->jnl) {
2381 (void) hfs_update(vp, TRUE);
2382 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2383 }
2384
2385 hfs_end_transaction(hfsmp);
2386
2387
2388 /*
2389 * if we get an error and no changes were made then exit
2390 * otherwise we must do the hfs_update to reflect the changes
2391 */
2392 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2393 #if QUOTA
2394 /* These are bytesreleased */
2395 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2396 #endif /* QUOTA */
2397
2398 if (fp->ff_size > filebytes) {
2399 fp->ff_size = filebytes;
2400
2401 hfs_unlock(cp);
2402 ubc_setsize(vp, fp->ff_size);
2403 hfs_lock(cp, HFS_FORCE_LOCK);
2404 }
2405 }
2406
2407 Std_Exit:
2408 cp->c_touch_chgtime = TRUE;
2409 cp->c_touch_modtime = TRUE;
2410 retval2 = hfs_update(vp, MNT_WAIT);
2411
2412 if (retval == 0)
2413 retval = retval2;
2414 Err_Exit:
2415 hfs_unlock(cp);
2416 return (retval);
2417 }
2418
2419
2420 /*
2421 * Pagein for HFS filesystem
2422 */
2423 int
2424 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2425 /*
2426 struct vnop_pagein_args {
2427 vnode_t a_vp,
2428 upl_t a_pl,
2429 vm_offset_t a_pl_offset,
2430 off_t a_f_offset,
2431 size_t a_size,
2432 int a_flags
2433 vfs_context_t a_context;
2434 };
2435 */
2436 {
2437 vnode_t vp = ap->a_vp;
2438 int error;
2439
2440 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2441 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2442 /*
2443 * Keep track of blocks read.
2444 */
2445 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2446 struct cnode *cp;
2447 struct filefork *fp;
2448 int bytesread;
2449 int took_cnode_lock = 0;
2450
2451 cp = VTOC(vp);
2452 fp = VTOF(vp);
2453
2454 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2455 bytesread = fp->ff_size;
2456 else
2457 bytesread = ap->a_size;
2458
2459 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2460 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2461 hfs_lock(cp, HFS_FORCE_LOCK);
2462 took_cnode_lock = 1;
2463 }
2464 /*
2465 * If this file hasn't been seen since the start of
2466 * the current sampling period then start over.
2467 */
2468 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2469 struct timeval tv;
2470
2471 fp->ff_bytesread = bytesread;
2472 microtime(&tv);
2473 cp->c_atime = tv.tv_sec;
2474 } else {
2475 fp->ff_bytesread += bytesread;
2476 }
2477 cp->c_touch_acctime = TRUE;
2478 if (took_cnode_lock)
2479 hfs_unlock(cp);
2480 }
2481 return (error);
2482 }
2483
2484 /*
2485 * Pageout for HFS filesystem.
2486 */
2487 int
2488 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2489 /*
2490 struct vnop_pageout_args {
2491 vnode_t a_vp,
2492 upl_t a_pl,
2493 vm_offset_t a_pl_offset,
2494 off_t a_f_offset,
2495 size_t a_size,
2496 int a_flags
2497 vfs_context_t a_context;
2498 };
2499 */
2500 {
2501 vnode_t vp = ap->a_vp;
2502 struct cnode *cp;
2503 struct filefork *fp;
2504 int retval;
2505 off_t end_of_range;
2506 off_t filesize;
2507
2508 cp = VTOC(vp);
2509 if (cp->c_lockowner == current_thread()) {
2510 panic("pageout: %s cnode lock already held!\n",
2511 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2512 }
2513 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2514 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2515 ubc_upl_abort_range(ap->a_pl,
2516 ap->a_pl_offset,
2517 ap->a_size,
2518 UPL_ABORT_FREE_ON_EMPTY);
2519 }
2520 return (retval);
2521 }
2522 fp = VTOF(vp);
2523
2524 filesize = fp->ff_size;
2525 end_of_range = ap->a_f_offset + ap->a_size - 1;
2526
2527 if (end_of_range >= filesize) {
2528 end_of_range = (off_t)(filesize - 1);
2529 }
2530 if (ap->a_f_offset < filesize) {
2531 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2532 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2533 }
2534 hfs_unlock(cp);
2535
2536 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2537 ap->a_size, filesize, ap->a_flags);
2538
2539 /*
2540 * If data was written, and setuid or setgid bits are set and
2541 * this process is not the superuser then clear the setuid and
2542 * setgid bits as a precaution against tampering.
2543 */
2544 if ((retval == 0) &&
2545 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2546 (vfs_context_suser(ap->a_context) != 0)) {
2547 hfs_lock(cp, HFS_FORCE_LOCK);
2548 cp->c_mode &= ~(S_ISUID | S_ISGID);
2549 cp->c_touch_chgtime = TRUE;
2550 hfs_unlock(cp);
2551 }
2552 return (retval);
2553 }
2554
2555 /*
2556 * Intercept B-Tree node writes to unswap them if necessary.
2557 */
2558 int
2559 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2560 {
2561 int retval = 0;
2562 register struct buf *bp = ap->a_bp;
2563 register struct vnode *vp = buf_vnode(bp);
2564 BlockDescriptor block;
2565
2566 /* Trap B-Tree writes */
2567 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2568 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2569 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2570
2571 /*
2572 * Swap and validate the node if it is in native byte order.
2573 * This is always be true on big endian, so we always validate
2574 * before writing here. On little endian, the node typically has
2575 * been swapped and validatated when it was written to the journal,
2576 * so we won't do anything here.
2577 */
2578 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2579 /* Prepare the block pointer */
2580 block.blockHeader = bp;
2581 block.buffer = (char *)buf_dataptr(bp);
2582 block.blockNum = buf_lblkno(bp);
2583 /* not found in cache ==> came from disk */
2584 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2585 block.blockSize = buf_count(bp);
2586
2587 /* Endian un-swap B-Tree node */
2588 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2589 if (retval)
2590 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2591 }
2592 }
2593
2594 /* This buffer shouldn't be locked anymore but if it is clear it */
2595 if ((buf_flags(bp) & B_LOCKED)) {
2596 // XXXdbg
2597 if (VTOHFS(vp)->jnl) {
2598 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2599 }
2600 buf_clearflags(bp, B_LOCKED);
2601 }
2602 retval = vn_bwrite (ap);
2603
2604 return (retval);
2605 }
2606
2607 /*
2608 * Relocate a file to a new location on disk
2609 * cnode must be locked on entry
2610 *
2611 * Relocation occurs by cloning the file's data from its
2612 * current set of blocks to a new set of blocks. During
2613 * the relocation all of the blocks (old and new) are
2614 * owned by the file.
2615 *
2616 * -----------------
2617 * |///////////////|
2618 * -----------------
2619 * 0 N (file offset)
2620 *
2621 * ----------------- -----------------
2622 * |///////////////| | | STEP 1 (aquire new blocks)
2623 * ----------------- -----------------
2624 * 0 N N+1 2N
2625 *
2626 * ----------------- -----------------
2627 * |///////////////| |///////////////| STEP 2 (clone data)
2628 * ----------------- -----------------
2629 * 0 N N+1 2N
2630 *
2631 * -----------------
2632 * |///////////////| STEP 3 (head truncate blocks)
2633 * -----------------
2634 * 0 N
2635 *
2636 * During steps 2 and 3 page-outs to file offsets less
2637 * than or equal to N are suspended.
2638 *
2639 * During step 3 page-ins to the file get supended.
2640 */
2641 __private_extern__
2642 int
2643 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2644 struct proc *p)
2645 {
2646 struct cnode *cp;
2647 struct filefork *fp;
2648 struct hfsmount *hfsmp;
2649 u_int32_t headblks;
2650 u_int32_t datablks;
2651 u_int32_t blksize;
2652 u_int32_t growsize;
2653 u_int32_t nextallocsave;
2654 daddr64_t sector_a, sector_b;
2655 int disabled_caching = 0;
2656 int eflags;
2657 off_t newbytes;
2658 int retval;
2659 int lockflags = 0;
2660 int took_trunc_lock = 0;
2661 int started_tr = 0;
2662 enum vtype vnodetype;
2663
2664 vnodetype = vnode_vtype(vp);
2665 if (vnodetype != VREG && vnodetype != VLNK) {
2666 return (EPERM);
2667 }
2668
2669 hfsmp = VTOHFS(vp);
2670 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2671 return (ENOSPC);
2672 }
2673
2674 cp = VTOC(vp);
2675 fp = VTOF(vp);
2676 if (fp->ff_unallocblocks)
2677 return (EINVAL);
2678 blksize = hfsmp->blockSize;
2679 if (blockHint == 0)
2680 blockHint = hfsmp->nextAllocation;
2681
2682 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2683 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2684 return (EFBIG);
2685 }
2686
2687 //
2688 // We do not believe that this call to hfs_fsync() is
2689 // necessary and it causes a journal transaction
2690 // deadlock so we are removing it.
2691 //
2692 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2693 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2694 // if (retval)
2695 // return (retval);
2696 //}
2697
2698 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2699 hfs_unlock(cp);
2700 hfs_lock_truncate(cp, TRUE);
2701 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2702 hfs_unlock_truncate(cp);
2703 return (retval);
2704 }
2705 took_trunc_lock = 1;
2706 }
2707 headblks = fp->ff_blocks;
2708 datablks = howmany(fp->ff_size, blksize);
2709 growsize = datablks * blksize;
2710 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2711 if (blockHint >= hfsmp->hfs_metazone_start &&
2712 blockHint <= hfsmp->hfs_metazone_end)
2713 eflags |= kEFMetadataMask;
2714
2715 if (hfs_start_transaction(hfsmp) != 0) {
2716 if (took_trunc_lock)
2717 hfs_unlock_truncate(cp);
2718 return (EINVAL);
2719 }
2720 started_tr = 1;
2721 /*
2722 * Protect the extents b-tree and the allocation bitmap
2723 * during MapFileBlockC and ExtendFileC operations.
2724 */
2725 lockflags = SFL_BITMAP;
2726 if (overflow_extents(fp))
2727 lockflags |= SFL_EXTENTS;
2728 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2729
2730 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2731 if (retval) {
2732 retval = MacToVFSError(retval);
2733 goto out;
2734 }
2735
2736 /*
2737 * STEP 1 - aquire new allocation blocks.
2738 */
2739 if (!vnode_isnocache(vp)) {
2740 vnode_setnocache(vp);
2741 disabled_caching = 1;
2742
2743 }
2744 nextallocsave = hfsmp->nextAllocation;
2745 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2746 if (eflags & kEFMetadataMask) {
2747 HFS_MOUNT_LOCK(hfsmp, TRUE);
2748 hfsmp->nextAllocation = nextallocsave;
2749 hfsmp->vcbFlags |= 0xFF00;
2750 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2751 }
2752
2753 retval = MacToVFSError(retval);
2754 if (retval == 0) {
2755 cp->c_flag |= C_MODIFIED;
2756 if (newbytes < growsize) {
2757 retval = ENOSPC;
2758 goto restore;
2759 } else if (fp->ff_blocks < (headblks + datablks)) {
2760 printf("hfs_relocate: allocation failed");
2761 retval = ENOSPC;
2762 goto restore;
2763 }
2764
2765 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2766 if (retval) {
2767 retval = MacToVFSError(retval);
2768 } else if ((sector_a + 1) == sector_b) {
2769 retval = ENOSPC;
2770 goto restore;
2771 } else if ((eflags & kEFMetadataMask) &&
2772 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2773 hfsmp->hfs_metazone_end)) {
2774 printf("hfs_relocate: didn't move into metadata zone\n");
2775 retval = ENOSPC;
2776 goto restore;
2777 }
2778 }
2779 /* Done with system locks and journal for now. */
2780 hfs_systemfile_unlock(hfsmp, lockflags);
2781 lockflags = 0;
2782 hfs_end_transaction(hfsmp);
2783 started_tr = 0;
2784
2785 if (retval) {
2786 /*
2787 * Check to see if failure is due to excessive fragmentation.
2788 */
2789 if ((retval == ENOSPC) &&
2790 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2791 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2792 }
2793 goto out;
2794 }
2795 /*
2796 * STEP 2 - clone file data into the new allocation blocks.
2797 */
2798
2799 if (vnodetype == VLNK)
2800 retval = hfs_clonelink(vp, blksize, cred, p);
2801 else if (vnode_issystem(vp))
2802 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2803 else
2804 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2805
2806 /* Start transaction for step 3 or for a restore. */
2807 if (hfs_start_transaction(hfsmp) != 0) {
2808 retval = EINVAL;
2809 goto out;
2810 }
2811 started_tr = 1;
2812 if (retval)
2813 goto restore;
2814
2815 /*
2816 * STEP 3 - switch to cloned data and remove old blocks.
2817 */
2818 lockflags = SFL_BITMAP;
2819 if (overflow_extents(fp))
2820 lockflags |= SFL_EXTENTS;
2821 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2822
2823 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2824
2825 hfs_systemfile_unlock(hfsmp, lockflags);
2826 lockflags = 0;
2827 if (retval)
2828 goto restore;
2829 out:
2830 if (took_trunc_lock)
2831 hfs_unlock_truncate(cp);
2832
2833 if (lockflags) {
2834 hfs_systemfile_unlock(hfsmp, lockflags);
2835 lockflags = 0;
2836 }
2837
2838 // See comment up above about calls to hfs_fsync()
2839 //
2840 //if (retval == 0)
2841 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2842
2843 if (hfsmp->jnl) {
2844 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2845 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2846 else
2847 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2848 }
2849 exit:
2850 if (disabled_caching) {
2851 vnode_clearnocache(vp);
2852 }
2853 if (started_tr)
2854 hfs_end_transaction(hfsmp);
2855
2856 return (retval);
2857
2858 restore:
2859 if (fp->ff_blocks == headblks)
2860 goto exit;
2861 /*
2862 * Give back any newly allocated space.
2863 */
2864 if (lockflags == 0) {
2865 lockflags = SFL_BITMAP;
2866 if (overflow_extents(fp))
2867 lockflags |= SFL_EXTENTS;
2868 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2869 }
2870
2871 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2872
2873 hfs_systemfile_unlock(hfsmp, lockflags);
2874 lockflags = 0;
2875
2876 if (took_trunc_lock)
2877 hfs_unlock_truncate(cp);
2878 goto exit;
2879 }
2880
2881
2882 /*
2883 * Clone a symlink.
2884 *
2885 */
2886 static int
2887 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2888 {
2889 struct buf *head_bp = NULL;
2890 struct buf *tail_bp = NULL;
2891 int error;
2892
2893
2894 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2895 if (error)
2896 goto out;
2897
2898 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2899 if (tail_bp == NULL) {
2900 error = EIO;
2901 goto out;
2902 }
2903 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2904 error = (int)buf_bwrite(tail_bp);
2905 out:
2906 if (head_bp) {
2907 buf_markinvalid(head_bp);
2908 buf_brelse(head_bp);
2909 }
2910 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2911
2912 return (error);
2913 }
2914
2915 /*
2916 * Clone a file's data within the file.
2917 *
2918 */
2919 static int
2920 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2921 {
2922 caddr_t bufp;
2923 size_t writebase;
2924 size_t bufsize;
2925 size_t copysize;
2926 size_t iosize;
2927 off_t filesize;
2928 size_t offset;
2929 uio_t auio;
2930 int error = 0;
2931
2932 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2933 writebase = blkstart * blksize;
2934 copysize = blkcnt * blksize;
2935 iosize = bufsize = MIN(copysize, 4096 * 16);
2936 offset = 0;
2937
2938 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2939 return (ENOMEM);
2940 }
2941 hfs_unlock(VTOC(vp));
2942
2943 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2944
2945 while (offset < copysize) {
2946 iosize = MIN(copysize - offset, iosize);
2947
2948 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2949 uio_addiov(auio, (uintptr_t)bufp, iosize);
2950
2951 error = cluster_read(vp, auio, copysize, 0);
2952 if (error) {
2953 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2954 break;
2955 }
2956 if (uio_resid(auio) != 0) {
2957 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2958 error = EIO;
2959 break;
2960 }
2961
2962 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2963 uio_addiov(auio, (uintptr_t)bufp, iosize);
2964
2965 error = cluster_write(vp, auio, filesize + offset,
2966 filesize + offset + iosize,
2967 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2968 if (error) {
2969 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2970 break;
2971 }
2972 if (uio_resid(auio) != 0) {
2973 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2974 error = EIO;
2975 break;
2976 }
2977 offset += iosize;
2978 }
2979 uio_free(auio);
2980
2981 /*
2982 * No need to call ubc_sync_range or hfs_invalbuf
2983 * since the file was copied using IO_NOCACHE.
2984 */
2985
2986 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2987
2988 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2989 return (error);
2990 }
2991
2992 /*
2993 * Clone a system (metadata) file.
2994 *
2995 */
2996 static int
2997 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2998 kauth_cred_t cred, struct proc *p)
2999 {
3000 caddr_t bufp;
3001 char * offset;
3002 size_t bufsize;
3003 size_t iosize;
3004 struct buf *bp = NULL;
3005 daddr64_t blkno;
3006 daddr64_t blk;
3007 daddr64_t start_blk;
3008 daddr64_t last_blk;
3009 int breadcnt;
3010 int i;
3011 int error = 0;
3012
3013
3014 iosize = GetLogicalBlockSize(vp);
3015 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3016 breadcnt = bufsize / iosize;
3017
3018 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3019 return (ENOMEM);
3020 }
3021 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3022 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3023 blkno = 0;
3024
3025 while (blkno < last_blk) {
3026 /*
3027 * Read up to a megabyte
3028 */
3029 offset = bufp;
3030 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3031 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3032 if (error) {
3033 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3034 goto out;
3035 }
3036 if (buf_count(bp) != iosize) {
3037 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3038 goto out;
3039 }
3040 bcopy((char *)buf_dataptr(bp), offset, iosize);
3041
3042 buf_markinvalid(bp);
3043 buf_brelse(bp);
3044 bp = NULL;
3045
3046 offset += iosize;
3047 }
3048
3049 /*
3050 * Write up to a megabyte
3051 */
3052 offset = bufp;
3053 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3054 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3055 if (bp == NULL) {
3056 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3057 error = EIO;
3058 goto out;
3059 }
3060 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3061 error = (int)buf_bwrite(bp);
3062 bp = NULL;
3063 if (error)
3064 goto out;
3065 offset += iosize;
3066 }
3067 }
3068 out:
3069 if (bp) {
3070 buf_brelse(bp);
3071 }
3072
3073 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3074
3075 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3076
3077 return (error);
3078 }