]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/hfs/hfs_readwrite.c
xnu-792.17.14.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
41#include <sys/filedesc.h>
42#include <sys/stat.h>
43#include <sys/buf.h>
44#include <sys/proc.h>
45#include <sys/kauth.h>
46#include <sys/vnode.h>
47#include <sys/uio.h>
48#include <sys/vfs_context.h>
49#include <sys/disk.h>
50#include <sys/sysctl.h>
51
52#include <miscfs/specfs/specdev.h>
53
54#include <sys/ubc.h>
55#include <vm/vm_pageout.h>
56#include <vm/vm_kern.h>
57
58#include <sys/kdebug.h>
59
60#include "hfs.h"
61#include "hfs_endian.h"
62#include "hfs_fsctl.h"
63#include "hfs_quota.h"
64#include "hfscommon/headers/FileMgrInternal.h"
65#include "hfscommon/headers/BTreesInternal.h"
66#include "hfs_cnode.h"
67#include "hfs_dbg.h"
68
69extern int overflow_extents(struct filefork *fp);
70
71#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
72
73enum {
74 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
75};
76
77extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
78
79extern int hfs_setextendedsecurity(struct hfsmount *, int);
80
81
82static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
83static int hfs_clonefile(struct vnode *, int, int, int);
84static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
85
86
87int flush_cache_on_write = 0;
88SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
89
90
91/*****************************************************************************
92*
93* I/O Operations on vnodes
94*
95*****************************************************************************/
96int hfs_vnop_read(struct vnop_read_args *);
97int hfs_vnop_write(struct vnop_write_args *);
98int hfs_vnop_ioctl(struct vnop_ioctl_args *);
99int hfs_vnop_select(struct vnop_select_args *);
100int hfs_vnop_blktooff(struct vnop_blktooff_args *);
101int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
102int hfs_vnop_blockmap(struct vnop_blockmap_args *);
103int hfs_vnop_strategy(struct vnop_strategy_args *);
104int hfs_vnop_allocate(struct vnop_allocate_args *);
105int hfs_vnop_pagein(struct vnop_pagein_args *);
106int hfs_vnop_pageout(struct vnop_pageout_args *);
107int hfs_vnop_bwrite(struct vnop_bwrite_args *);
108
109
110/*
111 * Read data from a file.
112 */
113int
114hfs_vnop_read(struct vnop_read_args *ap)
115{
116 uio_t uio = ap->a_uio;
117 struct vnode *vp = ap->a_vp;
118 struct cnode *cp;
119 struct filefork *fp;
120 struct hfsmount *hfsmp;
121 off_t filesize;
122 off_t filebytes;
123 off_t start_resid = uio_resid(uio);
124 off_t offset = uio_offset(uio);
125 int retval = 0;
126
127
128 /* Preflight checks */
129 if (!vnode_isreg(vp)) {
130 /* can only read regular files */
131 if (vnode_isdir(vp))
132 return (EISDIR);
133 else
134 return (EPERM);
135 }
136 if (start_resid == 0)
137 return (0); /* Nothing left to do */
138 if (offset < 0)
139 return (EINVAL); /* cant read from a negative offset */
140
141 cp = VTOC(vp);
142 fp = VTOF(vp);
143 hfsmp = VTOHFS(vp);
144
145 /* Protect against a size change. */
146 hfs_lock_truncate(cp, 0);
147
148 filesize = fp->ff_size;
149 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
150 if (offset > filesize) {
151 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
152 (offset > (off_t)MAXHFSFILESIZE)) {
153 retval = EFBIG;
154 }
155 goto exit;
156 }
157
158 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
159 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
160
161 retval = cluster_read(vp, uio, filesize, 0);
162
163 cp->c_touch_acctime = TRUE;
164
165 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
166 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
167
168 /*
169 * Keep track blocks read
170 */
171 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
172 int took_cnode_lock = 0;
173 off_t bytesread;
174
175 bytesread = start_resid - uio_resid(uio);
176
177 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
178 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
179 hfs_lock(cp, HFS_FORCE_LOCK);
180 took_cnode_lock = 1;
181 }
182 /*
183 * If this file hasn't been seen since the start of
184 * the current sampling period then start over.
185 */
186 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
187 struct timeval tv;
188
189 fp->ff_bytesread = bytesread;
190 microtime(&tv);
191 cp->c_atime = tv.tv_sec;
192 } else {
193 fp->ff_bytesread += bytesread;
194 }
195 if (took_cnode_lock)
196 hfs_unlock(cp);
197 }
198exit:
199 hfs_unlock_truncate(cp);
200 return (retval);
201}
202
203/*
204 * Write data to a file.
205 */
206int
207hfs_vnop_write(struct vnop_write_args *ap)
208{
209 uio_t uio = ap->a_uio;
210 struct vnode *vp = ap->a_vp;
211 struct cnode *cp;
212 struct filefork *fp;
213 struct hfsmount *hfsmp;
214 kauth_cred_t cred = NULL;
215 off_t origFileSize;
216 off_t writelimit;
217 off_t bytesToAdd;
218 off_t actualBytesAdded;
219 off_t filebytes;
220 off_t offset;
221 size_t resid;
222 int eflags;
223 int ioflag = ap->a_ioflag;
224 int retval = 0;
225 int lockflags;
226 int cnode_locked = 0;
227
228 // LP64todo - fix this! uio_resid may be 64-bit value
229 resid = uio_resid(uio);
230 offset = uio_offset(uio);
231
232 if (offset < 0)
233 return (EINVAL);
234 if (resid == 0)
235 return (E_NONE);
236 if (!vnode_isreg(vp))
237 return (EPERM); /* Can only write regular files */
238
239 /* Protect against a size change. */
240 hfs_lock_truncate(VTOC(vp), TRUE);
241
242 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
243 hfs_unlock_truncate(VTOC(vp));
244 return (retval);
245 }
246 cnode_locked = 1;
247 cp = VTOC(vp);
248 fp = VTOF(vp);
249 hfsmp = VTOHFS(vp);
250 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
251
252 if (ioflag & IO_APPEND) {
253 uio_setoffset(uio, fp->ff_size);
254 offset = fp->ff_size;
255 }
256 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
257 retval = EPERM;
258 goto exit;
259 }
260
261 origFileSize = fp->ff_size;
262 eflags = kEFDeferMask; /* defer file block allocations */
263
264#ifdef HFS_SPARSE_DEV
265 /*
266 * When the underlying device is sparse and space
267 * is low (< 8MB), stop doing delayed allocations
268 * and begin doing synchronous I/O.
269 */
270 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
271 (hfs_freeblks(hfsmp, 0) < 2048)) {
272 eflags &= ~kEFDeferMask;
273 ioflag |= IO_SYNC;
274 }
275#endif /* HFS_SPARSE_DEV */
276
277 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
278 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
279
280 /* Now test if we need to extend the file */
281 /* Doing so will adjust the filebytes for us */
282
283 writelimit = offset + resid;
284 if (writelimit <= filebytes)
285 goto sizeok;
286
287 cred = vfs_context_ucred(ap->a_context);
288#if QUOTA
289 bytesToAdd = writelimit - filebytes;
290 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
291 cred, 0);
292 if (retval)
293 goto exit;
294#endif /* QUOTA */
295
296 if (hfs_start_transaction(hfsmp) != 0) {
297 retval = EINVAL;
298 goto exit;
299 }
300
301 while (writelimit > filebytes) {
302 bytesToAdd = writelimit - filebytes;
303 if (cred && suser(cred, NULL) != 0)
304 eflags |= kEFReserveMask;
305
306 /* Protect extents b-tree and allocation bitmap */
307 lockflags = SFL_BITMAP;
308 if (overflow_extents(fp))
309 lockflags |= SFL_EXTENTS;
310 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
311
312 /* Files that are changing size are not hot file candidates. */
313 if (hfsmp->hfc_stage == HFC_RECORDING) {
314 fp->ff_bytesread = 0;
315 }
316 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
317 0, eflags, &actualBytesAdded));
318
319 hfs_systemfile_unlock(hfsmp, lockflags);
320
321 if ((actualBytesAdded == 0) && (retval == E_NONE))
322 retval = ENOSPC;
323 if (retval != E_NONE)
324 break;
325 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
326 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
327 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
328 }
329 (void) hfs_update(vp, TRUE);
330 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
331 (void) hfs_end_transaction(hfsmp);
332
333sizeok:
334 if (retval == E_NONE) {
335 off_t filesize;
336 off_t zero_off;
337 off_t tail_off;
338 off_t inval_start;
339 off_t inval_end;
340 off_t io_start;
341 int lflag;
342 struct rl_entry *invalid_range;
343
344 if (writelimit > fp->ff_size)
345 filesize = writelimit;
346 else
347 filesize = fp->ff_size;
348
349 lflag = (ioflag & IO_SYNC);
350
351 if (offset <= fp->ff_size) {
352 zero_off = offset & ~PAGE_MASK_64;
353
354 /* Check to see whether the area between the zero_offset and the start
355 of the transfer to see whether is invalid and should be zero-filled
356 as part of the transfer:
357 */
358 if (offset > zero_off) {
359 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
360 lflag |= IO_HEADZEROFILL;
361 }
362 } else {
363 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
364
365 /* The bytes between fp->ff_size and uio->uio_offset must never be
366 read without being zeroed. The current last block is filled with zeroes
367 if it holds valid data but in all cases merely do a little bookkeeping
368 to track the area from the end of the current last page to the start of
369 the area actually written. For the same reason only the bytes up to the
370 start of the page where this write will start is invalidated; any remainder
371 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
372
373 Note that inval_start, the start of the page after the current EOF,
374 may be past the start of the write, in which case the zeroing
375 will be handled by the cluser_write of the actual data.
376 */
377 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
378 inval_end = offset & ~PAGE_MASK_64;
379 zero_off = fp->ff_size;
380
381 if ((fp->ff_size & PAGE_MASK_64) &&
382 (rl_scan(&fp->ff_invalidranges,
383 eof_page_base,
384 fp->ff_size - 1,
385 &invalid_range) != RL_NOOVERLAP)) {
386 /* The page containing the EOF is not valid, so the
387 entire page must be made inaccessible now. If the write
388 starts on a page beyond the page containing the eof
389 (inval_end > eof_page_base), add the
390 whole page to the range to be invalidated. Otherwise
391 (i.e. if the write starts on the same page), zero-fill
392 the entire page explicitly now:
393 */
394 if (inval_end > eof_page_base) {
395 inval_start = eof_page_base;
396 } else {
397 zero_off = eof_page_base;
398 };
399 };
400
401 if (inval_start < inval_end) {
402 struct timeval tv;
403 /* There's some range of data that's going to be marked invalid */
404
405 if (zero_off < inval_start) {
406 /* The pages between inval_start and inval_end are going to be invalidated,
407 and the actual write will start on a page past inval_end. Now's the last
408 chance to zero-fill the page containing the EOF:
409 */
410 hfs_unlock(cp);
411 cnode_locked = 0;
412 retval = cluster_write(vp, (uio_t) 0,
413 fp->ff_size, inval_start,
414 zero_off, (off_t)0,
415 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
416 hfs_lock(cp, HFS_FORCE_LOCK);
417 cnode_locked = 1;
418 if (retval) goto ioerr_exit;
419 offset = uio_offset(uio);
420 };
421
422 /* Mark the remaining area of the newly allocated space as invalid: */
423 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
424 microuptime(&tv);
425 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
426 zero_off = fp->ff_size = inval_end;
427 };
428
429 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
430 };
431
432 /* Check to see whether the area between the end of the write and the end of
433 the page it falls in is invalid and should be zero-filled as part of the transfer:
434 */
435 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
436 if (tail_off > filesize) tail_off = filesize;
437 if (tail_off > writelimit) {
438 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
439 lflag |= IO_TAILZEROFILL;
440 };
441 };
442
443 /*
444 * if the write starts beyond the current EOF (possibly advanced in the
445 * zeroing of the last block, above), then we'll zero fill from the current EOF
446 * to where the write begins:
447 *
448 * NOTE: If (and ONLY if) the portion of the file about to be written is
449 * before the current EOF it might be marked as invalid now and must be
450 * made readable (removed from the invalid ranges) before cluster_write
451 * tries to write it:
452 */
453 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
454 if (io_start < fp->ff_size) {
455 off_t io_end;
456
457 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
458 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
459 };
460
461 hfs_unlock(cp);
462 cnode_locked = 0;
463 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
464 tail_off, lflag | IO_NOZERODIRTY);
465 offset = uio_offset(uio);
466 if (offset > fp->ff_size) {
467 fp->ff_size = offset;
468
469 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
470 /* Files that are changing size are not hot file candidates. */
471 if (hfsmp->hfc_stage == HFC_RECORDING)
472 fp->ff_bytesread = 0;
473 }
474 if (resid > uio_resid(uio)) {
475 cp->c_touch_chgtime = TRUE;
476 cp->c_touch_modtime = TRUE;
477 }
478 }
479
480 // XXXdbg - testing for vivek and paul lambert
481 {
482 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
483 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
484 }
485 }
486 HFS_KNOTE(vp, NOTE_WRITE);
487
488ioerr_exit:
489 /*
490 * If we successfully wrote any data, and we are not the superuser
491 * we clear the setuid and setgid bits as a precaution against
492 * tampering.
493 */
494 if (cp->c_mode & (S_ISUID | S_ISGID)) {
495 cred = vfs_context_ucred(ap->a_context);
496 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
497 if (!cnode_locked) {
498 hfs_lock(cp, HFS_FORCE_LOCK);
499 cnode_locked = 1;
500 }
501 cp->c_mode &= ~(S_ISUID | S_ISGID);
502 }
503 }
504 if (retval) {
505 if (ioflag & IO_UNIT) {
506 if (!cnode_locked) {
507 hfs_lock(cp, HFS_FORCE_LOCK);
508 cnode_locked = 1;
509 }
510 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
511 0, ap->a_context);
512 // LP64todo - fix this! resid needs to by user_ssize_t
513 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
514 uio_setresid(uio, resid);
515 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
516 }
517 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
518 if (!cnode_locked) {
519 hfs_lock(cp, HFS_FORCE_LOCK);
520 cnode_locked = 1;
521 }
522 retval = hfs_update(vp, TRUE);
523 }
524 /* Updating vcbWrCnt doesn't need to be atomic. */
525 hfsmp->vcbWrCnt++;
526
527 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
528 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
529exit:
530 if (cnode_locked)
531 hfs_unlock(cp);
532 hfs_unlock_truncate(cp);
533 return (retval);
534}
535
536/* support for the "bulk-access" fcntl */
537
538#define CACHE_ELEMS 64
539#define CACHE_LEVELS 16
540#define PARENT_IDS_FLAG 0x100
541
542/* from hfs_attrlist.c */
543extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
544 mode_t obj_mode, struct mount *mp,
545 kauth_cred_t cred, struct proc *p);
546
547/* from vfs/vfs_fsevents.c */
548extern char *get_pathbuff(void);
549extern void release_pathbuff(char *buff);
550
551struct access_cache {
552 int numcached;
553 int cachehits; /* these two for statistics gathering */
554 int lookups;
555 unsigned int *acache;
556 Boolean *haveaccess;
557};
558
559struct access_t {
560 uid_t uid; /* IN: effective user id */
561 short flags; /* IN: access requested (i.e. R_OK) */
562 short num_groups; /* IN: number of groups user belongs to */
563 int num_files; /* IN: number of files to process */
564 int *file_ids; /* IN: array of file ids */
565 gid_t *groups; /* IN: array of groups */
566 short *access; /* OUT: access info for each file (0 for 'has access') */
567};
568
569struct user_access_t {
570 uid_t uid; /* IN: effective user id */
571 short flags; /* IN: access requested (i.e. R_OK) */
572 short num_groups; /* IN: number of groups user belongs to */
573 int num_files; /* IN: number of files to process */
574 user_addr_t file_ids; /* IN: array of file ids */
575 user_addr_t groups; /* IN: array of groups */
576 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
577};
578
579/*
580 * Perform a binary search for the given parent_id. Return value is
581 * found/not found boolean, and indexp will be the index of the item
582 * or the index at which to insert the item if it's not found.
583 */
584static int
585lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
586{
587 unsigned int lo, hi;
588 int index, matches = 0;
589
590 if (cache->numcached == 0) {
591 *indexp = 0;
592 return 0; // table is empty, so insert at index=0 and report no match
593 }
594
595 if (cache->numcached > CACHE_ELEMS) {
596 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
597 cache->numcached, CACHE_ELEMS);*/
598 cache->numcached = CACHE_ELEMS;
599 }
600
601 lo = 0;
602 hi = cache->numcached - 1;
603 index = -1;
604
605 /* perform binary search for parent_id */
606 do {
607 unsigned int mid = (hi - lo)/2 + lo;
608 unsigned int this_id = cache->acache[mid];
609
610 if (parent_id == this_id) {
611 index = mid;
612 break;
613 }
614
615 if (parent_id < this_id) {
616 hi = mid;
617 continue;
618 }
619
620 if (parent_id > this_id) {
621 lo = mid + 1;
622 continue;
623 }
624 } while(lo < hi);
625
626 /* check if lo and hi converged on the match */
627 if (parent_id == cache->acache[hi]) {
628 index = hi;
629 }
630
631 /* if no existing entry found, find index for new one */
632 if (index == -1) {
633 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
634 matches = 0;
635 } else {
636 matches = 1;
637 }
638
639 *indexp = index;
640 return matches;
641}
642
643/*
644 * Add a node to the access_cache at the given index (or do a lookup first
645 * to find the index if -1 is passed in). We currently do a replace rather
646 * than an insert if the cache is full.
647 */
648static void
649add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
650{
651 int lookup_index = -1;
652
653 /* need to do a lookup first if -1 passed for index */
654 if (index == -1) {
655 if (lookup_bucket(cache, &lookup_index, nodeID)) {
656 if (cache->haveaccess[lookup_index] != access) {
657 /* change access info for existing entry... should never happen */
658 cache->haveaccess[lookup_index] = access;
659 }
660
661 /* mission accomplished */
662 return;
663 } else {
664 index = lookup_index;
665 }
666
667 }
668
669 /* if the cache is full, do a replace rather than an insert */
670 if (cache->numcached >= CACHE_ELEMS) {
671 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
672 cache->numcached = CACHE_ELEMS-1;
673
674 if (index > cache->numcached) {
675 // printf("index %d pinned to %d\n", index, cache->numcached);
676 index = cache->numcached;
677 }
678 } else if (index >= 0 && index < cache->numcached) {
679 /* only do bcopy if we're inserting */
680 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
681 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
682 }
683
684 cache->acache[index] = nodeID;
685 cache->haveaccess[index] = access;
686 cache->numcached++;
687}
688
689
690struct cinfo {
691 uid_t uid;
692 gid_t gid;
693 mode_t mode;
694 cnid_t parentcnid;
695};
696
697static int
698snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
699{
700 struct cinfo *cip = (struct cinfo *)arg;
701
702 cip->uid = attrp->ca_uid;
703 cip->gid = attrp->ca_gid;
704 cip->mode = attrp->ca_mode;
705 cip->parentcnid = descp->cd_parentcnid;
706
707 return (0);
708}
709
710/*
711 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
712 * isn't incore, then go to the catalog.
713 */
714static int
715do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
716 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
717{
718 int error = 0;
719
720 /* if this id matches the one the fsctl was called with, skip the lookup */
721 if (cnid == skip_cp->c_cnid) {
722 cnattrp->ca_uid = skip_cp->c_uid;
723 cnattrp->ca_gid = skip_cp->c_gid;
724 cnattrp->ca_mode = skip_cp->c_mode;
725 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
726 } else {
727 struct cinfo c_info;
728
729 /* otherwise, check the cnode hash incase the file/dir is incore */
730 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
731 cnattrp->ca_uid = c_info.uid;
732 cnattrp->ca_gid = c_info.gid;
733 cnattrp->ca_mode = c_info.mode;
734 keyp->hfsPlus.parentID = c_info.parentcnid;
735 } else {
736 int lockflags;
737
738 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
739
740 /* lookup this cnid in the catalog */
741 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
742
743 hfs_systemfile_unlock(hfsmp, lockflags);
744
745 cache->lookups++;
746 }
747 }
748
749 return (error);
750}
751
752/*
753 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
754 * up to CACHE_LEVELS as we progress towards the root.
755 */
756static int
757do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
758 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
759{
760 int myErr = 0;
761 int myResult;
762 HFSCatalogNodeID thisNodeID;
763 unsigned long myPerms;
764 struct cat_attr cnattr;
765 int cache_index = -1;
766 CatalogKey catkey;
767
768 int i = 0, ids_to_cache = 0;
769 int parent_ids[CACHE_LEVELS];
770
771 /* root always has access */
772 if (!suser(myp_ucred, NULL)) {
773 return (1);
774 }
775
776 thisNodeID = nodeID;
777 while (thisNodeID >= kRootDirID) {
778 myResult = 0; /* default to "no access" */
779
780 /* check the cache before resorting to hitting the catalog */
781
782 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
783 * to look any further after hitting cached dir */
784
785 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
786 cache->cachehits++;
787 myResult = cache->haveaccess[cache_index];
788 goto ExitThisRoutine;
789 }
790
791 /* remember which parents we want to cache */
792 if (ids_to_cache < CACHE_LEVELS) {
793 parent_ids[ids_to_cache] = thisNodeID;
794 ids_to_cache++;
795 }
796
797 /* do the lookup (checks the cnode hash, then the catalog) */
798 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
799 if (myErr) {
800 goto ExitThisRoutine; /* no access */
801 }
802
803 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
804 cnattr.ca_mode, hfsmp->hfs_mp,
805 myp_ucred, theProcPtr);
806
807 if ( (myPerms & X_OK) == 0 ) {
808 myResult = 0;
809 goto ExitThisRoutine; /* no access */
810 }
811
812 /* up the hierarchy we go */
813 thisNodeID = catkey.hfsPlus.parentID;
814 }
815
816 /* if here, we have access to this node */
817 myResult = 1;
818
819 ExitThisRoutine:
820 if (myErr) {
821 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
822 myResult = 0;
823 }
824 *err = myErr;
825
826 /* cache the parent directory(ies) */
827 for (i = 0; i < ids_to_cache; i++) {
828 /* small optimization: get rid of double-lookup for all these */
829 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
830 add_node(cache, -1, parent_ids[i], myResult);
831 }
832
833 return (myResult);
834}
835/* end "bulk-access" support */
836
837
838
839/*
840 * Callback for use with freeze ioctl.
841 */
842static int
843hfs_freezewrite_callback(struct vnode *vp, void *cargs)
844{
845 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
846
847 return 0;
848}
849
850/*
851 * Control filesystem operating characteristics.
852 */
853int
854hfs_vnop_ioctl( struct vnop_ioctl_args /* {
855 vnode_t a_vp;
856 int a_command;
857 caddr_t a_data;
858 int a_fflag;
859 vfs_context_t a_context;
860 } */ *ap)
861{
862 struct vnode * vp = ap->a_vp;
863 struct hfsmount *hfsmp = VTOHFS(vp);
864 vfs_context_t context = ap->a_context;
865 kauth_cred_t cred = vfs_context_ucred(context);
866 proc_t p = vfs_context_proc(context);
867 struct vfsstatfs *vfsp;
868 boolean_t is64bit;
869
870 is64bit = proc_is64bit(p);
871
872 switch (ap->a_command) {
873
874 case HFS_RESIZE_VOLUME: {
875 u_int64_t newsize;
876 u_int64_t cursize;
877
878 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
879 if (suser(cred, NULL) &&
880 kauth_cred_getuid(cred) != vfsp->f_owner) {
881 return (EACCES); /* must be owner of file system */
882 }
883 if (!vnode_isvroot(vp)) {
884 return (EINVAL);
885 }
886 newsize = *(u_int64_t *)ap->a_data;
887 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
888
889 if (newsize > cursize) {
890 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
891 } else if (newsize < cursize) {
892 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
893 } else {
894 return (0);
895 }
896 }
897 case HFS_CHANGE_NEXT_ALLOCATION: {
898 u_int32_t location;
899
900 if (vnode_vfsisrdonly(vp)) {
901 return (EROFS);
902 }
903 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
904 if (suser(cred, NULL) &&
905 kauth_cred_getuid(cred) != vfsp->f_owner) {
906 return (EACCES); /* must be owner of file system */
907 }
908 if (!vnode_isvroot(vp)) {
909 return (EINVAL);
910 }
911 location = *(u_int32_t *)ap->a_data;
912 if (location > hfsmp->totalBlocks - 1) {
913 return (EINVAL);
914 }
915 /* Return previous value. */
916 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
917 HFS_MOUNT_LOCK(hfsmp, TRUE);
918 hfsmp->nextAllocation = location;
919 hfsmp->vcbFlags |= 0xFF00;
920 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
921 return (0);
922 }
923
924#ifdef HFS_SPARSE_DEV
925 case HFS_SETBACKINGSTOREINFO: {
926 struct vnode * bsfs_rootvp;
927 struct vnode * di_vp;
928 struct hfs_backingstoreinfo *bsdata;
929 int error = 0;
930
931 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
932 return (EALREADY);
933 }
934 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
935 if (suser(cred, NULL) &&
936 kauth_cred_getuid(cred) != vfsp->f_owner) {
937 return (EACCES); /* must be owner of file system */
938 }
939 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
940 if (bsdata == NULL) {
941 return (EINVAL);
942 }
943 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
944 return (error);
945 }
946 if ((error = vnode_getwithref(di_vp))) {
947 file_drop(bsdata->backingfd);
948 return(error);
949 }
950
951 if (vnode_mount(vp) == vnode_mount(di_vp)) {
952 (void)vnode_put(di_vp);
953 file_drop(bsdata->backingfd);
954 return (EINVAL);
955 }
956
957 /*
958 * Obtain the backing fs root vnode and keep a reference
959 * on it. This reference will be dropped in hfs_unmount.
960 */
961 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
962 if (error) {
963 (void)vnode_put(di_vp);
964 file_drop(bsdata->backingfd);
965 return (error);
966 }
967 vnode_ref(bsfs_rootvp);
968 vnode_put(bsfs_rootvp);
969
970 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
971 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
972 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
973 hfsmp->hfs_sparsebandblks *= 4;
974
975 (void)vnode_put(di_vp);
976 file_drop(bsdata->backingfd);
977 return (0);
978 }
979 case HFS_CLRBACKINGSTOREINFO: {
980 struct vnode * tmpvp;
981
982 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
983 if (suser(cred, NULL) &&
984 kauth_cred_getuid(cred) != vfsp->f_owner) {
985 return (EACCES); /* must be owner of file system */
986 }
987 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
988 hfsmp->hfs_backingfs_rootvp) {
989
990 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
991 tmpvp = hfsmp->hfs_backingfs_rootvp;
992 hfsmp->hfs_backingfs_rootvp = NULLVP;
993 hfsmp->hfs_sparsebandblks = 0;
994 vnode_rele(tmpvp);
995 }
996 return (0);
997 }
998#endif /* HFS_SPARSE_DEV */
999
1000 case F_FREEZE_FS: {
1001 struct mount *mp;
1002 task_t task;
1003
1004 if (!is_suser())
1005 return (EACCES);
1006
1007 mp = vnode_mount(vp);
1008 hfsmp = VFSTOHFS(mp);
1009
1010 if (!(hfsmp->jnl))
1011 return (ENOTSUP);
1012
1013 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1014
1015 task = current_task();
1016 task_working_set_disable(task);
1017
1018 // flush things before we get started to try and prevent
1019 // dirty data from being paged out while we're frozen.
1020 // note: can't do this after taking the lock as it will
1021 // deadlock against ourselves.
1022 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1023 hfs_global_exclusive_lock_acquire(hfsmp);
1024 journal_flush(hfsmp->jnl);
1025
1026 // don't need to iterate on all vnodes, we just need to
1027 // wait for writes to the system files and the device vnode
1028 if (HFSTOVCB(hfsmp)->extentsRefNum)
1029 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1030 if (HFSTOVCB(hfsmp)->catalogRefNum)
1031 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1032 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1033 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1034 if (hfsmp->hfs_attribute_vp)
1035 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1036 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1037
1038 hfsmp->hfs_freezing_proc = current_proc();
1039
1040 return (0);
1041 }
1042
1043 case F_THAW_FS: {
1044 if (!is_suser())
1045 return (EACCES);
1046
1047 // if we're not the one who froze the fs then we
1048 // can't thaw it.
1049 if (hfsmp->hfs_freezing_proc != current_proc()) {
1050 return EPERM;
1051 }
1052
1053 // NOTE: if you add code here, also go check the
1054 // code that "thaws" the fs in hfs_vnop_close()
1055 //
1056 hfsmp->hfs_freezing_proc = NULL;
1057 hfs_global_exclusive_lock_release(hfsmp);
1058 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1059
1060 return (0);
1061 }
1062
1063#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1064#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1065
1066 case HFS_BULKACCESS_FSCTL:
1067 case HFS_BULKACCESS: {
1068 /*
1069 * NOTE: on entry, the vnode is locked. Incase this vnode
1070 * happens to be in our list of file_ids, we'll note it
1071 * avoid calling hfs_chashget_nowait() on that id as that
1072 * will cause a "locking against myself" panic.
1073 */
1074 Boolean check_leaf = true;
1075
1076 struct user_access_t *user_access_structp;
1077 struct user_access_t tmp_user_access_t;
1078 struct access_cache cache;
1079
1080 int error = 0, i;
1081
1082 dev_t dev = VTOC(vp)->c_dev;
1083
1084 short flags;
1085 struct ucred myucred; /* XXX ILLEGAL */
1086 int num_files;
1087 int *file_ids = NULL;
1088 short *access = NULL;
1089
1090 cnid_t cnid;
1091 cnid_t prevParent_cnid = 0;
1092 unsigned long myPerms;
1093 short myaccess = 0;
1094 struct cat_attr cnattr;
1095 CatalogKey catkey;
1096 struct cnode *skip_cp = VTOC(vp);
1097 struct vfs_context my_context;
1098
1099 /* first, return error if not run as root */
1100 if (cred->cr_ruid != 0) {
1101 return EPERM;
1102 }
1103
1104 /* initialize the local cache and buffers */
1105 cache.numcached = 0;
1106 cache.cachehits = 0;
1107 cache.lookups = 0;
1108
1109 file_ids = (int *) get_pathbuff();
1110 access = (short *) get_pathbuff();
1111 cache.acache = (int *) get_pathbuff();
1112 cache.haveaccess = (Boolean *) get_pathbuff();
1113
1114 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1115 release_pathbuff((char *) file_ids);
1116 release_pathbuff((char *) access);
1117 release_pathbuff((char *) cache.acache);
1118 release_pathbuff((char *) cache.haveaccess);
1119
1120 return ENOMEM;
1121 }
1122
1123 /* struct copyin done during dispatch... need to copy file_id array separately */
1124 if (ap->a_data == NULL) {
1125 error = EINVAL;
1126 goto err_exit_bulk_access;
1127 }
1128
1129 if (is64bit) {
1130 user_access_structp = (struct user_access_t *)ap->a_data;
1131 }
1132 else {
1133 struct access_t * accessp = (struct access_t *)ap->a_data;
1134 tmp_user_access_t.uid = accessp->uid;
1135 tmp_user_access_t.flags = accessp->flags;
1136 tmp_user_access_t.num_groups = accessp->num_groups;
1137 tmp_user_access_t.num_files = accessp->num_files;
1138 tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1139 tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1140 tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1141 user_access_structp = &tmp_user_access_t;
1142 }
1143
1144 num_files = user_access_structp->num_files;
1145 if (num_files < 1) {
1146 goto err_exit_bulk_access;
1147 }
1148 if (num_files > 256) {
1149 error = EINVAL;
1150 goto err_exit_bulk_access;
1151 }
1152
1153 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1154 num_files * sizeof(int)))) {
1155 goto err_exit_bulk_access;
1156 }
1157
1158 /* fill in the ucred structure */
1159 flags = user_access_structp->flags;
1160 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1161 flags = R_OK;
1162 }
1163
1164 /* check if we've been passed leaf node ids or parent ids */
1165 if (flags & PARENT_IDS_FLAG) {
1166 check_leaf = false;
1167 }
1168
1169 memset(&myucred, 0, sizeof(myucred));
1170 myucred.cr_ref = 1;
1171 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1172 myucred.cr_ngroups = user_access_structp->num_groups;
1173 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1174 myucred.cr_ngroups = 0;
1175 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1176 myucred.cr_ngroups * sizeof(gid_t)))) {
1177 goto err_exit_bulk_access;
1178 }
1179 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1180 myucred.cr_gmuid = myucred.cr_uid;
1181
1182 my_context.vc_proc = p;
1183 my_context.vc_ucred = &myucred;
1184
1185 /* Check access to each file_id passed in */
1186 for (i = 0; i < num_files; i++) {
1187#if 0
1188 cnid = (cnid_t) file_ids[i];
1189
1190 /* root always has access */
1191 if (!suser(&myucred, NULL)) {
1192 access[i] = 0;
1193 continue;
1194 }
1195
1196 if (check_leaf) {
1197
1198 /* do the lookup (checks the cnode hash, then the catalog) */
1199 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1200 if (error) {
1201 access[i] = (short) error;
1202 continue;
1203 }
1204
1205 /* before calling CheckAccess(), check the target file for read access */
1206 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1207 cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
1208
1209
1210 /* fail fast if no access */
1211 if ((myPerms & flags) == 0) {
1212 access[i] = EACCES;
1213 continue;
1214 }
1215 } else {
1216 /* we were passed an array of parent ids */
1217 catkey.hfsPlus.parentID = cnid;
1218 }
1219
1220 /* if the last guy had the same parent and had access, we're done */
1221 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1222 cache.cachehits++;
1223 access[i] = 0;
1224 continue;
1225 }
1226
1227 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1228 skip_cp, p, &myucred, dev);
1229
1230 if ( myaccess ) {
1231 access[i] = 0; // have access.. no errors to report
1232 } else {
1233 access[i] = (error != 0 ? (short) error : EACCES);
1234 }
1235
1236 prevParent_cnid = catkey.hfsPlus.parentID;
1237#else
1238 int myErr;
1239
1240 cnid = (cnid_t)file_ids[i];
1241
1242 while (cnid >= kRootDirID) {
1243 /* get the vnode for this cnid */
1244 myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1245 if ( myErr ) {
1246 access[i] = EACCES;
1247 break;
1248 }
1249
1250 cnid = VTOC(vp)->c_parentcnid;
1251
1252 hfs_unlock(VTOC(vp));
1253 if (vnode_vtype(vp) == VDIR) {
1254 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1255 } else {
1256 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1257 }
1258 vnode_put(vp);
1259 access[i] = myErr;
1260 if (myErr) {
1261 break;
1262 }
1263 }
1264#endif
1265 }
1266
1267 /* copyout the access array */
1268 if ((error = copyout((caddr_t)access, user_access_structp->access,
1269 num_files * sizeof (short)))) {
1270 goto err_exit_bulk_access;
1271 }
1272
1273 err_exit_bulk_access:
1274
1275 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1276
1277 release_pathbuff((char *) cache.acache);
1278 release_pathbuff((char *) cache.haveaccess);
1279 release_pathbuff((char *) file_ids);
1280 release_pathbuff((char *) access);
1281
1282 return (error);
1283 } /* HFS_BULKACCESS */
1284
1285 case HFS_SETACLSTATE: {
1286 int state;
1287
1288 if (ap->a_data == NULL) {
1289 return (EINVAL);
1290 }
1291
1292 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1293 state = *(int *)ap->a_data;
1294
1295 // super-user can enable or disable acl's on a volume.
1296 // the volume owner can only enable acl's
1297 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1298 return (EPERM);
1299 }
1300 if (state == 0 || state == 1)
1301 return hfs_setextendedsecurity(hfsmp, state);
1302 else
1303 return (EINVAL);
1304 }
1305
1306 case F_FULLFSYNC: {
1307 int error;
1308
1309 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1310 if (error == 0) {
1311 error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1312 hfs_unlock(VTOC(vp));
1313 }
1314
1315 return error;
1316 }
1317
1318 case F_CHKCLEAN: {
1319 register struct cnode *cp;
1320 int error;
1321
1322 if (!vnode_isreg(vp))
1323 return EINVAL;
1324
1325 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1326 if (error == 0) {
1327 cp = VTOC(vp);
1328 /*
1329 * used by regression test to determine if
1330 * all the dirty pages (via write) have been cleaned
1331 * after a call to 'fsysnc'.
1332 */
1333 error = is_file_clean(vp, VTOF(vp)->ff_size);
1334 hfs_unlock(cp);
1335 }
1336 return (error);
1337 }
1338
1339 case F_RDADVISE: {
1340 register struct radvisory *ra;
1341 struct filefork *fp;
1342 int error;
1343
1344 if (!vnode_isreg(vp))
1345 return EINVAL;
1346
1347 ra = (struct radvisory *)(ap->a_data);
1348 fp = VTOF(vp);
1349
1350 /* Protect against a size change. */
1351 hfs_lock_truncate(VTOC(vp), TRUE);
1352
1353 if (ra->ra_offset >= fp->ff_size) {
1354 error = EFBIG;
1355 } else {
1356 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1357 }
1358
1359 hfs_unlock_truncate(VTOC(vp));
1360 return (error);
1361 }
1362
1363 case F_READBOOTSTRAP:
1364 case F_WRITEBOOTSTRAP:
1365 {
1366 struct vnode *devvp = NULL;
1367 user_fbootstraptransfer_t *user_bootstrapp;
1368 int devBlockSize;
1369 int error;
1370 uio_t auio;
1371 daddr64_t blockNumber;
1372 u_long blockOffset;
1373 u_long xfersize;
1374 struct buf *bp;
1375 user_fbootstraptransfer_t user_bootstrap;
1376
1377 if (!vnode_isvroot(vp))
1378 return (EINVAL);
1379 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1380 * to a user_fbootstraptransfer_t else we get a pointer to a
1381 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1382 */
1383 if (is64bit) {
1384 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1385 }
1386 else {
1387 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1388 user_bootstrapp = &user_bootstrap;
1389 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1390 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1391 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1392 }
1393 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1394 return EINVAL;
1395
1396 devvp = VTOHFS(vp)->hfs_devvp;
1397 auio = uio_create(1, user_bootstrapp->fbt_offset,
1398 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1399 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1400 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1401
1402 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1403
1404 while (uio_resid(auio) > 0) {
1405 blockNumber = uio_offset(auio) / devBlockSize;
1406 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1407 if (error) {
1408 if (bp) buf_brelse(bp);
1409 uio_free(auio);
1410 return error;
1411 };
1412
1413 blockOffset = uio_offset(auio) % devBlockSize;
1414 xfersize = devBlockSize - blockOffset;
1415 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1416 if (error) {
1417 buf_brelse(bp);
1418 uio_free(auio);
1419 return error;
1420 };
1421 if (uio_rw(auio) == UIO_WRITE) {
1422 error = VNOP_BWRITE(bp);
1423 if (error) {
1424 uio_free(auio);
1425 return error;
1426 }
1427 } else {
1428 buf_brelse(bp);
1429 };
1430 };
1431 uio_free(auio);
1432 };
1433 return 0;
1434
1435 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1436 {
1437 if (is64bit) {
1438 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1439 }
1440 else {
1441 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1442 }
1443 return 0;
1444 }
1445
1446 case HFS_GET_MOUNT_TIME:
1447 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1448 break;
1449
1450 case HFS_GET_LAST_MTIME:
1451 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1452 break;
1453
1454 case HFS_SET_BOOT_INFO:
1455 if (!vnode_isvroot(vp))
1456 return(EINVAL);
1457 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1458 return(EACCES); /* must be superuser or owner of filesystem */
1459 HFS_MOUNT_LOCK(hfsmp, TRUE);
1460 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1461 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1462 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1463 break;
1464
1465 case HFS_GET_BOOT_INFO:
1466 if (!vnode_isvroot(vp))
1467 return(EINVAL);
1468 HFS_MOUNT_LOCK(hfsmp, TRUE);
1469 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1470 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1471 break;
1472
1473 default:
1474 return (ENOTTY);
1475 }
1476
1477 /* Should never get here */
1478 return 0;
1479}
1480
1481/*
1482 * select
1483 */
1484int
1485hfs_vnop_select(__unused struct vnop_select_args *ap)
1486/*
1487 struct vnop_select_args {
1488 vnode_t a_vp;
1489 int a_which;
1490 int a_fflags;
1491 void *a_wql;
1492 vfs_context_t a_context;
1493 };
1494*/
1495{
1496 /*
1497 * We should really check to see if I/O is possible.
1498 */
1499 return (1);
1500}
1501
1502/*
1503 * Converts a logical block number to a physical block, and optionally returns
1504 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1505 * The physical block number is based on the device block size, currently its 512.
1506 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1507 */
1508int
1509hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1510{
1511 struct cnode *cp = VTOC(vp);
1512 struct filefork *fp = VTOF(vp);
1513 struct hfsmount *hfsmp = VTOHFS(vp);
1514 int retval = E_NONE;
1515 daddr_t logBlockSize;
1516 size_t bytesContAvail = 0;
1517 off_t blockposition;
1518 int lockExtBtree;
1519 int lockflags = 0;
1520
1521 /*
1522 * Check for underlying vnode requests and ensure that logical
1523 * to physical mapping is requested.
1524 */
1525 if (vpp != NULL)
1526 *vpp = cp->c_devvp;
1527 if (bnp == NULL)
1528 return (0);
1529
1530 logBlockSize = GetLogicalBlockSize(vp);
1531 blockposition = (off_t)bn * (off_t)logBlockSize;
1532
1533 lockExtBtree = overflow_extents(fp);
1534
1535 if (lockExtBtree)
1536 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1537
1538 retval = MacToVFSError(
1539 MapFileBlockC (HFSTOVCB(hfsmp),
1540 (FCB*)fp,
1541 MAXPHYSIO,
1542 blockposition,
1543 bnp,
1544 &bytesContAvail));
1545
1546 if (lockExtBtree)
1547 hfs_systemfile_unlock(hfsmp, lockflags);
1548
1549 if (retval == E_NONE) {
1550 /* Figure out how many read ahead blocks there are */
1551 if (runp != NULL) {
1552 if (can_cluster(logBlockSize)) {
1553 /* Make sure this result never goes negative: */
1554 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1555 } else {
1556 *runp = 0;
1557 }
1558 }
1559 }
1560 return (retval);
1561}
1562
1563/*
1564 * Convert logical block number to file offset.
1565 */
1566int
1567hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1568/*
1569 struct vnop_blktooff_args {
1570 vnode_t a_vp;
1571 daddr64_t a_lblkno;
1572 off_t *a_offset;
1573 };
1574*/
1575{
1576 if (ap->a_vp == NULL)
1577 return (EINVAL);
1578 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1579
1580 return(0);
1581}
1582
1583/*
1584 * Convert file offset to logical block number.
1585 */
1586int
1587hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1588/*
1589 struct vnop_offtoblk_args {
1590 vnode_t a_vp;
1591 off_t a_offset;
1592 daddr64_t *a_lblkno;
1593 };
1594*/
1595{
1596 if (ap->a_vp == NULL)
1597 return (EINVAL);
1598 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1599
1600 return(0);
1601}
1602
1603/*
1604 * Map file offset to physical block number.
1605 *
1606 * System file cnodes are expected to be locked (shared or exclusive).
1607 */
1608int
1609hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1610/*
1611 struct vnop_blockmap_args {
1612 vnode_t a_vp;
1613 off_t a_foffset;
1614 size_t a_size;
1615 daddr64_t *a_bpn;
1616 size_t *a_run;
1617 void *a_poff;
1618 int a_flags;
1619 vfs_context_t a_context;
1620 };
1621*/
1622{
1623 struct vnode *vp = ap->a_vp;
1624 struct cnode *cp;
1625 struct filefork *fp;
1626 struct hfsmount *hfsmp;
1627 size_t bytesContAvail = 0;
1628 int retval = E_NONE;
1629 int syslocks = 0;
1630 int lockflags = 0;
1631 struct rl_entry *invalid_range;
1632 enum rl_overlaptype overlaptype;
1633 int started_tr = 0;
1634 int tooklock = 0;
1635
1636 /* Do not allow blockmap operation on a directory */
1637 if (vnode_isdir(vp)) {
1638 return (ENOTSUP);
1639 }
1640
1641 /*
1642 * Check for underlying vnode requests and ensure that logical
1643 * to physical mapping is requested.
1644 */
1645 if (ap->a_bpn == NULL)
1646 return (0);
1647
1648 if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1649 if (VTOC(vp)->c_lockowner != current_thread()) {
1650 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1651 tooklock = 1;
1652 } else {
1653 cp = VTOC(vp);
1654 panic("blockmap: %s cnode lock already held!\n",
1655 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1656 }
1657 }
1658 hfsmp = VTOHFS(vp);
1659 cp = VTOC(vp);
1660 fp = VTOF(vp);
1661
1662retry:
1663 if (fp->ff_unallocblocks) {
1664 if (hfs_start_transaction(hfsmp) != 0) {
1665 retval = EINVAL;
1666 goto exit;
1667 } else {
1668 started_tr = 1;
1669 }
1670 syslocks = SFL_EXTENTS | SFL_BITMAP;
1671
1672 } else if (overflow_extents(fp)) {
1673 syslocks = SFL_EXTENTS;
1674 }
1675
1676 if (syslocks)
1677 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1678
1679 /*
1680 * Check for any delayed allocations.
1681 */
1682 if (fp->ff_unallocblocks) {
1683 SInt64 actbytes;
1684 u_int32_t loanedBlocks;
1685
1686 //
1687 // Make sure we have a transaction. It's possible
1688 // that we came in and fp->ff_unallocblocks was zero
1689 // but during the time we blocked acquiring the extents
1690 // btree, ff_unallocblocks became non-zero and so we
1691 // will need to start a transaction.
1692 //
1693 if (started_tr == 0) {
1694 if (syslocks) {
1695 hfs_systemfile_unlock(hfsmp, lockflags);
1696 syslocks = 0;
1697 }
1698 goto retry;
1699 }
1700
1701 /*
1702 * Note: ExtendFileC will Release any blocks on loan and
1703 * aquire real blocks. So we ask to extend by zero bytes
1704 * since ExtendFileC will account for the virtual blocks.
1705 */
1706
1707 loanedBlocks = fp->ff_unallocblocks;
1708 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1709 kEFAllMask | kEFNoClumpMask, &actbytes);
1710
1711 if (retval) {
1712 fp->ff_unallocblocks = loanedBlocks;
1713 cp->c_blocks += loanedBlocks;
1714 fp->ff_blocks += loanedBlocks;
1715
1716 HFS_MOUNT_LOCK(hfsmp, TRUE);
1717 hfsmp->loanedBlocks += loanedBlocks;
1718 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1719 }
1720
1721 if (retval) {
1722 hfs_systemfile_unlock(hfsmp, lockflags);
1723 cp->c_flag |= C_MODIFIED;
1724 if (started_tr) {
1725 (void) hfs_update(vp, TRUE);
1726 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1727
1728 hfs_end_transaction(hfsmp);
1729 }
1730 goto exit;
1731 }
1732 }
1733
1734 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1735 ap->a_bpn, &bytesContAvail);
1736 if (syslocks) {
1737 hfs_systemfile_unlock(hfsmp, lockflags);
1738 syslocks = 0;
1739 }
1740
1741 if (started_tr) {
1742 (void) hfs_update(vp, TRUE);
1743 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1744 hfs_end_transaction(hfsmp);
1745 started_tr = 0;
1746 }
1747 if (retval) {
1748 goto exit;
1749 }
1750
1751 /* Adjust the mapping information for invalid file ranges: */
1752 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1753 ap->a_foffset + (off_t)bytesContAvail - 1,
1754 &invalid_range);
1755 if (overlaptype != RL_NOOVERLAP) {
1756 switch(overlaptype) {
1757 case RL_MATCHINGOVERLAP:
1758 case RL_OVERLAPCONTAINSRANGE:
1759 case RL_OVERLAPSTARTSBEFORE:
1760 /* There's no valid block for this byte offset: */
1761 *ap->a_bpn = (daddr64_t)-1;
1762 /* There's no point limiting the amount to be returned
1763 * if the invalid range that was hit extends all the way
1764 * to the EOF (i.e. there's no valid bytes between the
1765 * end of this range and the file's EOF):
1766 */
1767 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1768 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1769 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1770 }
1771 break;
1772
1773 case RL_OVERLAPISCONTAINED:
1774 case RL_OVERLAPENDSAFTER:
1775 /* The range of interest hits an invalid block before the end: */
1776 if (invalid_range->rl_start == ap->a_foffset) {
1777 /* There's actually no valid information to be had starting here: */
1778 *ap->a_bpn = (daddr64_t)-1;
1779 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1780 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1781 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1782 }
1783 } else {
1784 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1785 }
1786 break;
1787
1788 case RL_NOOVERLAP:
1789 break;
1790 } /* end switch */
1791 if (bytesContAvail > ap->a_size)
1792 bytesContAvail = ap->a_size;
1793 }
1794 if (ap->a_run)
1795 *ap->a_run = bytesContAvail;
1796
1797 if (ap->a_poff)
1798 *(int *)ap->a_poff = 0;
1799exit:
1800 if (tooklock)
1801 hfs_unlock(cp);
1802
1803 return (MacToVFSError(retval));
1804}
1805
1806
1807/*
1808 * prepare and issue the I/O
1809 * buf_strategy knows how to deal
1810 * with requests that require
1811 * fragmented I/Os
1812 */
1813int
1814hfs_vnop_strategy(struct vnop_strategy_args *ap)
1815{
1816 buf_t bp = ap->a_bp;
1817 vnode_t vp = buf_vnode(bp);
1818 struct cnode *cp = VTOC(vp);
1819
1820 return (buf_strategy(cp->c_devvp, ap));
1821}
1822
1823
1824static int
1825do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1826{
1827 register struct cnode *cp = VTOC(vp);
1828 struct filefork *fp = VTOF(vp);
1829 struct proc *p = vfs_context_proc(context);;
1830 kauth_cred_t cred = vfs_context_ucred(context);
1831 int retval;
1832 off_t bytesToAdd;
1833 off_t actualBytesAdded;
1834 off_t filebytes;
1835 u_int64_t old_filesize;
1836 u_long fileblocks;
1837 int blksize;
1838 struct hfsmount *hfsmp;
1839 int lockflags;
1840
1841 blksize = VTOVCB(vp)->blockSize;
1842 fileblocks = fp->ff_blocks;
1843 filebytes = (off_t)fileblocks * (off_t)blksize;
1844 old_filesize = fp->ff_size;
1845
1846 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1847 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1848
1849 if (length < 0)
1850 return (EINVAL);
1851
1852 /* This should only happen with a corrupt filesystem */
1853 if ((off_t)fp->ff_size < 0)
1854 return (EINVAL);
1855
1856 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1857 return (EFBIG);
1858
1859 hfsmp = VTOHFS(vp);
1860
1861 retval = E_NONE;
1862
1863 /* Files that are changing size are not hot file candidates. */
1864 if (hfsmp->hfc_stage == HFC_RECORDING) {
1865 fp->ff_bytesread = 0;
1866 }
1867
1868 /*
1869 * We cannot just check if fp->ff_size == length (as an optimization)
1870 * since there may be extra physical blocks that also need truncation.
1871 */
1872#if QUOTA
1873 if ((retval = hfs_getinoquota(cp)))
1874 return(retval);
1875#endif /* QUOTA */
1876
1877 /*
1878 * Lengthen the size of the file. We must ensure that the
1879 * last byte of the file is allocated. Since the smallest
1880 * value of ff_size is 0, length will be at least 1.
1881 */
1882 if (length > (off_t)fp->ff_size) {
1883#if QUOTA
1884 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1885 cred, 0);
1886 if (retval)
1887 goto Err_Exit;
1888#endif /* QUOTA */
1889 /*
1890 * If we don't have enough physical space then
1891 * we need to extend the physical size.
1892 */
1893 if (length > filebytes) {
1894 int eflags;
1895 u_long blockHint = 0;
1896
1897 /* All or nothing and don't round up to clumpsize. */
1898 eflags = kEFAllMask | kEFNoClumpMask;
1899
1900 if (cred && suser(cred, NULL) != 0)
1901 eflags |= kEFReserveMask; /* keep a reserve */
1902
1903 /*
1904 * Allocate Journal and Quota files in metadata zone.
1905 */
1906 if (filebytes == 0 &&
1907 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1908 hfs_virtualmetafile(cp)) {
1909 eflags |= kEFMetadataMask;
1910 blockHint = hfsmp->hfs_metazone_start;
1911 }
1912 if (hfs_start_transaction(hfsmp) != 0) {
1913 retval = EINVAL;
1914 goto Err_Exit;
1915 }
1916
1917 /* Protect extents b-tree and allocation bitmap */
1918 lockflags = SFL_BITMAP;
1919 if (overflow_extents(fp))
1920 lockflags |= SFL_EXTENTS;
1921 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1922
1923 while ((length > filebytes) && (retval == E_NONE)) {
1924 bytesToAdd = length - filebytes;
1925 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1926 (FCB*)fp,
1927 bytesToAdd,
1928 blockHint,
1929 eflags,
1930 &actualBytesAdded));
1931
1932 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1933 if (actualBytesAdded == 0 && retval == E_NONE) {
1934 if (length > filebytes)
1935 length = filebytes;
1936 break;
1937 }
1938 } /* endwhile */
1939
1940 hfs_systemfile_unlock(hfsmp, lockflags);
1941
1942 if (hfsmp->jnl) {
1943 (void) hfs_update(vp, TRUE);
1944 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1945 }
1946
1947 hfs_end_transaction(hfsmp);
1948
1949 if (retval)
1950 goto Err_Exit;
1951
1952 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1953 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1954 }
1955
1956 if (!(flags & IO_NOZEROFILL)) {
1957 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1958 struct rl_entry *invalid_range;
1959 off_t zero_limit;
1960
1961 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1962 if (length < zero_limit) zero_limit = length;
1963
1964 if (length > (off_t)fp->ff_size) {
1965 struct timeval tv;
1966
1967 /* Extending the file: time to fill out the current last page w. zeroes? */
1968 if ((fp->ff_size & PAGE_MASK_64) &&
1969 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1970 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1971
1972 /* There's some valid data at the start of the (current) last page
1973 of the file, so zero out the remainder of that page to ensure the
1974 entire page contains valid data. Since there is no invalid range
1975 possible past the (current) eof, there's no need to remove anything
1976 from the invalid range list before calling cluster_write(): */
1977 hfs_unlock(cp);
1978 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1979 fp->ff_size, (off_t)0,
1980 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1981 hfs_lock(cp, HFS_FORCE_LOCK);
1982 if (retval) goto Err_Exit;
1983
1984 /* Merely invalidate the remaining area, if necessary: */
1985 if (length > zero_limit) {
1986 microuptime(&tv);
1987 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1988 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1989 }
1990 } else {
1991 /* The page containing the (current) eof is invalid: just add the
1992 remainder of the page to the invalid list, along with the area
1993 being newly allocated:
1994 */
1995 microuptime(&tv);
1996 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1997 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1998 };
1999 }
2000 } else {
2001 panic("hfs_truncate: invoked on non-UBC object?!");
2002 };
2003 }
2004 cp->c_touch_modtime = TRUE;
2005 fp->ff_size = length;
2006
2007 /* Nested transactions will do their own ubc_setsize. */
2008 if (!skipsetsize) {
2009 /*
2010 * ubc_setsize can cause a pagein here
2011 * so we need to drop cnode lock.
2012 */
2013 hfs_unlock(cp);
2014 ubc_setsize(vp, length);
2015 hfs_lock(cp, HFS_FORCE_LOCK);
2016 }
2017
2018 } else { /* Shorten the size of the file */
2019
2020 if ((off_t)fp->ff_size > length) {
2021 /*
2022 * Any buffers that are past the truncation point need to be
2023 * invalidated (to maintain buffer cache consistency).
2024 */
2025
2026 /* Nested transactions will do their own ubc_setsize. */
2027 if (!skipsetsize) {
2028 /*
2029 * ubc_setsize can cause a pageout here
2030 * so we need to drop cnode lock.
2031 */
2032 hfs_unlock(cp);
2033 ubc_setsize(vp, length);
2034 hfs_lock(cp, HFS_FORCE_LOCK);
2035 }
2036
2037 /* Any space previously marked as invalid is now irrelevant: */
2038 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2039 }
2040
2041 /*
2042 * Account for any unmapped blocks. Note that the new
2043 * file length can still end up with unmapped blocks.
2044 */
2045 if (fp->ff_unallocblocks > 0) {
2046 u_int32_t finalblks;
2047 u_int32_t loanedBlocks;
2048
2049 HFS_MOUNT_LOCK(hfsmp, TRUE);
2050
2051 loanedBlocks = fp->ff_unallocblocks;
2052 cp->c_blocks -= loanedBlocks;
2053 fp->ff_blocks -= loanedBlocks;
2054 fp->ff_unallocblocks = 0;
2055
2056 hfsmp->loanedBlocks -= loanedBlocks;
2057
2058 finalblks = (length + blksize - 1) / blksize;
2059 if (finalblks > fp->ff_blocks) {
2060 /* calculate required unmapped blocks */
2061 loanedBlocks = finalblks - fp->ff_blocks;
2062 hfsmp->loanedBlocks += loanedBlocks;
2063
2064 fp->ff_unallocblocks = loanedBlocks;
2065 cp->c_blocks += loanedBlocks;
2066 fp->ff_blocks += loanedBlocks;
2067 }
2068 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2069 }
2070
2071 /*
2072 * For a TBE process the deallocation of the file blocks is
2073 * delayed until the file is closed. And hfs_close calls
2074 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2075 * isn't set, we make sure this isn't a TBE process.
2076 */
2077 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2078#if QUOTA
2079 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2080#endif /* QUOTA */
2081 if (hfs_start_transaction(hfsmp) != 0) {
2082 retval = EINVAL;
2083 goto Err_Exit;
2084 }
2085
2086 if (fp->ff_unallocblocks == 0) {
2087 /* Protect extents b-tree and allocation bitmap */
2088 lockflags = SFL_BITMAP;
2089 if (overflow_extents(fp))
2090 lockflags |= SFL_EXTENTS;
2091 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2092
2093 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2094 (FCB*)fp, length, false));
2095
2096 hfs_systemfile_unlock(hfsmp, lockflags);
2097 }
2098 if (hfsmp->jnl) {
2099 if (retval == 0) {
2100 fp->ff_size = length;
2101 }
2102 (void) hfs_update(vp, TRUE);
2103 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2104 }
2105
2106 hfs_end_transaction(hfsmp);
2107
2108 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2109 if (retval)
2110 goto Err_Exit;
2111#if QUOTA
2112 /* These are bytesreleased */
2113 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2114#endif /* QUOTA */
2115 }
2116 /* Only set update flag if the logical length changes */
2117 if (old_filesize != length)
2118 cp->c_touch_modtime = TRUE;
2119 fp->ff_size = length;
2120 }
2121 cp->c_touch_chgtime = TRUE;
2122 retval = hfs_update(vp, MNT_WAIT);
2123 if (retval) {
2124 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2125 -1, -1, -1, retval, 0);
2126 }
2127
2128Err_Exit:
2129
2130 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2131 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2132
2133 return (retval);
2134}
2135
2136
2137
2138/*
2139 * Truncate a cnode to at most length size, freeing (or adding) the
2140 * disk blocks.
2141 */
2142__private_extern__
2143int
2144hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2145 vfs_context_t context)
2146{
2147 struct filefork *fp = VTOF(vp);
2148 off_t filebytes;
2149 u_long fileblocks;
2150 int blksize, error = 0;
2151 struct cnode *cp = VTOC(vp);
2152
2153 if (vnode_isdir(vp))
2154 return (EISDIR); /* cannot truncate an HFS directory! */
2155
2156 blksize = VTOVCB(vp)->blockSize;
2157 fileblocks = fp->ff_blocks;
2158 filebytes = (off_t)fileblocks * (off_t)blksize;
2159
2160 // have to loop truncating or growing files that are
2161 // really big because otherwise transactions can get
2162 // enormous and consume too many kernel resources.
2163
2164 if (length < filebytes) {
2165 while (filebytes > length) {
2166 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2167 filebytes -= HFS_BIGFILE_SIZE;
2168 } else {
2169 filebytes = length;
2170 }
2171 cp->c_flag |= C_FORCEUPDATE;
2172 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2173 if (error)
2174 break;
2175 }
2176 } else if (length > filebytes) {
2177 while (filebytes < length) {
2178 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2179 filebytes += HFS_BIGFILE_SIZE;
2180 } else {
2181 filebytes = length;
2182 }
2183 cp->c_flag |= C_FORCEUPDATE;
2184 error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2185 if (error)
2186 break;
2187 }
2188 } else /* Same logical size */ {
2189
2190 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2191 }
2192 /* Files that are changing size are not hot file candidates. */
2193 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2194 fp->ff_bytesread = 0;
2195 }
2196
2197 return (error);
2198}
2199
2200
2201
2202/*
2203 * Preallocate file storage space.
2204 */
2205int
2206hfs_vnop_allocate(struct vnop_allocate_args /* {
2207 vnode_t a_vp;
2208 off_t a_length;
2209 u_int32_t a_flags;
2210 off_t *a_bytesallocated;
2211 off_t a_offset;
2212 vfs_context_t a_context;
2213 } */ *ap)
2214{
2215 struct vnode *vp = ap->a_vp;
2216 struct cnode *cp;
2217 struct filefork *fp;
2218 ExtendedVCB *vcb;
2219 off_t length = ap->a_length;
2220 off_t startingPEOF;
2221 off_t moreBytesRequested;
2222 off_t actualBytesAdded;
2223 off_t filebytes;
2224 u_long fileblocks;
2225 int retval, retval2;
2226 UInt32 blockHint;
2227 UInt32 extendFlags; /* For call to ExtendFileC */
2228 struct hfsmount *hfsmp;
2229 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2230 int lockflags;
2231
2232 *(ap->a_bytesallocated) = 0;
2233
2234 if (!vnode_isreg(vp))
2235 return (EISDIR);
2236 if (length < (off_t)0)
2237 return (EINVAL);
2238
2239 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2240 return (retval);
2241 cp = VTOC(vp);
2242 fp = VTOF(vp);
2243 hfsmp = VTOHFS(vp);
2244 vcb = VTOVCB(vp);
2245
2246 fileblocks = fp->ff_blocks;
2247 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2248
2249 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2250 retval = EINVAL;
2251 goto Err_Exit;
2252 }
2253
2254 /* Fill in the flags word for the call to Extend the file */
2255
2256 extendFlags = kEFNoClumpMask;
2257 if (ap->a_flags & ALLOCATECONTIG)
2258 extendFlags |= kEFContigMask;
2259 if (ap->a_flags & ALLOCATEALL)
2260 extendFlags |= kEFAllMask;
2261 if (cred && suser(cred, NULL) != 0)
2262 extendFlags |= kEFReserveMask;
2263
2264 retval = E_NONE;
2265 blockHint = 0;
2266 startingPEOF = filebytes;
2267
2268 if (ap->a_flags & ALLOCATEFROMPEOF)
2269 length += filebytes;
2270 else if (ap->a_flags & ALLOCATEFROMVOL)
2271 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2272
2273 /* If no changes are necesary, then we're done */
2274 if (filebytes == length)
2275 goto Std_Exit;
2276
2277 /*
2278 * Lengthen the size of the file. We must ensure that the
2279 * last byte of the file is allocated. Since the smallest
2280 * value of filebytes is 0, length will be at least 1.
2281 */
2282 if (length > filebytes) {
2283 moreBytesRequested = length - filebytes;
2284
2285#if QUOTA
2286 retval = hfs_chkdq(cp,
2287 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2288 cred, 0);
2289 if (retval)
2290 goto Err_Exit;
2291
2292#endif /* QUOTA */
2293 /*
2294 * Metadata zone checks.
2295 */
2296 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2297 /*
2298 * Allocate Journal and Quota files in metadata zone.
2299 */
2300 if (hfs_virtualmetafile(cp)) {
2301 extendFlags |= kEFMetadataMask;
2302 blockHint = hfsmp->hfs_metazone_start;
2303 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2304 (blockHint <= hfsmp->hfs_metazone_end)) {
2305 /*
2306 * Move blockHint outside metadata zone.
2307 */
2308 blockHint = hfsmp->hfs_metazone_end + 1;
2309 }
2310 }
2311
2312 if (hfs_start_transaction(hfsmp) != 0) {
2313 retval = EINVAL;
2314 goto Err_Exit;
2315 }
2316
2317 /* Protect extents b-tree and allocation bitmap */
2318 lockflags = SFL_BITMAP;
2319 if (overflow_extents(fp))
2320 lockflags |= SFL_EXTENTS;
2321 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2322
2323 retval = MacToVFSError(ExtendFileC(vcb,
2324 (FCB*)fp,
2325 moreBytesRequested,
2326 blockHint,
2327 extendFlags,
2328 &actualBytesAdded));
2329
2330 *(ap->a_bytesallocated) = actualBytesAdded;
2331 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2332
2333 hfs_systemfile_unlock(hfsmp, lockflags);
2334
2335 if (hfsmp->jnl) {
2336 (void) hfs_update(vp, TRUE);
2337 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2338 }
2339
2340 hfs_end_transaction(hfsmp);
2341
2342 /*
2343 * if we get an error and no changes were made then exit
2344 * otherwise we must do the hfs_update to reflect the changes
2345 */
2346 if (retval && (startingPEOF == filebytes))
2347 goto Err_Exit;
2348
2349 /*
2350 * Adjust actualBytesAdded to be allocation block aligned, not
2351 * clump size aligned.
2352 * NOTE: So what we are reporting does not affect reality
2353 * until the file is closed, when we truncate the file to allocation
2354 * block size.
2355 */
2356 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2357 *(ap->a_bytesallocated) =
2358 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2359
2360 } else { /* Shorten the size of the file */
2361
2362 if (fp->ff_size > length) {
2363 /*
2364 * Any buffers that are past the truncation point need to be
2365 * invalidated (to maintain buffer cache consistency).
2366 */
2367 }
2368
2369 if (hfs_start_transaction(hfsmp) != 0) {
2370 retval = EINVAL;
2371 goto Err_Exit;
2372 }
2373
2374 /* Protect extents b-tree and allocation bitmap */
2375 lockflags = SFL_BITMAP;
2376 if (overflow_extents(fp))
2377 lockflags |= SFL_EXTENTS;
2378 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2379
2380 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2381
2382 hfs_systemfile_unlock(hfsmp, lockflags);
2383
2384 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2385
2386 if (hfsmp->jnl) {
2387 (void) hfs_update(vp, TRUE);
2388 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2389 }
2390
2391 hfs_end_transaction(hfsmp);
2392
2393
2394 /*
2395 * if we get an error and no changes were made then exit
2396 * otherwise we must do the hfs_update to reflect the changes
2397 */
2398 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2399#if QUOTA
2400 /* These are bytesreleased */
2401 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2402#endif /* QUOTA */
2403
2404 if (fp->ff_size > filebytes) {
2405 fp->ff_size = filebytes;
2406
2407 hfs_unlock(cp);
2408 ubc_setsize(vp, fp->ff_size);
2409 hfs_lock(cp, HFS_FORCE_LOCK);
2410 }
2411 }
2412
2413Std_Exit:
2414 cp->c_touch_chgtime = TRUE;
2415 cp->c_touch_modtime = TRUE;
2416 retval2 = hfs_update(vp, MNT_WAIT);
2417
2418 if (retval == 0)
2419 retval = retval2;
2420Err_Exit:
2421 hfs_unlock(cp);
2422 return (retval);
2423}
2424
2425
2426/*
2427 * Pagein for HFS filesystem
2428 */
2429int
2430hfs_vnop_pagein(struct vnop_pagein_args *ap)
2431/*
2432 struct vnop_pagein_args {
2433 vnode_t a_vp,
2434 upl_t a_pl,
2435 vm_offset_t a_pl_offset,
2436 off_t a_f_offset,
2437 size_t a_size,
2438 int a_flags
2439 vfs_context_t a_context;
2440 };
2441*/
2442{
2443 vnode_t vp = ap->a_vp;
2444 int error;
2445
2446 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2447 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2448 /*
2449 * Keep track of blocks read.
2450 */
2451 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2452 struct cnode *cp;
2453 struct filefork *fp;
2454 int bytesread;
2455 int took_cnode_lock = 0;
2456
2457 cp = VTOC(vp);
2458 fp = VTOF(vp);
2459
2460 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2461 bytesread = fp->ff_size;
2462 else
2463 bytesread = ap->a_size;
2464
2465 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2466 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2467 hfs_lock(cp, HFS_FORCE_LOCK);
2468 took_cnode_lock = 1;
2469 }
2470 /*
2471 * If this file hasn't been seen since the start of
2472 * the current sampling period then start over.
2473 */
2474 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2475 struct timeval tv;
2476
2477 fp->ff_bytesread = bytesread;
2478 microtime(&tv);
2479 cp->c_atime = tv.tv_sec;
2480 } else {
2481 fp->ff_bytesread += bytesread;
2482 }
2483 cp->c_touch_acctime = TRUE;
2484 if (took_cnode_lock)
2485 hfs_unlock(cp);
2486 }
2487 return (error);
2488}
2489
2490/*
2491 * Pageout for HFS filesystem.
2492 */
2493int
2494hfs_vnop_pageout(struct vnop_pageout_args *ap)
2495/*
2496 struct vnop_pageout_args {
2497 vnode_t a_vp,
2498 upl_t a_pl,
2499 vm_offset_t a_pl_offset,
2500 off_t a_f_offset,
2501 size_t a_size,
2502 int a_flags
2503 vfs_context_t a_context;
2504 };
2505*/
2506{
2507 vnode_t vp = ap->a_vp;
2508 struct cnode *cp;
2509 struct filefork *fp;
2510 int retval;
2511 off_t end_of_range;
2512 off_t filesize;
2513
2514 cp = VTOC(vp);
2515 if (cp->c_lockowner == current_thread()) {
2516 panic("pageout: %s cnode lock already held!\n",
2517 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2518 }
2519 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2520 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2521 ubc_upl_abort_range(ap->a_pl,
2522 ap->a_pl_offset,
2523 ap->a_size,
2524 UPL_ABORT_FREE_ON_EMPTY);
2525 }
2526 return (retval);
2527 }
2528 fp = VTOF(vp);
2529
2530 filesize = fp->ff_size;
2531 end_of_range = ap->a_f_offset + ap->a_size - 1;
2532
2533 if (end_of_range >= filesize) {
2534 end_of_range = (off_t)(filesize - 1);
2535 }
2536 if (ap->a_f_offset < filesize) {
2537 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2538 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2539 }
2540 hfs_unlock(cp);
2541
2542 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2543 ap->a_size, filesize, ap->a_flags);
2544
2545 /*
2546 * If data was written, and setuid or setgid bits are set and
2547 * this process is not the superuser then clear the setuid and
2548 * setgid bits as a precaution against tampering.
2549 */
2550 if ((retval == 0) &&
2551 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2552 (vfs_context_suser(ap->a_context) != 0)) {
2553 hfs_lock(cp, HFS_FORCE_LOCK);
2554 cp->c_mode &= ~(S_ISUID | S_ISGID);
2555 cp->c_touch_chgtime = TRUE;
2556 hfs_unlock(cp);
2557 }
2558 return (retval);
2559}
2560
2561/*
2562 * Intercept B-Tree node writes to unswap them if necessary.
2563 */
2564int
2565hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2566{
2567 int retval = 0;
2568 register struct buf *bp = ap->a_bp;
2569 register struct vnode *vp = buf_vnode(bp);
2570 BlockDescriptor block;
2571
2572 /* Trap B-Tree writes */
2573 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2574 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2575 (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2576
2577 /*
2578 * Swap and validate the node if it is in native byte order.
2579 * This is always be true on big endian, so we always validate
2580 * before writing here. On little endian, the node typically has
2581 * been swapped and validatated when it was written to the journal,
2582 * so we won't do anything here.
2583 */
2584 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2585 /* Prepare the block pointer */
2586 block.blockHeader = bp;
2587 block.buffer = (char *)buf_dataptr(bp);
2588 block.blockNum = buf_lblkno(bp);
2589 /* not found in cache ==> came from disk */
2590 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2591 block.blockSize = buf_count(bp);
2592
2593 /* Endian un-swap B-Tree node */
2594 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2595 if (retval)
2596 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2597 }
2598 }
2599
2600 /* This buffer shouldn't be locked anymore but if it is clear it */
2601 if ((buf_flags(bp) & B_LOCKED)) {
2602 // XXXdbg
2603 if (VTOHFS(vp)->jnl) {
2604 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2605 }
2606 buf_clearflags(bp, B_LOCKED);
2607 }
2608 retval = vn_bwrite (ap);
2609
2610 return (retval);
2611}
2612
2613/*
2614 * Relocate a file to a new location on disk
2615 * cnode must be locked on entry
2616 *
2617 * Relocation occurs by cloning the file's data from its
2618 * current set of blocks to a new set of blocks. During
2619 * the relocation all of the blocks (old and new) are
2620 * owned by the file.
2621 *
2622 * -----------------
2623 * |///////////////|
2624 * -----------------
2625 * 0 N (file offset)
2626 *
2627 * ----------------- -----------------
2628 * |///////////////| | | STEP 1 (aquire new blocks)
2629 * ----------------- -----------------
2630 * 0 N N+1 2N
2631 *
2632 * ----------------- -----------------
2633 * |///////////////| |///////////////| STEP 2 (clone data)
2634 * ----------------- -----------------
2635 * 0 N N+1 2N
2636 *
2637 * -----------------
2638 * |///////////////| STEP 3 (head truncate blocks)
2639 * -----------------
2640 * 0 N
2641 *
2642 * During steps 2 and 3 page-outs to file offsets less
2643 * than or equal to N are suspended.
2644 *
2645 * During step 3 page-ins to the file get supended.
2646 */
2647__private_extern__
2648int
2649hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
2650 struct proc *p)
2651{
2652 struct cnode *cp;
2653 struct filefork *fp;
2654 struct hfsmount *hfsmp;
2655 u_int32_t headblks;
2656 u_int32_t datablks;
2657 u_int32_t blksize;
2658 u_int32_t growsize;
2659 u_int32_t nextallocsave;
2660 daddr64_t sector_a, sector_b;
2661 int disabled_caching = 0;
2662 int eflags;
2663 off_t newbytes;
2664 int retval;
2665 int lockflags = 0;
2666 int took_trunc_lock = 0;
2667 int started_tr = 0;
2668 enum vtype vnodetype;
2669
2670 vnodetype = vnode_vtype(vp);
2671 if (vnodetype != VREG && vnodetype != VLNK) {
2672 return (EPERM);
2673 }
2674
2675 hfsmp = VTOHFS(vp);
2676 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2677 return (ENOSPC);
2678 }
2679
2680 cp = VTOC(vp);
2681 fp = VTOF(vp);
2682 if (fp->ff_unallocblocks)
2683 return (EINVAL);
2684 blksize = hfsmp->blockSize;
2685 if (blockHint == 0)
2686 blockHint = hfsmp->nextAllocation;
2687
2688 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2689 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2690 return (EFBIG);
2691 }
2692
2693 //
2694 // We do not believe that this call to hfs_fsync() is
2695 // necessary and it causes a journal transaction
2696 // deadlock so we are removing it.
2697 //
2698 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2699 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2700 // if (retval)
2701 // return (retval);
2702 //}
2703
2704 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2705 hfs_unlock(cp);
2706 hfs_lock_truncate(cp, TRUE);
2707 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2708 hfs_unlock_truncate(cp);
2709 return (retval);
2710 }
2711 took_trunc_lock = 1;
2712 }
2713 headblks = fp->ff_blocks;
2714 datablks = howmany(fp->ff_size, blksize);
2715 growsize = datablks * blksize;
2716 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2717 if (blockHint >= hfsmp->hfs_metazone_start &&
2718 blockHint <= hfsmp->hfs_metazone_end)
2719 eflags |= kEFMetadataMask;
2720
2721 if (hfs_start_transaction(hfsmp) != 0) {
2722 if (took_trunc_lock)
2723 hfs_unlock_truncate(cp);
2724 return (EINVAL);
2725 }
2726 started_tr = 1;
2727 /*
2728 * Protect the extents b-tree and the allocation bitmap
2729 * during MapFileBlockC and ExtendFileC operations.
2730 */
2731 lockflags = SFL_BITMAP;
2732 if (overflow_extents(fp))
2733 lockflags |= SFL_EXTENTS;
2734 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2735
2736 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2737 if (retval) {
2738 retval = MacToVFSError(retval);
2739 goto out;
2740 }
2741
2742 /*
2743 * STEP 1 - aquire new allocation blocks.
2744 */
2745 if (!vnode_isnocache(vp)) {
2746 vnode_setnocache(vp);
2747 disabled_caching = 1;
2748
2749 }
2750 nextallocsave = hfsmp->nextAllocation;
2751 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2752 if (eflags & kEFMetadataMask) {
2753 HFS_MOUNT_LOCK(hfsmp, TRUE);
2754 hfsmp->nextAllocation = nextallocsave;
2755 hfsmp->vcbFlags |= 0xFF00;
2756 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2757 }
2758
2759 retval = MacToVFSError(retval);
2760 if (retval == 0) {
2761 cp->c_flag |= C_MODIFIED;
2762 if (newbytes < growsize) {
2763 retval = ENOSPC;
2764 goto restore;
2765 } else if (fp->ff_blocks < (headblks + datablks)) {
2766 printf("hfs_relocate: allocation failed");
2767 retval = ENOSPC;
2768 goto restore;
2769 }
2770
2771 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2772 if (retval) {
2773 retval = MacToVFSError(retval);
2774 } else if ((sector_a + 1) == sector_b) {
2775 retval = ENOSPC;
2776 goto restore;
2777 } else if ((eflags & kEFMetadataMask) &&
2778 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2779 hfsmp->hfs_metazone_end)) {
2780 printf("hfs_relocate: didn't move into metadata zone\n");
2781 retval = ENOSPC;
2782 goto restore;
2783 }
2784 }
2785 /* Done with system locks and journal for now. */
2786 hfs_systemfile_unlock(hfsmp, lockflags);
2787 lockflags = 0;
2788 hfs_end_transaction(hfsmp);
2789 started_tr = 0;
2790
2791 if (retval) {
2792 /*
2793 * Check to see if failure is due to excessive fragmentation.
2794 */
2795 if ((retval == ENOSPC) &&
2796 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2797 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2798 }
2799 goto out;
2800 }
2801 /*
2802 * STEP 2 - clone file data into the new allocation blocks.
2803 */
2804
2805 if (vnodetype == VLNK)
2806 retval = hfs_clonelink(vp, blksize, cred, p);
2807 else if (vnode_issystem(vp))
2808 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2809 else
2810 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2811
2812 /* Start transaction for step 3 or for a restore. */
2813 if (hfs_start_transaction(hfsmp) != 0) {
2814 retval = EINVAL;
2815 goto out;
2816 }
2817 started_tr = 1;
2818 if (retval)
2819 goto restore;
2820
2821 /*
2822 * STEP 3 - switch to cloned data and remove old blocks.
2823 */
2824 lockflags = SFL_BITMAP;
2825 if (overflow_extents(fp))
2826 lockflags |= SFL_EXTENTS;
2827 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2828
2829 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2830
2831 hfs_systemfile_unlock(hfsmp, lockflags);
2832 lockflags = 0;
2833 if (retval)
2834 goto restore;
2835out:
2836 if (took_trunc_lock)
2837 hfs_unlock_truncate(cp);
2838
2839 if (lockflags) {
2840 hfs_systemfile_unlock(hfsmp, lockflags);
2841 lockflags = 0;
2842 }
2843
2844 // See comment up above about calls to hfs_fsync()
2845 //
2846 //if (retval == 0)
2847 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2848
2849 if (hfsmp->jnl) {
2850 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2851 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2852 else
2853 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2854 }
2855exit:
2856 if (disabled_caching) {
2857 vnode_clearnocache(vp);
2858 }
2859 if (started_tr)
2860 hfs_end_transaction(hfsmp);
2861
2862 return (retval);
2863
2864restore:
2865 if (fp->ff_blocks == headblks)
2866 goto exit;
2867 /*
2868 * Give back any newly allocated space.
2869 */
2870 if (lockflags == 0) {
2871 lockflags = SFL_BITMAP;
2872 if (overflow_extents(fp))
2873 lockflags |= SFL_EXTENTS;
2874 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2875 }
2876
2877 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2878
2879 hfs_systemfile_unlock(hfsmp, lockflags);
2880 lockflags = 0;
2881
2882 if (took_trunc_lock)
2883 hfs_unlock_truncate(cp);
2884 goto exit;
2885}
2886
2887
2888/*
2889 * Clone a symlink.
2890 *
2891 */
2892static int
2893hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2894{
2895 struct buf *head_bp = NULL;
2896 struct buf *tail_bp = NULL;
2897 int error;
2898
2899
2900 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2901 if (error)
2902 goto out;
2903
2904 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2905 if (tail_bp == NULL) {
2906 error = EIO;
2907 goto out;
2908 }
2909 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2910 error = (int)buf_bwrite(tail_bp);
2911out:
2912 if (head_bp) {
2913 buf_markinvalid(head_bp);
2914 buf_brelse(head_bp);
2915 }
2916 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2917
2918 return (error);
2919}
2920
2921/*
2922 * Clone a file's data within the file.
2923 *
2924 */
2925static int
2926hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2927{
2928 caddr_t bufp;
2929 size_t writebase;
2930 size_t bufsize;
2931 size_t copysize;
2932 size_t iosize;
2933 off_t filesize;
2934 size_t offset;
2935 uio_t auio;
2936 int error = 0;
2937
2938 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
2939 writebase = blkstart * blksize;
2940 copysize = blkcnt * blksize;
2941 iosize = bufsize = MIN(copysize, 4096 * 16);
2942 offset = 0;
2943
2944 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2945 return (ENOMEM);
2946 }
2947 hfs_unlock(VTOC(vp));
2948
2949 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2950
2951 while (offset < copysize) {
2952 iosize = MIN(copysize - offset, iosize);
2953
2954 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2955 uio_addiov(auio, (uintptr_t)bufp, iosize);
2956
2957 error = cluster_read(vp, auio, copysize, 0);
2958 if (error) {
2959 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2960 break;
2961 }
2962 if (uio_resid(auio) != 0) {
2963 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2964 error = EIO;
2965 break;
2966 }
2967
2968 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2969 uio_addiov(auio, (uintptr_t)bufp, iosize);
2970
2971 error = cluster_write(vp, auio, filesize + offset,
2972 filesize + offset + iosize,
2973 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2974 if (error) {
2975 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2976 break;
2977 }
2978 if (uio_resid(auio) != 0) {
2979 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2980 error = EIO;
2981 break;
2982 }
2983 offset += iosize;
2984 }
2985 uio_free(auio);
2986
2987 /*
2988 * No need to call ubc_sync_range or hfs_invalbuf
2989 * since the file was copied using IO_NOCACHE.
2990 */
2991
2992 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2993
2994 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2995 return (error);
2996}
2997
2998/*
2999 * Clone a system (metadata) file.
3000 *
3001 */
3002static int
3003hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3004 kauth_cred_t cred, struct proc *p)
3005{
3006 caddr_t bufp;
3007 char * offset;
3008 size_t bufsize;
3009 size_t iosize;
3010 struct buf *bp = NULL;
3011 daddr64_t blkno;
3012 daddr64_t blk;
3013 daddr64_t start_blk;
3014 daddr64_t last_blk;
3015 int breadcnt;
3016 int i;
3017 int error = 0;
3018
3019
3020 iosize = GetLogicalBlockSize(vp);
3021 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3022 breadcnt = bufsize / iosize;
3023
3024 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3025 return (ENOMEM);
3026 }
3027 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3028 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3029 blkno = 0;
3030
3031 while (blkno < last_blk) {
3032 /*
3033 * Read up to a megabyte
3034 */
3035 offset = bufp;
3036 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3037 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3038 if (error) {
3039 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3040 goto out;
3041 }
3042 if (buf_count(bp) != iosize) {
3043 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3044 goto out;
3045 }
3046 bcopy((char *)buf_dataptr(bp), offset, iosize);
3047
3048 buf_markinvalid(bp);
3049 buf_brelse(bp);
3050 bp = NULL;
3051
3052 offset += iosize;
3053 }
3054
3055 /*
3056 * Write up to a megabyte
3057 */
3058 offset = bufp;
3059 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3060 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3061 if (bp == NULL) {
3062 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3063 error = EIO;
3064 goto out;
3065 }
3066 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3067 error = (int)buf_bwrite(bp);
3068 bp = NULL;
3069 if (error)
3070 goto out;
3071 offset += iosize;
3072 }
3073 }
3074out:
3075 if (bp) {
3076 buf_brelse(bp);
3077 }
3078
3079 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3080
3081 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3082
3083 return (error);
3084}