X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..7e4a7d3939db04e70062ae6c7bf24b8c8b2f5a7c:/bsd/hfs/hfs_readwrite.c?ds=inline diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index daf533c2e..6dc30afad 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -51,6 +51,7 @@ #include #include #include +#include #include @@ -78,12 +79,15 @@ enum { MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */ }; -/* from bsd/vfs/vfs_cluster.c */ -extern int is_file_clean(vnode_t vp, off_t filesize); +/* from bsd/hfs/hfs_vfsops.c */ +extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *); static int hfs_clonefile(struct vnode *, int, int, int); static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); +static int hfs_minorupdate(struct vnode *vp); +static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context); + int flush_cache_on_write = 0; SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files"); @@ -106,7 +110,6 @@ hfs_vnop_read(struct vnop_read_args *ap) off_t offset = uio_offset(uio); int retval = 0; - /* Preflight checks */ if (!vnode_isreg(vp)) { /* can only read regular files */ @@ -119,6 +122,34 @@ hfs_vnop_read(struct vnop_read_args *ap) return (0); /* Nothing left to do */ if (offset < 0) return (EINVAL); /* cant read from a negative offset */ + +#if HFS_COMPRESSION + if (VNODE_IS_RSRC(vp)) { + if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */ + return 0; + } + /* otherwise read the resource fork normally */ + } else { + int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ + if (compressed) { + retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp)); + if (compressed) { + if (retval == 0) { + /* successful read, update the access time */ + VTOC(vp)->c_touch_acctime = TRUE; + + /* compressed files are not hot file candidates */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { + VTOF(vp)->ff_bytesread = 0; + } + } + return retval; + } + /* otherwise the file was converted back to a regular file while we were reading it */ + retval = 0; + } + } +#endif /* HFS_COMPRESSION */ cp = VTOC(vp); fp = VTOF(vp); @@ -200,7 +231,7 @@ hfs_vnop_write(struct vnop_write_args *ap) off_t actualBytesAdded; off_t filebytes; off_t offset; - size_t resid; + ssize_t resid; int eflags; int ioflag = ap->a_ioflag; int retval = 0; @@ -209,6 +240,22 @@ hfs_vnop_write(struct vnop_write_args *ap) int partialwrite = 0; int exclusive_lock = 0; +#if HFS_COMPRESSION + if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ + int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); + switch(state) { + case FILE_IS_COMPRESSED: + return EACCES; + case FILE_IS_CONVERTING: + /* if FILE_IS_CONVERTING, we allow writes */ + break; + default: + printf("invalid state %d for compressed file\n", state); + /* fall through */ + } + } +#endif + // LP64todo - fix this! uio_resid may be 64-bit value resid = uio_resid(uio); offset = uio_offset(uio); @@ -262,10 +309,12 @@ again: /* If the truncate lock is shared, and if we either have virtual * blocks or will need to extend the file, upgrade the truncate * to exclusive lock. If upgrade fails, we lose the lock and - * have to get exclusive lock again + * have to get exclusive lock again. Note that we want to + * grab the truncate lock exclusive even if we're not allocating new blocks + * because we could still be growing past the LEOF. */ if ((exclusive_lock == 0) && - ((fp->ff_unallocblocks != 0) || (writelimit > filebytes))) { + ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) { exclusive_lock = 1; /* Lock upgrade failed and we lost our shared lock, try again */ if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) { @@ -477,20 +526,51 @@ sizeok: hfs_unlock(cp); cnode_locked = 0; + + /* + * We need to tell UBC the fork's new size BEFORE calling + * cluster_write, in case any of the new pages need to be + * paged out before cluster_write completes (which does happen + * in embedded systems due to extreme memory pressure). + * Similarly, we need to tell hfs_vnop_pageout what the new EOF + * will be, so that it can pass that on to cluster_pageout, and + * allow those pageouts. + * + * We don't update ff_size yet since we don't want pageins to + * be able to see uninitialized data between the old and new + * EOF, until cluster_write has completed and initialized that + * part of the file. + * + * The vnode pager relies on the file size last given to UBC via + * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or + * ff_size (whichever is larger). NOTE: ff_new_size is always + * zero, unless we are extending the file via write. + */ + if (filesize > fp->ff_size) { + fp->ff_new_size = filesize; + ubc_setsize(vp, filesize); + } retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off, tail_off, lflag | IO_NOZERODIRTY); if (retval) { + fp->ff_new_size = 0; /* no longer extending; use ff_size */ + if (filesize > origFileSize) { + ubc_setsize(vp, origFileSize); + } goto ioerr_exit; } - offset = uio_offset(uio); - if (offset > fp->ff_size) { - fp->ff_size = offset; - - ubc_setsize(vp, fp->ff_size); /* XXX check errors */ + + if (filesize > origFileSize) { + fp->ff_size = filesize; + /* Files that are changing size are not hot file candidates. */ - if (hfsmp->hfc_stage == HFC_RECORDING) + if (hfsmp->hfc_stage == HFC_RECORDING) { fp->ff_bytesread = 0; + } } + fp->ff_new_size = 0; /* ff_size now has the correct size */ + + /* If we wrote some bytes, then touch the change and mod times */ if (resid > uio_resid(uio)) { cp->c_touch_chgtime = TRUE; cp->c_touch_modtime = TRUE; @@ -507,7 +587,6 @@ sizeok: VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); } } - HFS_KNOTE(vp, NOTE_WRITE); ioerr_exit: /* @@ -532,7 +611,7 @@ ioerr_exit: cnode_locked = 1; } (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, - 0, ap->a_context); + 0, 0, ap->a_context); // LP64todo - fix this! resid needs to by user_ssize_t uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); uio_setresid(uio, resid); @@ -579,16 +658,26 @@ struct access_t { int *file_ids; /* IN: array of file ids */ gid_t *groups; /* IN: array of groups */ short *access; /* OUT: access info for each file (0 for 'has access') */ +} __attribute__((unavailable)); // this structure is for reference purposes only + +struct user32_access_t { + uid_t uid; /* IN: effective user id */ + short flags; /* IN: access requested (i.e. R_OK) */ + short num_groups; /* IN: number of groups user belongs to */ + int num_files; /* IN: number of files to process */ + user32_addr_t file_ids; /* IN: array of file ids */ + user32_addr_t groups; /* IN: array of groups */ + user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ }; -struct user_access_t { +struct user64_access_t { uid_t uid; /* IN: effective user id */ short flags; /* IN: access requested (i.e. R_OK) */ short num_groups; /* IN: number of groups user belongs to */ int num_files; /* IN: number of files to process */ - user_addr_t file_ids; /* IN: array of file ids */ - user_addr_t groups; /* IN: array of groups */ - user_addr_t access; /* OUT: access info for each file (0 for 'has access') */ + user64_addr_t file_ids; /* IN: array of file ids */ + user64_addr_t groups; /* IN: array of groups */ + user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ }; @@ -604,17 +693,28 @@ struct ext_access_t { short *access; /* OUT: access info for each file (0 for 'has access') */ uint32_t num_parents; /* future use */ cnid_t *parents; /* future use */ +} __attribute__((unavailable)); // this structure is for reference purposes only + +struct user32_ext_access_t { + uint32_t flags; /* IN: access requested (i.e. R_OK) */ + uint32_t num_files; /* IN: number of files to process */ + uint32_t map_size; /* IN: size of the bit map */ + user32_addr_t file_ids; /* IN: Array of file ids */ + user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */ + user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ + uint32_t num_parents; /* future use */ + user32_addr_t parents; /* future use */ }; -struct ext_user_access_t { +struct user64_ext_access_t { uint32_t flags; /* IN: access requested (i.e. R_OK) */ uint32_t num_files; /* IN: number of files to process */ uint32_t map_size; /* IN: size of the bit map */ - user_addr_t file_ids; /* IN: array of file ids */ - user_addr_t bitmap; /* IN: array of groups */ - user_addr_t access; /* OUT: access info for each file (0 for 'has access') */ + user64_addr_t file_ids; /* IN: array of file ids */ + user64_addr_t bitmap; /* IN: array of groups */ + user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ uint32_t num_parents;/* future use */ - user_addr_t parents;/* future use */ + user64_addr_t parents;/* future use */ }; @@ -675,7 +775,7 @@ lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id) } if (cache->numcached > NUM_CACHE_ENTRIES) { - /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n", + /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n", cache->numcached, NUM_CACHE_ENTRIES);*/ cache->numcached = NUM_CACHE_ENTRIES; } @@ -724,11 +824,11 @@ add_node(struct access_cache *cache, int index, cnid_t nodeID, int access) /* if the cache is full, do a replace rather than an insert */ if (cache->numcached >= NUM_CACHE_ENTRIES) { - //printf("cache is full (%d). replace at index %d\n", cache->numcached, index); + //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index); cache->numcached = NUM_CACHE_ENTRIES-1; if (index > cache->numcached) { - // printf("index %d pinned to %d\n", index, cache->numcached); + // printf("hfs: index %d pinned to %d\n", index, cache->numcached); index = cache->numcached; } } @@ -776,7 +876,7 @@ snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * isn't incore, then go to the catalog. */ static int -do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid, +do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp) { int error = 0; @@ -786,12 +886,13 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cn cnattrp->ca_uid = skip_cp->c_uid; cnattrp->ca_gid = skip_cp->c_gid; cnattrp->ca_mode = skip_cp->c_mode; + cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; keyp->hfsPlus.parentID = skip_cp->c_parentcnid; } else { struct cinfo c_info; /* otherwise, check the cnode hash incase the file/dir is incore */ - if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) { + if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) { cnattrp->ca_uid = c_info.uid; cnattrp->ca_gid = c_info.gid; cnattrp->ca_mode = c_info.mode; @@ -821,7 +922,7 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cn */ static int do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, - struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev, + struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, struct vfs_context *my_context, char *bitmap, uint32_t map_size, @@ -886,7 +987,7 @@ do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HF /* do the lookup (checks the cnode hash, then the catalog) */ - myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr); + myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr); if (myErr) { goto ExitThisRoutine; /* no access */ } @@ -986,15 +1087,13 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, */ Boolean check_leaf = true; - struct ext_user_access_t *user_access_structp; - struct ext_user_access_t tmp_user_access; + struct user64_ext_access_t *user_access_structp; + struct user64_ext_access_t tmp_user_access; struct access_cache cache; - int error = 0; + int error = 0, prev_parent_check_ok=1; unsigned int i; - dev_t dev = VTOC(vp)->c_dev; - short flags; unsigned int num_files = 0; int map_size = 0; @@ -1031,28 +1130,28 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, } if (is64bit) { - if (arg_size != sizeof(struct ext_user_access_t)) { + if (arg_size != sizeof(struct user64_ext_access_t)) { error = EINVAL; goto err_exit_bulk_access; } - user_access_structp = (struct ext_user_access_t *)ap->a_data; + user_access_structp = (struct user64_ext_access_t *)ap->a_data; - } else if (arg_size == sizeof(struct access_t)) { - struct access_t *accessp = (struct access_t *)ap->a_data; + } else if (arg_size == sizeof(struct user32_access_t)) { + struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data; // convert an old style bulk-access struct to the new style tmp_user_access.flags = accessp->flags; tmp_user_access.num_files = accessp->num_files; tmp_user_access.map_size = 0; tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids); - tmp_user_access.bitmap = (user_addr_t)NULL; + tmp_user_access.bitmap = USER_ADDR_NULL; tmp_user_access.access = CAST_USER_ADDR_T(accessp->access); tmp_user_access.num_parents = 0; user_access_structp = &tmp_user_access; - } else if (arg_size == sizeof(struct ext_access_t)) { - struct ext_access_t *accessp = (struct ext_access_t *)ap->a_data; + } else if (arg_size == sizeof(struct user32_ext_access_t)) { + struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data; // up-cast from a 32-bit version of the struct tmp_user_access.flags = accessp->flags; @@ -1165,7 +1264,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, if (check_leaf) { /* do the lookup (checks the cnode hash, then the catalog) */ - error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr); + error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr); if (error) { access[i] = (short) error; continue; @@ -1174,6 +1273,10 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, if (parents) { // Check if the leaf matches one of the parent scopes leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL); + if (leaf_index >= 0 && parents[leaf_index] == cnid) + prev_parent_check_ok = 0; + else if (leaf_index >= 0) + prev_parent_check_ok = 1; } // if the thing has acl's, do the full permission check @@ -1217,14 +1320,14 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, } /* if the last guy had the same parent and had access, we're done */ - if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) { + if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) { cache.cachehits++; access[i] = 0; continue; } myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, - skip_cp, p, cred, dev, context,bitmap, map_size, parents, num_parents); + skip_cp, p, cred, context,bitmap, map_size, parents, num_parents); if (myaccess || (error == ESRCH && leaf_index != -1)) { access[i] = 0; // have access.. no errors to report @@ -1250,7 +1353,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, err_exit_bulk_access: - //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups); + //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups); if (file_ids) kfree(file_ids, sizeof(int) * num_files); @@ -1302,6 +1405,30 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { proc_t p = vfs_context_proc(context); struct vfsstatfs *vfsp; boolean_t is64bit; + off_t jnl_start, jnl_size; + struct hfs_journal_info *jip; +#if HFS_COMPRESSION + int compressed = 0; + off_t uncompressed_size = -1; + int decmpfs_error = 0; + + if (ap->a_command == F_RDADVISE) { + /* we need to inspect the decmpfs state of the file as early as possible */ + compressed = hfs_file_is_compressed(VTOC(vp), 0); + if (compressed) { + if (VNODE_IS_RSRC(vp)) { + /* if this is the resource fork, treat it as if it were empty */ + uncompressed_size = 0; + } else { + decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0); + if (decmpfs_error != 0) { + /* failed to get the uncompressed size, we'll check for this later */ + uncompressed_size = -1; + } + } + } + } +#endif /* HFS_COMPRESSION */ is64bit = proc_is64bit(p); @@ -1328,7 +1455,12 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { bufptr = (char *)ap->a_data; cnid = strtoul(bufptr, NULL, 10); - if ((error = hfs_vget(hfsmp, cnid, &file_vp, 1))) { + /* We need to call hfs_vfs_vget to leverage the code that will + * fix the origin list for us if needed, as opposed to calling + * hfs_vget, since we will need the parent for build_path call. + */ + + if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) { return (error); } error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context); @@ -1380,6 +1512,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if (!vnode_isvroot(vp)) { return (EINVAL); } + /* file system must not be mounted read-only */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } + return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data); } @@ -1395,6 +1532,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if (!vnode_isvroot(vp)) { return (EINVAL); } + + /* filesystem must not be mounted read only */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } newsize = *(u_int64_t *)ap->a_data; cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; @@ -1435,7 +1577,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { * after metadata zone and set flag in mount structure to indicate * that nextAllocation should not be updated again. */ - HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); + if (hfsmp->hfs_metazone_end != 0) { + HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); + } hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; } else { hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; @@ -1454,6 +1598,9 @@ fail_change_next_allocation: struct hfs_backingstoreinfo *bsdata; int error = 0; + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { return (EALREADY); } @@ -1500,6 +1647,25 @@ fail_change_next_allocation: vfs_markdependency(hfsmp->hfs_mp); + /* + * If the sparse image is on a sparse image file (as opposed to a sparse + * bundle), then we may need to limit the free space to the maximum size + * of a file on that volume. So we query (using pathconf), and if we get + * a meaningful result, we cache the number of blocks for later use in + * hfs_freeblks(). + */ + hfsmp->hfs_backingfs_maxblocks = 0; + if (vnode_vtype(di_vp) == VREG) { + int terr; + int hostbits; + terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context); + if (terr == 0 && hostbits != 0 && hostbits < 64) { + u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits; + + hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize; + } + } + (void)vnode_put(di_vp); file_drop(bsdata->backingfd); return (0); @@ -1512,6 +1678,10 @@ fail_change_next_allocation: kauth_cred_getuid(cred) != vfsp->f_owner) { return (EACCES); /* must be owner of file system */ } + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } + if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { @@ -1528,15 +1698,18 @@ fail_change_next_allocation: case F_FREEZE_FS: { struct mount *mp; - if (!is_suser()) - return (EACCES); - mp = vnode_mount(vp); hfsmp = VFSTOHFS(mp); if (!(hfsmp->jnl)) return (ENOTSUP); + vfsp = vfs_statfs(mp); + + if (kauth_cred_getuid(cred) != vfsp->f_owner && + !kauth_cred_issuser(cred)) + return (EACCES); + lck_rw_lock_exclusive(&hfsmp->hfs_insync); // flush things before we get started to try and prevent @@ -1545,6 +1718,9 @@ fail_change_next_allocation: // deadlock against ourselves. vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); hfs_global_exclusive_lock_acquire(hfsmp); + + // DO NOT call hfs_journal_flush() because that takes a + // shared lock on the global exclusive lock! journal_flush(hfsmp->jnl); // don't need to iterate on all vnodes, we just need to @@ -1565,7 +1741,9 @@ fail_change_next_allocation: } case F_THAW_FS: { - if (!is_suser()) + vfsp = vfs_statfs(vnode_mount(vp)); + if (kauth_cred_getuid(cred) != vfsp->f_owner && + !kauth_cred_issuser(cred)) return (EACCES); // if we're not the one who froze the fs then we @@ -1592,9 +1770,9 @@ fail_change_next_allocation: } if (is64bit) { - size = sizeof(struct user_access_t); + size = sizeof(struct user64_access_t); } else { - size = sizeof(struct access_t); + size = sizeof(struct user32_access_t); } return do_bulk_access_check(hfsmp, vp, ap, size, context); @@ -1608,9 +1786,9 @@ fail_change_next_allocation: } if (is64bit) { - size = sizeof(struct ext_user_access_t); + size = sizeof(struct user64_ext_access_t); } else { - size = sizeof(struct ext_access_t); + size = sizeof(struct user32_ext_access_t); } return do_bulk_access_check(hfsmp, vp, ap, size, context); @@ -1626,6 +1804,9 @@ fail_change_next_allocation: vfsp = vfs_statfs(HFSTOVFS(hfsmp)); state = *(int *)ap->a_data; + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } // super-user can enable or disable acl's on a volume. // the volume owner can only enable acl's if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) { @@ -1645,6 +1826,10 @@ fail_change_next_allocation: } state = *(int *)ap->a_data; + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } /* Super-user can enable or disable extent-based extended * attribute support on a volume @@ -1660,7 +1845,10 @@ fail_change_next_allocation: case F_FULLFSYNC: { int error; - + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); if (error == 0) { error = hfs_fsync(vp, MNT_WAIT, TRUE, p); @@ -1705,9 +1893,20 @@ fail_change_next_allocation: /* Protect against a size change. */ hfs_lock_truncate(VTOC(vp), TRUE); +#if HFS_COMPRESSION + if (compressed && (uncompressed_size == -1)) { + /* fetching the uncompressed size failed above, so return the error */ + error = decmpfs_error; + } else if ((compressed && (ra->ra_offset >= uncompressed_size)) || + (!compressed && (ra->ra_offset >= fp->ff_size))) { + error = EFBIG; + } +#else /* HFS_COMPRESSION */ if (ra->ra_offset >= fp->ff_size) { error = EFBIG; - } else { + } +#endif /* HFS_COMPRESSION */ + else { error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count); } @@ -1724,8 +1923,8 @@ fail_change_next_allocation: int error; uio_t auio; daddr64_t blockNumber; - u_long blockOffset; - u_long xfersize; + u_int32_t blockOffset; + u_int32_t xfersize; struct buf *bp; user_fbootstraptransfer_t user_bootstrap; @@ -1735,11 +1934,14 @@ fail_change_next_allocation: * to a user_fbootstraptransfer_t else we get a pointer to a * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } if (is64bit) { user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data; } else { - fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data; + user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data; user_bootstrapp = &user_bootstrap; user_bootstrap.fbt_offset = bootstrapp->fbt_offset; user_bootstrap.fbt_length = bootstrapp->fbt_length; @@ -1793,17 +1995,47 @@ fail_change_next_allocation: *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); } else { - *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate); + *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); } return 0; } - case HFS_GET_MOUNT_TIME: - return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time)); + case SPOTLIGHT_FSCTL_GET_MOUNT_TIME: + *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time; break; - case HFS_GET_LAST_MTIME: - return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime)); + case SPOTLIGHT_FSCTL_GET_LAST_MTIME: + *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime; + break; + + case HFS_FSCTL_SET_VERY_LOW_DISK: + if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) { + return EINVAL; + } + + hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data; + break; + + case HFS_FSCTL_SET_LOW_DISK: + if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel + || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) { + + return EINVAL; + } + + hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data; + break; + + case HFS_FSCTL_SET_DESIRED_DISK: + if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) { + return EINVAL; + } + + hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data; + break; + + case HFS_VOLUME_STATUS: + *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions; break; case HFS_SET_BOOT_INFO: @@ -1811,6 +2043,9 @@ fail_change_next_allocation: return(EINVAL); if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner)) return(EACCES); /* must be superuser or owner of filesystem */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } HFS_MOUNT_LOCK(hfsmp, TRUE); bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); HFS_MOUNT_UNLOCK(hfsmp, TRUE); @@ -1833,22 +2068,52 @@ fail_change_next_allocation: if (!is_suser()) { return EACCES; } - + /* Allowed only on the root vnode of the boot volume */ if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || !vnode_isvroot(vp)) { return EINVAL; } - + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n"); hfs_mark_volume_inconsistent(hfsmp); break; + case HFS_FSCTL_GET_JOURNAL_INFO: + jip = (struct hfs_journal_info*)ap->a_data; + + if (vp == NULLVP) + return EINVAL; + + if (hfsmp->jnl == NULL) { + jnl_start = 0; + jnl_size = 0; + } else { + jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; + jnl_size = (off_t)hfsmp->jnl_size; + } + + jip->jstart = jnl_start; + jip->jsize = jnl_size; + break; + + case HFS_SET_ALWAYS_ZEROFILL: { + struct cnode *cp = VTOC(vp); + + if (*(int *)ap->a_data) { + cp->c_flag |= C_ALWAYS_ZEROFILL; + } else { + cp->c_flag &= ~C_ALWAYS_ZEROFILL; + } + break; + } + default: return (ENOTTY); } - /* Should never get here */ return 0; } @@ -2015,6 +2280,26 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) int started_tr = 0; int tooklock = 0; +#if HFS_COMPRESSION + if (VNODE_IS_RSRC(vp)) { + /* allow blockmaps to the resource fork */ + } else { + if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ + int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); + switch(state) { + case FILE_IS_COMPRESSED: + return ENOTSUP; + case FILE_IS_CONVERTING: + /* if FILE_IS_CONVERTING, we allow blockmap */ + break; + default: + printf("invalid state %d for compressed file\n", state); + /* fall through */ + } + } + } +#endif /* HFS_COMPRESSION */ + /* Do not allow blockmap operation on a directory */ if (vnode_isdir(vp)) { return (ENOTSUP); @@ -2196,7 +2481,7 @@ retry: * end of this range and the file's EOF): */ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && - (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { + ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; } break; @@ -2208,7 +2493,7 @@ retry: /* There's actually no valid information to be had starting here: */ *ap->a_bpn = (daddr64_t)-1; if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && - (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { + ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; } } else { @@ -2254,9 +2539,19 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap)); } +static int +hfs_minorupdate(struct vnode *vp) { + struct cnode *cp = VTOC(vp); + cp->c_flag &= ~C_MODIFIED; + cp->c_touch_acctime = 0; + cp->c_touch_chgtime = 0; + cp->c_touch_modtime = 0; + + return 0; +} static int -do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context) +do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context) { register struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); @@ -2266,7 +2561,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context off_t bytesToAdd; off_t actualBytesAdded; off_t filebytes; - u_long fileblocks; + u_int32_t fileblocks; int blksize; struct hfsmount *hfsmp; int lockflags; @@ -2324,7 +2619,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context */ if (length > filebytes) { int eflags; - u_long blockHint = 0; + u_int32_t blockHint = 0; /* All or nothing and don't round up to clumpsize. */ eflags = kEFAllMask | kEFNoClumpMask; @@ -2372,8 +2667,13 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context hfs_systemfile_unlock(hfsmp, lockflags); if (hfsmp->jnl) { - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + if (skipupdate) { + (void) hfs_minorupdate(vp); + } + else { + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + } } hfs_end_transaction(hfsmp); @@ -2504,10 +2804,14 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context if (retval == 0) { fp->ff_size = length; } - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + if (skipupdate) { + (void) hfs_minorupdate(vp); + } + else { + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + } } - hfs_end_transaction(hfsmp); filebytes = (off_t)fp->ff_blocks * (off_t)blksize; @@ -2523,9 +2827,20 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context cp->c_touch_modtime = TRUE; fp->ff_size = length; } - cp->c_touch_chgtime = TRUE; /* status changed */ - cp->c_touch_modtime = TRUE; /* file data was modified */ - retval = hfs_update(vp, MNT_WAIT); + if (cp->c_mode & (S_ISUID | S_ISGID)) { + if (!vfs_context_issuser(context)) { + cp->c_mode &= ~(S_ISUID | S_ISGID); + skipupdate = 0; + } + } + if (skipupdate) { + retval = hfs_minorupdate(vp); + } + else { + cp->c_touch_chgtime = TRUE; /* status changed */ + cp->c_touch_modtime = TRUE; /* file data was modified */ + retval = hfs_update(vp, MNT_WAIT); + } if (retval) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, -1, -1, -1, retval, 0); @@ -2548,11 +2863,11 @@ Err_Exit: __private_extern__ int hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, - vfs_context_t context) + int skipupdate, vfs_context_t context) { struct filefork *fp = VTOF(vp); off_t filebytes; - u_long fileblocks; + u_int32_t fileblocks; int blksize, error = 0; struct cnode *cp = VTOC(vp); @@ -2577,7 +2892,15 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, // If skipsetsize is set, then the caller is responsible // for the ubc_setsize. // - if (!skipsetsize) + // Even if skipsetsize is set, if the length is zero we + // want to call ubc_setsize() because as of SnowLeopard + // it will no longer cause any page-ins and it will drop + // any dirty pages so that we don't do any i/o that we + // don't have to. This also prevents a race where i/o + // for truncated blocks may overwrite later data if the + // blocks get reallocated to a different file. + // + if (!skipsetsize || length == 0) ubc_setsize(vp, length); // have to loop truncating or growing files that are @@ -2592,7 +2915,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, filebytes = length; } cp->c_flag |= C_FORCEUPDATE; - error = do_hfs_truncate(vp, filebytes, flags, context); + error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); if (error) break; } @@ -2604,13 +2927,13 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, filebytes = length; } cp->c_flag |= C_FORCEUPDATE; - error = do_hfs_truncate(vp, filebytes, flags, context); + error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); if (error) break; } } else /* Same logical size */ { - error = do_hfs_truncate(vp, length, flags, context); + error = do_hfs_truncate(vp, length, flags, skipupdate, context); } /* Files that are changing size are not hot file candidates. */ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { @@ -2644,7 +2967,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { off_t moreBytesRequested; off_t actualBytesAdded; off_t filebytes; - u_long fileblocks; + u_int32_t fileblocks; int retval, retval2; u_int32_t blockHint; u_int32_t extendFlags; /* For call to ExtendFileC */ @@ -2688,6 +3011,8 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { extendFlags |= kEFAllMask; if (cred && suser(cred, NULL) != 0) extendFlags |= kEFReserveMask; + if (hfs_virtualmetafile(cp)) + extendFlags |= kEFMetadataMask; retval = E_NONE; blockHint = 0; @@ -2728,7 +3053,6 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { * Allocate Journal and Quota files in metadata zone. */ if (hfs_virtualmetafile(cp)) { - extendFlags |= kEFMetadataMask; blockHint = hfsmp->hfs_metazone_start; } else if ((blockHint >= hfsmp->hfs_metazone_start) && (blockHint <= hfsmp->hfs_metazone_end)) { @@ -2760,6 +3084,12 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { bytesRequested = moreBytesRequested; } + if (extendFlags & kEFContigMask) { + // if we're on a sparse device, this will force it to do a + // full scan to find the space needed. + hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN; + } + retval = MacToVFSError(ExtendFileC(vcb, (FCB*)fp, bytesRequested, @@ -2815,7 +3145,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { */ } - retval = hfs_truncate(vp, length, 0, 0, ap->a_context); + retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context); filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; /* @@ -2871,6 +3201,30 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) vnode_t vp = ap->a_vp; int error; +#if HFS_COMPRESSION + if (VNODE_IS_RSRC(vp)) { + /* allow pageins of the resource fork */ + } else { + int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ + if (compressed) { + error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp)); + if (compressed) { + if (error == 0) { + /* successful page-in, update the access time */ + VTOC(vp)->c_touch_acctime = TRUE; + + /* compressed files are not hot file candidates */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { + VTOF(vp)->ff_bytesread = 0; + } + } + return error; + } + /* otherwise the file was converted back to a regular file while we were reading it */ + } + } +#endif + error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags); /* @@ -2935,62 +3289,268 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) vnode_t vp = ap->a_vp; struct cnode *cp; struct filefork *fp; - int retval; + int retval = 0; off_t filesize; + upl_t upl; + upl_page_info_t* pl; + vm_offset_t a_pl_offset; + int a_flags; + int is_pageoutv2 = 0; cp = VTOC(vp); fp = VTOF(vp); - if (vnode_isswap(vp)) { - filesize = fp->ff_size; - } else { - off_t end_of_range; - int tooklock = 0; - - if (cp->c_lockowner != current_thread()) { - if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { - if (!(ap->a_flags & UPL_NOCOMMIT)) { - ubc_upl_abort_range(ap->a_pl, - ap->a_pl_offset, - ap->a_size, - UPL_ABORT_FREE_ON_EMPTY); + /* + * Figure out where the file ends, for pageout purposes. If + * ff_new_size > ff_size, then we're in the middle of extending the + * file via a write, so it is safe (and necessary) that we be able + * to pageout up to that point. + */ + filesize = fp->ff_size; + if (fp->ff_new_size > filesize) + filesize = fp->ff_new_size; + + a_flags = ap->a_flags; + a_pl_offset = ap->a_pl_offset; + + /* + * we can tell if we're getting the new or old behavior from the UPL + */ + if ((upl = ap->a_pl) == NULL) { + int request_flags; + + is_pageoutv2 = 1; + /* + * we're in control of any UPL we commit + * make sure someone hasn't accidentally passed in UPL_NOCOMMIT + */ + a_flags &= ~UPL_NOCOMMIT; + a_pl_offset = 0; + + /* + * take truncate lock (shared) to guard against + * zero-fill thru fsync interfering, but only for v2 + */ + hfs_lock_truncate(cp, 0); + + if (a_flags & UPL_MSYNC) { + request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY; + } + else { + request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY; + } + ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); + + if (upl == (upl_t) NULL) { + retval = EINVAL; + goto pageout_done; + } + } + /* + * from this point forward upl points at the UPL we're working with + * it was either passed in or we succesfully created it + */ + + /* + * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own + * UPL instead of relying on the UPL passed into us. We go ahead and do that here, + * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for + * N dirty ranges in the UPL. Note that this is almost a direct copy of the + * logic in vnode_pageout except that we need to do it after grabbing the truncate + * lock in HFS so that we don't lock invert ourselves. + * + * Note that we can still get into this function on behalf of the default pager with + * non-V2 behavior (swapfiles). However in that case, we did not grab locks above + * since fsync and other writing threads will grab the locks, then mark the + * relevant pages as busy. But the pageout codepath marks the pages as busy, + * and THEN would attempt to grab the truncate lock, which would result in deadlock. So + * we do not try to grab anything for the pre-V2 case, which should only be accessed + * by the paging/VM system. + */ + + if (is_pageoutv2) { + off_t f_offset; + int offset; + int isize; + int pg_index; + int error; + int error_ret = 0; + + isize = ap->a_size; + f_offset = ap->a_f_offset; + + /* + * Scan from the back to find the last page in the UPL, so that we + * aren't looking at a UPL that may have already been freed by the + * preceding aborts/completions. + */ + for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { + if (upl_page_present(pl, --pg_index)) + break; + if (pg_index == 0) { + ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); + goto pageout_done; } - return (retval); - } - tooklock = 1; } + + /* + * initialize the offset variables before we touch the UPL. + * a_f_offset is the position into the file, in bytes + * offset is the position into the UPL, in bytes + * pg_index is the pg# of the UPL we're operating on. + * isize is the offset into the UPL of the last non-clean page. + */ + isize = ((pg_index + 1) * PAGE_SIZE); + + offset = 0; + pg_index = 0; + + while (isize) { + int xsize; + int num_of_pages; + + if ( !upl_page_present(pl, pg_index)) { + /* + * we asked for RET_ONLY_DIRTY, so it's possible + * to get back empty slots in the UPL. + * just skip over them + */ + f_offset += PAGE_SIZE; + offset += PAGE_SIZE; + isize -= PAGE_SIZE; + pg_index++; + + continue; + } + if ( !upl_dirty_page(pl, pg_index)) { + panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl); + } + + /* + * We know that we have at least one dirty page. + * Now checking to see how many in a row we have + */ + num_of_pages = 1; + xsize = isize - PAGE_SIZE; + + while (xsize) { + if ( !upl_dirty_page(pl, pg_index + num_of_pages)) + break; + num_of_pages++; + xsize -= PAGE_SIZE; + } + xsize = num_of_pages * PAGE_SIZE; + + if (!vnode_isswap(vp)) { + off_t end_of_range; + int tooklock; + + tooklock = 0; + + if (cp->c_lockowner != current_thread()) { + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + /* + * we're in the v2 path, so we are the + * owner of the UPL... we may have already + * processed some of the UPL, so abort it + * from the current working offset to the + * end of the UPL + */ + ubc_upl_abort_range(upl, + offset, + ap->a_size - offset, + UPL_ABORT_FREE_ON_EMPTY); + goto pageout_done; + } + tooklock = 1; + } + end_of_range = f_offset + xsize - 1; - filesize = fp->ff_size; - end_of_range = ap->a_f_offset + ap->a_size - 1; - - if (end_of_range >= filesize) { - end_of_range = (off_t)(filesize - 1); + if (end_of_range >= filesize) { + end_of_range = (off_t)(filesize - 1); + } + if (f_offset < filesize) { + rl_remove(f_offset, end_of_range, &fp->ff_invalidranges); + cp->c_flag |= C_MODIFIED; /* leof is dirty */ + } + if (tooklock) { + hfs_unlock(cp); + } + } + if ((error = cluster_pageout(vp, upl, offset, f_offset, + xsize, filesize, a_flags))) { + if (error_ret == 0) + error_ret = error; + } + f_offset += xsize; + offset += xsize; + isize -= xsize; + pg_index += num_of_pages; } - if (ap->a_f_offset < filesize) { - rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); - cp->c_flag |= C_MODIFIED; /* leof is dirty */ + /* capture errnos bubbled out of cluster_pageout if they occurred */ + if (error_ret != 0) { + retval = error_ret; } + } /* end block for v2 pageout behavior */ + else { + if (!vnode_isswap(vp)) { + off_t end_of_range; + int tooklock = 0; + + if (cp->c_lockowner != current_thread()) { + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if (!(a_flags & UPL_NOCOMMIT)) { + ubc_upl_abort_range(upl, + a_pl_offset, + ap->a_size, + UPL_ABORT_FREE_ON_EMPTY); + } + goto pageout_done; + } + tooklock = 1; + } + end_of_range = ap->a_f_offset + ap->a_size - 1; + + if (end_of_range >= filesize) { + end_of_range = (off_t)(filesize - 1); + } + if (ap->a_f_offset < filesize) { + rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); + cp->c_flag |= C_MODIFIED; /* leof is dirty */ + } - if (tooklock) { - hfs_unlock(cp); + if (tooklock) { + hfs_unlock(cp); + } } + /* + * just call cluster_pageout for old pre-v2 behavior + */ + retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset, + ap->a_size, filesize, a_flags); } - retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, - ap->a_size, filesize, ap->a_flags); - /* - * If data was written, and setuid or setgid bits are set and - * this process is not the superuser then clear the setuid and - * setgid bits as a precaution against tampering. + * If data was written, update the modification time of the file. + * If setuid or setgid bits are set and this process is not the + * superuser then clear the setuid and setgid bits as a precaution + * against tampering. */ - if ((retval == 0) && - (cp->c_mode & (S_ISUID | S_ISGID)) && - (vfs_context_suser(ap->a_context) != 0)) { - hfs_lock(cp, HFS_FORCE_LOCK); - cp->c_mode &= ~(S_ISUID | S_ISGID); + if (retval == 0) { + cp->c_touch_modtime = TRUE; cp->c_touch_chgtime = TRUE; - hfs_unlock(cp); + if ((cp->c_mode & (S_ISUID | S_ISGID)) && + (vfs_context_suser(ap->a_context) != 0)) { + hfs_lock(cp, HFS_FORCE_LOCK); + cp->c_mode &= ~(S_ISUID | S_ISGID); + hfs_unlock(cp); + } + } + +pageout_done: + if (is_pageoutv2) { + /* release truncate lock (shared) */ + hfs_unlock_truncate(cp, 0); } return (retval); } @@ -3029,7 +3589,7 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap) block.blockSize = buf_count(bp); /* Endian un-swap B-Tree node */ - retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig); + retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false); if (retval) panic("hfs_vnop_bwrite: about to write corrupt node!\n"); } @@ -3213,8 +3773,9 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, retval = ENOSPC; goto restore; } else if ((eflags & kEFMetadataMask) && - ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) > + ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) > hfsmp->hfs_metazone_end)) { +#if 0 const char * filestr; char emptystr = '\0'; @@ -3225,7 +3786,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, } else { filestr = &emptystr; } - printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr, fp->ff_blocks); +#endif retval = ENOSPC; goto restore; } @@ -3373,16 +3934,14 @@ static int hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) { caddr_t bufp; - size_t writebase; size_t bufsize; size_t copysize; size_t iosize; - off_t filesize; size_t offset; + off_t writebase; uio_t auio; int error = 0; - filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */ writebase = blkstart * blksize; copysize = blkcnt * blksize; iosize = bufsize = MIN(copysize, 128 * 1024); @@ -3393,12 +3952,12 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) } hfs_unlock(VTOC(vp)); - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); while (offset < copysize) { iosize = MIN(copysize - offset, iosize); - uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ); + uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)bufp, iosize); error = cluster_read(vp, auio, copysize, IO_NOCACHE); @@ -3407,16 +3966,16 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) break; } if (uio_resid(auio) != 0) { - printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio)); + printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio)); error = EIO; break; } - uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE); + uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)bufp, iosize); - error = cluster_write(vp, auio, filesize + offset, - filesize + offset + iosize, + error = cluster_write(vp, auio, writebase + offset, + writebase + offset + iosize, uio_offset(auio), 0, IO_NOCACHE | IO_SYNC); if (error) { printf("hfs_clonefile: cluster_write failed - %d\n", error); @@ -3431,11 +3990,25 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) } uio_free(auio); - /* - * No need to call ubc_sync_range or hfs_invalbuf - * since the file was copied using IO_NOCACHE. - */ - + if ((blksize & PAGE_MASK)) { + /* + * since the copy may not have started on a PAGE + * boundary (or may not have ended on one), we + * may have pages left in the cache since NOCACHE + * will let partially written pages linger... + * lets just flush the entire range to make sure + * we don't have any pages left that are beyond + * (or intersect) the real LEOF of this file + */ + ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY); + } else { + /* + * No need to call ubc_sync_range or hfs_invalbuf + * since the file was copied using IO_NOCACHE and + * the copy was done starting and ending on a page + * boundary in the file. + */ + } kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); hfs_lock(VTOC(vp), HFS_FORCE_LOCK);