X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/316670eb35587141e969394ae8537d66b9211e80..527f99514973766e9c0382a4d8550dfb00f54939:/bsd/kern/kern_symfile.c?ds=inline diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index d0d467494..46018b2de 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,7 @@ #include #include #include -#include +#include /* This function is called from kern_sysctl in the current process context; * it is exported with the System6.0.exports, but this appears to be a legacy @@ -79,6 +80,7 @@ struct kern_direct_file_io_ref_t dev_t device; uint32_t blksize; off_t filelength; + char cf; char pinned; }; @@ -99,7 +101,7 @@ static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t static int kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, off_t offset, off_t end) { - int error; + int error = 0; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1; void * p2; @@ -125,7 +127,19 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, p2 = ref->ctx; do_ioctl = &device_ioctl; } - while (offset < end) + + if (_DKIOCCSPINEXTENT == theIoctl) { + /* Tell CS the image size, so it knows whether to place the subsequent pins SSD/HDD */ + pin.cp_extent.length = end; + pin.cp_flags = _DKIOCCSHIBERNATEIMGSIZE; + (void) do_ioctl(p1, p2, _DKIOCCSPINEXTENT, (caddr_t)&pin); + } else if (_DKIOCCSUNPINEXTENT == theIoctl) { + /* Tell CS hibernation is done, so it can stop blocking overlapping writes */ + pin.cp_flags = _DKIOCCSPINDISCARDBLACKLIST; + (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin); + } + + for (; offset < end; offset += filechunk) { if (ref->vp->v_type == VREG) { @@ -133,8 +147,10 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, filechunk = 1*1024*1024*1024; if (filechunk > (size_t)(end - offset)) filechunk = (size_t)(end - offset); - error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, &filechunk, NULL, 0, NULL); - if (error) break; + error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, + &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL); + if (error) break; + if (-1LL == blkno) continue; fileblk = blkno * ref->blksize; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) @@ -156,54 +172,69 @@ kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, { pin.cp_extent.offset = fileblk; pin.cp_extent.length = filechunk; - pin.cp_flags = _DKIOCSPINDISCARDDATA; + pin.cp_flags = _DKIOCCSPINFORHIBERNATION; error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin); if (error && (ENOTTY != error)) { - printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", - error, pin.cp_extent.offset, pin.cp_extent.length); + printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", error, pin.cp_extent.offset, pin.cp_extent.length); + } + } + else if (_DKIOCCSUNPINEXTENT == theIoctl) + { + pin.cp_extent.offset = fileblk; + pin.cp_extent.length = filechunk; + pin.cp_flags = _DKIOCCSPINFORHIBERNATION; + error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin); + if (error && (ENOTTY != error)) + { + printf("_DKIOCCSUNPINEXTENT(%d) 0x%qx, 0x%qx\n", error, pin.cp_extent.offset, pin.cp_extent.length); } } else error = EINVAL; - if (error) break; - offset += filechunk; + if (error) break; } return (error); } -int -kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len); +extern uint32_t freespace_mb(vnode_t vp); struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, + boolean_t create_file, kern_get_file_extents_callback_t callback, void * callback_ref, + off_t set_file_size, + off_t fs_free_size, + off_t write_file_offset, + void * write_file_addr, + size_t write_file_len, dev_t * partition_device_result, dev_t * image_device_result, uint64_t * partitionbase_result, uint64_t * maxiocount_result, - uint32_t * oflags, - off_t offset, - caddr_t addr, - vm_size_t len) + uint32_t * oflags) { struct kern_direct_file_io_ref_t * ref; - proc_t p; - struct vnode_attr va; - int error; - off_t f_offset; - uint64_t fileblk; - size_t filechunk; - uint64_t physoffset; - dev_t device; - dev_t target = 0; - int isssd = 0; - uint32_t flags = 0; - uint32_t blksize; - off_t maxiocount, count; - boolean_t locked = FALSE; + proc_t p; + struct vnode_attr va; + int error; + off_t f_offset; + uint64_t fileblk; + size_t filechunk; + uint64_t physoffset; + dev_t device; + dev_t target = 0; + int isssd = 0; + uint32_t flags = 0; + uint32_t blksize; + off_t maxiocount, count, segcount; + boolean_t locked = FALSE; + int fmode, cmode; + struct nameidata nd; + u_int32_t ndflags; + off_t mpFree; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1 = NULL; @@ -220,40 +251,79 @@ kern_open_file_for_direct_io(const char * name, bzero(ref, sizeof(*ref)); p = kernproc; - ref->ctx = vfs_context_create(vfs_context_current()); + ref->ctx = vfs_context_kernel(); - if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx))) - goto out; + fmode = (create_file) ? (O_CREAT | FWRITE) : FWRITE; + cmode = S_IRUSR | S_IWUSR; + ndflags = NOFOLLOW; + NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx); + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, cmode); + VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); + VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D); + if ((error = vn_open_auth(&nd, &fmode, &va))) { + kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode, cmode, error); + goto out; + } - if (addr && len) + ref->vp = nd.ni_vp; + if (ref->vp->v_type == VREG) { - if ((error = kern_write_file(ref, offset, addr, len))) - goto out; + vnode_lock_spin(ref->vp); + SET(ref->vp->v_flag, VSWAP); + vnode_unlock(ref->vp); + } + + if (write_file_addr && write_file_len) + { + if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, IO_SKIP_ENCRYPTION))) { + kprintf("kern_write_file() failed with error: %d\n", error); + goto out; + } } VATTR_INIT(&va); VATTR_WANTED(&va, va_rdev); VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_devid); VATTR_WANTED(&va, va_data_size); + VATTR_WANTED(&va, va_data_alloc); VATTR_WANTED(&va, va_nlink); error = EFAULT; - if (vnode_getattr(ref->vp, &va, ref->ctx)) - goto out; + if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out; - kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev)); - kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid)); - kprintf("vp size %qd\n", va.va_data_size); + mpFree = freespace_mb(ref->vp); + mpFree <<= 20; + kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", + name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size); if (ref->vp->v_type == VREG) { - /* Don't dump files with links. */ - if (va.va_nlink != 1) - goto out; + /* Don't dump files with links. */ + if (va.va_nlink != 1) goto out; + + device = (VATTR_IS_SUPPORTED(&va, va_devid)) ? va.va_devid : va.va_fsid; + ref->filelength = va.va_data_size; - device = va.va_fsid; p1 = &device; p2 = p; do_ioctl = &file_ioctl; + + if (set_file_size) + { + if (fs_free_size) + { + mpFree += va.va_data_alloc; + if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size)) + { + error = ENOSPC; + goto out; + } + } + error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx); + if (error) goto out; + ref->filelength = set_file_size; + } } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { @@ -267,24 +337,27 @@ kern_open_file_for_direct_io(const char * name, else { /* Don't dump to non-regular files. */ - error = EFAULT; + error = EFAULT; goto out; } ref->device = device; + // probe for CF + dk_corestorage_info_t cs_info; + memset(&cs_info, 0, sizeof(dk_corestorage_info_t)); + error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info); + ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES); + // get block size error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); if (error) goto out; - if (ref->vp->v_type == VREG) - ref->filelength = va.va_data_size; - else + if (ref->vp->v_type != VREG) { error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); - if (error) - goto out; + if (error) goto out; ref->filelength = fileblk * ref->blksize; } @@ -297,22 +370,21 @@ kern_open_file_for_direct_io(const char * name, // generate the block list error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL); - if (error) - goto out; + if (error) goto out; locked = TRUE; f_offset = 0; - while (f_offset < ref->filelength) + for (; f_offset < ref->filelength; f_offset += filechunk) { if (ref->vp->v_type == VREG) { filechunk = 1*1024*1024*1024; daddr64_t blkno; - error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL); - if (error) - goto out; - + error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, + &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL); + if (error) goto out; + if (-1LL == blkno) continue; fileblk = blkno * ref->blksize; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) @@ -330,8 +402,7 @@ kern_open_file_for_direct_io(const char * name, getphysreq.offset = fileblk + physoffset; getphysreq.length = (filechunk - physoffset); error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq); - if (error) - goto out; + if (error) goto out; if (!target) { target = getphysreq.dev; @@ -341,21 +412,36 @@ kern_open_file_for_direct_io(const char * name, error = ENOTSUP; goto out; } +#if HIBFRAGMENT + uint64_t rev; + for (rev = 4096; rev <= getphysreq.length; rev += 4096) + { + callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096); + } +#else callback(callback_ref, getphysreq.offset, getphysreq.length); +#endif physoffset += getphysreq.length; } - f_offset += filechunk; } callback(callback_ref, 0ULL, 0ULL); - if (ref->vp->v_type == VREG) - p1 = ⌖ + if (ref->vp->v_type == VREG) p1 = ⌖ + else + { + p1 = ⌖ + p2 = p; + do_ioctl = &file_ioctl; + } // get partition base - error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); - if (error) - goto out; + if (partitionbase_result) + { + error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); + if (error) + goto out; + } // get block size & constraints @@ -392,14 +478,20 @@ kern_open_file_for_direct_io(const char * name, maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count); + if (!error) + error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount); if (error) - count = 0; + count = segcount = 0; + count *= segcount; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count); + if (!error) + error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount); if (error) - count = 0; + count = segcount = 0; + count *= segcount; if (count && (count < maxiocount)) maxiocount = count; @@ -409,17 +501,25 @@ kern_open_file_for_direct_io(const char * name, error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); if (!error && isssd) - flags |= kIOHibernateOptionSSD; + flags |= kIOPolledFileSSD; if (partition_device_result) *partition_device_result = device; if (image_device_result) *image_device_result = target; - if (flags) + if (oflags) *oflags = flags; + if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) + { + vnode_close(ref->vp, FWRITE, ref->ctx); + ref->vp = NULLVP; + ref->ctx = NULL; + } + out: - kprintf("kern_open_file_for_direct_io(%d)\n", error); + printf("kern_open_file_for_direct_io(%p, %d)\n", ref, error); + if (error && locked) { @@ -431,34 +531,52 @@ out: { if (ref->vp) { + (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0); vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; } - vfs_context_rele(ref->ctx); + ref->ctx = NULL; kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); ref = NULL; } + return(ref); } int -kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len) +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag) { return (vn_rdwr(UIO_WRITE, ref->vp, addr, len, offset, - UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, + UIO_SYSSPACE, ioflag|IO_SYNC|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ref->ctx), (int *) 0, vfs_context_proc(ref->ctx))); } +int +kern_read_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag) +{ + return (vn_rdwr(UIO_READ, ref->vp, + addr, len, offset, + UIO_SYSSPACE, ioflag|IO_SYNC|IO_NODELOCKED|IO_UNIT, + vfs_context_ucred(ref->ctx), (int *) 0, + vfs_context_proc(ref->ctx))); +} + + +struct mount * +kern_file_mount(struct kern_direct_file_io_ref_t * ref) +{ + return (ref->vp->v_mount); +} void kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, - off_t write_offset, caddr_t addr, vm_size_t write_length, + off_t write_offset, void * addr, size_t write_length, off_t discard_offset, off_t discard_end) { int error; - kprintf("kern_close_file_for_direct_io\n"); + printf("kern_close_file_for_direct_io(%p)\n", ref); if (!ref) return; @@ -482,23 +600,35 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, do_ioctl = &device_ioctl; } (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); - - if (addr && write_length) + + //XXX If unmapping extents then don't also need to unpin; except ... + //XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and + //XXX unmap is subset, so save extra walk over file extents (and the risk + //XXX that CF drain starts) vs leaving partial units pinned to SSD + //XXX (until whatever was sharing also unmaps). Err on cleaning up fully. + boolean_t will_unmap = (!ref->pinned || ref->cf) && (discard_end > discard_offset); + boolean_t will_unpin = (ref->pinned && ref->cf /* && !will_unmap */); + + (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (will_unpin) ? ref->filelength : 0); + + if (will_unmap) { - (void) kern_write_file(ref, write_offset, addr, write_length); + (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, (ref->cf) ? ref->filelength : discard_end); } - if (discard_offset && discard_end && !ref->pinned) + + if (addr && write_length) { - (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, discard_end); + (void) kern_write_file(ref, write_offset, addr, write_length, IO_SKIP_ENCRYPTION); } error = vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; kprintf("vnode_close(%d)\n", error); + } - vfs_context_rele(ref->ctx); + ref->ctx = NULL; + kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); } -