+
+/*
+ * Purge buffer cache for simulating cold starts
+ */
+static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
+{
+ ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
+
+ return VNODE_RETURNED;
+}
+
+static int vfs_purge_callback(mount_t mp, __unused void * arg)
+{
+ vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
+
+ return VFS_RETURNED;
+}
+
+int
+vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
+{
+ if (!kauth_cred_issuser(kauth_cred_get()))
+ return EPERM;
+
+ vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
+
+ return 0;
+}
+
+/*
+ * gets the vnode associated with the (unnamed) snapshot directory
+ * for a Filesystem. The snapshot directory vnode is returned with
+ * an iocount on it.
+ */
+int
+vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
+{
+ return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
+}
+
+/*
+ * Get the snapshot vnode.
+ *
+ * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
+ * needs nameidone() on ndp.
+ *
+ * If the snapshot vnode exists it is returned in ndp->ni_vp.
+ *
+ * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
+ * not needed.
+ */
+static int
+vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
+ user_addr_t name, struct nameidata *ndp, int32_t op,
+#if !CONFIG_TRIGGERS
+ __unused
+#endif
+ enum path_operation pathop,
+ vfs_context_t ctx)
+{
+ int error, i;
+ caddr_t name_buf;
+ size_t name_len;
+ struct vfs_attr vfa;
+
+ *sdvpp = NULLVP;
+ *rvpp = NULLVP;
+
+ error = vnode_getfromfd(ctx, dirfd, rvpp);
+ if (error)
+ return (error);
+
+ if (!vnode_isvroot(*rvpp)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* Make sure the filesystem supports snapshots */
+ VFSATTR_INIT(&vfa);
+ VFSATTR_WANTED(&vfa, f_capabilities);
+ if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
+ !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
+ !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
+ VOL_CAP_INT_SNAPSHOT)) ||
+ !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
+ VOL_CAP_INT_SNAPSHOT))) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
+ if (error)
+ goto out;
+
+ MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
+ if (error)
+ goto out1;
+
+ /*
+ * Some sanity checks- name can't be empty, "." or ".." or have slashes.
+ * (the length returned by copyinstr includes the terminating NUL)
+ */
+ if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
+ (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
+ error = EINVAL;
+ goto out1;
+ }
+ for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
+ if (i < (int)name_len) {
+ error = EINVAL;
+ goto out1;
+ }
+
+#if CONFIG_MACF
+ if (op == CREATE) {
+ error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
+ name_buf);
+ } else if (op == DELETE) {
+ error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
+ name_buf);
+ }
+ if (error)
+ goto out1;
+#endif
+
+ /* Check if the snapshot already exists ... */
+ NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
+ UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
+ ndp->ni_dvp = *sdvpp;
+
+ error = namei(ndp);
+out1:
+ FREE(name_buf, M_TEMP);
+out:
+ if (error) {
+ if (*sdvpp) {
+ vnode_put(*sdvpp);
+ *sdvpp = NULLVP;
+ }
+ if (*rvpp) {
+ vnode_put(*rvpp);
+ *rvpp = NULLVP;
+ }
+ }
+ return (error);
+}
+
+/*
+ * create a filesystem snapshot (for supporting filesystems)
+ *
+ * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
+ * We get to the (unnamed) snapshot directory vnode and create the vnode
+ * for the snapshot in it.
+ *
+ * Restrictions:
+ *
+ * a) Passed in name for snapshot cannot have slashes.
+ * b) name can't be "." or ".."
+ *
+ * Since this requires superuser privileges, vnode_authorize calls are not
+ * made.
+ */
+static int
+snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp;
+ int error;
+ struct nameidata namend;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
+ OP_LINK, ctx);
+ if (error)
+ return (error);
+
+ if (namend.ni_vp) {
+ vnode_put(namend.ni_vp);
+ error = EEXIST;
+ } else {
+ struct vnode_attr va;
+ vnode_t vp = NULLVP;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, VREG);
+ VATTR_SET(&va, va_mode, 0);
+
+ error = vn_create(snapdvp, &vp, &namend, &va,
+ VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
+ if (!error && vp)
+ vnode_put(vp);
+ }
+
+ nameidone(&namend);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ return (error);
+}
+
+/*
+ * Delete a Filesystem snapshot
+ *
+ * get the vnode for the unnamed snapshot directory and the snapshot and
+ * delete the snapshot.
+ */
+static int
+snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp;
+ int error;
+ struct nameidata namend;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
+ OP_UNLINK, ctx);
+ if (error)
+ goto out;
+
+ error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
+ VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
+
+ vnode_put(namend.ni_vp);
+ nameidone(&namend);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+out:
+ return (error);
+}
+
+/*
+ * Revert a filesystem to a snapshot
+ *
+ * Marks the filesystem to revert to the given snapshot on next mount.
+ */
+static int
+snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ int error;
+ vnode_t rvp;
+ mount_t mp;
+ struct fs_snapshot_revert_args revert_data;
+ struct componentname cnp;
+ caddr_t name_buf;
+ size_t name_len;
+
+ error = vnode_getfromfd(ctx, dirfd, &rvp);
+ if (error) {
+ return (error);
+ }
+ mp = vnode_mount(rvp);
+
+ MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ vnode_put(rvp);
+ return (error);
+ }
+
+#if CONFIG_MACF
+ error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ vnode_put(rvp);
+ return (error);
+ }
+#endif
+
+ /*
+ * Grab mount_iterref so that we can release the vnode,
+ * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
+ */
+ error = mount_iterref (mp, 0);
+ vnode_put(rvp);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ return (error);
+ }
+
+ memset(&cnp, 0, sizeof(cnp));
+ cnp.cn_pnbuf = (char *)name_buf;
+ cnp.cn_nameiop = LOOKUP;
+ cnp.cn_flags = ISLASTCN | HASBUF;
+ cnp.cn_pnlen = MAXPATHLEN;
+ cnp.cn_nameptr = cnp.cn_pnbuf;
+ cnp.cn_namelen = (int)name_len;
+ revert_data.sr_cnp = &cnp;
+
+ error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
+ mount_iterdrop(mp);
+ FREE(name_buf, M_TEMP);
+
+ if (error) {
+ /* If there was any error, try again using VNOP_IOCTL */
+
+ vnode_t snapdvp;
+ struct nameidata namend;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
+ OP_LOOKUP, ctx);
+ if (error) {
+ return (error);
+ }
+
+
+#ifndef APFSIOC_REVERT_TO_SNAPSHOT
+#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
+#endif
+
+#ifndef APFS_REVERT_TO_SNAPSHOT
+#define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
+#endif
+
+ error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
+ 0, ctx);
+
+ vnode_put(namend.ni_vp);
+ nameidone(&namend);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ }
+
+ return (error);
+}
+
+/*
+ * rename a Filesystem snapshot
+ *
+ * get the vnode for the unnamed snapshot directory and the snapshot and
+ * rename the snapshot. This is a very specialised (and simple) case of
+ * rename(2) (which has to deal with a lot more complications). It differs
+ * slightly from rename(2) in that EEXIST is returned if the new name exists.
+ */
+static int
+snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
+ __unused uint32_t flags, vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp;
+ int error, i;
+ caddr_t newname_buf;
+ size_t name_len;
+ vnode_t fvp;
+ struct nameidata *fromnd, *tond;
+ /* carving out a chunk for structs that are too big to be on stack. */
+ struct {
+ struct nameidata from_node;
+ struct nameidata to_node;
+ } * __rename_data;
+
+ MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+ fromnd = &__rename_data->from_node;
+ tond = &__rename_data->to_node;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
+ OP_UNLINK, ctx);
+ if (error)
+ goto out;
+ fvp = fromnd->ni_vp;
+
+ MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
+ if (error)
+ goto out1;
+
+ /*
+ * Some sanity checks- new name can't be empty, "." or ".." or have
+ * slashes.
+ * (the length returned by copyinstr includes the terminating NUL)
+ *
+ * The FS rename VNOP is suppossed to handle this but we'll pick it
+ * off here itself.
+ */
+ if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
+ (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
+ error = EINVAL;
+ goto out1;
+ }
+ for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
+ if (i < (int)name_len) {
+ error = EINVAL;
+ goto out1;
+ }
+
+#if CONFIG_MACF
+ error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
+ newname_buf);
+ if (error)
+ goto out1;
+#endif
+
+ NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
+ UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
+ tond->ni_dvp = snapdvp;
+
+ error = namei(tond);
+ if (error) {
+ goto out2;
+ } else if (tond->ni_vp) {
+ /*
+ * snapshot rename behaves differently than rename(2) - if the
+ * new name exists, EEXIST is returned.
+ */
+ vnode_put(tond->ni_vp);
+ error = EEXIST;
+ goto out2;
+ }
+
+ error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
+ &tond->ni_cnd, ctx);
+
+out2:
+ nameidone(tond);
+out1:
+ FREE(newname_buf, M_TEMP);
+ vnode_put(fvp);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ nameidone(fromnd);
+out:
+ FREE(__rename_data, M_TEMP);
+ return (error);
+}
+
+/*
+ * Mount a Filesystem snapshot
+ *
+ * get the vnode for the unnamed snapshot directory and the snapshot and
+ * mount the snapshot.
+ */
+static int
+snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
+ __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
+{
+ vnode_t rvp, snapdvp, snapvp, vp, pvp;
+ int error;
+ struct nameidata *snapndp, *dirndp;
+ /* carving out a chunk for structs that are too big to be on stack. */
+ struct {
+ struct nameidata snapnd;
+ struct nameidata dirnd;
+ } * __snapshot_mount_data;
+
+ MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
+ M_TEMP, M_WAITOK);
+ snapndp = &__snapshot_mount_data->snapnd;
+ dirndp = &__snapshot_mount_data->dirnd;
+
+ error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
+ OP_LOOKUP, ctx);
+ if (error)
+ goto out;
+
+ snapvp = snapndp->ni_vp;
+ if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
+ error = EIO;
+ goto out1;
+ }
+
+ /* Get the vnode to be covered */
+ NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
+ UIO_USERSPACE, directory, ctx);
+ error = namei(dirndp);
+ if (error)
+ goto out1;
+
+ vp = dirndp->ni_vp;
+ pvp = dirndp->ni_dvp;
+
+ if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
+ error = EINVAL;
+ } else {
+ mount_t mp = vnode_mount(rvp);
+ struct fs_snapshot_mount_args smnt_data;
+
+ smnt_data.sm_mp = mp;
+ smnt_data.sm_cnp = &snapndp->ni_cnd;
+ error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
+ &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0,
+ KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+ }
+
+ vnode_put(vp);
+ vnode_put(pvp);
+ nameidone(dirndp);
+out1:
+ vnode_put(snapvp);
+ vnode_put(snapdvp);
+ vnode_put(rvp);
+ nameidone(snapndp);
+out:
+ FREE(__snapshot_mount_data, M_TEMP);
+ return (error);
+}
+
+/*
+ * Root from a snapshot of the filesystem
+ *
+ * Marks the filesystem to root from the given snapshot on next boot.
+ */
+static int
+snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
+ vfs_context_t ctx)
+{
+ int error;
+ vnode_t rvp;
+ mount_t mp;
+ struct fs_snapshot_root_args root_data;
+ struct componentname cnp;
+ caddr_t name_buf;
+ size_t name_len;
+
+ error = vnode_getfromfd(ctx, dirfd, &rvp);
+ if (error) {
+ return (error);
+ }
+ mp = vnode_mount(rvp);
+
+ MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ vnode_put(rvp);
+ return (error);
+ }
+
+ // XXX MAC checks ?
+
+ /*
+ * Grab mount_iterref so that we can release the vnode,
+ * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
+ */
+ error = mount_iterref (mp, 0);
+ vnode_put(rvp);
+ if (error) {
+ FREE(name_buf, M_TEMP);
+ return (error);
+ }
+
+ memset(&cnp, 0, sizeof(cnp));
+ cnp.cn_pnbuf = (char *)name_buf;
+ cnp.cn_nameiop = LOOKUP;
+ cnp.cn_flags = ISLASTCN | HASBUF;
+ cnp.cn_pnlen = MAXPATHLEN;
+ cnp.cn_nameptr = cnp.cn_pnbuf;
+ cnp.cn_namelen = (int)name_len;
+ root_data.sr_cnp = &cnp;
+
+ error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
+
+ mount_iterdrop(mp);
+ FREE(name_buf, M_TEMP);
+
+ return (error);
+}
+
+/*
+ * FS snapshot operations dispatcher
+ */
+int
+fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
+ __unused int32_t *retval)
+{
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ AUDIT_ARG(fd, uap->dirfd);
+ AUDIT_ARG(value32, uap->op);
+
+ error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
+ if (error)
+ return (error);
+
+ switch (uap->op) {
+ case SNAPSHOT_OP_CREATE:
+ error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_DELETE:
+ error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_RENAME:
+ error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
+ uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_MOUNT:
+ error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
+ uap->data, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_REVERT:
+ error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ case SNAPSHOT_OP_ROOT:
+ error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
+ break;
+ default:
+ error = ENOSYS;
+ }
+
+ return (error);
+}