+ if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) {
+ watcher->flags |= WATCHER_DROPPED_EVENTS;
+ fsevents_wakeup(watcher);
+ return ENOSPC;
+ }
+
+ OSAddAtomic(1, &kfse->refcount);
+ watcher->event_queue[watcher->wr] = kfse;
+ OSSynchronizeIO();
+ watcher->wr = (watcher->wr + 1) % watcher->eventq_size;
+
+ //
+ // wake up the watcher if there are more than MAX_NUM_PENDING events.
+ // otherwise schedule a timer (if one isn't already set) which will
+ // send any pending events if no more are received in the next
+ // EVENT_DELAY_IN_MS milli-seconds.
+ //
+ int32_t num_pending = 0;
+ if (watcher->rd < watcher->wr) {
+ num_pending = watcher->wr - watcher->rd;
+ }
+
+ if (watcher->rd > watcher->wr) {
+ num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
+ }
+
+ if (num_pending > (watcher->eventq_size * 3 / 4) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
+ /* Non-Apple Service is falling behind, start dropping events for this process */
+ lck_rw_lock_exclusive(&event_handling_lock);
+ while (watcher->rd != watcher->wr) {
+ kfse = watcher->event_queue[watcher->rd];
+ watcher->event_queue[watcher->rd] = NULL;
+ watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
+ OSSynchronizeIO();
+ if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
+ release_event_ref(kfse);
+ }
+ }
+ watcher->flags |= WATCHER_DROPPED_EVENTS;
+ lck_rw_unlock_exclusive(&event_handling_lock);
+
+ printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
+ watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
+ watcher->eventq_size, watcher->flags);
+
+ fsevents_wakeup(watcher);
+ } else if (num_pending > MAX_NUM_PENDING) {
+ fsevents_wakeup(watcher);
+ } else if (timer_set == 0) {
+ schedule_event_wakeup();
+ }
+
+ return 0;
+}
+
+static int
+fill_buff(uint16_t type, int32_t size, const void *data,
+ char *buff, int32_t *_buff_idx, int32_t buff_sz,
+ struct uio *uio)
+{
+ int32_t amt, error = 0, buff_idx = *_buff_idx;
+ uint16_t tmp;
+
+ //
+ // the +1 on the size is to guarantee that the main data
+ // copy loop will always copy at least 1 byte
+ //
+ if ((buff_sz - buff_idx) <= (int)(2 * sizeof(uint16_t) + 1)) {
+ if (buff_idx > uio_resid(uio)) {
+ error = ENOSPC;
+ goto get_out;
+ }
+
+ error = uiomove(buff, buff_idx, uio);
+ if (error) {
+ goto get_out;
+ }
+ buff_idx = 0;
+ }
+
+ // copy out the header (type & size)
+ memcpy(&buff[buff_idx], &type, sizeof(uint16_t));
+ buff_idx += sizeof(uint16_t);
+
+ tmp = size & 0xffff;
+ memcpy(&buff[buff_idx], &tmp, sizeof(uint16_t));
+ buff_idx += sizeof(uint16_t);
+
+ // now copy the body of the data, flushing along the way
+ // if the buffer fills up.
+ //
+ while (size > 0) {
+ amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
+ memcpy(&buff[buff_idx], data, amt);
+
+ size -= amt;
+ buff_idx += amt;
+ data = (const char *)data + amt;
+ if (size > (buff_sz - buff_idx)) {
+ if (buff_idx > uio_resid(uio)) {
+ error = ENOSPC;
+ goto get_out;
+ }
+ error = uiomove(buff, buff_idx, uio);
+ if (error) {
+ goto get_out;
+ }
+ buff_idx = 0;
+ }
+
+ if (amt == 0) { // just in case...
+ break;
+ }
+ }
+
+get_out:
+ *_buff_idx = buff_idx;
+
+ return error;
+}
+
+
+static int copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio) __attribute__((noinline));
+
+static int
+copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)
+{
+ int error;
+ uint16_t tmp16;
+ int32_t type;
+ kfs_event *cur;
+ char evbuff[512];
+ int evbuff_idx = 0;
+
+ if (kfse->type == FSE_INVALID) {
+ panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d fref ptr %p)\n", kfse, kfse->refcount, kfse->str);
+ }
+
+ if (kfse->flags & KFSE_BEING_CREATED) {
+ return 0;
+ }
+
+ if (((kfse->type == FSE_RENAME) || (kfse->type == FSE_CLONE)) && kfse->dest == NULL) {
+ //
+ // This can happen if an event gets recycled but we had a
+ // pointer to it in our event queue. The event is the
+ // destination of a rename or clone which we'll process separately
+ // (that is, another kfse points to this one so it's ok
+ // to skip this guy because we'll process it when we process
+ // the other one)
+ error = 0;
+ goto get_out;
+ }
+
+ if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
+ type = (kfse->type & 0xfff);
+
+ if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
+ type |= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
+ } else if (kfse->flags & KFSE_COMBINED_EVENTS) {
+ type |= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
+ }
+ } else {
+ type = (int32_t)kfse->type;
+ }
+
+ // copy out the type of the event
+ memcpy(evbuff, &type, sizeof(int32_t));
+ evbuff_idx += sizeof(int32_t);
+
+ // copy out the pid of the person that generated the event
+ memcpy(&evbuff[evbuff_idx], &kfse->pid, sizeof(pid_t));
+ evbuff_idx += sizeof(pid_t);
+
+ cur = kfse;
+
+copy_again:
+
+ if (kfse->type == FSE_DOCID_CHANGED || kfse->type == FSE_DOCID_CREATED) {
+ dev_t dev = cur->dev;
+ ino64_t ino = cur->ino;
+ uint64_t ival;
+
+ error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ memcpy(&ino, &cur->str, sizeof(ino64_t));
+ error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ memcpy(&ival, &cur->uid, sizeof(uint64_t)); // the docid gets stuffed into the ino field
+ error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &ival, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ goto done;
+ }
+
+ if (kfse->type == FSE_UNMOUNT_PENDING) {
+ dev_t dev = cur->dev;
+
+ error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ goto done;
+ }
+
+ if (cur->str == NULL || cur->str[0] == '\0') {
+ printf("copy_out_kfse:2: empty/short path (%s)\n", cur->str);
+ error = fill_buff(FSE_ARG_STRING, 2, "/", evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ } else {
+ error = fill_buff(FSE_ARG_STRING, cur->len, cur->str, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ }
+ if (error != 0) {
+ goto get_out;
+ }
+
+ if (cur->dev == 0 && cur->ino == 0) {
+ // this happens when a rename event happens and the
+ // destination of the rename did not previously exist.
+ // it thus has no other file info so skip copying out
+ // the stuff below since it isn't initialized
+ goto done;
+ }
+
+
+ if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
+ int32_t finfo_size;
+
+ finfo_size = sizeof(dev_t) + sizeof(ino64_t) + sizeof(int32_t) + sizeof(uid_t) + sizeof(gid_t);
+ error = fill_buff(FSE_ARG_FINFO, finfo_size, &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+ } else {
+ error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &cur->dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ error = fill_buff(FSE_ARG_MODE, sizeof(int32_t), &cur->mode, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ error = fill_buff(FSE_ARG_UID, sizeof(uid_t), &cur->uid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ error = fill_buff(FSE_ARG_GID, sizeof(gid_t), &cur->gid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+ }
+
+
+ if (cur->dest) {
+ cur = cur->dest;
+ goto copy_again;
+ }
+
+done:
+ // very last thing: the time stamp
+ error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &cur->abstime, evbuff, &evbuff_idx, sizeof(evbuff), uio);
+ if (error != 0) {
+ goto get_out;
+ }
+
+ // check if the FSE_ARG_DONE will fit
+ if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
+ if (evbuff_idx > uio_resid(uio)) {
+ error = ENOSPC;
+ goto get_out;
+ }
+ error = uiomove(evbuff, evbuff_idx, uio);
+ if (error) {
+ goto get_out;
+ }
+ evbuff_idx = 0;
+ }
+
+ tmp16 = FSE_ARG_DONE;
+ memcpy(&evbuff[evbuff_idx], &tmp16, sizeof(uint16_t));
+ evbuff_idx += sizeof(uint16_t);
+
+ // flush any remaining data in the buffer (and hopefully
+ // in most cases this is the only uiomove we'll do)
+ if (evbuff_idx > uio_resid(uio)) {
+ error = ENOSPC;
+ } else {
+ error = uiomove(evbuff, evbuff_idx, uio);
+ }
+
+get_out:
+
+ return error;
+}
+
+
+
+static int
+fmod_watch(fs_event_watcher *watcher, struct uio *uio)
+{
+ int error = 0;
+ user_ssize_t last_full_event_resid;
+ kfs_event *kfse;
+ uint16_t tmp16;
+ int skipped;
+
+ last_full_event_resid = uio_resid(uio);
+
+ // need at least 2048 bytes of space (maxpathlen + 1 event buf)
+ if (uio_resid(uio) < 2048 || watcher == NULL) {
+ return EINVAL;
+ }
+
+ if (watcher->flags & WATCHER_CLOSING) {
+ return 0;
+ }
+
+ if (OSAddAtomic(1, &watcher->num_readers) != 0) {
+ // don't allow multiple threads to read from the fd at the same time
+ OSAddAtomic(-1, &watcher->num_readers);
+ return EAGAIN;
+ }
+
+restart_watch:
+ if (watcher->rd == watcher->wr) {
+ if (watcher->flags & WATCHER_CLOSING) {
+ OSAddAtomic(-1, &watcher->num_readers);
+ return 0;
+ }
+ OSAddAtomic(1, &watcher->blockers);
+
+ // there's nothing to do, go to sleep
+ error = tsleep((caddr_t)watcher, PUSER | PCATCH, "fsevents_empty", 0);
+
+ OSAddAtomic(-1, &watcher->blockers);
+
+ if (error != 0 || (watcher->flags & WATCHER_CLOSING)) {
+ OSAddAtomic(-1, &watcher->num_readers);
+ return error;
+ }
+ }
+
+ // if we dropped events, return that as an event first
+ if (watcher->flags & WATCHER_DROPPED_EVENTS) {
+ int32_t val = FSE_EVENTS_DROPPED;
+
+ error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
+ if (error == 0) {
+ val = 0; // a fake pid
+ error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
+
+ tmp16 = FSE_ARG_DONE; // makes it a consistent msg
+ error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio);
+
+ last_full_event_resid = uio_resid(uio);
+ }
+
+ if (error) {
+ OSAddAtomic(-1, &watcher->num_readers);
+ return error;
+ }
+
+ watcher->flags &= ~WATCHER_DROPPED_EVENTS;
+ }
+
+ skipped = 0;
+
+ lck_rw_lock_shared(&event_handling_lock);
+ while (uio_resid(uio) > 0 && watcher->rd != watcher->wr) {
+ if (watcher->flags & WATCHER_CLOSING) {
+ break;
+ }
+
+ //
+ // check if the event is something of interest to us
+ // (since it may have been recycled/reused and changed
+ // its type or which device it is for)
+ //
+ kfse = watcher->event_queue[watcher->rd];
+ if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) {
+ break;
+ }
+
+ if (watcher->event_list[kfse->type] == FSE_REPORT && watcher_cares_about_dev(watcher, kfse->dev)) {
+ if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) && kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && is_ignored_directory(kfse->str)) {
+ // If this is not an Apple System Service, skip specified directories
+ // radar://12034844
+ error = 0;
+ skipped = 1;
+ } else {
+ skipped = 0;
+ if (last_event_ptr == kfse) {
+ last_event_ptr = NULL;
+ last_event_type = -1;
+ last_coalesced_time = 0;
+ }
+ error = copy_out_kfse(watcher, kfse, uio);
+ if (error != 0) {
+ // if an event won't fit or encountered an error while
+ // we were copying it out, then backup to the last full
+ // event and just bail out. if the error was ENOENT
+ // then we can continue regular processing, otherwise
+ // we should unlock things and return.
+ uio_setresid(uio, last_full_event_resid);
+ if (error != ENOENT) {
+ lck_rw_unlock_shared(&event_handling_lock);
+ error = 0;
+ goto get_out;
+ }
+ }
+
+ last_full_event_resid = uio_resid(uio);
+ }
+ }
+
+ watcher->event_queue[watcher->rd] = NULL;
+ watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
+ OSSynchronizeIO();
+ release_event_ref(kfse);
+ }
+ lck_rw_unlock_shared(&event_handling_lock);
+
+ if (skipped && error == 0) {
+ goto restart_watch;
+ }
+
+get_out:
+ OSAddAtomic(-1, &watcher->num_readers);
+
+ return error;
+}
+
+
+//
+// Shoo watchers away from a volume that's about to be unmounted
+// (so that it can be cleanly unmounted).
+//
+void
+fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
+{
+#if CONFIG_EMBEDDED
+ dev_t dev = mp->mnt_vfsstat.f_fsid.val[0];
+ int error, waitcount = 0;
+ struct timespec ts = {.tv_sec = 1, .tv_nsec = 0};
+
+ // wait for any other pending unmounts to complete
+ lock_watch_table();
+ while (fsevent_unmount_dev != 0) {
+ error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
+ if (error == EWOULDBLOCK) {
+ error = 0;
+ }
+ if (!error && (++waitcount >= 10)) {
+ error = EWOULDBLOCK;
+ printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
+ }
+ if (error) {
+ // there's a problem, bail out
+ unlock_watch_table();
+ return;
+ }
+ }
+ if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == 0) {
+ // nobody watching for unmount pending events
+ unlock_watch_table();
+ return;
+ }
+ // this is now the current unmount pending
+ fsevent_unmount_dev = dev;
+ fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
+ unlock_watch_table();
+
+ // send an event to notify the watcher they need to get off the mount
+ error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
+
+ // wait for acknowledgment(s) (give up if it takes too long)
+ lock_watch_table();
+ waitcount = 0;
+ while (fsevent_unmount_dev == dev) {
+ error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
+ if (error == EWOULDBLOCK) {
+ error = 0;
+ }
+ if (!error && (++waitcount >= 10)) {
+ error = EWOULDBLOCK;
+ printf("unmount pending ack timeout for dev %d\n", dev);
+ }
+ if (error) {
+ // there's a problem, bail out
+ if (fsevent_unmount_dev == dev) {
+ fsevent_unmount_dev = 0;
+ fsevent_unmount_ack_count = 0;
+ }
+ wakeup((caddr_t)&fsevent_unmount_dev);
+ break;
+ }
+ }
+ unlock_watch_table();
+#endif
+}
+
+
+//
+// /dev/fsevents device code
+//
+static int fsevents_installed = 0;
+
+typedef struct fsevent_handle {
+ UInt32 flags;
+ SInt32 active;
+ fs_event_watcher *watcher;
+ struct klist knotes;
+ struct selinfo si;
+} fsevent_handle;
+
+#define FSEH_CLOSING 0x0001
+
+static int
+fseventsf_read(struct fileproc *fp, struct uio *uio,
+ __unused int flags, __unused vfs_context_t ctx)
+{
+ fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
+ int error;
+
+ error = fmod_watch(fseh->watcher, uio);
+
+ return error;
+}
+
+
+#pragma pack(push, 4)
+typedef struct fsevent_dev_filter_args32 {
+ uint32_t num_devices;
+ user32_addr_t devices;
+} fsevent_dev_filter_args32;
+typedef struct fsevent_dev_filter_args64 {
+ uint32_t num_devices;
+ user64_addr_t devices;
+} fsevent_dev_filter_args64;
+#pragma pack(pop)
+
+#define FSEVENTS_DEVICE_FILTER_32 _IOW('s', 100, fsevent_dev_filter_args32)
+#define FSEVENTS_DEVICE_FILTER_64 _IOW('s', 100, fsevent_dev_filter_args64)
+
+static int
+fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
+{
+ fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
+ int ret = 0;
+ fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
+
+ OSAddAtomic(1, &fseh->active);
+ if (fseh->flags & FSEH_CLOSING) {
+ OSAddAtomic(-1, &fseh->active);
+ return 0;
+ }
+
+ switch (cmd) {
+ case FIONBIO:
+ case FIOASYNC:
+ break;
+
+ case FSEVENTS_WANT_COMPACT_EVENTS: {
+ fseh->watcher->flags |= WATCHER_WANTS_COMPACT_EVENTS;
+ break;
+ }
+
+ case FSEVENTS_WANT_EXTENDED_INFO: {
+ fseh->watcher->flags |= WATCHER_WANTS_EXTENDED_INFO;
+ break;
+ }
+
+ case FSEVENTS_GET_CURRENT_ID: {
+ *(uint64_t *)data = fseh->watcher->max_event_id;
+ ret = 0;
+ break;
+ }
+
+ case FSEVENTS_DEVICE_FILTER_32: {
+ if (proc_is64bit(vfs_context_proc(ctx))) {
+ ret = EINVAL;
+ break;
+ }
+ fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data;
+
+ devfilt_args = &_devfilt_args;
+ memset(devfilt_args, 0, sizeof(fsevent_dev_filter_args64));
+ devfilt_args->num_devices = devfilt_args32->num_devices;
+ devfilt_args->devices = CAST_USER_ADDR_T(devfilt_args32->devices);
+ goto handle_dev_filter;
+ }
+
+ case FSEVENTS_DEVICE_FILTER_64:
+ if (!proc_is64bit(vfs_context_proc(ctx))) {
+ ret = EINVAL;
+ break;
+ }
+ devfilt_args = (fsevent_dev_filter_args64 *)data;
+
+handle_dev_filter:
+ {
+ int new_num_devices;
+ dev_t *devices_not_to_watch, *tmp = NULL;
+
+ if (devfilt_args->num_devices > 256) {
+ ret = EINVAL;
+ break;
+ }
+
+ new_num_devices = devfilt_args->num_devices;
+ if (new_num_devices == 0) {
+ lock_watch_table();
+
+ tmp = fseh->watcher->devices_not_to_watch;
+ fseh->watcher->devices_not_to_watch = NULL;
+ fseh->watcher->num_devices = new_num_devices;
+
+ unlock_watch_table();
+ if (tmp) {
+ FREE(tmp, M_TEMP);
+ }
+ break;
+ }
+
+ MALLOC(devices_not_to_watch, dev_t *,
+ new_num_devices * sizeof(dev_t),
+ M_TEMP, M_WAITOK);
+ if (devices_not_to_watch == NULL) {
+ ret = ENOMEM;
+ break;
+ }
+
+ ret = copyin(devfilt_args->devices,
+ (void *)devices_not_to_watch,
+ new_num_devices * sizeof(dev_t));
+ if (ret) {
+ FREE(devices_not_to_watch, M_TEMP);
+ break;
+ }
+
+ lock_watch_table();
+ fseh->watcher->num_devices = new_num_devices;
+ tmp = fseh->watcher->devices_not_to_watch;
+ fseh->watcher->devices_not_to_watch = devices_not_to_watch;
+ unlock_watch_table();
+
+ if (tmp) {
+ FREE(tmp, M_TEMP);
+ }
+
+ break;
+ }
+
+ case FSEVENTS_UNMOUNT_PENDING_ACK: {
+ lock_watch_table();
+ dev_t dev = *(dev_t *)data;
+ if (fsevent_unmount_dev == dev) {
+ if (--fsevent_unmount_ack_count <= 0) {
+ fsevent_unmount_dev = 0;
+ wakeup((caddr_t)&fsevent_unmount_dev);
+ }
+ } else {
+ printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
+ ret = EINVAL;
+ }
+ unlock_watch_table();
+ break;
+ }
+
+ default:
+ ret = EINVAL;
+ break;
+ }
+
+ OSAddAtomic(-1, &fseh->active);
+ return ret;
+}
+
+
+static int
+fseventsf_select(struct fileproc *fp, int which, __unused void *wql, vfs_context_t ctx)
+{
+ fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
+ int ready = 0;
+
+ if ((which != FREAD) || (fseh->watcher->flags & WATCHER_CLOSING)) {
+ return 0;
+ }
+
+
+ // if there's nothing in the queue, we're not ready
+ if (fseh->watcher->rd != fseh->watcher->wr) {
+ ready = 1;
+ }
+
+ if (!ready) {
+ selrecord(vfs_context_proc(ctx), &fseh->si, wql);
+ }
+
+ return ready;
+}
+
+
+#if NOTUSED
+static int
+fseventsf_stat(__unused struct fileproc *fp, __unused struct stat *sb, __unused vfs_context_t ctx)
+{
+ return ENOTSUP;
+}
+#endif
+
+static int
+fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
+{
+ fsevent_handle *fseh = (struct fsevent_handle *)fg->fg_data;
+ fs_event_watcher *watcher;
+
+ OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
+ while (OSAddAtomic(0, &fseh->active) > 0) {
+ tsleep((caddr_t)fseh->watcher, PRIBIO, "fsevents-close", 1);
+ }
+
+ watcher = fseh->watcher;
+ fg->fg_data = NULL;
+ fseh->watcher = NULL;
+
+ remove_watcher(watcher);
+ FREE(fseh, M_TEMP);
+
+ return 0;
+}
+
+static void
+filt_fsevent_detach(struct knote *kn)
+{
+ fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook;
+
+ lock_watch_table();
+
+ KNOTE_DETACH(&fseh->knotes, kn);
+
+ unlock_watch_table();
+}
+
+/*
+ * Determine whether this knote should be active
+ *
+ * This is kind of subtle.
+ * --First, notice if the vnode has been revoked: in so, override hint
+ * --EVFILT_READ knotes are checked no matter what the hint is
+ * --Other knotes activate based on hint.
+ * --If hint is revoke, set special flags and activate
+ */
+static int
+filt_fsevent_common(struct knote *kn, struct kevent_qos_s *kev, long hint)
+{
+ fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook;
+ int activate = 0;
+ int32_t rd, wr, amt;
+ int64_t data = 0;
+
+ if (NOTE_REVOKE == hint) {
+ kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+ activate = 1;
+ }
+
+ rd = fseh->watcher->rd;
+ wr = fseh->watcher->wr;
+ if (rd <= wr) {
+ amt = wr - rd;
+ } else {
+ amt = fseh->watcher->eventq_size - (rd - wr);
+ }
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ data = amt;
+ activate = (data != 0);
+ break;
+ case EVFILT_VNODE:
+ /* Check events this note matches against the hint */
+ if (kn->kn_sfflags & hint) {
+ kn->kn_fflags |= hint; /* Set which event occurred */
+ }
+ if (kn->kn_fflags != 0) {
+ activate = 1;
+ }
+ break;
+ default:
+ // nothing to do...
+ break;
+ }
+
+ if (activate && kev) {
+ knote_fill_kevent(kn, kev, data);
+ }
+ return activate;
+}
+
+static int
+filt_fsevent(struct knote *kn, long hint)
+{
+ return filt_fsevent_common(kn, NULL, hint);
+}
+
+static int
+filt_fsevent_touch(struct knote *kn, struct kevent_qos_s *kev)
+{
+ int res;
+
+ lock_watch_table();
+
+ /* accept new fflags/data as saved */
+ kn->kn_sfflags = kev->fflags;
+ kn->kn_sdata = kev->data;
+
+ /* restrict the current results to the (smaller?) set of new interest */
+ /*
+ * For compatibility with previous implementations, we leave kn_fflags
+ * as they were before.
+ */
+ //kn->kn_fflags &= kev->fflags;
+
+ /* determine if the filter is now fired */
+ res = filt_fsevent_common(kn, NULL, 0);
+
+ unlock_watch_table();
+
+ return res;
+}
+
+static int
+filt_fsevent_process(struct knote *kn, struct kevent_qos_s *kev)
+{
+ int res;
+
+ lock_watch_table();
+
+ res = filt_fsevent_common(kn, kev, 0);
+
+ unlock_watch_table();
+
+ return res;
+}
+
+SECURITY_READ_ONLY_EARLY(struct filterops) fsevent_filtops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = filt_fsevent_detach,
+ .f_event = filt_fsevent,
+ .f_touch = filt_fsevent_touch,
+ .f_process = filt_fsevent_process,
+};
+
+static int
+fseventsf_kqfilter(struct fileproc *fp, struct knote *kn,
+ __unused struct kevent_qos_s *kev)
+{
+ fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
+ int res;
+
+ kn->kn_hook = (void*)fseh;
+ kn->kn_filtid = EVFILTID_FSEVENT;
+
+ lock_watch_table();
+
+ KNOTE_ATTACH(&fseh->knotes, kn);
+
+ /* check to see if it is fired already */
+ res = filt_fsevent_common(kn, NULL, 0);
+
+ unlock_watch_table();
+
+ return res;
+}
+
+
+static int
+fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
+{
+ int counter = 0;
+ fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
+
+ // if there are people still waiting, sleep for 10ms to
+ // let them clean up and get out of there. however we
+ // also don't want to get stuck forever so if they don't
+ // exit after 5 seconds we're tearing things down anyway.
+ while (fseh->watcher->blockers && counter++ < 500) {
+ // issue wakeup in case anyone is blocked waiting for an event
+ // do this each time we wakeup in case the blocker missed
+ // the wakeup due to the unprotected test of WATCHER_CLOSING
+ // and decision to tsleep in fmod_watch... this bit of
+ // latency is a decent tradeoff against not having to
+ // take and drop a lock in fmod_watch
+ lock_watch_table();
+ fsevents_wakeup(fseh->watcher);
+ unlock_watch_table();
+
+ tsleep((caddr_t)fseh->watcher, PRIBIO, "watcher-close", 1);
+ }
+
+ return 0;
+}
+
+
+static int
+fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
+{
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EPERM;
+ }
+
+ return 0;
+}
+
+static int
+fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
+{
+ return 0;
+}
+
+static int
+fseventsread(__unused dev_t dev, __unused struct uio *uio, __unused int ioflag)
+{
+ return EIO;
+}
+
+
+static int
+parse_buffer_and_add_events(const char *buffer, int bufsize, vfs_context_t ctx, long *remainder)
+{
+ const fse_info *finfo, *dest_finfo;
+ const char *path, *ptr, *dest_path, *event_start = buffer;
+ int path_len, type, dest_path_len, err = 0;
+
+
+ ptr = buffer;
+ while ((ptr + sizeof(int) + sizeof(fse_info) + 1) < buffer + bufsize) {
+ type = *(const int *)ptr;
+ if (type < 0 || type >= FSE_MAX_EVENTS) {
+ err = EINVAL;
+ break;
+ }
+
+ ptr += sizeof(int);
+
+ finfo = (const fse_info *)ptr;
+ ptr += sizeof(fse_info);
+
+ path = ptr;
+ while (ptr < buffer + bufsize && *ptr != '\0') {
+ ptr++;
+ }
+
+ if (ptr >= buffer + bufsize) {
+ break;
+ }
+
+ ptr++; // advance over the trailing '\0'
+
+ path_len = ptr - path;
+
+ if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
+ event_start = ptr; // record where the next event starts
+
+ err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
+ if (err) {
+ break;
+ }
+ continue;
+ }
+
+ //
+ // if we're here we have to slurp up the destination finfo
+ // and path so that we can pass them to the add_fsevent()
+ // call. basically it's a copy of the above code.
+ //
+ dest_finfo = (const fse_info *)ptr;
+ ptr += sizeof(fse_info);
+
+ dest_path = ptr;
+ while (ptr < buffer + bufsize && *ptr != '\0') {
+ ptr++;
+ }
+
+ if (ptr >= buffer + bufsize) {
+ break;
+ }
+
+ ptr++; // advance over the trailing '\0'
+ event_start = ptr; // record where the next event starts
+
+ dest_path_len = ptr - dest_path;
+ //
+ // If the destination inode number is non-zero, generate a rename
+ // with both source and destination FSE_ARG_FINFO. Otherwise generate
+ // a rename with only one FSE_ARG_FINFO. If you need to inject an
+ // exchange with an inode of zero, just make that inode (and its path)
+ // come in as the first one, not the second.
+ //
+ if (dest_finfo->ino) {
+ err = add_fsevent(type, ctx,
+ FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
+ FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
+ FSE_ARG_DONE);
+ } else {
+ err = add_fsevent(type, ctx,
+ FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
+ FSE_ARG_STRING, dest_path_len, dest_path,
+ FSE_ARG_DONE);
+ }
+
+ if (err) {
+ break;
+ }
+ }
+
+ // if the last event wasn't complete, set the remainder
+ // to be the last event start boundary.
+ //
+ *remainder = (long)((buffer + bufsize) - event_start);
+
+ return err;
+}
+
+
+//
+// Note: this buffer size can not ever be less than
+// 2*MAXPATHLEN + 2*sizeof(fse_info) + sizeof(int)
+// because that is the max size for a single event.
+// I made it 4k to be a "nice" size. making it
+// smaller is not a good idea.
+//
+#define WRITE_BUFFER_SIZE 4096
+char *write_buffer = NULL;
+
+static int
+fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag)
+{
+ int error = 0, count;
+ vfs_context_t ctx = vfs_context_current();
+ long offset = 0, remainder;
+
+ lck_mtx_lock(&event_writer_lock);
+
+ if (write_buffer == NULL) {
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE, VM_KERN_MEMORY_FILE)) {
+ lck_mtx_unlock(&event_writer_lock);
+ return ENOMEM;
+ }
+ }
+
+ //
+ // this loop copies in and processes the events written.
+ // it takes care to copy in reasonable size chunks and
+ // process them. if there is an event that spans a chunk
+ // boundary we're careful to copy those bytes down to the
+ // beginning of the buffer and read the next chunk in just
+ // after it.
+ //
+ while (uio_resid(uio)) {
+ if (uio_resid(uio) > (WRITE_BUFFER_SIZE - offset)) {
+ count = WRITE_BUFFER_SIZE - offset;
+ } else {
+ count = uio_resid(uio);
+ }
+
+ error = uiomove(write_buffer + offset, count, uio);
+ if (error) {
+ break;
+ }
+
+ // printf("fsevents: write: copied in %d bytes (offset: %ld)\n", count, offset);
+ error = parse_buffer_and_add_events(write_buffer, offset + count, ctx, &remainder);
+ if (error) {
+ break;
+ }
+
+ //
+ // if there's any remainder, copy it down to the beginning
+ // of the buffer so that it will get processed the next time
+ // through the loop. note that the remainder always starts
+ // at an event boundary.
+ //
+ if (remainder != 0) {
+ // printf("fsevents: write: an event spanned a %d byte boundary. remainder: %ld\n",
+ // WRITE_BUFFER_SIZE, remainder);
+ memmove(write_buffer, (write_buffer + count + offset) - remainder, remainder);
+ offset = remainder;
+ } else {
+ offset = 0;
+ }
+ }
+
+ lck_mtx_unlock(&event_writer_lock);
+
+ return error;
+}
+
+
+static const struct fileops fsevents_fops = {
+ .fo_type = DTYPE_FSEVENTS,
+ .fo_read = fseventsf_read,
+ .fo_write = fo_no_write,
+ .fo_ioctl = fseventsf_ioctl,
+ .fo_select = fseventsf_select,
+ .fo_close = fseventsf_close,
+ .fo_kqfilter = fseventsf_kqfilter,
+ .fo_drain = fseventsf_drain,
+};
+
+typedef struct fsevent_clone_args32 {
+ user32_addr_t event_list;
+ int32_t num_events;
+ int32_t event_queue_depth;
+ user32_addr_t fd;
+} fsevent_clone_args32;
+
+typedef struct fsevent_clone_args64 {
+ user64_addr_t event_list;
+ int32_t num_events;
+ int32_t event_queue_depth;
+ user64_addr_t fd;
+} fsevent_clone_args64;
+
+#define FSEVENTS_CLONE_32 _IOW('s', 1, fsevent_clone_args32)
+#define FSEVENTS_CLONE_64 _IOW('s', 1, fsevent_clone_args64)
+
+static int
+fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
+{
+ struct fileproc *f;
+ int fd, error;
+ fsevent_handle *fseh = NULL;
+ fsevent_clone_args64 *fse_clone_args, _fse_clone;
+ int8_t *event_list;
+ int is64bit = proc_is64bit(p);
+
+ switch (cmd) {
+ case FSEVENTS_CLONE_32: {
+ if (is64bit) {
+ return EINVAL;
+ }
+ fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data;
+
+ fse_clone_args = &_fse_clone;
+ memset(fse_clone_args, 0, sizeof(fsevent_clone_args64));
+
+ fse_clone_args->event_list = CAST_USER_ADDR_T(args32->event_list);
+ fse_clone_args->num_events = args32->num_events;
+ fse_clone_args->event_queue_depth = args32->event_queue_depth;
+ fse_clone_args->fd = CAST_USER_ADDR_T(args32->fd);
+ goto handle_clone;
+ }
+
+ case FSEVENTS_CLONE_64:
+ if (!is64bit) {
+ return EINVAL;
+ }
+ fse_clone_args = (fsevent_clone_args64 *)data;
+
+handle_clone:
+ if (fse_clone_args->num_events < 0 || fse_clone_args->num_events > 4096) {
+ return EINVAL;
+ }
+
+ MALLOC(fseh, fsevent_handle *, sizeof(fsevent_handle),
+ M_TEMP, M_WAITOK);
+ if (fseh == NULL) {
+ return ENOMEM;
+ }
+ memset(fseh, 0, sizeof(fsevent_handle));
+
+ klist_init(&fseh->knotes);
+
+ MALLOC(event_list, int8_t *,
+ fse_clone_args->num_events * sizeof(int8_t),
+ M_TEMP, M_WAITOK);
+ if (event_list == NULL) {
+ FREE(fseh, M_TEMP);
+ return ENOMEM;
+ }
+
+ error = copyin(fse_clone_args->event_list,
+ (void *)event_list,
+ fse_clone_args->num_events * sizeof(int8_t));
+ if (error) {
+ FREE(event_list, M_TEMP);
+ FREE(fseh, M_TEMP);
+ return error;
+ }
+
+ /*
+ * Lock down the user's "fd" result buffer so it's safe
+ * to hold locks while we copy it out.
+ */
+ error = vslock((user_addr_t)fse_clone_args->fd,
+ sizeof(int32_t));
+ if (error) {
+ FREE(event_list, M_TEMP);
+ FREE(fseh, M_TEMP);
+ return error;
+ }
+
+ error = add_watcher(event_list,
+ fse_clone_args->num_events,
+ fse_clone_args->event_queue_depth,
+ &fseh->watcher,
+ fseh);
+ if (error) {
+ vsunlock((user_addr_t)fse_clone_args->fd,
+ sizeof(int32_t), 0);
+ FREE(event_list, M_TEMP);
+ FREE(fseh, M_TEMP);
+ return error;
+ }
+
+ fseh->watcher->fseh = fseh;
+
+ error = falloc(p, &f, &fd, vfs_context_current());
+ if (error) {
+ remove_watcher(fseh->watcher);
+ vsunlock((user_addr_t)fse_clone_args->fd,
+ sizeof(int32_t), 0);
+ FREE(event_list, M_TEMP);
+ FREE(fseh, M_TEMP);
+ return error;
+ }
+ proc_fdlock(p);
+ f->f_fglob->fg_flag = FREAD | FWRITE;
+ f->f_fglob->fg_ops = &fsevents_fops;
+ f->f_fglob->fg_data = (caddr_t) fseh;
+ /*
+ * We can safely hold the proc_fdlock across this copyout()
+ * because of the vslock() call above. The vslock() call
+ * also ensures that we will never get an error, so assert
+ * this.
+ */
+ error = copyout((void *)&fd, fse_clone_args->fd, sizeof(int32_t));
+ assert(error == 0);
+
+ procfdtbl_releasefd(p, fd, NULL);
+ fp_drop(p, fd, f, 1);
+ proc_fdunlock(p);
+
+ vsunlock((user_addr_t)fse_clone_args->fd,
+ sizeof(int32_t), 1);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static void
+fsevents_wakeup(fs_event_watcher *watcher)
+{
+ selwakeup(&watcher->fseh->si);
+ KNOTE(&watcher->fseh->knotes, NOTE_WRITE | NOTE_NONE);
+ wakeup((caddr_t)watcher);