bsd/vfs/vfs_fsevents.c

   1 /*
   2  * Copyright (c) 2004-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <stdarg.h>
  29 #include <sys/param.h>
  30 #include <sys/systm.h>
  31 #include <sys/event.h>         // for kqueue related stuff
  32 #include <sys/fsevents.h>
  33
  34 #if CONFIG_FSE
  35 #include <sys/namei.h>
  36 #include <sys/filedesc.h>
  37 #include <sys/kernel.h>
  38 #include <sys/file_internal.h>
  39 #include <sys/stat.h>
  40 #include <sys/vnode_internal.h>
  41 #include <sys/mount_internal.h>
  42 #include <sys/proc_internal.h>
  43 #include <sys/kauth.h>
  44 #include <sys/uio.h>
  45 #include <sys/malloc.h>
  46 #include <sys/dirent.h>
  47 #include <sys/attr.h>
  48 #include <sys/sysctl.h>
  49 #include <sys/ubc.h>
  50 #include <machine/cons.h>
  51 #include <miscfs/specfs/specdev.h>
  52 #include <miscfs/devfs/devfs.h>
  53 #include <sys/filio.h>
  54 #include <kern/locks.h>
  55 #include <libkern/OSAtomic.h>
  56 #include <kern/zalloc.h>
  57 #include <mach/mach_time.h>
  58 #include <kern/thread_call.h>
  59 #include <kern/clock.h>
  60
  61 #include <security/audit/audit.h>
  62 #include <bsm/audit_kevents.h>
  63
  64 #include <pexpert/pexpert.h>
  65 #include <libkern/section_keywords.h>
  66
  67 typedef struct kfs_event {
  68         LIST_ENTRY(kfs_event) kevent_list;
  69         int16_t        type;       // type code of this event
  70         u_int16_t      flags,      // per-event flags
  71             len;                   // the length of the path in "str"
  72         int32_t        refcount;   // number of clients referencing this
  73         pid_t          pid;        // pid of the process that did the op
  74
  75         uint64_t       abstime;    // when this event happened (mach_absolute_time())
  76         ino64_t        ino;
  77         dev_t          dev;
  78         int32_t        mode;
  79         uid_t          uid;
  80         gid_t          gid;
  81
  82         const char    *str;
  83
  84         struct kfs_event *dest; // if this is a two-file op
  85 } kfs_event;
  86
  87 // flags for the flags field
  88 #define KFSE_COMBINED_EVENTS          0x0001
  89 #define KFSE_CONTAINS_DROPPED_EVENTS  0x0002
  90 #define KFSE_RECYCLED_EVENT           0x0004
  91 #define KFSE_BEING_CREATED            0x0008
  92
  93 LIST_HEAD(kfse_list, kfs_event) kfse_list_head = LIST_HEAD_INITIALIZER(x);
  94 int num_events_outstanding = 0;
  95 int num_pending_rename = 0;
  96
  97
  98 struct fsevent_handle;
  99
 100 typedef struct fs_event_watcher {
 101         int8_t      *event_list;         // the events we're interested in
 102         int32_t      num_events;
 103         dev_t       *devices_not_to_watch;// report events from devices not in this list
 104         uint32_t     num_devices;
 105         int32_t      flags;
 106         kfs_event  **event_queue;
 107         int32_t      eventq_size;        // number of event pointers in queue
 108         int32_t      num_readers;
 109         int32_t      rd;                 // read index into the event_queue
 110         int32_t      wr;                 // write index into the event_queue
 111         int32_t      blockers;
 112         int32_t      my_id;
 113         uint32_t     num_dropped;
 114         uint64_t     max_event_id;
 115         struct fsevent_handle *fseh;
 116         pid_t        pid;
 117         char         proc_name[(2 * MAXCOMLEN) + 1];
 118 } fs_event_watcher;
 119
 120 // fs_event_watcher flags
 121 #define WATCHER_DROPPED_EVENTS         0x0001
 122 #define WATCHER_CLOSING                0x0002
 123 #define WATCHER_WANTS_COMPACT_EVENTS   0x0004
 124 #define WATCHER_WANTS_EXTENDED_INFO    0x0008
 125 #define WATCHER_APPLE_SYSTEM_SERVICE   0x0010   // fseventsd, coreservicesd, mds, revisiond
 126
 127 #define MAX_WATCHERS  8
 128 static fs_event_watcher *watcher_table[MAX_WATCHERS];
 129
 130 #define DEFAULT_MAX_KFS_EVENTS   4096
 131 static int max_kfs_events = DEFAULT_MAX_KFS_EVENTS;
 132
 133 // we allocate kfs_event structures out of this zone
 134 static zone_t     event_zone;
 135 static int        fs_event_init = 0;
 136
 137 //
 138 // this array records whether anyone is interested in a
 139 // particular type of event.  if no one is, we bail out
 140 // early from the event delivery
 141 //
 142 static int16_t     fs_event_type_watchers[FSE_MAX_EVENTS];
 143
 144 // the device currently being unmounted:
 145 static dev_t fsevent_unmount_dev = 0;
 146 // how many ACKs are still outstanding:
 147 static int fsevent_unmount_ack_count = 0;
 148
 149 static int  watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse);
 150 static void fsevents_wakeup(fs_event_watcher *watcher);
 151
 152 //
 153 // Locks
 154 //
 155 static lck_grp_attr_t *  fsevent_group_attr;
 156 static lck_attr_t *      fsevent_lock_attr;
 157 static lck_grp_t *       fsevent_mutex_group;
 158
 159 static lck_grp_t *       fsevent_rw_group;
 160
 161 static lck_rw_t  event_handling_lock; // handles locking for event manipulation and recycling
 162 static lck_mtx_t watch_table_lock;
 163 static lck_mtx_t event_buf_lock;
 164 static lck_mtx_t event_writer_lock;
 165
 166
 167 /* Explicitly declare qsort so compiler doesn't complain */
 168 __private_extern__ void qsort(
 169         void * array,
 170         size_t nmembers,
 171         size_t member_size,
 172         int (*)(const void *, const void *));
 173
 174 static int
 175 is_ignored_directory(const char *path)
 176 {
 177         if (!path) {
 178                 return 0;
 179         }
 180
 181 #define IS_TLD(x) strnstr(__DECONST(char *, path), x, MAXPATHLEN)
 182         if (IS_TLD("/.Spotlight-V100/") ||
 183             IS_TLD("/.MobileBackups/") ||
 184             IS_TLD("/Backups.backupdb/")) {
 185                 return 1;
 186         }
 187 #undef IS_TLD
 188
 189         return 0;
 190 }
 191
 192 static void
 193 fsevents_internal_init(void)
 194 {
 195         int i;
 196
 197         if (fs_event_init++ != 0) {
 198                 return;
 199         }
 200
 201         for (i = 0; i < FSE_MAX_EVENTS; i++) {
 202                 fs_event_type_watchers[i] = 0;
 203         }
 204
 205         memset(watcher_table, 0, sizeof(watcher_table));
 206
 207         fsevent_lock_attr    = lck_attr_alloc_init();
 208         fsevent_group_attr   = lck_grp_attr_alloc_init();
 209         fsevent_mutex_group  = lck_grp_alloc_init("fsevent-mutex", fsevent_group_attr);
 210         fsevent_rw_group     = lck_grp_alloc_init("fsevent-rw", fsevent_group_attr);
 211
 212         lck_mtx_init(&watch_table_lock, fsevent_mutex_group, fsevent_lock_attr);
 213         lck_mtx_init(&event_buf_lock, fsevent_mutex_group, fsevent_lock_attr);
 214         lck_mtx_init(&event_writer_lock, fsevent_mutex_group, fsevent_lock_attr);
 215
 216         lck_rw_init(&event_handling_lock, fsevent_rw_group, fsevent_lock_attr);
 217
 218         PE_get_default("kern.maxkfsevents", &max_kfs_events, sizeof(max_kfs_events));
 219
 220         event_zone = zinit(sizeof(kfs_event),
 221             max_kfs_events * sizeof(kfs_event),
 222             max_kfs_events * sizeof(kfs_event),
 223             "fs-event-buf");
 224         if (event_zone == NULL) {
 225                 printf("fsevents: failed to initialize the event zone.\n");
 226         }
 227
 228         // mark the zone as exhaustible so that it will not
 229         // ever grow beyond what we initially filled it with
 230         zone_change(event_zone, Z_EXHAUST, TRUE);
 231         zone_change(event_zone, Z_COLLECT, FALSE);
 232         zone_change(event_zone, Z_CALLERACCT, FALSE);
 233
 234         if (zfill(event_zone, max_kfs_events) < max_kfs_events) {
 235                 printf("fsevents: failed to pre-fill the event zone.\n");
 236         }
 237 }
 238
 239 static void
 240 lock_watch_table(void)
 241 {
 242         lck_mtx_lock(&watch_table_lock);
 243 }
 244
 245 static void
 246 unlock_watch_table(void)
 247 {
 248         lck_mtx_unlock(&watch_table_lock);
 249 }
 250
 251 static void
 252 lock_fs_event_list(void)
 253 {
 254         lck_mtx_lock(&event_buf_lock);
 255 }
 256
 257 static void
 258 unlock_fs_event_list(void)
 259 {
 260         lck_mtx_unlock(&event_buf_lock);
 261 }
 262
 263 // forward prototype
 264 static void release_event_ref(kfs_event *kfse);
 265
 266 static boolean_t
 267 watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
 268 {
 269         unsigned int i;
 270
 271         // if devices_not_to_watch is NULL then we care about all
 272         // events from all devices
 273         if (watcher->devices_not_to_watch == NULL) {
 274                 return true;
 275         }
 276
 277         for (i = 0; i < watcher->num_devices; i++) {
 278                 if (dev == watcher->devices_not_to_watch[i]) {
 279                         // found a match! that means we do not
 280                         // want events from this device.
 281                         return false;
 282                 }
 283         }
 284
 285         // if we're here it's not in the devices_not_to_watch[]
 286         // list so that means we do care about it
 287         return true;
 288 }
 289
 290
 291 int
 292 need_fsevent(int type, vnode_t vp)
 293 {
 294         if (type >= 0 && type < FSE_MAX_EVENTS && fs_event_type_watchers[type] == 0) {
 295                 return 0;
 296         }
 297
 298         // events in /dev aren't really interesting...
 299         if (vp->v_tag == VT_DEVFS) {
 300                 return 0;
 301         }
 302
 303         return 1;
 304 }
 305
 306
 307 #define is_throw_away(x)  ((x) == FSE_STAT_CHANGED || (x) == FSE_CONTENT_MODIFIED)
 308
 309
 310 // Ways that an event can be reused:
 311 //
 312 // "combined" events mean that there were two events for
 313 // the same vnode or path and we're combining both events
 314 // into a single event.  The primary event gets a bit that
 315 // marks it as having been combined.  The secondary event
 316 // is essentially dropped and the kfse structure reused.
 317 //
 318 // "collapsed" means that multiple events below a given
 319 // directory are collapsed into a single event.  in this
 320 // case, the directory that we collapse into and all of
 321 // its children must be re-scanned.
 322 //
 323 // "recycled" means that we're completely blowing away
 324 // the event since there are other events that have info
 325 // about the same vnode or path (and one of those other
 326 // events will be marked as combined or collapsed as
 327 // appropriate).
 328 //
 329 #define KFSE_COMBINED   0x0001
 330 #define KFSE_COLLAPSED  0x0002
 331 #define KFSE_RECYCLED   0x0004
 332
 333 int num_dropped         = 0;
 334 int num_parent_switch   = 0;
 335 int num_recycled_rename = 0;
 336
 337 static struct timeval last_print;
 338
 339 //
 340 // These variables are used to track coalescing multiple identical
 341 // events for the same vnode/pathname.  If we get the same event
 342 // type and same vnode/pathname as the previous event, we just drop
 343 // the event since it's superfluous.  This improves some micro-
 344 // benchmarks considerably and actually has a real-world impact on
 345 // tests like a Finder copy where multiple stat-changed events can
 346 // get coalesced.
 347 //
 348 static int     last_event_type = -1;
 349 static void   *last_ptr = NULL;
 350 static char    last_str[MAXPATHLEN];
 351 static int     last_nlen = 0;
 352 static int     last_vid = -1;
 353 static uint64_t last_coalesced_time = 0;
 354 static void   *last_event_ptr = NULL;
 355 int            last_coalesced = 0;
 356 static mach_timebase_info_data_t    sTimebaseInfo = { 0, 0 };
 357
 358
 359 int
 360 add_fsevent(int type, vfs_context_t ctx, ...)
 361 {
 362         struct proc      *p = vfs_context_proc(ctx);
 363         int               i, arg_type, ret;
 364         kfs_event        *kfse, *kfse_dest = NULL, *cur;
 365         fs_event_watcher *watcher;
 366         va_list           ap;
 367         int               error = 0, did_alloc = 0;
 368         dev_t             dev = 0;
 369         uint64_t          now, elapsed;
 370         char             *pathbuff = NULL;
 371         int               pathbuff_len;
 372
 373
 374
 375         va_start(ap, ctx);
 376
 377         // ignore bogus event types..
 378         if (type < 0 || type >= FSE_MAX_EVENTS) {
 379                 return EINVAL;
 380         }
 381
 382         // if no one cares about this type of event, bail out
 383         if (fs_event_type_watchers[type] == 0) {
 384                 va_end(ap);
 385
 386                 return 0;
 387         }
 388
 389         now = mach_absolute_time();
 390
 391         // find a free event and snag it for our use
 392         // NOTE: do not do anything that would block until
 393         //       the lock is dropped.
 394         lock_fs_event_list();
 395
 396         //
 397         // check if this event is identical to the previous one...
 398         // (as long as it's not an event type that can never be the
 399         // same as a previous event)
 400         //
 401         if (type != FSE_CREATE_FILE && type != FSE_DELETE && type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CHOWN && type != FSE_DOCID_CHANGED && type != FSE_DOCID_CREATED && type != FSE_CLONE) {
 402                 void *ptr = NULL;
 403                 int   vid = 0, was_str = 0, nlen = 0;
 404
 405                 for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
 406                         switch (arg_type) {
 407                         case FSE_ARG_VNODE: {
 408                                 ptr = va_arg(ap, void *);
 409                                 vid = vnode_vid((struct vnode *)ptr);
 410                                 last_str[0] = '\0';
 411                                 break;
 412                         }
 413                         case FSE_ARG_STRING: {
 414                                 nlen = va_arg(ap, int32_t);
 415                                 ptr = va_arg(ap, void *);
 416                                 was_str = 1;
 417                                 break;
 418                         }
 419                         }
 420                         if (ptr != NULL) {
 421                                 break;
 422                         }
 423                 }
 424
 425                 if (sTimebaseInfo.denom == 0) {
 426                         (void) clock_timebase_info(&sTimebaseInfo);
 427                 }
 428
 429                 elapsed = (now - last_coalesced_time);
 430                 if (sTimebaseInfo.denom != sTimebaseInfo.numer) {
 431                         if (sTimebaseInfo.denom == 1) {
 432                                 elapsed *= sTimebaseInfo.numer;
 433                         } else {
 434                                 // this could overflow... the worst that will happen is that we'll
 435                                 // send (or not send) an extra event so I'm not going to worry about
 436                                 // doing the math right like dtrace_abs_to_nano() does.
 437                                 elapsed = (elapsed * sTimebaseInfo.numer) / (uint64_t)sTimebaseInfo.denom;
 438                         }
 439                 }
 440
 441                 if (type == last_event_type
 442                     && (elapsed < 1000000000)
 443                     &&
 444                     ((vid && vid == last_vid && last_ptr == ptr)
 445                     ||
 446                     (last_str[0] && last_nlen == nlen && ptr && strcmp(last_str, ptr) == 0))
 447                     ) {
 448                         last_coalesced++;
 449                         unlock_fs_event_list();
 450                         va_end(ap);
 451
 452                         return 0;
 453                 } else {
 454                         last_ptr = ptr;
 455                         if (was_str) {
 456                                 strlcpy(last_str, ptr, sizeof(last_str));
 457                         }
 458                         last_nlen = nlen;
 459                         last_vid = vid;
 460                         last_event_type = type;
 461                         last_coalesced_time = now;
 462                 }
 463         }
 464         va_start(ap, ctx);
 465
 466
 467         kfse = zalloc_noblock(event_zone);
 468         if (kfse && (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE)) {
 469                 kfse_dest = zalloc_noblock(event_zone);
 470                 if (kfse_dest == NULL) {
 471                         did_alloc = 1;
 472                         zfree(event_zone, kfse);
 473                         kfse = NULL;
 474                 }
 475         }
 476
 477
 478         if (kfse == NULL) {    // yikes! no free events
 479                 unlock_fs_event_list();
 480                 lock_watch_table();
 481
 482                 for (i = 0; i < MAX_WATCHERS; i++) {
 483                         watcher = watcher_table[i];
 484                         if (watcher == NULL) {
 485                                 continue;
 486                         }
 487
 488                         watcher->flags |= WATCHER_DROPPED_EVENTS;
 489                         fsevents_wakeup(watcher);
 490                 }
 491                 unlock_watch_table();
 492
 493                 {
 494                         struct timeval current_tv;
 495
 496                         num_dropped++;
 497
 498                         // only print a message at most once every 5 seconds
 499                         microuptime(&current_tv);
 500                         if ((current_tv.tv_sec - last_print.tv_sec) > 10) {
 501                                 int ii;
 502                                 void *junkptr = zalloc_noblock(event_zone), *listhead = kfse_list_head.lh_first;
 503
 504                                 printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding);
 505                                 printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename);
 506                                 printf("add_fsevent: zalloc sez: %p\n", junkptr);
 507                                 printf("add_fsevent: event_zone info: %d 0x%x\n", ((int *)event_zone)[0], ((int *)event_zone)[1]);
 508                                 lock_watch_table();
 509                                 for (ii = 0; ii < MAX_WATCHERS; ii++) {
 510                                         if (watcher_table[ii] == NULL) {
 511                                                 continue;
 512                                         }
 513
 514                                         printf("add_fsevent: watcher %s %p: rd %4d wr %4d q_size %4d flags 0x%x\n",
 515                                             watcher_table[ii]->proc_name,
 516                                             watcher_table[ii],
 517                                             watcher_table[ii]->rd, watcher_table[ii]->wr,
 518                                             watcher_table[ii]->eventq_size, watcher_table[ii]->flags);
 519                                 }
 520                                 unlock_watch_table();
 521
 522                                 last_print = current_tv;
 523                                 if (junkptr) {
 524                                         zfree(event_zone, junkptr);
 525                                 }
 526                         }
 527                 }
 528
 529                 if (pathbuff) {
 530                         release_pathbuff(pathbuff);
 531                         pathbuff = NULL;
 532                 }
 533                 return ENOSPC;
 534         }
 535
 536         memset(kfse, 0, sizeof(kfs_event));
 537         kfse->refcount = 1;
 538         OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse->flags);
 539
 540         last_event_ptr = kfse;
 541         kfse->type     = type;
 542         kfse->abstime  = now;
 543         kfse->pid      = p->p_pid;
 544         if (type == FSE_RENAME || type == FSE_EXCHANGE || type == FSE_CLONE) {
 545                 memset(kfse_dest, 0, sizeof(kfs_event));
 546                 kfse_dest->refcount = 1;
 547                 OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse_dest->flags);
 548                 kfse_dest->type     = type;
 549                 kfse_dest->pid      = p->p_pid;
 550                 kfse_dest->abstime  = now;
 551
 552                 kfse->dest = kfse_dest;
 553         }
 554
 555         num_events_outstanding++;
 556         if (kfse->type == FSE_RENAME) {
 557                 num_pending_rename++;
 558         }
 559         LIST_INSERT_HEAD(&kfse_list_head, kfse, kevent_list);
 560
 561         if (kfse->refcount < 1) {
 562                 panic("add_fsevent: line %d: kfse recount %d but should be at least 1\n", __LINE__, kfse->refcount);
 563         }
 564
 565         unlock_fs_event_list(); // at this point it's safe to unlock
 566
 567         //
 568         // now process the arguments passed in and copy them into
 569         // the kfse
 570         //
 571
 572         cur = kfse;
 573
 574         if (type == FSE_DOCID_CREATED || type == FSE_DOCID_CHANGED) {
 575                 uint64_t val;
 576
 577                 //
 578                 // These events are special and not like the other events.  They only
 579                 // have a dev_t, src inode #, dest inode #, and a doc-id.  We use the
 580                 // fields that we can in the kfse but have to overlay the dest inode
 581                 // number and the doc-id on the other fields.
 582                 //
 583
 584                 // First the dev_t
 585                 arg_type = va_arg(ap, int32_t);
 586                 if (arg_type == FSE_ARG_DEV) {
 587                         cur->dev = (dev_t)(va_arg(ap, dev_t));
 588                 } else {
 589                         cur->dev = (dev_t)0xbadc0de1;
 590                 }
 591
 592                 // next the source inode #
 593                 arg_type = va_arg(ap, int32_t);
 594                 if (arg_type == FSE_ARG_INO) {
 595                         cur->ino = (ino64_t)(va_arg(ap, ino64_t));
 596                 } else {
 597                         cur->ino = 0xbadc0de2;
 598                 }
 599
 600                 // now the dest inode #
 601                 arg_type = va_arg(ap, int32_t);
 602                 if (arg_type == FSE_ARG_INO) {
 603                         val = (ino64_t)(va_arg(ap, ino64_t));
 604                 } else {
 605                         val = 0xbadc0de2;
 606                 }
 607                 // overlay the dest inode number on the str/dest pointer fields
 608                 __nochk_memcpy(&cur->str, &val, sizeof(ino64_t));
 609
 610
 611                 // and last the document-id
 612                 arg_type = va_arg(ap, int32_t);
 613                 if (arg_type == FSE_ARG_INT32) {
 614                         val = (uint64_t)va_arg(ap, uint32_t);
 615                 } else if (arg_type == FSE_ARG_INT64) {
 616                         val = (uint64_t)va_arg(ap, uint64_t);
 617                 } else {
 618                         val = 0xbadc0de3;
 619                 }
 620
 621                 // the docid is 64-bit and overlays the uid/gid fields
 622                 static_assert(sizeof(cur->uid) + sizeof(cur->gid) == sizeof(val), "gid/uid size mismatch");
 623                 static_assert(offsetof(struct kfs_event, gid) - offsetof(struct kfs_event, uid) == sizeof(cur->uid), "unexpected struct kfs_event layout");
 624                 memcpy(&cur->uid, &val, sizeof(cur->uid));
 625                 memcpy(&cur->gid, (u_int8_t *)&val + sizeof(cur->uid), sizeof(cur->gid));
 626
 627                 goto done_with_args;
 628         }
 629
 630         if (type == FSE_UNMOUNT_PENDING) {
 631                 // Just a dev_t
 632                 arg_type = va_arg(ap, int32_t);
 633                 if (arg_type == FSE_ARG_DEV) {
 634                         cur->dev = (dev_t)(va_arg(ap, dev_t));
 635                 } else {
 636                         cur->dev = (dev_t)0xbadc0de1;
 637                 }
 638
 639                 goto done_with_args;
 640         }
 641
 642         for (arg_type = va_arg(ap, int32_t); arg_type != FSE_ARG_DONE; arg_type = va_arg(ap, int32_t)) {
 643                 switch (arg_type) {
 644                 case FSE_ARG_VNODE: {
 645                         // this expands out into multiple arguments to the client
 646                         struct vnode *vp;
 647                         struct vnode_attr va;
 648
 649                         if (kfse->str != NULL) {
 650                                 cur = kfse_dest;
 651                         }
 652
 653                         vp = va_arg(ap, struct vnode *);
 654                         if (vp == NULL) {
 655                                 panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!\n",
 656                                     cur->type);
 657                         }
 658
 659                         VATTR_INIT(&va);
 660                         VATTR_WANTED(&va, va_fsid);
 661                         VATTR_WANTED(&va, va_fileid);
 662                         VATTR_WANTED(&va, va_mode);
 663                         VATTR_WANTED(&va, va_uid);
 664                         VATTR_WANTED(&va, va_gid);
 665                         VATTR_WANTED(&va, va_nlink);
 666                         if ((ret = vnode_getattr(vp, &va, vfs_context_kernel())) != 0) {
 667                                 // printf("add_fsevent: failed to getattr on vp %p (%d)\n", cur->fref.vp, ret);
 668                                 cur->str = NULL;
 669                                 error = EINVAL;
 670                                 goto clean_up;
 671                         }
 672
 673                         cur->dev  = dev = (dev_t)va.va_fsid;
 674                         cur->ino  = (ino64_t)va.va_fileid;
 675                         cur->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode;
 676                         cur->uid  = va.va_uid;
 677                         cur->gid  = va.va_gid;
 678                         if (vp->v_flag & VISHARDLINK) {
 679                                 cur->mode |= FSE_MODE_HLINK;
 680                                 if ((vp->v_type == VDIR && va.va_dirlinkcount == 0) || (vp->v_type == VREG && va.va_nlink == 0)) {
 681                                         cur->mode |= FSE_MODE_LAST_HLINK;
 682                                 }
 683                         }
 684
 685                         // if we haven't gotten the path yet, get it.
 686                         if (pathbuff == NULL) {
 687                                 pathbuff = get_pathbuff();
 688                                 pathbuff_len = MAXPATHLEN;
 689
 690                                 pathbuff[0] = '\0';
 691                                 if ((ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len)) != 0 || pathbuff[0] == '\0') {
 692                                         cur->flags |= KFSE_CONTAINS_DROPPED_EVENTS;
 693
 694                                         do {
 695                                                 if (vp->v_parent != NULL) {
 696                                                         vp = vp->v_parent;
 697                                                 } else if (vp->v_mount) {
 698                                                         strlcpy(pathbuff, vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN);
 699                                                         break;
 700                                                 } else {
 701                                                         vp = NULL;
 702                                                 }
 703
 704                                                 if (vp == NULL) {
 705                                                         break;
 706                                                 }
 707
 708                                                 pathbuff_len = MAXPATHLEN;
 709                                                 ret = vn_getpath_no_firmlink(vp, pathbuff, &pathbuff_len);
 710                                         } while (ret == ENOSPC);
 711
 712                                         if (ret != 0 || vp == NULL) {
 713                                                 error = ENOENT;
 714                                                 goto clean_up;
 715                                         }
 716                                 }
 717                         }
 718
 719                         // store the path by adding it to the global string table
 720                         cur->len = pathbuff_len;
 721                         cur->str = vfs_addname(pathbuff, pathbuff_len, 0, 0);
 722                         if (cur->str == NULL || cur->str[0] == '\0') {
 723                                 panic("add_fsevent: was not able to add path %s to event %p.\n", pathbuff, cur);
 724                         }
 725
 726                         release_pathbuff(pathbuff);
 727                         pathbuff = NULL;
 728
 729                         break;
 730                 }
 731
 732                 case FSE_ARG_FINFO: {
 733                         fse_info *fse;
 734
 735                         fse = va_arg(ap, fse_info *);
 736
 737                         cur->dev  = dev = (dev_t)fse->dev;
 738                         cur->ino  = (ino64_t)fse->ino;
 739                         cur->mode = (int32_t)fse->mode;
 740                         cur->uid  = (uid_t)fse->uid;
 741                         cur->gid  = (uid_t)fse->gid;
 742                         // if it's a hard-link and this is the last link, flag it
 743                         if ((fse->mode & FSE_MODE_HLINK) && fse->nlink == 0) {
 744                                 cur->mode |= FSE_MODE_LAST_HLINK;
 745                         }
 746                         if (cur->mode & FSE_TRUNCATED_PATH) {
 747                                 cur->flags |= KFSE_CONTAINS_DROPPED_EVENTS;
 748                                 cur->mode &= ~FSE_TRUNCATED_PATH;
 749                         }
 750                         break;
 751                 }
 752
 753                 case FSE_ARG_STRING:
 754                         if (kfse->str != NULL) {
 755                                 cur = kfse_dest;
 756                         }
 757
 758                         cur->len = (int16_t)(va_arg(ap, int32_t) & 0x7fff);
 759                         if (cur->len >= 1) {
 760                                 cur->str = vfs_addname(va_arg(ap, char *), cur->len, 0, 0);
 761                         } else {
 762                                 printf("add_fsevent: funny looking string length: %d\n", (int)cur->len);
 763                                 cur->len = 2;
 764                                 cur->str = vfs_addname("/", cur->len, 0, 0);
 765                         }
 766                         if (cur->str[0] == 0) {
 767                                 printf("add_fsevent: bogus looking string (len %d)\n", cur->len);
 768                         }
 769                         break;
 770
 771                 case FSE_ARG_INT32: {
 772                         uint32_t ival = (uint32_t)va_arg(ap, int32_t);
 773                         kfse->uid = (ino64_t)ival;
 774                         break;
 775                 }
 776
 777                 default:
 778                         printf("add_fsevent: unknown type %d\n", arg_type);
 779                         // just skip one 32-bit word and hope we sync up...
 780                         (void)va_arg(ap, int32_t);
 781                 }
 782         }
 783
 784 done_with_args:
 785         va_end(ap);
 786
 787         OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse->flags);
 788         if (kfse_dest) {
 789                 OSBitAndAtomic16(~KFSE_BEING_CREATED, &kfse_dest->flags);
 790         }
 791
 792         //
 793         // now we have to go and let everyone know that
 794         // is interested in this type of event
 795         //
 796         lock_watch_table();
 797
 798         for (i = 0; i < MAX_WATCHERS; i++) {
 799                 watcher = watcher_table[i];
 800                 if (watcher == NULL) {
 801                         continue;
 802                 }
 803
 804                 if (type < watcher->num_events
 805                     && watcher->event_list[type] == FSE_REPORT
 806                     && watcher_cares_about_dev(watcher, dev)) {
 807                         if (watcher_add_event(watcher, kfse) != 0) {
 808                                 watcher->num_dropped++;
 809                                 continue;
 810                         }
 811                 }
 812
 813                 // if (kfse->refcount < 1) {
 814                 //    panic("add_fsevent: line %d: kfse recount %d but should be at least 1\n", __LINE__, kfse->refcount);
 815                 // }
 816         }
 817
 818         unlock_watch_table();
 819
 820 clean_up:
 821
 822         if (pathbuff) {
 823                 release_pathbuff(pathbuff);
 824                 pathbuff = NULL;
 825         }
 826
 827         release_event_ref(kfse);
 828
 829         return error;
 830 }
 831
 832
 833 static void
 834 release_event_ref(kfs_event *kfse)
 835 {
 836         int old_refcount;
 837         kfs_event copy, dest_copy;
 838
 839
 840         old_refcount = OSAddAtomic(-1, &kfse->refcount);
 841         if (old_refcount > 1) {
 842                 return;
 843         }
 844
 845         lock_fs_event_list();
 846         if (last_event_ptr == kfse) {
 847                 last_event_ptr = NULL;
 848                 last_event_type = -1;
 849                 last_coalesced_time = 0;
 850         }
 851
 852         if (kfse->refcount < 0) {
 853                 panic("release_event_ref: bogus kfse refcount %d\n", kfse->refcount);
 854         }
 855
 856         if (kfse->refcount > 0 || kfse->type == FSE_INVALID) {
 857                 // This is very subtle.  Either of these conditions can
 858                 // be true if an event got recycled while we were waiting
 859                 // on the fs_event_list lock or the event got recycled,
 860                 // delivered, _and_ free'd by someone else while we were
 861                 // waiting on the fs event list lock.  In either case
 862                 // we need to just unlock the list and return without
 863                 // doing anything because if the refcount is > 0 then
 864                 // someone else will take care of free'ing it and when
 865                 // the kfse->type is invalid then someone else already
 866                 // has handled free'ing the event (while we were blocked
 867                 // on the event list lock).
 868                 //
 869                 unlock_fs_event_list();
 870                 return;
 871         }
 872
 873         //
 874         // make a copy of this so we can free things without
 875         // holding the fs_event_buf lock
 876         //
 877         copy = *kfse;
 878         if (kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && kfse->dest && OSAddAtomic(-1, &kfse->dest->refcount) == 1) {
 879                 dest_copy = *kfse->dest;
 880         } else {
 881                 dest_copy.str  = NULL;
 882                 dest_copy.len  = 0;
 883                 dest_copy.type = FSE_INVALID;
 884         }
 885
 886         kfse->pid = kfse->type;         // save this off for debugging...
 887         kfse->uid = (uid_t)(long)kfse->str;   // save this off for debugging...
 888         kfse->gid = (gid_t)(long)current_thread();
 889
 890         kfse->str = (char *)0xdeadbeef;         // XXXdbg - catch any cheaters...
 891
 892         if (dest_copy.type != FSE_INVALID) {
 893                 kfse->dest->str = (char *)0xbadc0de; // XXXdbg - catch any cheaters...
 894                 kfse->dest->type = FSE_INVALID;
 895
 896                 if (kfse->dest->kevent_list.le_prev != NULL) {
 897                         num_events_outstanding--;
 898                         LIST_REMOVE(kfse->dest, kevent_list);
 899                         memset(&kfse->dest->kevent_list, 0xa5, sizeof(kfse->dest->kevent_list));
 900                 }
 901
 902                 zfree(event_zone, kfse->dest);
 903         }
 904
 905         // mark this fsevent as invalid
 906         {
 907                 int otype;
 908
 909                 otype = kfse->type;
 910                 kfse->type = FSE_INVALID;
 911
 912                 if (kfse->kevent_list.le_prev != NULL) {
 913                         num_events_outstanding--;
 914                         if (otype == FSE_RENAME) {
 915                                 num_pending_rename--;
 916                         }
 917                         LIST_REMOVE(kfse, kevent_list);
 918                         memset(&kfse->kevent_list, 0, sizeof(kfse->kevent_list));
 919                 }
 920         }
 921
 922         zfree(event_zone, kfse);
 923
 924         unlock_fs_event_list();
 925
 926         // if we have a pointer in the union
 927         if (copy.str && copy.type != FSE_DOCID_CREATED && copy.type != FSE_DOCID_CHANGED) {
 928                 if (copy.len == 0) { // and it's not a string
 929                         panic("%s:%d: no more fref.vp!\n", __FILE__, __LINE__);
 930                         // vnode_rele_ext(copy.fref.vp, O_EVTONLY, 0);
 931                 } else {        // else it's a string
 932                         vfs_removename(copy.str);
 933                 }
 934         }
 935
 936         if (dest_copy.type != FSE_INVALID && dest_copy.str) {
 937                 if (dest_copy.len == 0) {
 938                         panic("%s:%d: no more fref.vp!\n", __FILE__, __LINE__);
 939                         // vnode_rele_ext(dest_copy.fref.vp, O_EVTONLY, 0);
 940                 } else {
 941                         vfs_removename(dest_copy.str);
 942                 }
 943         }
 944 }
 945
 946 static int
 947 add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out, void *fseh)
 948 {
 949         int               i;
 950         fs_event_watcher *watcher;
 951
 952         if (eventq_size <= 0 || eventq_size > 100 * max_kfs_events) {
 953                 eventq_size = max_kfs_events;
 954         }
 955
 956         // Note: the event_queue follows the fs_event_watcher struct
 957         //       in memory so we only have to do one allocation
 958         MALLOC(watcher,
 959             fs_event_watcher *,
 960             sizeof(fs_event_watcher) + eventq_size * sizeof(kfs_event *),
 961             M_TEMP, M_WAITOK);
 962         if (watcher == NULL) {
 963                 return ENOMEM;
 964         }
 965
 966         watcher->event_list   = event_list;
 967         watcher->num_events   = num_events;
 968         watcher->devices_not_to_watch = NULL;
 969         watcher->num_devices  = 0;
 970         watcher->flags        = 0;
 971         watcher->event_queue  = (kfs_event **)&watcher[1];
 972         watcher->eventq_size  = eventq_size;
 973         watcher->rd           = 0;
 974         watcher->wr           = 0;
 975         watcher->blockers     = 0;
 976         watcher->num_readers  = 0;
 977         watcher->max_event_id = 0;
 978         watcher->fseh         = fseh;
 979         watcher->pid          = proc_selfpid();
 980         proc_selfname(watcher->proc_name, sizeof(watcher->proc_name));
 981
 982         watcher->num_dropped  = 0;  // XXXdbg - debugging
 983
 984         if (!strncmp(watcher->proc_name, "fseventsd", sizeof(watcher->proc_name)) ||
 985             !strncmp(watcher->proc_name, "coreservicesd", sizeof(watcher->proc_name)) ||
 986             !strncmp(watcher->proc_name, "revisiond", sizeof(watcher->proc_name)) ||
 987             !strncmp(watcher->proc_name, "mds", sizeof(watcher->proc_name))) {
 988                 watcher->flags |= WATCHER_APPLE_SYSTEM_SERVICE;
 989         } else {
 990                 printf("fsevents: watcher %s (pid: %d) - Using /dev/fsevents directly is unsupported.  Migrate to FSEventsFramework\n",
 991                     watcher->proc_name, watcher->pid);
 992         }
 993
 994         lock_watch_table();
 995
 996         // find a slot for the new watcher
 997         for (i = 0; i < MAX_WATCHERS; i++) {
 998                 if (watcher_table[i] == NULL) {
 999                         watcher->my_id   = i;
1000                         watcher_table[i] = watcher;
1001                         break;
1002                 }
1003         }
1004
1005         if (i >= MAX_WATCHERS) {
1006                 printf("fsevents: too many watchers!\n");
1007                 unlock_watch_table();
1008                 FREE(watcher, M_TEMP);
1009                 return ENOSPC;
1010         }
1011
1012         // now update the global list of who's interested in
1013         // events of a particular type...
1014         for (i = 0; i < num_events; i++) {
1015                 if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1016                         fs_event_type_watchers[i]++;
1017                 }
1018         }
1019
1020         unlock_watch_table();
1021
1022         *watcher_out = watcher;
1023
1024         return 0;
1025 }
1026
1027
1028
1029 static void
1030 remove_watcher(fs_event_watcher *target)
1031 {
1032         int i, j, counter = 0;
1033         fs_event_watcher *watcher;
1034         kfs_event *kfse;
1035
1036         lock_watch_table();
1037
1038         for (j = 0; j < MAX_WATCHERS; j++) {
1039                 watcher = watcher_table[j];
1040                 if (watcher != target) {
1041                         continue;
1042                 }
1043
1044                 watcher_table[j] = NULL;
1045
1046                 for (i = 0; i < watcher->num_events; i++) {
1047                         if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) {
1048                                 fs_event_type_watchers[i]--;
1049                         }
1050                 }
1051
1052                 if (watcher->flags & WATCHER_CLOSING) {
1053                         unlock_watch_table();
1054                         return;
1055                 }
1056
1057                 // printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags);
1058                 watcher->flags |= WATCHER_CLOSING;
1059                 OSAddAtomic(1, &watcher->num_readers);
1060
1061                 unlock_watch_table();
1062
1063                 while (watcher->num_readers > 1 && counter++ < 5000) {
1064                         lock_watch_table();
1065                         fsevents_wakeup(watcher); // in case they're asleep
1066                         unlock_watch_table();
1067
1068                         tsleep(watcher, PRIBIO, "fsevents-close", 1);
1069                 }
1070                 if (counter++ >= 5000) {
1071                         // printf("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1072                         panic("fsevents: close: still have readers! (%d)\n", watcher->num_readers);
1073                 }
1074
1075                 // drain the event_queue
1076
1077                 lck_rw_lock_exclusive(&event_handling_lock);
1078                 while (watcher->rd != watcher->wr) {
1079                         kfse = watcher->event_queue[watcher->rd];
1080                         watcher->event_queue[watcher->rd] = NULL;
1081                         watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1082                         OSSynchronizeIO();
1083                         if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1084                                 release_event_ref(kfse);
1085                         }
1086                 }
1087                 lck_rw_unlock_exclusive(&event_handling_lock);
1088
1089                 if (watcher->event_list) {
1090                         FREE(watcher->event_list, M_TEMP);
1091                         watcher->event_list = NULL;
1092                 }
1093                 if (watcher->devices_not_to_watch) {
1094                         FREE(watcher->devices_not_to_watch, M_TEMP);
1095                         watcher->devices_not_to_watch = NULL;
1096                 }
1097                 FREE(watcher, M_TEMP);
1098
1099                 return;
1100         }
1101
1102         unlock_watch_table();
1103 }
1104
1105
1106 #define EVENT_DELAY_IN_MS   10
1107 static thread_call_t event_delivery_timer = NULL;
1108 static int timer_set = 0;
1109
1110
1111 static void
1112 delayed_event_delivery(__unused void *param0, __unused void *param1)
1113 {
1114         int i;
1115
1116         lock_watch_table();
1117
1118         for (i = 0; i < MAX_WATCHERS; i++) {
1119                 if (watcher_table[i] != NULL && watcher_table[i]->rd != watcher_table[i]->wr) {
1120                         fsevents_wakeup(watcher_table[i]);
1121                 }
1122         }
1123
1124         timer_set = 0;
1125
1126         unlock_watch_table();
1127 }
1128
1129
1130 //
1131 // The watch table must be locked before calling this function.
1132 //
1133 static void
1134 schedule_event_wakeup(void)
1135 {
1136         uint64_t deadline;
1137
1138         if (event_delivery_timer == NULL) {
1139                 event_delivery_timer = thread_call_allocate((thread_call_func_t)delayed_event_delivery, NULL);
1140         }
1141
1142         clock_interval_to_deadline(EVENT_DELAY_IN_MS, 1000 * 1000, &deadline);
1143
1144         thread_call_enter_delayed(event_delivery_timer, deadline);
1145         timer_set = 1;
1146 }
1147
1148
1149
1150 #define MAX_NUM_PENDING  16
1151
1152 //
1153 // NOTE: the watch table must be locked before calling
1154 //       this routine.
1155 //
1156 static int
1157 watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse)
1158 {
1159         if (kfse->abstime > watcher->max_event_id) {
1160                 watcher->max_event_id = kfse->abstime;
1161         }
1162
1163         if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) {
1164                 watcher->flags |= WATCHER_DROPPED_EVENTS;
1165                 fsevents_wakeup(watcher);
1166                 return ENOSPC;
1167         }
1168
1169         OSAddAtomic(1, &kfse->refcount);
1170         watcher->event_queue[watcher->wr] = kfse;
1171         OSSynchronizeIO();
1172         watcher->wr = (watcher->wr + 1) % watcher->eventq_size;
1173
1174         //
1175         // wake up the watcher if there are more than MAX_NUM_PENDING events.
1176         // otherwise schedule a timer (if one isn't already set) which will
1177         // send any pending events if no more are received in the next
1178         // EVENT_DELAY_IN_MS milli-seconds.
1179         //
1180         int32_t num_pending = 0;
1181         if (watcher->rd < watcher->wr) {
1182                 num_pending = watcher->wr - watcher->rd;
1183         }
1184
1185         if (watcher->rd > watcher->wr) {
1186                 num_pending = watcher->wr + watcher->eventq_size - watcher->rd;
1187         }
1188
1189         if (num_pending > (watcher->eventq_size * 3 / 4) && !(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE)) {
1190                 /* Non-Apple Service is falling behind, start dropping events for this process */
1191                 lck_rw_lock_exclusive(&event_handling_lock);
1192                 while (watcher->rd != watcher->wr) {
1193                         kfse = watcher->event_queue[watcher->rd];
1194                         watcher->event_queue[watcher->rd] = NULL;
1195                         watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1196                         OSSynchronizeIO();
1197                         if (kfse != NULL && kfse->type != FSE_INVALID && kfse->refcount >= 1) {
1198                                 release_event_ref(kfse);
1199                         }
1200                 }
1201                 watcher->flags |= WATCHER_DROPPED_EVENTS;
1202                 lck_rw_unlock_exclusive(&event_handling_lock);
1203
1204                 printf("fsevents: watcher falling behind: %s (pid: %d) rd: %4d wr: %4d q_size: %4d flags: 0x%x\n",
1205                     watcher->proc_name, watcher->pid, watcher->rd, watcher->wr,
1206                     watcher->eventq_size, watcher->flags);
1207
1208                 fsevents_wakeup(watcher);
1209         } else if (num_pending > MAX_NUM_PENDING) {
1210                 fsevents_wakeup(watcher);
1211         } else if (timer_set == 0) {
1212                 schedule_event_wakeup();
1213         }
1214
1215         return 0;
1216 }
1217
1218 static int
1219 fill_buff(uint16_t type, int32_t size, const void *data,
1220     char *buff, int32_t *_buff_idx, int32_t buff_sz,
1221     struct uio *uio)
1222 {
1223         int32_t amt, error = 0, buff_idx = *_buff_idx;
1224         uint16_t tmp;
1225
1226         //
1227         // the +1 on the size is to guarantee that the main data
1228         // copy loop will always copy at least 1 byte
1229         //
1230         if ((buff_sz - buff_idx) <= (int)(2 * sizeof(uint16_t) + 1)) {
1231                 if (buff_idx > uio_resid(uio)) {
1232                         error = ENOSPC;
1233                         goto get_out;
1234                 }
1235
1236                 error = uiomove(buff, buff_idx, uio);
1237                 if (error) {
1238                         goto get_out;
1239                 }
1240                 buff_idx = 0;
1241         }
1242
1243         // copy out the header (type & size)
1244         memcpy(&buff[buff_idx], &type, sizeof(uint16_t));
1245         buff_idx += sizeof(uint16_t);
1246
1247         tmp = size & 0xffff;
1248         memcpy(&buff[buff_idx], &tmp, sizeof(uint16_t));
1249         buff_idx += sizeof(uint16_t);
1250
1251         // now copy the body of the data, flushing along the way
1252         // if the buffer fills up.
1253         //
1254         while (size > 0) {
1255                 amt = (size < (buff_sz - buff_idx)) ? size : (buff_sz - buff_idx);
1256                 memcpy(&buff[buff_idx], data, amt);
1257
1258                 size -= amt;
1259                 buff_idx += amt;
1260                 data = (const char *)data + amt;
1261                 if (size > (buff_sz - buff_idx)) {
1262                         if (buff_idx > uio_resid(uio)) {
1263                                 error = ENOSPC;
1264                                 goto get_out;
1265                         }
1266                         error = uiomove(buff, buff_idx, uio);
1267                         if (error) {
1268                                 goto get_out;
1269                         }
1270                         buff_idx = 0;
1271                 }
1272
1273                 if (amt == 0) { // just in case...
1274                         break;
1275                 }
1276         }
1277
1278 get_out:
1279         *_buff_idx = buff_idx;
1280
1281         return error;
1282 }
1283
1284
1285 static int copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)  __attribute__((noinline));
1286
1287 static int
1288 copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio)
1289 {
1290         int      error;
1291         uint16_t tmp16;
1292         int32_t  type;
1293         kfs_event *cur;
1294         char     evbuff[512];
1295         int      evbuff_idx = 0;
1296
1297         if (kfse->type == FSE_INVALID) {
1298                 panic("fsevents: copy_out_kfse: asked to copy out an invalid event (kfse %p, refcount %d fref ptr %p)\n", kfse, kfse->refcount, kfse->str);
1299         }
1300
1301         if (kfse->flags & KFSE_BEING_CREATED) {
1302                 return 0;
1303         }
1304
1305         if (((kfse->type == FSE_RENAME) || (kfse->type == FSE_CLONE)) && kfse->dest == NULL) {
1306                 //
1307                 // This can happen if an event gets recycled but we had a
1308                 // pointer to it in our event queue.  The event is the
1309                 // destination of a rename or clone which we'll process separately
1310                 // (that is, another kfse points to this one so it's ok
1311                 // to skip this guy because we'll process it when we process
1312                 // the other one)
1313                 error = 0;
1314                 goto get_out;
1315         }
1316
1317         if (watcher->flags & WATCHER_WANTS_EXTENDED_INFO) {
1318                 type = (kfse->type & 0xfff);
1319
1320                 if (kfse->flags & KFSE_CONTAINS_DROPPED_EVENTS) {
1321                         type |= (FSE_CONTAINS_DROPPED_EVENTS << FSE_FLAG_SHIFT);
1322                 } else if (kfse->flags & KFSE_COMBINED_EVENTS) {
1323                         type |= (FSE_COMBINED_EVENTS << FSE_FLAG_SHIFT);
1324                 }
1325         } else {
1326                 type = (int32_t)kfse->type;
1327         }
1328
1329         // copy out the type of the event
1330         memcpy(evbuff, &type, sizeof(int32_t));
1331         evbuff_idx += sizeof(int32_t);
1332
1333         // copy out the pid of the person that generated the event
1334         memcpy(&evbuff[evbuff_idx], &kfse->pid, sizeof(pid_t));
1335         evbuff_idx += sizeof(pid_t);
1336
1337         cur = kfse;
1338
1339 copy_again:
1340
1341         if (kfse->type == FSE_DOCID_CHANGED || kfse->type == FSE_DOCID_CREATED) {
1342                 dev_t    dev  = cur->dev;
1343                 ino64_t    ino  = cur->ino;
1344                 uint64_t ival;
1345
1346                 error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1347                 if (error != 0) {
1348                         goto get_out;
1349                 }
1350
1351                 error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1352                 if (error != 0) {
1353                         goto get_out;
1354                 }
1355
1356                 memcpy(&ino, &cur->str, sizeof(ino64_t));
1357                 error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1358                 if (error != 0) {
1359                         goto get_out;
1360                 }
1361
1362                 memcpy(&ival, &cur->uid, sizeof(uint64_t)); // the docid gets stuffed into the ino field
1363                 error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &ival, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1364                 if (error != 0) {
1365                         goto get_out;
1366                 }
1367
1368                 goto done;
1369         }
1370
1371         if (kfse->type == FSE_UNMOUNT_PENDING) {
1372                 dev_t    dev  = cur->dev;
1373
1374                 error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1375                 if (error != 0) {
1376                         goto get_out;
1377                 }
1378
1379                 goto done;
1380         }
1381
1382         if (cur->str == NULL || cur->str[0] == '\0') {
1383                 printf("copy_out_kfse:2: empty/short path (%s)\n", cur->str);
1384                 error = fill_buff(FSE_ARG_STRING, 2, "/", evbuff, &evbuff_idx, sizeof(evbuff), uio);
1385         } else {
1386                 error = fill_buff(FSE_ARG_STRING, cur->len, cur->str, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1387         }
1388         if (error != 0) {
1389                 goto get_out;
1390         }
1391
1392         if (cur->dev == 0 && cur->ino == 0) {
1393                 // this happens when a rename event happens and the
1394                 // destination of the rename did not previously exist.
1395                 // it thus has no other file info so skip copying out
1396                 // the stuff below since it isn't initialized
1397                 goto done;
1398         }
1399
1400
1401         if (watcher->flags & WATCHER_WANTS_COMPACT_EVENTS) {
1402                 int32_t finfo_size;
1403
1404                 finfo_size = sizeof(dev_t) + sizeof(ino64_t) + sizeof(int32_t) + sizeof(uid_t) + sizeof(gid_t);
1405                 error = fill_buff(FSE_ARG_FINFO, finfo_size, &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1406                 if (error != 0) {
1407                         goto get_out;
1408                 }
1409         } else {
1410                 error = fill_buff(FSE_ARG_DEV, sizeof(dev_t), &cur->dev, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1411                 if (error != 0) {
1412                         goto get_out;
1413                 }
1414
1415                 error = fill_buff(FSE_ARG_INO, sizeof(ino64_t), &cur->ino, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1416                 if (error != 0) {
1417                         goto get_out;
1418                 }
1419
1420                 error = fill_buff(FSE_ARG_MODE, sizeof(int32_t), &cur->mode, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1421                 if (error != 0) {
1422                         goto get_out;
1423                 }
1424
1425                 error = fill_buff(FSE_ARG_UID, sizeof(uid_t), &cur->uid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1426                 if (error != 0) {
1427                         goto get_out;
1428                 }
1429
1430                 error = fill_buff(FSE_ARG_GID, sizeof(gid_t), &cur->gid, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1431                 if (error != 0) {
1432                         goto get_out;
1433                 }
1434         }
1435
1436
1437         if (cur->dest) {
1438                 cur = cur->dest;
1439                 goto copy_again;
1440         }
1441
1442 done:
1443         // very last thing: the time stamp
1444         error = fill_buff(FSE_ARG_INT64, sizeof(uint64_t), &cur->abstime, evbuff, &evbuff_idx, sizeof(evbuff), uio);
1445         if (error != 0) {
1446                 goto get_out;
1447         }
1448
1449         // check if the FSE_ARG_DONE will fit
1450         if (sizeof(uint16_t) > sizeof(evbuff) - evbuff_idx) {
1451                 if (evbuff_idx > uio_resid(uio)) {
1452                         error = ENOSPC;
1453                         goto get_out;
1454                 }
1455                 error = uiomove(evbuff, evbuff_idx, uio);
1456                 if (error) {
1457                         goto get_out;
1458                 }
1459                 evbuff_idx = 0;
1460         }
1461
1462         tmp16 = FSE_ARG_DONE;
1463         memcpy(&evbuff[evbuff_idx], &tmp16, sizeof(uint16_t));
1464         evbuff_idx += sizeof(uint16_t);
1465
1466         // flush any remaining data in the buffer (and hopefully
1467         // in most cases this is the only uiomove we'll do)
1468         if (evbuff_idx > uio_resid(uio)) {
1469                 error = ENOSPC;
1470         } else {
1471                 error = uiomove(evbuff, evbuff_idx, uio);
1472         }
1473
1474 get_out:
1475
1476         return error;
1477 }
1478
1479
1480
1481 static int
1482 fmod_watch(fs_event_watcher *watcher, struct uio *uio)
1483 {
1484         int               error = 0;
1485         user_ssize_t      last_full_event_resid;
1486         kfs_event        *kfse;
1487         uint16_t          tmp16;
1488         int               skipped;
1489
1490         last_full_event_resid = uio_resid(uio);
1491
1492         // need at least 2048 bytes of space (maxpathlen + 1 event buf)
1493         if (uio_resid(uio) < 2048 || watcher == NULL) {
1494                 return EINVAL;
1495         }
1496
1497         if (watcher->flags & WATCHER_CLOSING) {
1498                 return 0;
1499         }
1500
1501         if (OSAddAtomic(1, &watcher->num_readers) != 0) {
1502                 // don't allow multiple threads to read from the fd at the same time
1503                 OSAddAtomic(-1, &watcher->num_readers);
1504                 return EAGAIN;
1505         }
1506
1507 restart_watch:
1508         if (watcher->rd == watcher->wr) {
1509                 if (watcher->flags & WATCHER_CLOSING) {
1510                         OSAddAtomic(-1, &watcher->num_readers);
1511                         return 0;
1512                 }
1513                 OSAddAtomic(1, &watcher->blockers);
1514
1515                 // there's nothing to do, go to sleep
1516                 error = tsleep((caddr_t)watcher, PUSER | PCATCH, "fsevents_empty", 0);
1517
1518                 OSAddAtomic(-1, &watcher->blockers);
1519
1520                 if (error != 0 || (watcher->flags & WATCHER_CLOSING)) {
1521                         OSAddAtomic(-1, &watcher->num_readers);
1522                         return error;
1523                 }
1524         }
1525
1526         // if we dropped events, return that as an event first
1527         if (watcher->flags & WATCHER_DROPPED_EVENTS) {
1528                 int32_t val = FSE_EVENTS_DROPPED;
1529
1530                 error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1531                 if (error == 0) {
1532                         val = 0; // a fake pid
1533                         error = uiomove((caddr_t)&val, sizeof(int32_t), uio);
1534
1535                         tmp16 = FSE_ARG_DONE; // makes it a consistent msg
1536                         error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio);
1537
1538                         last_full_event_resid = uio_resid(uio);
1539                 }
1540
1541                 if (error) {
1542                         OSAddAtomic(-1, &watcher->num_readers);
1543                         return error;
1544                 }
1545
1546                 watcher->flags &= ~WATCHER_DROPPED_EVENTS;
1547         }
1548
1549         skipped = 0;
1550
1551         lck_rw_lock_shared(&event_handling_lock);
1552         while (uio_resid(uio) > 0 && watcher->rd != watcher->wr) {
1553                 if (watcher->flags & WATCHER_CLOSING) {
1554                         break;
1555                 }
1556
1557                 //
1558                 // check if the event is something of interest to us
1559                 // (since it may have been recycled/reused and changed
1560                 // its type or which device it is for)
1561                 //
1562                 kfse = watcher->event_queue[watcher->rd];
1563                 if (!kfse || kfse->type == FSE_INVALID || kfse->type >= watcher->num_events || kfse->refcount < 1) {
1564                         break;
1565                 }
1566
1567                 if (watcher->event_list[kfse->type] == FSE_REPORT) {
1568                         boolean_t watcher_cares;
1569
1570                         if (watcher->devices_not_to_watch == NULL) {
1571                                 watcher_cares = true;
1572                         } else {
1573                                 lock_watch_table();
1574                                 watcher_cares = watcher_cares_about_dev(watcher, kfse->dev);
1575                                 unlock_watch_table();
1576                         }
1577
1578                         if (watcher_cares) {
1579                                 if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) && kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && is_ignored_directory(kfse->str)) {
1580                                         // If this is not an Apple System Service, skip specified directories
1581                                         // radar://12034844
1582                                         error = 0;
1583                                         skipped = 1;
1584                                 } else {
1585                                         skipped = 0;
1586                                         if (last_event_ptr == kfse) {
1587                                                 last_event_ptr = NULL;
1588                                                 last_event_type = -1;
1589                                                 last_coalesced_time = 0;
1590                                         }
1591                                         error = copy_out_kfse(watcher, kfse, uio);
1592                                         if (error != 0) {
1593                                                 // if an event won't fit or encountered an error while
1594                                                 // we were copying it out, then backup to the last full
1595                                                 // event and just bail out.  if the error was ENOENT
1596                                                 // then we can continue regular processing, otherwise
1597                                                 // we should unlock things and return.
1598                                                 uio_setresid(uio, last_full_event_resid);
1599                                                 if (error != ENOENT) {
1600                                                         lck_rw_unlock_shared(&event_handling_lock);
1601                                                         error = 0;
1602                                                         goto get_out;
1603                                                 }
1604                                         }
1605
1606                                         last_full_event_resid = uio_resid(uio);
1607                                 }
1608                         }
1609                 }
1610
1611                 watcher->event_queue[watcher->rd] = NULL;
1612                 watcher->rd = (watcher->rd + 1) % watcher->eventq_size;
1613                 OSSynchronizeIO();
1614                 release_event_ref(kfse);
1615         }
1616         lck_rw_unlock_shared(&event_handling_lock);
1617
1618         if (skipped && error == 0) {
1619                 goto restart_watch;
1620         }
1621
1622 get_out:
1623         OSAddAtomic(-1, &watcher->num_readers);
1624
1625         return error;
1626 }
1627
1628
1629 //
1630 // Shoo watchers away from a volume that's about to be unmounted
1631 // (so that it can be cleanly unmounted).
1632 //
1633 void
1634 fsevent_unmount(__unused struct mount *mp, __unused vfs_context_t ctx)
1635 {
1636 #if CONFIG_EMBEDDED
1637         dev_t dev = mp->mnt_vfsstat.f_fsid.val[0];
1638         int error, waitcount = 0;
1639         struct timespec ts = {.tv_sec = 1, .tv_nsec = 0};
1640
1641         // wait for any other pending unmounts to complete
1642         lock_watch_table();
1643         while (fsevent_unmount_dev != 0) {
1644                 error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_wait", &ts);
1645                 if (error == EWOULDBLOCK) {
1646                         error = 0;
1647                 }
1648                 if (!error && (++waitcount >= 10)) {
1649                         error = EWOULDBLOCK;
1650                         printf("timeout waiting to signal unmount pending for dev %d (fsevent_unmount_dev %d)\n", dev, fsevent_unmount_dev);
1651                 }
1652                 if (error) {
1653                         // there's a problem, bail out
1654                         unlock_watch_table();
1655                         return;
1656                 }
1657         }
1658         if (fs_event_type_watchers[FSE_UNMOUNT_PENDING] == 0) {
1659                 // nobody watching for unmount pending events
1660                 unlock_watch_table();
1661                 return;
1662         }
1663         // this is now the current unmount pending
1664         fsevent_unmount_dev = dev;
1665         fsevent_unmount_ack_count = fs_event_type_watchers[FSE_UNMOUNT_PENDING];
1666         unlock_watch_table();
1667
1668         // send an event to notify the watcher they need to get off the mount
1669         error = add_fsevent(FSE_UNMOUNT_PENDING, ctx, FSE_ARG_DEV, dev, FSE_ARG_DONE);
1670
1671         // wait for acknowledgment(s) (give up if it takes too long)
1672         lock_watch_table();
1673         waitcount = 0;
1674         while (fsevent_unmount_dev == dev) {
1675                 error = msleep((caddr_t)&fsevent_unmount_dev, &watch_table_lock, PRIBIO, "fsevent_unmount_pending", &ts);
1676                 if (error == EWOULDBLOCK) {
1677                         error = 0;
1678                 }
1679                 if (!error && (++waitcount >= 10)) {
1680                         error = EWOULDBLOCK;
1681                         printf("unmount pending ack timeout for dev %d\n", dev);
1682                 }
1683                 if (error) {
1684                         // there's a problem, bail out
1685                         if (fsevent_unmount_dev == dev) {
1686                                 fsevent_unmount_dev = 0;
1687                                 fsevent_unmount_ack_count = 0;
1688                         }
1689                         wakeup((caddr_t)&fsevent_unmount_dev);
1690                         break;
1691                 }
1692         }
1693         unlock_watch_table();
1694 #endif
1695 }
1696
1697
1698 //
1699 // /dev/fsevents device code
1700 //
1701 static int fsevents_installed = 0;
1702
1703 typedef struct fsevent_handle {
1704         UInt32            flags;
1705         SInt32            active;
1706         fs_event_watcher *watcher;
1707         struct klist      knotes;
1708         struct selinfo    si;
1709 } fsevent_handle;
1710
1711 #define FSEH_CLOSING   0x0001
1712
1713 static int
1714 fseventsf_read(struct fileproc *fp, struct uio *uio,
1715     __unused int flags, __unused vfs_context_t ctx)
1716 {
1717         fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
1718         int error;
1719
1720         error = fmod_watch(fseh->watcher, uio);
1721
1722         return error;
1723 }
1724
1725
1726 #pragma pack(push, 4)
1727 typedef struct fsevent_dev_filter_args32 {
1728         uint32_t            num_devices;
1729         user32_addr_t       devices;
1730 } fsevent_dev_filter_args32;
1731 typedef struct fsevent_dev_filter_args64 {
1732         uint32_t            num_devices;
1733         user64_addr_t       devices;
1734 } fsevent_dev_filter_args64;
1735 #pragma pack(pop)
1736
1737 #define FSEVENTS_DEVICE_FILTER_32       _IOW('s', 100, fsevent_dev_filter_args32)
1738 #define FSEVENTS_DEVICE_FILTER_64       _IOW('s', 100, fsevent_dev_filter_args64)
1739
1740 static int
1741 fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx)
1742 {
1743         fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
1744         int ret = 0;
1745         fsevent_dev_filter_args64 *devfilt_args, _devfilt_args;
1746
1747         OSAddAtomic(1, &fseh->active);
1748         if (fseh->flags & FSEH_CLOSING) {
1749                 OSAddAtomic(-1, &fseh->active);
1750                 return 0;
1751         }
1752
1753         switch (cmd) {
1754         case FIONBIO:
1755         case FIOASYNC:
1756                 break;
1757
1758         case FSEVENTS_WANT_COMPACT_EVENTS: {
1759                 fseh->watcher->flags |= WATCHER_WANTS_COMPACT_EVENTS;
1760                 break;
1761         }
1762
1763         case FSEVENTS_WANT_EXTENDED_INFO: {
1764                 fseh->watcher->flags |= WATCHER_WANTS_EXTENDED_INFO;
1765                 break;
1766         }
1767
1768         case FSEVENTS_GET_CURRENT_ID: {
1769                 *(uint64_t *)data = fseh->watcher->max_event_id;
1770                 ret = 0;
1771                 break;
1772         }
1773
1774         case FSEVENTS_DEVICE_FILTER_32: {
1775                 if (proc_is64bit(vfs_context_proc(ctx))) {
1776                         ret = EINVAL;
1777                         break;
1778                 }
1779                 fsevent_dev_filter_args32 *devfilt_args32 = (fsevent_dev_filter_args32 *)data;
1780
1781                 devfilt_args = &_devfilt_args;
1782                 memset(devfilt_args, 0, sizeof(fsevent_dev_filter_args64));
1783                 devfilt_args->num_devices = devfilt_args32->num_devices;
1784                 devfilt_args->devices     = CAST_USER_ADDR_T(devfilt_args32->devices);
1785                 goto handle_dev_filter;
1786         }
1787
1788         case FSEVENTS_DEVICE_FILTER_64:
1789                 if (!proc_is64bit(vfs_context_proc(ctx))) {
1790                         ret = EINVAL;
1791                         break;
1792                 }
1793                 devfilt_args = (fsevent_dev_filter_args64 *)data;
1794
1795 handle_dev_filter:
1796                 {
1797                         int new_num_devices;
1798                         dev_t *devices_not_to_watch, *tmp = NULL;
1799
1800                         if (devfilt_args->num_devices > 256) {
1801                                 ret = EINVAL;
1802                                 break;
1803                         }
1804
1805                         new_num_devices = devfilt_args->num_devices;
1806                         if (new_num_devices == 0) {
1807                                 lock_watch_table();
1808
1809                                 tmp = fseh->watcher->devices_not_to_watch;
1810                                 fseh->watcher->devices_not_to_watch = NULL;
1811                                 fseh->watcher->num_devices = new_num_devices;
1812
1813                                 unlock_watch_table();
1814                                 if (tmp) {
1815                                         FREE(tmp, M_TEMP);
1816                                 }
1817                                 break;
1818                         }
1819
1820                         MALLOC(devices_not_to_watch, dev_t *,
1821                             new_num_devices * sizeof(dev_t),
1822                             M_TEMP, M_WAITOK);
1823                         if (devices_not_to_watch == NULL) {
1824                                 ret = ENOMEM;
1825                                 break;
1826                         }
1827
1828                         ret = copyin(devfilt_args->devices,
1829                             (void *)devices_not_to_watch,
1830                             new_num_devices * sizeof(dev_t));
1831                         if (ret) {
1832                                 FREE(devices_not_to_watch, M_TEMP);
1833                                 break;
1834                         }
1835
1836                         lock_watch_table();
1837                         fseh->watcher->num_devices = new_num_devices;
1838                         tmp = fseh->watcher->devices_not_to_watch;
1839                         fseh->watcher->devices_not_to_watch = devices_not_to_watch;
1840                         unlock_watch_table();
1841
1842                         if (tmp) {
1843                                 FREE(tmp, M_TEMP);
1844                         }
1845
1846                         break;
1847                 }
1848
1849         case FSEVENTS_UNMOUNT_PENDING_ACK: {
1850                 lock_watch_table();
1851                 dev_t dev = *(dev_t *)data;
1852                 if (fsevent_unmount_dev == dev) {
1853                         if (--fsevent_unmount_ack_count <= 0) {
1854                                 fsevent_unmount_dev = 0;
1855                                 wakeup((caddr_t)&fsevent_unmount_dev);
1856                         }
1857                 } else {
1858                         printf("unexpected unmount pending ack %d (%d)\n", dev, fsevent_unmount_dev);
1859                         ret = EINVAL;
1860                 }
1861                 unlock_watch_table();
1862                 break;
1863         }
1864
1865         default:
1866                 ret = EINVAL;
1867                 break;
1868         }
1869
1870         OSAddAtomic(-1, &fseh->active);
1871         return ret;
1872 }
1873
1874
1875 static int
1876 fseventsf_select(struct fileproc *fp, int which, __unused void *wql, vfs_context_t ctx)
1877 {
1878         fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
1879         int ready = 0;
1880
1881         if ((which != FREAD) || (fseh->watcher->flags & WATCHER_CLOSING)) {
1882                 return 0;
1883         }
1884
1885
1886         // if there's nothing in the queue, we're not ready
1887         if (fseh->watcher->rd != fseh->watcher->wr) {
1888                 ready = 1;
1889         }
1890
1891         if (!ready) {
1892                 selrecord(vfs_context_proc(ctx), &fseh->si, wql);
1893         }
1894
1895         return ready;
1896 }
1897
1898
1899 #if NOTUSED
1900 static int
1901 fseventsf_stat(__unused struct fileproc *fp, __unused struct stat *sb, __unused vfs_context_t ctx)
1902 {
1903         return ENOTSUP;
1904 }
1905 #endif
1906
1907 static int
1908 fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx)
1909 {
1910         fsevent_handle *fseh = (struct fsevent_handle *)fg->fg_data;
1911         fs_event_watcher *watcher;
1912
1913         OSBitOrAtomic(FSEH_CLOSING, &fseh->flags);
1914         while (OSAddAtomic(0, &fseh->active) > 0) {
1915                 tsleep((caddr_t)fseh->watcher, PRIBIO, "fsevents-close", 1);
1916         }
1917
1918         watcher = fseh->watcher;
1919         fg->fg_data = NULL;
1920         fseh->watcher = NULL;
1921
1922         remove_watcher(watcher);
1923         FREE(fseh, M_TEMP);
1924
1925         return 0;
1926 }
1927
1928 static void
1929 filt_fsevent_detach(struct knote *kn)
1930 {
1931         fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook;
1932
1933         lock_watch_table();
1934
1935         KNOTE_DETACH(&fseh->knotes, kn);
1936
1937         unlock_watch_table();
1938 }
1939
1940 /*
1941  * Determine whether this knote should be active
1942  *
1943  * This is kind of subtle.
1944  *      --First, notice if the vnode has been revoked: in so, override hint
1945  *      --EVFILT_READ knotes are checked no matter what the hint is
1946  *      --Other knotes activate based on hint.
1947  *      --If hint is revoke, set special flags and activate
1948  */
1949 static int
1950 filt_fsevent_common(struct knote *kn, struct kevent_qos_s *kev, long hint)
1951 {
1952         fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook;
1953         int activate = 0;
1954         int32_t rd, wr, amt;
1955         int64_t data = 0;
1956
1957         if (NOTE_REVOKE == hint) {
1958                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1959                 activate = 1;
1960         }
1961
1962         rd = fseh->watcher->rd;
1963         wr = fseh->watcher->wr;
1964         if (rd <= wr) {
1965                 amt = wr - rd;
1966         } else {
1967                 amt = fseh->watcher->eventq_size - (rd - wr);
1968         }
1969
1970         switch (kn->kn_filter) {
1971         case EVFILT_READ:
1972                 data = amt;
1973                 activate = (data != 0);
1974                 break;
1975         case EVFILT_VNODE:
1976                 /* Check events this note matches against the hint */
1977                 if (kn->kn_sfflags & hint) {
1978                         kn->kn_fflags |= hint;         /* Set which event occurred */
1979                 }
1980                 if (kn->kn_fflags != 0) {
1981                         activate = 1;
1982                 }
1983                 break;
1984         default:
1985                 // nothing to do...
1986                 break;
1987         }
1988
1989         if (activate && kev) {
1990                 knote_fill_kevent(kn, kev, data);
1991         }
1992         return activate;
1993 }
1994
1995 static int
1996 filt_fsevent(struct knote *kn, long hint)
1997 {
1998         return filt_fsevent_common(kn, NULL, hint);
1999 }
2000
2001 static int
2002 filt_fsevent_touch(struct knote *kn, struct kevent_qos_s *kev)
2003 {
2004         int res;
2005
2006         lock_watch_table();
2007
2008         /* accept new fflags/data as saved */
2009         kn->kn_sfflags = kev->fflags;
2010         kn->kn_sdata = kev->data;
2011
2012         /* restrict the current results to the (smaller?) set of new interest */
2013         /*
2014          * For compatibility with previous implementations, we leave kn_fflags
2015          * as they were before.
2016          */
2017         //kn->kn_fflags &= kev->fflags;
2018
2019         /* determine if the filter is now fired */
2020         res = filt_fsevent_common(kn, NULL, 0);
2021
2022         unlock_watch_table();
2023
2024         return res;
2025 }
2026
2027 static int
2028 filt_fsevent_process(struct knote *kn, struct kevent_qos_s *kev)
2029 {
2030         int res;
2031
2032         lock_watch_table();
2033
2034         res = filt_fsevent_common(kn, kev, 0);
2035
2036         unlock_watch_table();
2037
2038         return res;
2039 }
2040
2041 SECURITY_READ_ONLY_EARLY(struct  filterops) fsevent_filtops = {
2042         .f_isfd = 1,
2043         .f_attach = NULL,
2044         .f_detach = filt_fsevent_detach,
2045         .f_event = filt_fsevent,
2046         .f_touch = filt_fsevent_touch,
2047         .f_process = filt_fsevent_process,
2048 };
2049
2050 static int
2051 fseventsf_kqfilter(struct fileproc *fp, struct knote *kn,
2052     __unused struct kevent_qos_s *kev)
2053 {
2054         fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
2055         int res;
2056
2057         kn->kn_hook = (void*)fseh;
2058         kn->kn_filtid = EVFILTID_FSEVENT;
2059
2060         lock_watch_table();
2061
2062         KNOTE_ATTACH(&fseh->knotes, kn);
2063
2064         /* check to see if it is fired already */
2065         res = filt_fsevent_common(kn, NULL, 0);
2066
2067         unlock_watch_table();
2068
2069         return res;
2070 }
2071
2072
2073 static int
2074 fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx)
2075 {
2076         int counter = 0;
2077         fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data;
2078
2079         // if there are people still waiting, sleep for 10ms to
2080         // let them clean up and get out of there.  however we
2081         // also don't want to get stuck forever so if they don't
2082         // exit after 5 seconds we're tearing things down anyway.
2083         while (fseh->watcher->blockers && counter++ < 500) {
2084                 // issue wakeup in case anyone is blocked waiting for an event
2085                 // do this each time we wakeup in case the blocker missed
2086                 // the wakeup due to the unprotected test of WATCHER_CLOSING
2087                 // and decision to tsleep in fmod_watch... this bit of
2088                 // latency is a decent tradeoff against not having to
2089                 // take and drop a lock in fmod_watch
2090                 lock_watch_table();
2091                 fsevents_wakeup(fseh->watcher);
2092                 unlock_watch_table();
2093
2094                 tsleep((caddr_t)fseh->watcher, PRIBIO, "watcher-close", 1);
2095         }
2096
2097         return 0;
2098 }
2099
2100
2101 static int
2102 fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2103 {
2104         if (!kauth_cred_issuser(kauth_cred_get())) {
2105                 return EPERM;
2106         }
2107
2108         return 0;
2109 }
2110
2111 static int
2112 fseventsclose(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p)
2113 {
2114         return 0;
2115 }
2116
2117 static int
2118 fseventsread(__unused dev_t dev, __unused struct uio *uio, __unused int ioflag)
2119 {
2120         return EIO;
2121 }
2122
2123
2124 static int
2125 parse_buffer_and_add_events(const char *buffer, int bufsize, vfs_context_t ctx, long *remainder)
2126 {
2127         const fse_info *finfo, *dest_finfo;
2128         const char *path, *ptr, *dest_path, *event_start = buffer;
2129         int path_len, type, dest_path_len, err = 0;
2130
2131
2132         ptr = buffer;
2133         while ((ptr + sizeof(int) + sizeof(fse_info) + 1) < buffer + bufsize) {
2134                 type = *(const int *)ptr;
2135                 if (type < 0 || type >= FSE_MAX_EVENTS) {
2136                         err = EINVAL;
2137                         break;
2138                 }
2139
2140                 ptr += sizeof(int);
2141
2142                 finfo = (const fse_info *)ptr;
2143                 ptr += sizeof(fse_info);
2144
2145                 path = ptr;
2146                 while (ptr < buffer + bufsize && *ptr != '\0') {
2147                         ptr++;
2148                 }
2149
2150                 if (ptr >= buffer + bufsize) {
2151                         break;
2152                 }
2153
2154                 ptr++; // advance over the trailing '\0'
2155
2156                 path_len = ptr - path;
2157
2158                 if (type != FSE_RENAME && type != FSE_EXCHANGE && type != FSE_CLONE) {
2159                         event_start = ptr; // record where the next event starts
2160
2161                         err = add_fsevent(type, ctx, FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, FSE_ARG_DONE);
2162                         if (err) {
2163                                 break;
2164                         }
2165                         continue;
2166                 }
2167
2168                 //
2169                 // if we're here we have to slurp up the destination finfo
2170                 // and path so that we can pass them to the add_fsevent()
2171                 // call.  basically it's a copy of the above code.
2172                 //
2173                 dest_finfo = (const fse_info *)ptr;
2174                 ptr += sizeof(fse_info);
2175
2176                 dest_path = ptr;
2177                 while (ptr < buffer + bufsize && *ptr != '\0') {
2178                         ptr++;
2179                 }
2180
2181                 if (ptr >= buffer + bufsize) {
2182                         break;
2183                 }
2184
2185                 ptr++;       // advance over the trailing '\0'
2186                 event_start = ptr; // record where the next event starts
2187
2188                 dest_path_len = ptr - dest_path;
2189                 //
2190                 // If the destination inode number is non-zero, generate a rename
2191                 // with both source and destination FSE_ARG_FINFO. Otherwise generate
2192                 // a rename with only one FSE_ARG_FINFO. If you need to inject an
2193                 // exchange with an inode of zero, just make that inode (and its path)
2194                 // come in as the first one, not the second.
2195                 //
2196                 if (dest_finfo->ino) {
2197                         err = add_fsevent(type, ctx,
2198                             FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2199                             FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo,
2200                             FSE_ARG_DONE);
2201                 } else {
2202                         err = add_fsevent(type, ctx,
2203                             FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo,
2204                             FSE_ARG_STRING, dest_path_len, dest_path,
2205                             FSE_ARG_DONE);
2206                 }
2207
2208                 if (err) {
2209                         break;
2210                 }
2211         }
2212
2213         // if the last event wasn't complete, set the remainder
2214         // to be the last event start boundary.
2215         //
2216         *remainder = (long)((buffer + bufsize) - event_start);
2217
2218         return err;
2219 }
2220
2221
2222 //
2223 // Note: this buffer size can not ever be less than
2224 //       2*MAXPATHLEN + 2*sizeof(fse_info) + sizeof(int)
2225 //       because that is the max size for a single event.
2226 //       I made it 4k to be a "nice" size.  making it
2227 //       smaller is not a good idea.
2228 //
2229 #define WRITE_BUFFER_SIZE  4096
2230 char *write_buffer = NULL;
2231
2232 static int
2233 fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag)
2234 {
2235         int error = 0, count;
2236         vfs_context_t ctx = vfs_context_current();
2237         long offset = 0, remainder;
2238
2239         lck_mtx_lock(&event_writer_lock);
2240
2241         if (write_buffer == NULL) {
2242                 if (kmem_alloc(kernel_map, (vm_offset_t *)&write_buffer, WRITE_BUFFER_SIZE, VM_KERN_MEMORY_FILE)) {
2243                         lck_mtx_unlock(&event_writer_lock);
2244                         return ENOMEM;
2245                 }
2246         }
2247
2248         //
2249         // this loop copies in and processes the events written.
2250         // it takes care to copy in reasonable size chunks and
2251         // process them.  if there is an event that spans a chunk
2252         // boundary we're careful to copy those bytes down to the
2253         // beginning of the buffer and read the next chunk in just
2254         // after it.
2255         //
2256         while (uio_resid(uio)) {
2257                 if (uio_resid(uio) > (WRITE_BUFFER_SIZE - offset)) {
2258                         count = WRITE_BUFFER_SIZE - offset;
2259                 } else {
2260                         count = uio_resid(uio);
2261                 }
2262
2263                 error = uiomove(write_buffer + offset, count, uio);
2264                 if (error) {
2265                         break;
2266                 }
2267
2268                 // printf("fsevents: write: copied in %d bytes (offset: %ld)\n", count, offset);
2269                 error = parse_buffer_and_add_events(write_buffer, offset + count, ctx, &remainder);
2270                 if (error) {
2271                         break;
2272                 }
2273
2274                 //
2275                 // if there's any remainder, copy it down to the beginning
2276                 // of the buffer so that it will get processed the next time
2277                 // through the loop.  note that the remainder always starts
2278                 // at an event boundary.
2279                 //
2280                 if (remainder != 0) {
2281                         // printf("fsevents: write: an event spanned a %d byte boundary.  remainder: %ld\n",
2282                         //      WRITE_BUFFER_SIZE, remainder);
2283                         memmove(write_buffer, (write_buffer + count + offset) - remainder, remainder);
2284                         offset = remainder;
2285                 } else {
2286                         offset = 0;
2287                 }
2288         }
2289
2290         lck_mtx_unlock(&event_writer_lock);
2291
2292         return error;
2293 }
2294
2295
2296 static const struct fileops fsevents_fops = {
2297         .fo_type     = DTYPE_FSEVENTS,
2298         .fo_read     = fseventsf_read,
2299         .fo_write    = fo_no_write,
2300         .fo_ioctl    = fseventsf_ioctl,
2301         .fo_select   = fseventsf_select,
2302         .fo_close    = fseventsf_close,
2303         .fo_kqfilter = fseventsf_kqfilter,
2304         .fo_drain    = fseventsf_drain,
2305 };
2306
2307 typedef struct fsevent_clone_args32 {
2308         user32_addr_t       event_list;
2309         int32_t             num_events;
2310         int32_t             event_queue_depth;
2311         user32_addr_t       fd;
2312 } fsevent_clone_args32;
2313
2314 typedef struct fsevent_clone_args64 {
2315         user64_addr_t       event_list;
2316         int32_t             num_events;
2317         int32_t             event_queue_depth;
2318         user64_addr_t       fd;
2319 } fsevent_clone_args64;
2320
2321 #define FSEVENTS_CLONE_32       _IOW('s', 1, fsevent_clone_args32)
2322 #define FSEVENTS_CLONE_64       _IOW('s', 1, fsevent_clone_args64)
2323
2324 static int
2325 fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p)
2326 {
2327         struct fileproc *f;
2328         int fd, error;
2329         fsevent_handle *fseh = NULL;
2330         fsevent_clone_args64 *fse_clone_args, _fse_clone;
2331         int8_t *event_list;
2332         int is64bit = proc_is64bit(p);
2333
2334         switch (cmd) {
2335         case FSEVENTS_CLONE_32: {
2336                 if (is64bit) {
2337                         return EINVAL;
2338                 }
2339                 fsevent_clone_args32 *args32 = (fsevent_clone_args32 *)data;
2340
2341                 fse_clone_args = &_fse_clone;
2342                 memset(fse_clone_args, 0, sizeof(fsevent_clone_args64));
2343
2344                 fse_clone_args->event_list        = CAST_USER_ADDR_T(args32->event_list);
2345                 fse_clone_args->num_events        = args32->num_events;
2346                 fse_clone_args->event_queue_depth = args32->event_queue_depth;
2347                 fse_clone_args->fd                = CAST_USER_ADDR_T(args32->fd);
2348                 goto handle_clone;
2349         }
2350
2351         case FSEVENTS_CLONE_64:
2352                 if (!is64bit) {
2353                         return EINVAL;
2354                 }
2355                 fse_clone_args = (fsevent_clone_args64 *)data;
2356
2357 handle_clone:
2358                 if (fse_clone_args->num_events < 0 || fse_clone_args->num_events > 4096) {
2359                         return EINVAL;
2360                 }
2361
2362                 MALLOC(fseh, fsevent_handle *, sizeof(fsevent_handle),
2363                     M_TEMP, M_WAITOK);
2364                 if (fseh == NULL) {
2365                         return ENOMEM;
2366                 }
2367                 memset(fseh, 0, sizeof(fsevent_handle));
2368
2369                 klist_init(&fseh->knotes);
2370
2371                 MALLOC(event_list, int8_t *,
2372                     fse_clone_args->num_events * sizeof(int8_t),
2373                     M_TEMP, M_WAITOK);
2374                 if (event_list == NULL) {
2375                         FREE(fseh, M_TEMP);
2376                         return ENOMEM;
2377                 }
2378
2379                 error = copyin(fse_clone_args->event_list,
2380                     (void *)event_list,
2381                     fse_clone_args->num_events * sizeof(int8_t));
2382                 if (error) {
2383                         FREE(event_list, M_TEMP);
2384                         FREE(fseh, M_TEMP);
2385                         return error;
2386                 }
2387
2388                 /*
2389                  * Lock down the user's "fd" result buffer so it's safe
2390                  * to hold locks while we copy it out.
2391                  */
2392                 error = vslock((user_addr_t)fse_clone_args->fd,
2393                     sizeof(int32_t));
2394                 if (error) {
2395                         FREE(event_list, M_TEMP);
2396                         FREE(fseh, M_TEMP);
2397                         return error;
2398                 }
2399
2400                 error = add_watcher(event_list,
2401                     fse_clone_args->num_events,
2402                     fse_clone_args->event_queue_depth,
2403                     &fseh->watcher,
2404                     fseh);
2405                 if (error) {
2406                         vsunlock((user_addr_t)fse_clone_args->fd,
2407                             sizeof(int32_t), 0);
2408                         FREE(event_list, M_TEMP);
2409                         FREE(fseh, M_TEMP);
2410                         return error;
2411                 }
2412
2413                 fseh->watcher->fseh = fseh;
2414
2415                 error = falloc(p, &f, &fd, vfs_context_current());
2416                 if (error) {
2417                         remove_watcher(fseh->watcher);
2418                         vsunlock((user_addr_t)fse_clone_args->fd,
2419                             sizeof(int32_t), 0);
2420                         FREE(event_list, M_TEMP);
2421                         FREE(fseh, M_TEMP);
2422                         return error;
2423                 }
2424                 proc_fdlock(p);
2425                 f->f_fglob->fg_flag = FREAD | FWRITE;
2426                 f->f_fglob->fg_ops = &fsevents_fops;
2427                 f->f_fglob->fg_data = (caddr_t) fseh;
2428                 /*
2429                  * We can safely hold the proc_fdlock across this copyout()
2430                  * because of the vslock() call above.  The vslock() call
2431                  * also ensures that we will never get an error, so assert
2432                  * this.
2433                  */
2434                 error = copyout((void *)&fd, fse_clone_args->fd, sizeof(int32_t));
2435                 assert(error == 0);
2436
2437                 procfdtbl_releasefd(p, fd, NULL);
2438                 fp_drop(p, fd, f, 1);
2439                 proc_fdunlock(p);
2440
2441                 vsunlock((user_addr_t)fse_clone_args->fd,
2442                     sizeof(int32_t), 1);
2443                 break;
2444
2445         default:
2446                 error = EINVAL;
2447                 break;
2448         }
2449
2450         return error;
2451 }
2452
2453 static void
2454 fsevents_wakeup(fs_event_watcher *watcher)
2455 {
2456         selwakeup(&watcher->fseh->si);
2457         KNOTE(&watcher->fseh->knotes, NOTE_WRITE | NOTE_NONE);
2458         wakeup((caddr_t)watcher);
2459 }
2460
2461
2462 /*
2463  * A struct describing which functions will get invoked for certain
2464  * actions.
2465  */
2466 static struct cdevsw fsevents_cdevsw =
2467 {
2468         fseventsopen,           /* open */
2469         fseventsclose,          /* close */
2470         fseventsread,           /* read */
2471         fseventswrite,          /* write */
2472         fseventsioctl,          /* ioctl */
2473         (stop_fcn_t *)&nulldev, /* stop */
2474         (reset_fcn_t *)&nulldev, /* reset */
2475         NULL,                   /* tty's */
2476         eno_select,             /* select */
2477         eno_mmap,               /* mmap */
2478         eno_strat,              /* strategy */
2479         eno_getc,               /* getc */
2480         eno_putc,               /* putc */
2481         0                       /* type */
2482 };
2483
2484
2485 /*
2486  * Called to initialize our device,
2487  * and to register ourselves with devfs
2488  */
2489
2490 void
2491 fsevents_init(void)
2492 {
2493         int ret;
2494
2495         if (fsevents_installed) {
2496                 return;
2497         }
2498
2499         fsevents_installed = 1;
2500
2501         ret = cdevsw_add(-1, &fsevents_cdevsw);
2502         if (ret < 0) {
2503                 fsevents_installed = 0;
2504                 return;
2505         }
2506
2507         devfs_make_node(makedev(ret, 0), DEVFS_CHAR,
2508             UID_ROOT, GID_WHEEL, 0644, "fsevents", 0);
2509
2510         fsevents_internal_init();
2511 }
2512
2513
2514 char *
2515 get_pathbuff(void)
2516 {
2517         char *path;
2518
2519         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2520         return path;
2521 }
2522
2523 void
2524 release_pathbuff(char *path)
2525 {
2526         if (path == NULL) {
2527                 return;
2528         }
2529         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2530 }
2531
2532 int
2533 get_fse_info(struct vnode *vp, fse_info *fse, __unused vfs_context_t ctx)
2534 {
2535         struct vnode_attr va;
2536
2537         VATTR_INIT(&va);
2538         VATTR_WANTED(&va, va_fsid);
2539         va.va_vaflags |= VA_REALFSID;
2540         VATTR_WANTED(&va, va_fileid);
2541         VATTR_WANTED(&va, va_mode);
2542         VATTR_WANTED(&va, va_uid);
2543         VATTR_WANTED(&va, va_gid);
2544         if (vp->v_flag & VISHARDLINK) {
2545                 if (vp->v_type == VDIR) {
2546                         VATTR_WANTED(&va, va_dirlinkcount);
2547                 } else {
2548                         VATTR_WANTED(&va, va_nlink);
2549                 }
2550         }
2551
2552         if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
2553                 memset(fse, 0, sizeof(fse_info));
2554                 return -1;
2555         }
2556
2557         return vnode_get_fse_info_from_vap(vp, fse, &va);
2558 }
2559
2560 int
2561 vnode_get_fse_info_from_vap(vnode_t vp, fse_info *fse, struct vnode_attr *vap)
2562 {
2563         fse->ino  = (ino64_t)vap->va_fileid;
2564         fse->dev  = (dev_t)vap->va_fsid;
2565         fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | vap->va_mode;
2566         fse->uid  = (uid_t)vap->va_uid;
2567         fse->gid  = (gid_t)vap->va_gid;
2568         if (vp->v_flag & VISHARDLINK) {
2569                 fse->mode |= FSE_MODE_HLINK;
2570                 if (vp->v_type == VDIR) {
2571                         fse->nlink = (uint64_t)vap->va_dirlinkcount;
2572                 } else {
2573                         fse->nlink = (uint64_t)vap->va_nlink;
2574                 }
2575         }
2576
2577         return 0;
2578 }
2579
2580 void
2581 create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap)
2582 {
2583         int fsevent_type = FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic
2584         char pathbuf[MAXPATHLEN];
2585         fse_info fse;
2586
2587
2588         if (kevents & VNODE_EVENT_DELETE) {
2589                 fsevent_type = FSE_DELETE;
2590         } else if (kevents & (VNODE_EVENT_EXTEND | VNODE_EVENT_WRITE)) {
2591                 fsevent_type = FSE_CONTENT_MODIFIED;
2592         } else if (kevents & VNODE_EVENT_LINK) {
2593                 fsevent_type = FSE_CREATE_FILE;
2594         } else if (kevents & VNODE_EVENT_RENAME) {
2595                 fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info;
2596         } else if (kevents & (VNODE_EVENT_FILE_CREATED | VNODE_EVENT_FILE_REMOVED | VNODE_EVENT_DIR_CREATED | VNODE_EVENT_DIR_REMOVED)) {
2597                 fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it
2598         } else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else
2599                 fsevent_type = FSE_STAT_CHANGED;
2600         }
2601
2602         // printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)");
2603
2604         fse.dev = vap->va_fsid;
2605         fse.ino = vap->va_fileid;
2606         fse.mode = vnode_vttoif(vnode_vtype(vp)) | (uint32_t)vap->va_mode;
2607         if (vp->v_flag & VISHARDLINK) {
2608                 fse.mode |= FSE_MODE_HLINK;
2609                 if (vp->v_type == VDIR) {
2610                         fse.nlink = vap->va_dirlinkcount;
2611                 } else {
2612                         fse.nlink = vap->va_nlink;
2613                 }
2614         }
2615
2616         if (vp->v_type == VDIR) {
2617                 fse.mode |= FSE_REMOTE_DIR_EVENT;
2618         }
2619
2620
2621         fse.uid = vap->va_uid;
2622         fse.gid = vap->va_gid;
2623
2624         len = sizeof(pathbuf);
2625         if (vn_getpath_no_firmlink(vp, pathbuf, &len) == 0) {
2626                 add_fsevent(fsevent_type, vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE);
2627         }
2628         return;
2629 }
2630
2631 #else /* CONFIG_FSE */
2632
2633 #include <sys/fsevents.h>
2634
2635 /*
2636  * The get_pathbuff and release_pathbuff routines are used in places not
2637  * related to fsevents, and it's a handy abstraction, so define trivial
2638  * versions that don't cache a pool of buffers.  This way, we don't have
2639  * to conditionalize the callers, and they still get the advantage of the
2640  * pool of buffers if CONFIG_FSE is turned on.
2641  */
2642 char *
2643 get_pathbuff(void)
2644 {
2645         char *path;
2646         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2647         return path;
2648 }
2649
2650 void
2651 release_pathbuff(char *path)
2652 {
2653         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2654 }
2655
2656 int
2657 add_fsevent(__unused int type, __unused vfs_context_t ctx, ...)
2658 {
2659         return 0;
2660 }
2661
2662 int
2663 need_fsevent(__unused int type, __unused vnode_t vp)
2664 {
2665         return 0;
2666 }
2667
2668 #endif /* CONFIG_FSE */