KDBG/Machine.impl.hpp

   1 //
   2 //  Machine.impl.hpp
   3 //  KDBG
   4 //
   5 //  Created by James McIlree on 10/30/12.
   6 //  Copyright (c) 2014 Apple. All rights reserved.
   7 //
   8
   9 #include "KDebug.h"
  10
  11 template <typename SIZE>
  12 bool process_by_time_sort(const MachineProcess<SIZE>* left, const MachineProcess<SIZE>* right) {
  13     return left->timespan().location() < right->timespan().location();
  14 }
  15
  16 template <typename SIZE>
  17 bool thread_by_time_sort(const MachineThread<SIZE>* left, const MachineThread<SIZE>* right) {
  18     return left->timespan().location() < right->timespan().location();
  19 }
  20
  21 template <typename SIZE>
  22 void Machine<SIZE>::post_initialize() {
  23     //
  24     // Post initialization. Sort various by time vectors, etc.
  25     //
  26
  27     std::sort(_processes_by_time.begin(), _processes_by_time.end(), process_by_time_sort<SIZE>);
  28     std::sort(_threads_by_time.begin(), _threads_by_time.end(), thread_by_time_sort<SIZE>);
  29
  30     // naked auto okay here, process is a ptr.
  31     AbsTime last_machine_timestamp = _events[_event_count-1].timestamp();
  32     for (auto process : _processes_by_time) {
  33         process->post_initialize(last_machine_timestamp);
  34     }
  35
  36     //
  37     // Collapse the idle/intr/run queues into a single timeline
  38     //
  39     for (auto& cpu : _cpus) {
  40         cpu.post_initialize(timespan());
  41     }
  42
  43     //
  44     // Flush any outstanding blocked events
  45     //
  46     for (auto& thread : _threads_by_tid) {
  47         thread.second.post_initialize(last_machine_timestamp);
  48     }
  49
  50     //
  51     // Sort the IOActivity events, and build a flattened vector that can be used to find IO ranges for intersecting during interval searches
  52     //
  53     _io_by_uid.clear();
  54     std::sort(_all_io.begin(), _all_io.end());
  55
  56     // We cannot use trange_vector_union to flatten _all_io, as _all_io isn't a plain TRange<AbsTime> type, and we want to yield that type.
  57     // cut & paste to the rescue! :-)
  58     if (!_all_io.empty()) {
  59         auto input_it = _all_io.begin();
  60         _all_io_active_intervals.push_back(*input_it);
  61         while (++input_it < _all_io.end()) {
  62             TRange<AbsTime> union_range = _all_io_active_intervals.back();
  63
  64             if (union_range.intersects(*input_it)) {
  65                 _all_io_active_intervals.pop_back();
  66                 _all_io_active_intervals.push_back(union_range.union_range(*input_it));
  67             } else {
  68                 ASSERT(union_range < *input_it, "Out of order merging");
  69                 _all_io_active_intervals.push_back(*input_it);
  70             }
  71         }
  72     }
  73
  74     //
  75     // Flush any outstanding MachMsg(s) in the nursery (state SEND)
  76     //
  77     // NOTE! We do not clear _mach_msg_nursery because its state is
  78     // forwarded to future Machine(s).
  79     //
  80     for (auto& nursery_it : _mach_msg_nursery) {
  81         auto& nursery_msg = nursery_it.second;
  82         if (nursery_msg.state() == kNurseryMachMsgState::Send) {
  83             auto mach_msg_it = _mach_msgs.emplace(_mach_msgs.end(),
  84                                                   nursery_msg.id(),
  85                                                   nursery_msg.kmsg_addr(),
  86                                                   kMachineMachMsgFlag::HasSender,
  87                                                   nursery_msg.send_time(),
  88                                                   nursery_msg.send_tid(),
  89                                                   nursery_msg.send_msgh_bits(),
  90                                                   nursery_msg.send_voucher(),
  91                                                   AbsTime(0),
  92                                                   0,
  93                                                   0,
  94                                                   &Machine<SIZE>::UnsetVoucher);
  95             _mach_msgs_by_event_index[nursery_msg.send_event_index()] = std::distance(_mach_msgs.begin(), mach_msg_it);
  96         }
  97     }
  98
  99     //
 100     // Flush any outstanding Voucher(s) in the nursery
 101     //
 102     for (auto& nursery_it : _voucher_nursery) {
 103
 104         //
 105         // First we need to "close" the open end of the live voucher's
 106         // timespan.
 107         //
 108         auto voucher = nursery_it.second.get();
 109         voucher->set_timespan_to_end_of_time();
 110
 111         auto address = nursery_it.first;
 112
 113         // First find the "row" for this address.
 114         auto by_addr_it = _vouchers_by_addr.find(address);
 115         if (by_addr_it == _vouchers_by_addr.end()) {
 116             // No address entry case
 117             std::vector<std::unique_ptr<MachineVoucher<SIZE>>> row;
 118             row.emplace_back(std::move(nursery_it.second));
 119             _vouchers_by_addr.emplace(address, std::move(row));
 120         } else {
 121             auto& row = by_addr_it->second;
 122
 123             // Make sure these are sorted and non-overlapping
 124             ASSERT(row.back()->timespan() < voucher->timespan(), "Sanity");
 125             ASSERT(!row.back()->timespan().intersects(voucher->timespan()), "Sanity");
 126
 127             row.emplace_back(std::move(nursery_it.second));
 128         }
 129     }
 130
 131     _voucher_nursery.clear();
 132
 133     DEBUG_ONLY(validate());
 134 }
 135
 136 template <typename SIZE>
 137 void Machine<SIZE>::raw_initialize(const KDCPUMapEntry* cpumaps,
 138                                    uint32_t cpumap_count,
 139                                    const KDThreadMapEntry<SIZE>* threadmaps,
 140                                    uint32_t threadmap_count,
 141                                    const KDEvent<SIZE>* events,
 142                                    uintptr_t event_count)
 143 {
 144     ASSERT(cpumaps || cpumap_count == 0, "Sanity");
 145     ASSERT(threadmaps || threadmap_count == 0, "Sanity");
 146     ASSERT(events || event_count == 0, "Sanity");
 147
 148     for (uint32_t i = 0; i < cpumap_count; ++i) {
 149         _cpus.emplace_back(i, cpumaps[i].is_iop(), cpumaps[i].name());
 150     }
 151
 152     // We cannot create processes / threads unless we have at least one event to give us a timestamp.
 153     if (event_count) {
 154         AbsTime now = events[0].timestamp();
 155
 156         _kernel_task = create_process(0, "kernel_task", now, kMachineProcessFlag::IsKernelProcess);
 157
 158         // Initial thread state, nothing in nusery
 159         for (uint32_t index = 0; index < threadmap_count; ++index) {
 160             auto& threadmap = threadmaps[index];
 161
 162             pid_t pid = threadmap.pid();
 163
 164             // The kernel threadmap often has empty entries. Skip them.
 165             if (pid == 0)
 166                 break;
 167
 168             if (pid == 1 && strncmp(threadmap.name(), "kernel_task", 12) == 0) {
 169                 pid = 0;
 170             }
 171
 172             MachineProcess<SIZE>* process = youngest_mutable_process(pid);
 173             if (!process) {
 174                 process = create_process(pid, threadmap.name(), now, kMachineProcessFlag::CreatedByThreadMap);
 175                 ASSERT(process, "Sanity");
 176             }
 177             process->add_thread(create_thread(process, threadmap.tid(), &UnsetVoucher, now, kMachineThreadFlag::CreatedByThreadMap));
 178         }
 179     }
 180
 181     // We need to know what the idle/INTR states of the CPU's are.
 182     initialize_cpu_idle_intr_states();
 183
 184     for (uintptr_t index = 0; index < event_count; ++index) {
 185         if (!process_event(events[index]))
 186             break;
 187     }
 188
 189     post_initialize();
 190 }
 191
 192 template <typename SIZE>
 193 Machine<SIZE>::Machine(KDCPUMapEntry* cpumaps, uint32_t cpumap_count, KDThreadMapEntry<SIZE>* threadmaps, uint32_t threadmap_count, KDEvent<SIZE>* events, uintptr_t event_count) :
 194     _kernel_task(nullptr),
 195     _events(events),
 196     _event_count(event_count),
 197     _flags(0),
 198     _unknown_process_pid(-1)
 199 {
 200     raw_initialize(cpumaps,
 201                    cpumap_count,
 202                    threadmaps,
 203                    threadmap_count,
 204                    events,
 205                    event_count);
 206 }
 207
 208 template <typename SIZE>
 209 Machine<SIZE>::Machine(const TraceFile& file) :
 210     _kernel_task(nullptr),
 211     _events(file.events<SIZE>()),
 212     _event_count(file.event_count()),
 213     _flags(0),
 214     _unknown_process_pid(-1)
 215 {
 216     raw_initialize(file.cpumap(),
 217                    file.cpumap_count(),
 218                    file.threadmap<SIZE>(),
 219                    file.threadmap_count(),
 220                    file.events<SIZE>(),
 221                    file.event_count());
 222 }
 223
 224 template <typename SIZE>
 225 Machine<SIZE>::Machine(Machine<SIZE>& parent, KDEvent<SIZE>* events, uintptr_t event_count) :
 226     _kernel_task(nullptr),
 227     _events(events),
 228     _event_count(event_count),
 229     _flags(0),
 230     _unknown_process_pid(-1)
 231 {
 232     ASSERT(events || event_count == 0, "Sanity");
 233
 234     const std::vector<const MachineThread<SIZE>*>& parent_threads = parent.threads();
 235     const std::vector<const MachineCPU<SIZE>>& parent_cpus = parent.cpus();
 236
 237     for (const MachineCPU<SIZE>& parent_cpu : parent_cpus) {
 238         _cpus.emplace_back(parent_cpu.id(), parent_cpu.is_iop(), parent_cpu.name());
 239     }
 240
 241     // We cannot create processes / threads unless we have at least one event to give us a timestamp.
 242     if (event_count) {
 243         AbsTime now = events[0].timestamp();
 244
 245         //
 246         // Forawd any live vouchers. This must done before forwarding threads
 247         // or MachMsgs from their nurseries, as they have references to the
 248         // vouchers.
 249         //
 250         for (auto& parent_vouchers_by_addr_it : parent._vouchers_by_addr) {
 251             std::unique_ptr<MachineVoucher<SIZE>>& voucher = parent_vouchers_by_addr_it.second.back();
 252             if (voucher->is_live()) {
 253                 // When we flushed these vouchers in the previous machine state,
 254                 // we set their timespans to infinite. We need to reset them in
 255                 // case a close event arrives.
 256                 voucher->set_timespan_to_zero_length();
 257                 _voucher_nursery.emplace(voucher->address(), std::move(voucher));
 258             }
 259         }
 260
 261         _kernel_task = create_process(0, "kernel_task", now, kMachineProcessFlag::IsKernelProcess);
 262
 263         for (const MachineThread<SIZE>* parent_thread : parent_threads) {
 264             if (!parent_thread->is_trace_terminated()) {
 265                 const MachineProcess<SIZE>& parent_process = parent_thread->process();
 266                 MachineProcess<SIZE>* new_process = youngest_mutable_process(parent_process.pid());
 267                 if (!new_process) {
 268                     kMachineProcessFlag new_process_flags = (kMachineProcessFlag)(parent_process.flags() | (uint32_t)kMachineProcessFlag::CreatedByPreviousMachineState);
 269                     new_process = create_process(parent_process.pid(), parent_process.name(), now, new_process_flags);
 270                     ASSERT(new_process, "Sanity");
 271                 }
 272                 new_process->add_thread(create_thread(new_process,
 273                                                       parent_thread->tid(),
 274                                                       thread_forwarding_voucher_lookup(parent_thread->last_voucher()),
 275                                                       now,
 276                                                       (kMachineThreadFlag)(parent_thread->flags() | (uint32_t)kMachineThreadFlag::CreatedByPreviousMachineState)));
 277             }
 278         }
 279
 280         // We need to know what the idle/INTR states of the CPU's are.
 281         //
 282         // Start by looking at the existing states.
 283         uint32_t init_count = 0;
 284         uint32_t ap_count = 0;
 285         for (const MachineCPU<SIZE>& parent_cpu : parent_cpus) {
 286             if (!parent_cpu.is_iop()) {
 287                 ap_count++;
 288                 const std::vector<CPUActivity<SIZE>>& parent_cpu_timeline = parent_cpu.timeline();
 289
 290                 bool intr_initialized = false;
 291                 bool idle_initialized = false;
 292                 bool runq_initialized = false;
 293
 294                 MachineCPU<SIZE>& cpu = _cpus[parent_cpu.id()];
 295
 296                 for (auto reverse_it = parent_cpu_timeline.rbegin(); reverse_it < parent_cpu_timeline.rend(); ++reverse_it) {
 297
 298                     // We can sometimes split two simultaneous events across two buffer snaps.
 299                     // IOW, buffer snap 1:
 300                     //
 301                     // event[N].timestamp = 1234;
 302                     //
 303                     // buffer snap 2:
 304                     //
 305                     // event[0].timestamp = 1234;
 306                     ASSERT(!reverse_it->contains(now) || reverse_it->max()-AbsTime(1) == now, "Sanity");
 307                     ASSERT(reverse_it->location() <= now, "Sanity");
 308
 309                     switch (reverse_it->type()) {
 310                         //
 311                         // The states are separate, and heirarchical.
 312                         // The order (low -> high) is:
 313                         // Run, Idle, INTR
 314                         // Unknown is a special state; we give up on seeing it.
 315                         // A lower state precludes being in a higher state, but
 316                         // not vice-versa. You can not be IDLE if you are currently
 317                         // Run. You may be IDLE during Run.
 318                         //
 319
 320                         case kCPUActivity::Unknown:
 321                             // Don't actually initialize anything, just force a bailout
 322                             runq_initialized = idle_initialized = intr_initialized = true;
 323                             break;
 324
 325                         // NOTE NOTE NOTE!
 326                         //
 327                         // Overly clever here, note the lack of "break" in the Run
 328                         // and Idle clause, we fall through to initialize higher
 329                         // states.
 330                         case kCPUActivity::Run:
 331                             ASSERT(!runq_initialized, "This should always be the last level to initialize");
 332                             if (MachineThread<SIZE>* on_cpu_thread = youngest_mutable_thread(reverse_it->thread()->tid())) {
 333                                 cpu.initialize_thread_state(on_cpu_thread, now);
 334                                 init_count++;
 335                             } else {
 336                                 ASSERT(reverse_it->thread()->is_trace_terminated() , "We should find this thread unless its been removed");
 337                             }
 338                             runq_initialized = true;
 339
 340                         case kCPUActivity::Idle:
 341                             if (!idle_initialized) {
 342                                 cpu.initialize_idle_state(reverse_it->is_idle(), now);
 343                                 init_count++;
 344                                 idle_initialized = true;
 345                             }
 346
 347                         case kCPUActivity::INTR:
 348                             if (!intr_initialized) {
 349                                 cpu.initialize_intr_state(reverse_it->is_intr(), now);
 350                                 init_count++;
 351                                 intr_initialized = true;
 352                             }
 353                             break;
 354                     }
 355
 356                     if (runq_initialized) {
 357                         ASSERT(idle_initialized && intr_initialized, "Sanity");
 358                         break;
 359                     }
 360                 }
 361             }
 362         }
 363
 364         if (init_count < (ap_count * 3)) {
 365             initialize_cpu_idle_intr_states();
 366         }
 367
 368         //
 369         // Forward any messages from the nursery
 370         //
 371
 372         for (auto& parent_nursery_it : parent._mach_msg_nursery) {
 373             auto& parent_nursery_msg = parent_nursery_it.second;
 374
 375             switch (parent_nursery_msg.state()) {
 376                     // We forward send(s) because they can become receives.
 377                     // We forward the free's because they stop us from showing bogus kernel message receipts
 378                 case kNurseryMachMsgState::Send:
 379                 case kNurseryMachMsgState::Free: {
 380                     ASSERT(_mach_msg_nursery.find(parent_nursery_msg.kmsg_addr()) == _mach_msg_nursery.end(), "Duplicate kmsg address when forwarding mach_msg nursery from parent");
 381
 382                     auto it = _mach_msg_nursery.emplace(parent_nursery_msg.kmsg_addr(), parent_nursery_msg);
 383
 384                     // Grr, emplace returns a std::pair<it, bool>, and the it is std::pair<key, value>...
 385
 386                     // We have to clear this to prevent bogus data being shown during a receive,
 387                     // the send event index is no longer available.
 388                     it.first->second.set_send_event_index(-1);
 389                     break;
 390                 }
 391
 392                 default:
 393                     break;
 394             }
 395         }
 396     }
 397
 398     for (uintptr_t index = 0; index < event_count; ++index) {
 399         if (!process_event(_events[index]))
 400             break;
 401     }
 402
 403     post_initialize();
 404 }
 405
 406 template <typename SIZE>
 407 const MachineProcess<SIZE>* Machine<SIZE>::process(pid_t pid, AbsTime time) const {
 408     auto by_pid_range = _processes_by_pid.equal_range(pid);
 409     for (auto it = by_pid_range.first; it != by_pid_range.second; ++it) {
 410         const MachineProcess<SIZE>& process = it->second;
 411         if (process.timespan().contains(time)) {
 412             return &process;
 413         }
 414     }
 415
 416     return nullptr;
 417 }
 418
 419 template <typename SIZE>
 420 const MachineThread<SIZE>* Machine<SIZE>::thread(typename SIZE::ptr_t tid, AbsTime time) const {
 421     auto by_tid_range = _threads_by_tid.equal_range(tid);
 422     for (auto it = by_tid_range.first; it != by_tid_range.second; ++it) {
 423         const MachineThread<SIZE>& thread = it->second;
 424         if (thread.timespan().contains(time)) {
 425             return &thread;
 426         }
 427     }
 428
 429     return nullptr;
 430 }
 431
 432 template <typename SIZE>
 433 struct VoucherVsAbsTimeComparator {
 434     bool operator()(const std::unique_ptr<MachineVoucher<SIZE>>& voucher, const AbsTime time) const {
 435         return voucher->timespan().max() < time;
 436     }
 437
 438     bool operator()(const AbsTime time, const std::unique_ptr<MachineVoucher<SIZE>>& voucher) const {
 439         return time < voucher->timespan().max();
 440     }
 441 };
 442
 443 template <typename SIZE>
 444 const MachineVoucher<SIZE>* Machine<SIZE>::voucher(typename SIZE::ptr_t address, AbsTime timestamp) const {
 445     // First find the "row" for this address.
 446     auto by_addr_it = _vouchers_by_addr.find(address);
 447     if (by_addr_it != _vouchers_by_addr.end()) {
 448         auto& row = by_addr_it->second;
 449
 450         auto by_time_it = std::upper_bound(row.begin(), row.end(), timestamp, VoucherVsAbsTimeComparator<SIZE>());
 451         // The upper bound will report that 0 is lower than [ 10, 20 ), need to check contains!
 452         if (by_time_it != row.end()) {
 453             // The compiler is having troubles seeing through an iterator that reflects unique_ptr methods which reflects MachineVoucher methods.
 454             auto v = by_time_it->get();
 455             if (v->timespan().contains(timestamp)) {
 456                 return v;
 457             }
 458         }
 459     }
 460
 461     return nullptr;
 462 }
 463
 464 template <typename SIZE>
 465 const MachineMachMsg<SIZE>* Machine<SIZE>::mach_msg(uintptr_t event_index) const {
 466     auto it = _mach_msgs_by_event_index.find(event_index);
 467     if (it != _mach_msgs_by_event_index.end()) {
 468         return &_mach_msgs.at(it->second);
 469     }
 470
 471     return nullptr;
 472 }
 473
 474 #if !defined(NDEBUG) && !defined(NS_BLOCK_ASSERTIONS)
 475 template <typename SIZE>
 476 void Machine<SIZE>::validate() const {
 477     ASSERT(_events, "Sanity");
 478     ASSERT(_event_count, "Sanity");
 479
 480     //
 481     // Event timestamp ordering is already pre-checked, no point in retesting it here.
 482     //
 483
 484     ASSERT(_threads_by_tid.size() == _threads_by_time.size(), "Container views state not in sync");
 485     ASSERT(_processes_by_pid.size() == _processes_by_name.size(), "Container views state not in sync");
 486     ASSERT(_processes_by_pid.size() == _processes_by_time.size(), "Container views state not in sync");
 487
 488     for (auto& pair : _processes_by_pid) {
 489         auto& process = pair.second;
 490         process.validate();
 491         AbsInterval process_timespan = process.timespan();
 492         for (auto thread : process.threads()) {
 493             ASSERT(process_timespan.contains(thread->timespan()), "thread outside process timespan");
 494         }
 495     }
 496
 497     for (auto thread_ptr : _threads_by_time) {
 498         thread_ptr->validate();
 499     }
 500
 501     //
 502     // Make sure no process with the same pid overlaps in time
 503     //
 504     const MachineProcess<SIZE>* last_process = nullptr;
 505     for (auto& pair : _processes_by_pid) {
 506         auto& process = pair.second;
 507         if (last_process && last_process->pid() == process.pid()) {
 508             // The < operator only checks ordering, it is not strict
 509             // about overlap. We must check both
 510             ASSERT(last_process->timespan() < process.timespan(), "Sanity");
 511             ASSERT(!last_process->timespan().intersects(process.timespan()), "Sanity");
 512         }
 513         last_process = &process;
 514     }
 515
 516     //
 517     // Make sure no thread with the same tid overlaps in time
 518     //
 519     const MachineThread<SIZE>* last_thread = nullptr;
 520     for (auto& pair : _threads_by_tid) {
 521         auto& thread = pair.second;
 522         if (last_thread && last_thread->tid() == thread.tid()) {
 523             // The < operator only checks ordering, it is not strict
 524             // about overlap. We must check both
 525             ASSERT(last_thread->timespan() < thread.timespan(), "Sanity");
 526             ASSERT(!last_thread->timespan().intersects(thread.timespan()), "Sanity");
 527         }
 528         last_thread = &thread;
 529     }
 530
 531     ASSERT(is_trange_vector_sorted_and_non_overlapping(_all_io_active_intervals), "all io search/mask vector fails invariant");
 532 }
 533 #endif
 534
 535 template <typename SIZE>
 536 const std::vector<const MachineProcess<SIZE>*>& Machine<SIZE>::processes() const {
 537     return *reinterpret_cast< const std::vector<const MachineProcess<SIZE>*>* >(&_processes_by_time);
 538 }
 539
 540 template <typename SIZE>
 541 const std::vector<const MachineThread<SIZE>*>& Machine<SIZE>::threads() const {
 542     return *reinterpret_cast< const std::vector<const MachineThread<SIZE>*>* >(&_threads_by_time);
 543 }
 544
 545 template <typename SIZE>
 546 const std::vector<const MachineCPU<SIZE>>& Machine<SIZE>::cpus() const {
 547     return *reinterpret_cast< const std::vector<const MachineCPU<SIZE>>* >(&_cpus);
 548 }
 549
 550 template <typename SIZE>
 551 AbsInterval Machine<SIZE>::timespan() const {
 552     if (_event_count) {
 553         AbsTime begin(_events[0].timestamp());
 554         AbsTime end(_events[_event_count-1].timestamp());
 555         return AbsInterval(begin, end - begin + AbsTime(1));
 556     }
 557
 558     return AbsInterval(AbsTime(0),AbsTime(0));
 559 }
 560
 561 template <typename SIZE>
 562 CPUSummary<SIZE> Machine<SIZE>::summary_for_timespan(AbsInterval summary_timespan, const MachineCPU<SIZE>* summary_cpu) const {
 563     ASSERT(summary_timespan.intersects(timespan()), "Sanity");
 564     CPUSummary<SIZE> summary;
 565
 566     uint32_t ap_cpu_count = 0;
 567     for (auto& cpu: _cpus) {
 568         // We don't know enough about iops to do anything with them.
 569         // Also skip cpus with no activity
 570         if (!cpu.is_iop() && cpu.is_active()) {
 571             ap_cpu_count++;
 572         }
 573     }
 574
 575     bool should_calculate_wallclock_run_time = (summary_cpu == NULL && ap_cpu_count > 1);
 576
 577     summary.begin_cpu_timeline_walks();
 578
 579     //
 580     // Lots of optimization possibilities here...
 581     //
 582     // We spend a LOT of time doing the set lookups to map from a thread/process to a ThreadSummary / ProcessSummary.
 583     // If we could somehow map directly from thread/process to the summary, this would speed up considerably.
 584     //
 585
 586     for (auto& cpu: _cpus) {
 587         // We don't know enough about iops to do anything with them.
 588         // Also skip cpus with no activity
 589         if (!cpu.is_iop() && cpu.is_active()) {
 590             if (summary_cpu == NULL || summary_cpu == &cpu) {
 591
 592                 summary.begin_cpu_timeline_walk(&cpu);
 593
 594                 auto& timeline = cpu.timeline();
 595                 if (!timeline.empty()) {
 596                     AbsInterval timeline_timespan = AbsInterval(timeline.front().location(), timeline.back().max() - timeline.front().location());
 597                     AbsInterval trimmed_timespan = summary_timespan.intersection_range(timeline_timespan);
 598
 599                     summary.incr_active_cpus();
 600
 601                     auto start = cpu.activity_for_timestamp(trimmed_timespan.location());
 602                     auto end = cpu.activity_for_timestamp(trimmed_timespan.max()-AbsTime(1));
 603
 604                     ASSERT(start && start->contains(trimmed_timespan.location()), "Sanity");
 605                     ASSERT(end && end->contains(trimmed_timespan.max()-AbsTime(1)), "Sanity");
 606
 607                     ProcessSummary<SIZE>* process_summary = NULL;
 608                     ThreadSummary<SIZE>* thread_summary = NULL;
 609
 610                     if (start->is_run() && !start->is_context_switch()) {
 611                         const MachineThread<SIZE>* thread_on_cpu = start->thread();
 612                         const MachineProcess<SIZE>* process_on_cpu = &thread_on_cpu->process();
 613
 614                         process_summary = summary.mutable_process_summary(process_on_cpu);
 615                         thread_summary = process_summary->mutable_thread_summary(thread_on_cpu);
 616                     }
 617
 618                     // NOTE! <=, not <,  because end is inclusive of data we want to count!
 619                     while (start <= end) {
 620                         // NOTE! summary_timespan, NOT trimmed_timespan!
 621                         AbsInterval t = start->intersection_range(summary_timespan);
 622
 623                         switch (start->type()) {
 624                                                         case kCPUActivity::Unknown:
 625                                 // Only cpu summaries track unknown time
 626                                 summary.add_unknown_time(t.length());
 627                                 break;
 628
 629                                                         case kCPUActivity::Idle:
 630                                 summary.add_idle_time(t.length());
 631                                 summary.add_all_cpus_idle_interval(t);
 632                                 if (process_summary) process_summary->add_idle_time(t.length());
 633                                 if (thread_summary) thread_summary->add_idle_time(t.length());
 634                                 break;
 635
 636                                                         case kCPUActivity::INTR:
 637                                 summary.add_intr_time(t.length());
 638                                 // It might seem like we should add INTR time to the wallclock run time
 639                                 // for the top level summary, but the concurrency level is calculated as
 640                                 // Actual / Wallclock, where Actual only counts RUN time. If we add INTR
 641                                 // the results are skewed.
 642                                 if (process_summary) process_summary->add_intr_time(t.length());
 643                                 if (thread_summary) thread_summary->add_intr_time(t.length());
 644                                 break;
 645
 646                                                         case kCPUActivity::Run: {
 647                                                             // We must reset these each time. Consider the case where we have the following:
 648                                                             //
 649                                                             // RRRRRRRRIIIIIIIIIIIIIIIIRRRRRRRRRR
 650                                                             // ^                ^
 651                                                             // CSW              Summary starts here
 652                                                             //
 653                                                             // The first run seen in the summary will not be a CSW, and yet process/thread summary
 654                                                             // are NULL...
 655
 656                                                             const MachineThread<SIZE>* thread_on_cpu = start->thread();
 657                                                             const MachineProcess<SIZE>* process_on_cpu = &thread_on_cpu->process();
 658
 659                                                             process_summary = summary.mutable_process_summary(process_on_cpu);
 660                                                             thread_summary = process_summary->mutable_thread_summary(thread_on_cpu);
 661
 662                                                             if (start->is_context_switch()) {
 663                                                                 summary.incr_context_switches();
 664                                                                 process_summary->incr_context_switches();
 665                                                                 thread_summary->incr_context_switches();
 666                                                             }
 667
 668                                                             summary.add_run_time(t.length());
 669                                                             process_summary->add_run_time(t.length());
 670                                                             thread_summary->add_run_time(t.length());
 671
 672                                                             // We only calculate wallclock run time if there is a chance
 673                                                             // it might differ from run time.
 674                                                             if (should_calculate_wallclock_run_time) {
 675                                                                 summary.add_wallclock_run_interval(t);
 676                                                                 process_summary->add_wallclock_run_interval(t);
 677                                                             }
 678
 679                                                             break;
 680                                                         }
 681                         }
 682
 683                         start++;
 684                     }
 685                 }
 686
 687                 summary.end_cpu_timeline_walk(&cpu);
 688             }
 689         }
 690     }
 691
 692     summary.end_cpu_timeline_walks();
 693
 694     // We only attempt to calculate future summary data in limited circumstances
 695     // There must be enough future data to consistently decide if threads would run in the future.
 696     // If the summary_cpu is not "all" we do not attempt to calculate.
 697
 698     if (summary_cpu == NULL) {
 699         AbsInterval future_timespan(summary_timespan.max(), summary_timespan.length() * AbsTime(5));
 700         if (future_timespan.intersection_range(timespan()).length() == future_timespan.length()) {
 701             for (auto& cpu: _cpus) {
 702                 // We don't know enough about iops to do anything with them
 703                 if (!cpu.is_iop()) {
 704                     auto& timeline = cpu.timeline();
 705
 706                     if (!timeline.empty()) {
 707                         AbsInterval timeline_timespan = AbsInterval(timeline.front().location(), timeline.back().max() - timeline.front().location());
 708                         AbsInterval trimmed_timespan = future_timespan.intersection_range(timeline_timespan);
 709
 710                         auto start = cpu.activity_for_timestamp(trimmed_timespan.location());
 711                         auto end = cpu.activity_for_timestamp(trimmed_timespan.max()-AbsTime(1));
 712
 713                         ASSERT(start && start->contains(trimmed_timespan.location()), "Sanity");
 714                         ASSERT(end && end->contains(trimmed_timespan.max()-AbsTime(1)), "Sanity");
 715
 716                         ProcessSummary<SIZE>* process_summary = NULL;
 717                         ThreadSummary<SIZE>* thread_summary = NULL;
 718
 719                         // NOTE! <=, not <,  because end is inclusive of data we want to count!
 720                         while (start <= end) {
 721                                                         // NOTE! future_timespan, NOT trimmed_timespan!
 722                                                         AbsInterval t = start->intersection_range(future_timespan);
 723
 724                                                         switch (start->type()) {
 725                                                             case kCPUActivity::Unknown:
 726                                                                 break;
 727
 728                                                             case kCPUActivity::Idle:
 729                                                                 // On idle, we mark the current thread as blocked.
 730                                                                 if (thread_summary)
 731                                                                     thread_summary->set_is_blocked_in_future();
 732                                                                 break;
 733
 734                                                             case kCPUActivity::INTR:
 735                                                                 break;
 736
 737                                                             case kCPUActivity::Run: {
 738                                                                 // We must reset these each time. Consider the case where we have the following:
 739                                                                 //
 740                                                                 // RRRRRRRRIIIIIIIIIIIIIIIIRRRRRRRRRR
 741                                                                 // ^                ^
 742                                                                 // CSW              Summary starts here
 743                                                                 //
 744                                                                 // The first run seen in the summary will not be a CSW, and yet process/thread summary
 745                                                                 // are NULL...
 746
 747                                                                 const MachineThread<SIZE>* thread_on_cpu = start->thread();
 748                                                                 const MachineProcess<SIZE>* process_on_cpu = &thread_on_cpu->process();
 749
 750                                                                 process_summary = summary.mutable_process_summary(process_on_cpu);
 751                                                                 thread_summary = process_summary->mutable_thread_summary(thread_on_cpu);
 752
 753                                                                 if (!thread_summary->is_future_initialized()) {
 754                                                                     thread_summary->set_future_initialized();
 755                                                                     thread_summary->set_total_blocked_in_summary(thread_on_cpu->blocked_in_timespan(summary_timespan));
 756                                                                     thread_summary->set_first_block_after_summary(thread_on_cpu->next_blocked_after(summary_timespan.max()));
 757                                                                     ASSERT(thread_summary->total_blocked_in_summary() + thread_summary->total_run_time() <= summary_timespan.length(), "More time blocked + running than is possible in summary timespan");
 758                                                                     thread_summary->set_max_possible_future_run_time(summary_timespan.length() - (thread_summary->total_blocked_in_summary() + thread_summary->total_run_time()));
 759                                                                 }
 760
 761                                                                 if (!thread_summary->is_blocked_in_future()) {
 762                                                                     // We ONLY block at context_switch locations. But, we can context
 763                                                                     // switch on any cpu. So, need a strong check!
 764                                                                     if (t.max() >= thread_summary->first_block_after_summary()) {
 765                                                                         thread_summary->set_is_blocked_in_future();
 766                                                                     } else {
 767                                                                         ASSERT(t.location() <= thread_summary->first_block_after_summary(), "Sanity");
 768                                                                         // Each thread controls how much time it can accumulate in a given window.
 769                                                                         // It may be that only a fraction (or none) of the time can be added.
 770                                                                         // Make sure to only add the thread approved amount to the process and total summary
 771                                                                         AbsTime future_time = thread_summary->add_future_run_time(t.length());
 772                                                                         summary.add_future_run_time(future_time);
 773                                                                         process_summary->add_future_run_time(future_time);
 774                                                                     }
 775                                                                 }
 776                                                                 break;
 777                                                             }
 778                                                         }
 779                                                         start++;
 780                         }
 781                     }
 782                 }
 783             }
 784
 785             //
 786             // When we're doing future run predictions, we can create summaries for
 787             // threads that have no run time, and no future run time.
 788             //
 789             // The way this happens is you have 2 or more cpus.
 790             // On cpu 1, there is a blocking event for Thread T at time x.
 791             //
 792             // While walking through cpu 2's activity, you see T
 793             // scheduled at x + N. You cannot add to T's future run
 794             // time, and T never ran in the original time window.
 795             // Thus, T is added and does nothing.
 796             //
 797
 798             // Remove inactive threads/processes.
 799             auto& process_summaries = summary.mutable_process_summaries();
 800             auto process_it = process_summaries.begin();
 801             while (process_it != process_summaries.end()) {
 802                 auto next_process_it = process_it;
 803                 ++next_process_it;
 804                 if (process_it->total_run_time() == 0 && process_it->total_future_run_time() == 0) {
 805                     DEBUG_ONLY({
 806                         for (auto& thread_summary : process_it->thread_summaries()) {
 807                                                         ASSERT(thread_summary.total_run_time() == 0 && thread_summary.total_future_run_time() == 0, "Process with 0 run time && 0 future run time has thread with non zero values");
 808                         }
 809                     });
 810                     process_summaries.erase(process_it);
 811                 } else {
 812                     // Our evil friend unordered_set returns const iterators...
 813                     auto& thread_summaries = const_cast<ProcessSummary<SIZE>*>(&*process_it)->mutable_thread_summaries();
 814                     auto thread_it = thread_summaries.begin();
 815                     while (thread_it != thread_summaries.end()) {
 816                         auto next_thread_it = thread_it;
 817                         ++next_thread_it;
 818                         if (thread_it->total_run_time() == 0 && thread_it->total_future_run_time() == 0) {
 819                                                         thread_summaries.erase(thread_it);
 820                         }
 821                         thread_it = next_thread_it;
 822                     }
 823                 }
 824                 process_it = next_process_it;
 825             }
 826         }
 827     }
 828
 829     //
 830     // Calculate vmfault data.
 831     //
 832     // We want to calculate this after the future CPU time, because it is possible a time slice might have vmfaults
 833     // that span the entire timespan. This could result in a process/thread with no run time, and no future time, which
 834     // would be removed as "inactive" during future CPU time calculation.
 835     //
 836
 837     if (summary_cpu == NULL) {
 838         // vmfault intervals are stored in the MachineThread
 839         for (MachineThread<SIZE>* machine_thread : _threads_by_time) {
 840             const MachineProcess<SIZE>* process = &machine_thread->process();
 841
 842             ProcessSummary<SIZE>* process_summary = NULL;
 843             ThreadSummary<SIZE>* thread_summary = NULL;
 844
 845             const auto& vm_faults = machine_thread->vm_faults();
 846             if (!vm_faults.empty()) {
 847                 AbsInterval vm_faults_timespan = AbsInterval(vm_faults.front().location(), vm_faults.back().max() - vm_faults.front().location());
 848                 AbsInterval trimmed_timespan = summary_timespan.intersection_range(vm_faults_timespan);
 849
 850                 if (trimmed_timespan.length() > 0) {
 851                     auto start = interval_beginning_timespan(vm_faults, trimmed_timespan);
 852                     auto end = interval_ending_timespan(vm_faults, trimmed_timespan);
 853
 854                     ASSERT(!start || start->intersects(trimmed_timespan), "Sanity");
 855                     ASSERT(!end || end->intersects(trimmed_timespan), "Sanity");
 856                     ASSERT((!start && !end) || ((start && end) && (start <= end)), "Sanity");
 857
 858                     if (start && end) {
 859                         // NOTE! <=, not <,  because end is inclusive of data we want to count!
 860                         while (start <= end) {
 861                                                         //
 862                                                         // NOTE! summary_timespan, NOT trimmed_timespan!
 863                                                         //
 864                                                         // 8/25/13 ... Okay, why do we care summary vs trimmed?
 865                                                         // It shouldn't be possible for start to lie outside trimmed...
 866                                                         // Leaving this for now rather than introducing some bizzare
 867                                                         // corner case, but wth...
 868                                                         //
 869                                                         AbsInterval t = start->intersection_range(summary_timespan);
 870
 871                                                         ASSERT(t.length() > 0, "Might be too strong, but expecting this to be non-zero");
 872
 873                                                         summary.add_vm_fault_time(t.length());
 874
 875                                                         // We must initialize these lazily. If we don't, every process and thread gets
 876                                                         // a summary entry. But we don't want to keep looking them up over and over...
 877                                                         if (!process_summary) {
 878                                                             process_summary = summary.mutable_process_summary(process);
 879                                                         }
 880                                                         process_summary->add_vm_fault_time(t.length());
 881
 882                                                         if (!thread_summary) {
 883                                                             thread_summary = process_summary->mutable_thread_summary(machine_thread);
 884                                                         }
 885                                                         thread_summary->add_vm_fault_time(t.length());
 886
 887                                                         start++;
 888                         }
 889                     }
 890                 }
 891             }
 892         }
 893     }
 894
 895
 896     //
 897     // Calculate IO activity data.
 898     //
 899     if (summary_cpu == NULL) {
 900         //
 901         // IO activity may overlap on even individual threads.
 902         //
 903         // All IO activity is stored in a single sorted vector, but
 904         // it may overlap even at the thread level. There isn't an
 905         // easy way to locate a starting and stopping point that intersect
 906         // a given range.
 907         //
 908         // The solution being used is to flatten the overlapping IO
 909         // and keep a sorted non overlapping list of IO activity. For any
 910         // given timespan, we find the overlapping intervals of flattened
 911         // IO activity and then look up the actual matching IOActivity
 912         // objects.
 913         //
 914         if (!_all_io_active_intervals.empty()) {
 915             AbsInterval io_timespan = AbsInterval(_all_io_active_intervals.front().location(), _all_io_active_intervals.back().max() - _all_io_active_intervals.front().location());
 916             AbsInterval trimmed_timespan = summary_timespan.intersection_range(io_timespan);
 917             if (trimmed_timespan.length() > 0) {
 918                 //
 919                 // First find the flattened start point
 920                 //
 921                 if (auto flattened_start = interval_beginning_timespan(_all_io_active_intervals, trimmed_timespan)) {
 922                     //
 923                     // Now find the actual start IOActivity
 924                     //
 925                     auto it = std::lower_bound(_all_io.begin(), _all_io.end(), flattened_start->location(), AbsIntervalLocationVsAbsTimeComparator());
 926                     ASSERT(it != _all_io.end(), "If we reach here, we should ALWAYS find a match!");
 927
 928                     // We need <= in case there are multiple IOActivities ending at the same time
 929                     while (it != _all_io.end() && it->location() < summary_timespan.max()) {
 930                         AbsInterval t = it->intersection_range(summary_timespan);
 931
 932                         // Some of the ranges will not intersect at all, for example
 933                         //
 934                         // IOActivity
 935                         //
 936                         //      XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 937                         //         XXXXXX
 938                         //                      XXXXXXXX
 939                         //
 940                         // Summary Range
 941                         //
 942                         //                          SSSSSSSSSSSSSSSSSS
 943                         //
 944                         // The flattened_start will point at the oldest IOActivity
 945                         // which overlaps the summary range, but many of the later
 946                         // IOActivities will not overlap.
 947
 948                         if (t.length() > 0) {
 949                                                         //
 950                                                         // Wait time.
 951                                                         //
 952                                                         summary.add_io_time(t.length());
 953
 954                                                         ProcessSummary<SIZE>* process_summary = summary.mutable_process_summary(&it->thread()->process());
 955                                                         process_summary->add_io_time(t.length());
 956
 957                                                         ThreadSummary<SIZE>* thread_summary = process_summary->mutable_thread_summary(it->thread());
 958                                                         thread_summary->add_io_time(t.length());
 959
 960                                                         //
 961                                                         // Bytes completed
 962                                                         //
 963                                                         if (summary_timespan.contains(it->max() - AbsTime(1))) {
 964                                                             summary.add_io_bytes_completed(it->size());
 965                                                             process_summary->add_io_bytes_completed(it->size());
 966                                                             thread_summary->add_io_bytes_completed(it->size());
 967                                                         }
 968                         }
 969                         it++;
 970                     }
 971                 }
 972             }
 973         }
 974     }
 975
 976     //
 977     // Calculate Jetsam activity data.
 978     //
 979     if (summary_cpu == NULL) {
 980         // jetsam activity is stored in the MachineThread
 981         for (MachineThread<SIZE>* machine_thread : _threads_by_time) {
 982             const MachineProcess<SIZE>* process = &machine_thread->process();
 983
 984             ProcessSummary<SIZE>* process_summary = NULL;
 985             ThreadSummary<SIZE>* thread_summary = NULL;
 986
 987             const auto& jetsam_activity = machine_thread->jetsam_activity();
 988             if (!jetsam_activity.empty()) {
 989                 AbsInterval jetsam_timespan = AbsInterval(jetsam_activity.front().location(), jetsam_activity.back().max() - jetsam_activity.front().location());
 990                 AbsInterval trimmed_timespan = summary_timespan.intersection_range(jetsam_timespan);
 991
 992                 if (trimmed_timespan.length() > 0) {
 993                     auto start = interval_beginning_timespan(jetsam_activity, trimmed_timespan);
 994                     auto end = interval_ending_timespan(jetsam_activity, trimmed_timespan);
 995
 996                     ASSERT(!start || start->intersects(trimmed_timespan), "Sanity");
 997                     ASSERT(!end || end->intersects(trimmed_timespan), "Sanity");
 998                     ASSERT((!start && !end) || ((start && end) && (start <= end)), "Sanity");
 999
1000                     if (start && end) {
1001                         // NOTE! <=, not <,  because end is inclusive of data we want to count!
1002                         while (start <= end) {
1003                                                         //
1004                                                         // NOTE! summary_timespan, NOT trimmed_timespan!
1005                                                         //
1006                                                         // 8/25/13 ... Okay, why do we care summary vs trimmed?
1007                                                         // It shouldn't be possible for start to lie outside trimmed...
1008                                                         // Leaving this for now rather than introducing some bizzare
1009                                                         // corner case, but wth...
1010                                                         //
1011                                                         AbsInterval t = start->intersection_range(summary_timespan);
1012
1013                                                         ASSERT(t.length() > 0, "Might be too strong, but expecting this to be non-zero");
1014
1015                                                         summary.add_jetsam_time(t.length());
1016
1017                                                         // We must initialize these lazily. If we don't, every process and thread gets
1018                                                         // a summary entry. But we don't want to keep looking them up over and over...
1019                                                         if (!process_summary) {
1020                                                             process_summary = summary.mutable_process_summary(process);
1021                                                         }
1022                                                         process_summary->add_jetsam_time(t.length());
1023
1024                                                         if (!thread_summary) {
1025                                                             thread_summary = process_summary->mutable_thread_summary(machine_thread);
1026                                                         }
1027                                                         thread_summary->add_jetsam_time(t.length());
1028
1029                                                         start++;
1030                         }
1031                     }
1032                 }
1033             }
1034         }
1035
1036         // Jetsam kill times are stored in the process.
1037         for (MachineProcess<SIZE>* machine_process : _processes_by_time) {
1038             if (machine_process->is_exit_by_jetsam()) {
1039                 if (summary_timespan.contains(machine_process->exit_timestamp())) {
1040                     summary.increment_processes_jetsamed();
1041                     summary.mutable_process_summary(machine_process)->set_jetsam_killed();
1042                 }
1043             }
1044         }
1045     }
1046
1047     DEBUG_ONLY(summary.validate());
1048
1049     return summary;
1050 }
1051
1052 template <typename SIZE>
1053 uint32_t Machine<SIZE>::active_cpus() const {
1054     uint32_t cpus = 0;
1055
1056     for (auto& cpu : _cpus) {
1057         if (!cpu.timeline().empty()) {
1058             cpus++;
1059         }
1060     }
1061
1062     return cpus;
1063 }
1064
1065 // This attempts to analyze various pieces of data and guess
1066 // if the Machine represents an ios device or not.
1067
1068 template <typename SIZE>
1069 bool Machine<SIZE>::is_ios() const {
1070     // I looked at avg intr time, and min intr time; they were too close for
1071     // reliable detection of desktop vs device (desktop has intr(s) as short
1072     // as 60ns).
1073
1074     // For now, we're just going to do a really gross detection, in any trace
1075     // from a device we'd expect to see SpringBoard or backboardd.
1076
1077     for (auto process : _processes_by_time) {
1078         if (strcmp(process->name(), "SpringBoard") == 0)
1079             return true;
1080         if (strcmp(process->name(), "backboardd") == 0)
1081             return true;
1082     }
1083
1084     return false;
1085 }