]> git.saurik.com Git - apple/system_cmds.git/blob - KDBG/Machine.impl.hpp
3aee10a2d4aa2a81c4b4be06add2bc4a7b0e61f3
[apple/system_cmds.git] / KDBG / Machine.impl.hpp
1 //
2 // Machine.impl.hpp
3 // KDBG
4 //
5 // Created by James McIlree on 10/30/12.
6 // Copyright (c) 2014 Apple. All rights reserved.
7 //
8
9 #include "KDebug.h"
10
11 template <typename SIZE>
12 bool process_by_time_sort(const MachineProcess<SIZE>* left, const MachineProcess<SIZE>* right) {
13 return left->timespan().location() < right->timespan().location();
14 }
15
16 template <typename SIZE>
17 bool thread_by_time_sort(const MachineThread<SIZE>* left, const MachineThread<SIZE>* right) {
18 return left->timespan().location() < right->timespan().location();
19 }
20
21 template <typename SIZE>
22 void Machine<SIZE>::post_initialize() {
23 //
24 // Post initialization. Sort various by time vectors, etc.
25 //
26
27 std::sort(_processes_by_time.begin(), _processes_by_time.end(), process_by_time_sort<SIZE>);
28 std::sort(_threads_by_time.begin(), _threads_by_time.end(), thread_by_time_sort<SIZE>);
29
30 // naked auto okay here, process is a ptr.
31 AbsTime last_machine_timestamp = _events[_event_count-1].timestamp();
32 for (auto process : _processes_by_time) {
33 process->post_initialize(last_machine_timestamp);
34 }
35
36 //
37 // Collapse the idle/intr/run queues into a single timeline
38 //
39 for (auto& cpu : _cpus) {
40 cpu.post_initialize(timespan());
41 }
42
43 //
44 // Flush any outstanding blocked events
45 //
46 for (auto& thread : _threads_by_tid) {
47 thread.second.post_initialize(last_machine_timestamp);
48 }
49
50 //
51 // Sort the IOActivity events, and build a flattened vector that can be used to find IO ranges for intersecting during interval searches
52 //
53 _io_by_uid.clear();
54 std::sort(_all_io.begin(), _all_io.end());
55
56 // We cannot use trange_vector_union to flatten _all_io, as _all_io isn't a plain TRange<AbsTime> type, and we want to yield that type.
57 // cut & paste to the rescue! :-)
58 if (!_all_io.empty()) {
59 auto input_it = _all_io.begin();
60 _all_io_active_intervals.push_back(*input_it);
61 while (++input_it < _all_io.end()) {
62 TRange<AbsTime> union_range = _all_io_active_intervals.back();
63
64 if (union_range.intersects(*input_it)) {
65 _all_io_active_intervals.pop_back();
66 _all_io_active_intervals.push_back(union_range.union_range(*input_it));
67 } else {
68 ASSERT(union_range < *input_it, "Out of order merging");
69 _all_io_active_intervals.push_back(*input_it);
70 }
71 }
72 }
73
74 //
75 // Flush any outstanding MachMsg(s) in the nursery (state SEND)
76 //
77 // NOTE! We do not clear _mach_msg_nursery because its state is
78 // forwarded to future Machine(s).
79 //
80 for (auto& nursery_it : _mach_msg_nursery) {
81 auto& nursery_msg = nursery_it.second;
82 if (nursery_msg.state() == kNurseryMachMsgState::Send) {
83 auto mach_msg_it = _mach_msgs.emplace(_mach_msgs.end(),
84 nursery_msg.id(),
85 nursery_msg.kmsg_addr(),
86 kMachineMachMsgFlag::HasSender,
87 nursery_msg.send_time(),
88 nursery_msg.send_tid(),
89 nursery_msg.send_msgh_bits(),
90 nursery_msg.send_voucher(),
91 AbsTime(0),
92 0,
93 0,
94 &Machine<SIZE>::UnsetVoucher);
95 _mach_msgs_by_event_index[nursery_msg.send_event_index()] = std::distance(_mach_msgs.begin(), mach_msg_it);
96 }
97 }
98
99 //
100 // Flush any outstanding Voucher(s) in the nursery
101 //
102 for (auto& nursery_it : _voucher_nursery) {
103
104 //
105 // First we need to "close" the open end of the live voucher's
106 // timespan.
107 //
108 auto voucher = nursery_it.second.get();
109 voucher->set_timespan_to_end_of_time();
110
111 auto address = nursery_it.first;
112
113 // First find the "row" for this address.
114 auto by_addr_it = _vouchers_by_addr.find(address);
115 if (by_addr_it == _vouchers_by_addr.end()) {
116 // No address entry case
117 std::vector<std::unique_ptr<MachineVoucher<SIZE>>> row;
118 row.emplace_back(std::move(nursery_it.second));
119 _vouchers_by_addr.emplace(address, std::move(row));
120 } else {
121 auto& row = by_addr_it->second;
122
123 // Make sure these are sorted and non-overlapping
124 ASSERT(row.back()->timespan() < voucher->timespan(), "Sanity");
125 ASSERT(!row.back()->timespan().intersects(voucher->timespan()), "Sanity");
126
127 row.emplace_back(std::move(nursery_it.second));
128 }
129 }
130
131 _voucher_nursery.clear();
132
133 DEBUG_ONLY(validate());
134 }
135
136 template <typename SIZE>
137 void Machine<SIZE>::raw_initialize(const KDCPUMapEntry* cpumaps,
138 uint32_t cpumap_count,
139 const KDThreadMapEntry<SIZE>* threadmaps,
140 uint32_t threadmap_count,
141 const KDEvent<SIZE>* events,
142 uintptr_t event_count)
143 {
144 ASSERT(cpumaps || cpumap_count == 0, "Sanity");
145 ASSERT(threadmaps || threadmap_count == 0, "Sanity");
146 ASSERT(events || event_count == 0, "Sanity");
147
148 for (uint32_t i = 0; i < cpumap_count; ++i) {
149 _cpus.emplace_back(i, cpumaps[i].is_iop(), cpumaps[i].name());
150 }
151
152 // We cannot create processes / threads unless we have at least one event to give us a timestamp.
153 if (event_count) {
154 AbsTime now = events[0].timestamp();
155
156 _kernel_task = create_process(0, "kernel_task", now, kMachineProcessFlag::IsKernelProcess);
157
158 // Initial thread state, nothing in nusery
159 for (uint32_t index = 0; index < threadmap_count; ++index) {
160 auto& threadmap = threadmaps[index];
161
162 pid_t pid = threadmap.pid();
163
164 // The kernel threadmap often has empty entries. Skip them.
165 if (pid == 0)
166 break;
167
168 if (pid == 1 && strncmp(threadmap.name(), "kernel_task", 12) == 0) {
169 pid = 0;
170 }
171
172 MachineProcess<SIZE>* process = youngest_mutable_process(pid);
173 if (!process) {
174 process = create_process(pid, threadmap.name(), now, kMachineProcessFlag::CreatedByThreadMap);
175 ASSERT(process, "Sanity");
176 }
177 process->add_thread(create_thread(process, threadmap.tid(), &UnsetVoucher, now, kMachineThreadFlag::CreatedByThreadMap));
178 }
179 }
180
181 // We need to know what the idle/INTR states of the CPU's are.
182 initialize_cpu_idle_intr_states();
183
184 for (uintptr_t index = 0; index < event_count; ++index) {
185 if (!process_event(events[index]))
186 break;
187 }
188
189 post_initialize();
190 }
191
192 template <typename SIZE>
193 Machine<SIZE>::Machine(KDCPUMapEntry* cpumaps, uint32_t cpumap_count, KDThreadMapEntry<SIZE>* threadmaps, uint32_t threadmap_count, KDEvent<SIZE>* events, uintptr_t event_count) :
194 _kernel_task(nullptr),
195 _events(events),
196 _event_count(event_count),
197 _flags(0),
198 _unknown_process_pid(-1)
199 {
200 raw_initialize(cpumaps,
201 cpumap_count,
202 threadmaps,
203 threadmap_count,
204 events,
205 event_count);
206 }
207
208 template <typename SIZE>
209 Machine<SIZE>::Machine(const TraceFile& file) :
210 _kernel_task(nullptr),
211 _events(file.events<SIZE>()),
212 _event_count(file.event_count()),
213 _flags(0),
214 _unknown_process_pid(-1)
215 {
216 raw_initialize(file.cpumap(),
217 file.cpumap_count(),
218 file.threadmap<SIZE>(),
219 file.threadmap_count(),
220 file.events<SIZE>(),
221 file.event_count());
222 }
223
224 template <typename SIZE>
225 Machine<SIZE>::Machine(Machine<SIZE>& parent, KDEvent<SIZE>* events, uintptr_t event_count) :
226 _kernel_task(nullptr),
227 _events(events),
228 _event_count(event_count),
229 _flags(0),
230 _unknown_process_pid(-1)
231 {
232 ASSERT(events || event_count == 0, "Sanity");
233
234 const std::vector<const MachineThread<SIZE>*>& parent_threads = parent.threads();
235 const std::vector<const MachineCPU<SIZE>>& parent_cpus = parent.cpus();
236
237 for (const MachineCPU<SIZE>& parent_cpu : parent_cpus) {
238 _cpus.emplace_back(parent_cpu.id(), parent_cpu.is_iop(), parent_cpu.name());
239 }
240
241 // We cannot create processes / threads unless we have at least one event to give us a timestamp.
242 if (event_count) {
243 AbsTime now = events[0].timestamp();
244
245 //
246 // Forawd any live vouchers. This must done before forwarding threads
247 // or MachMsgs from their nurseries, as they have references to the
248 // vouchers.
249 //
250 for (auto& parent_vouchers_by_addr_it : parent._vouchers_by_addr) {
251 std::unique_ptr<MachineVoucher<SIZE>>& voucher = parent_vouchers_by_addr_it.second.back();
252 if (voucher->is_live()) {
253 // When we flushed these vouchers in the previous machine state,
254 // we set their timespans to infinite. We need to reset them in
255 // case a close event arrives.
256 voucher->set_timespan_to_zero_length();
257 _voucher_nursery.emplace(voucher->address(), std::move(voucher));
258 }
259 }
260
261 _kernel_task = create_process(0, "kernel_task", now, kMachineProcessFlag::IsKernelProcess);
262
263 for (const MachineThread<SIZE>* parent_thread : parent_threads) {
264 if (!parent_thread->is_trace_terminated()) {
265 const MachineProcess<SIZE>& parent_process = parent_thread->process();
266 MachineProcess<SIZE>* new_process = youngest_mutable_process(parent_process.pid());
267 if (!new_process) {
268 kMachineProcessFlag new_process_flags = (kMachineProcessFlag)(parent_process.flags() | (uint32_t)kMachineProcessFlag::CreatedByPreviousMachineState);
269 new_process = create_process(parent_process.pid(), parent_process.name(), now, new_process_flags);
270 ASSERT(new_process, "Sanity");
271 }
272 new_process->add_thread(create_thread(new_process,
273 parent_thread->tid(),
274 thread_forwarding_voucher_lookup(parent_thread->last_voucher()),
275 now,
276 (kMachineThreadFlag)(parent_thread->flags() | (uint32_t)kMachineThreadFlag::CreatedByPreviousMachineState)));
277 }
278 }
279
280 // We need to know what the idle/INTR states of the CPU's are.
281 //
282 // Start by looking at the existing states.
283 uint32_t init_count = 0;
284 uint32_t ap_count = 0;
285 for (const MachineCPU<SIZE>& parent_cpu : parent_cpus) {
286 if (!parent_cpu.is_iop()) {
287 ap_count++;
288 const std::vector<CPUActivity<SIZE>>& parent_cpu_timeline = parent_cpu.timeline();
289
290 bool intr_initialized = false;
291 bool idle_initialized = false;
292 bool runq_initialized = false;
293
294 MachineCPU<SIZE>& cpu = _cpus[parent_cpu.id()];
295
296 for (auto reverse_it = parent_cpu_timeline.rbegin(); reverse_it < parent_cpu_timeline.rend(); ++reverse_it) {
297
298 // We can sometimes split two simultaneous events across two buffer snaps.
299 // IOW, buffer snap 1:
300 //
301 // event[N].timestamp = 1234;
302 //
303 // buffer snap 2:
304 //
305 // event[0].timestamp = 1234;
306 ASSERT(!reverse_it->contains(now) || reverse_it->max()-AbsTime(1) == now, "Sanity");
307 ASSERT(reverse_it->location() <= now, "Sanity");
308
309 switch (reverse_it->type()) {
310 //
311 // The states are separate, and heirarchical.
312 // The order (low -> high) is:
313 // Run, Idle, INTR
314 // Unknown is a special state; we give up on seeing it.
315 // A lower state precludes being in a higher state, but
316 // not vice-versa. You can not be IDLE if you are currently
317 // Run. You may be IDLE during Run.
318 //
319
320 case kCPUActivity::Unknown:
321 // Don't actually initialize anything, just force a bailout
322 runq_initialized = idle_initialized = intr_initialized = true;
323 break;
324
325 // NOTE NOTE NOTE!
326 //
327 // Overly clever here, note the lack of "break" in the Run
328 // and Idle clause, we fall through to initialize higher
329 // states.
330 case kCPUActivity::Run:
331 ASSERT(!runq_initialized, "This should always be the last level to initialize");
332 if (MachineThread<SIZE>* on_cpu_thread = youngest_mutable_thread(reverse_it->thread()->tid())) {
333 cpu.initialize_thread_state(on_cpu_thread, now);
334 init_count++;
335 } else {
336 ASSERT(reverse_it->thread()->is_trace_terminated() , "We should find this thread unless its been removed");
337 }
338 runq_initialized = true;
339
340 case kCPUActivity::Idle:
341 if (!idle_initialized) {
342 cpu.initialize_idle_state(reverse_it->is_idle(), now);
343 init_count++;
344 idle_initialized = true;
345 }
346
347 case kCPUActivity::INTR:
348 if (!intr_initialized) {
349 cpu.initialize_intr_state(reverse_it->is_intr(), now);
350 init_count++;
351 intr_initialized = true;
352 }
353 break;
354 }
355
356 if (runq_initialized) {
357 ASSERT(idle_initialized && intr_initialized, "Sanity");
358 break;
359 }
360 }
361 }
362 }
363
364 if (init_count < (ap_count * 3)) {
365 initialize_cpu_idle_intr_states();
366 }
367
368 //
369 // Forward any messages from the nursery
370 //
371
372 for (auto& parent_nursery_it : parent._mach_msg_nursery) {
373 auto& parent_nursery_msg = parent_nursery_it.second;
374
375 switch (parent_nursery_msg.state()) {
376 // We forward send(s) because they can become receives.
377 // We forward the free's because they stop us from showing bogus kernel message receipts
378 case kNurseryMachMsgState::Send:
379 case kNurseryMachMsgState::Free: {
380 ASSERT(_mach_msg_nursery.find(parent_nursery_msg.kmsg_addr()) == _mach_msg_nursery.end(), "Duplicate kmsg address when forwarding mach_msg nursery from parent");
381
382 auto it = _mach_msg_nursery.emplace(parent_nursery_msg.kmsg_addr(), parent_nursery_msg);
383
384 // Grr, emplace returns a std::pair<it, bool>, and the it is std::pair<key, value>...
385
386 // We have to clear this to prevent bogus data being shown during a receive,
387 // the send event index is no longer available.
388 it.first->second.set_send_event_index(-1);
389 break;
390 }
391
392 default:
393 break;
394 }
395 }
396 }
397
398 for (uintptr_t index = 0; index < event_count; ++index) {
399 if (!process_event(_events[index]))
400 break;
401 }
402
403 post_initialize();
404 }
405
406 template <typename SIZE>
407 const MachineProcess<SIZE>* Machine<SIZE>::process(pid_t pid, AbsTime time) const {
408 auto by_pid_range = _processes_by_pid.equal_range(pid);
409 for (auto it = by_pid_range.first; it != by_pid_range.second; ++it) {
410 const MachineProcess<SIZE>& process = it->second;
411 if (process.timespan().contains(time)) {
412 return &process;
413 }
414 }
415
416 return nullptr;
417 }
418
419 template <typename SIZE>
420 const MachineThread<SIZE>* Machine<SIZE>::thread(typename SIZE::ptr_t tid, AbsTime time) const {
421 auto by_tid_range = _threads_by_tid.equal_range(tid);
422 for (auto it = by_tid_range.first; it != by_tid_range.second; ++it) {
423 const MachineThread<SIZE>& thread = it->second;
424 if (thread.timespan().contains(time)) {
425 return &thread;
426 }
427 }
428
429 return nullptr;
430 }
431
432 template <typename SIZE>
433 struct VoucherVsAbsTimeComparator {
434 bool operator()(const std::unique_ptr<MachineVoucher<SIZE>>& voucher, const AbsTime time) const {
435 return voucher->timespan().max() < time;
436 }
437
438 bool operator()(const AbsTime time, const std::unique_ptr<MachineVoucher<SIZE>>& voucher) const {
439 return time < voucher->timespan().max();
440 }
441 };
442
443 template <typename SIZE>
444 const MachineVoucher<SIZE>* Machine<SIZE>::voucher(typename SIZE::ptr_t address, AbsTime timestamp) const {
445 // First find the "row" for this address.
446 auto by_addr_it = _vouchers_by_addr.find(address);
447 if (by_addr_it != _vouchers_by_addr.end()) {
448 auto& row = by_addr_it->second;
449
450 auto by_time_it = std::upper_bound(row.begin(), row.end(), timestamp, VoucherVsAbsTimeComparator<SIZE>());
451 // The upper bound will report that 0 is lower than [ 10, 20 ), need to check contains!
452 if (by_time_it != row.end()) {
453 // The compiler is having troubles seeing through an iterator that reflects unique_ptr methods which reflects MachineVoucher methods.
454 auto v = by_time_it->get();
455 if (v->timespan().contains(timestamp)) {
456 return v;
457 }
458 }
459 }
460
461 return nullptr;
462 }
463
464 template <typename SIZE>
465 const MachineMachMsg<SIZE>* Machine<SIZE>::mach_msg(uintptr_t event_index) const {
466 auto it = _mach_msgs_by_event_index.find(event_index);
467 if (it != _mach_msgs_by_event_index.end()) {
468 return &_mach_msgs.at(it->second);
469 }
470
471 return nullptr;
472 }
473
474 #if !defined(NDEBUG) && !defined(NS_BLOCK_ASSERTIONS)
475 template <typename SIZE>
476 void Machine<SIZE>::validate() const {
477 ASSERT(_events, "Sanity");
478 ASSERT(_event_count, "Sanity");
479
480 //
481 // Event timestamp ordering is already pre-checked, no point in retesting it here.
482 //
483
484 ASSERT(_threads_by_tid.size() == _threads_by_time.size(), "Container views state not in sync");
485 ASSERT(_processes_by_pid.size() == _processes_by_name.size(), "Container views state not in sync");
486 ASSERT(_processes_by_pid.size() == _processes_by_time.size(), "Container views state not in sync");
487
488 for (auto& pair : _processes_by_pid) {
489 auto& process = pair.second;
490 process.validate();
491 AbsInterval process_timespan = process.timespan();
492 for (auto thread : process.threads()) {
493 ASSERT(process_timespan.contains(thread->timespan()), "thread outside process timespan");
494 }
495 }
496
497 for (auto thread_ptr : _threads_by_time) {
498 thread_ptr->validate();
499 }
500
501 //
502 // Make sure no process with the same pid overlaps in time
503 //
504 const MachineProcess<SIZE>* last_process = nullptr;
505 for (auto& pair : _processes_by_pid) {
506 auto& process = pair.second;
507 if (last_process && last_process->pid() == process.pid()) {
508 // The < operator only checks ordering, it is not strict
509 // about overlap. We must check both
510 ASSERT(last_process->timespan() < process.timespan(), "Sanity");
511 ASSERT(!last_process->timespan().intersects(process.timespan()), "Sanity");
512 }
513 last_process = &process;
514 }
515
516 //
517 // Make sure no thread with the same tid overlaps in time
518 //
519 const MachineThread<SIZE>* last_thread = nullptr;
520 for (auto& pair : _threads_by_tid) {
521 auto& thread = pair.second;
522 if (last_thread && last_thread->tid() == thread.tid()) {
523 // The < operator only checks ordering, it is not strict
524 // about overlap. We must check both
525 ASSERT(last_thread->timespan() < thread.timespan(), "Sanity");
526 ASSERT(!last_thread->timespan().intersects(thread.timespan()), "Sanity");
527 }
528 last_thread = &thread;
529 }
530
531 ASSERT(is_trange_vector_sorted_and_non_overlapping(_all_io_active_intervals), "all io search/mask vector fails invariant");
532 }
533 #endif
534
535 template <typename SIZE>
536 const std::vector<const MachineProcess<SIZE>*>& Machine<SIZE>::processes() const {
537 return *reinterpret_cast< const std::vector<const MachineProcess<SIZE>*>* >(&_processes_by_time);
538 }
539
540 template <typename SIZE>
541 const std::vector<const MachineThread<SIZE>*>& Machine<SIZE>::threads() const {
542 return *reinterpret_cast< const std::vector<const MachineThread<SIZE>*>* >(&_threads_by_time);
543 }
544
545 template <typename SIZE>
546 const std::vector<const MachineCPU<SIZE>>& Machine<SIZE>::cpus() const {
547 return *reinterpret_cast< const std::vector<const MachineCPU<SIZE>>* >(&_cpus);
548 }
549
550 template <typename SIZE>
551 AbsInterval Machine<SIZE>::timespan() const {
552 if (_event_count) {
553 AbsTime begin(_events[0].timestamp());
554 AbsTime end(_events[_event_count-1].timestamp());
555 return AbsInterval(begin, end - begin + AbsTime(1));
556 }
557
558 return AbsInterval(AbsTime(0),AbsTime(0));
559 }
560
561 template <typename SIZE>
562 CPUSummary<SIZE> Machine<SIZE>::summary_for_timespan(AbsInterval summary_timespan, const MachineCPU<SIZE>* summary_cpu) const {
563 ASSERT(summary_timespan.intersects(timespan()), "Sanity");
564 CPUSummary<SIZE> summary;
565
566 uint32_t ap_cpu_count = 0;
567 for (auto& cpu: _cpus) {
568 // We don't know enough about iops to do anything with them.
569 // Also skip cpus with no activity
570 if (!cpu.is_iop() && cpu.is_active()) {
571 ap_cpu_count++;
572 }
573 }
574
575 bool should_calculate_wallclock_run_time = (summary_cpu == NULL && ap_cpu_count > 1);
576
577 summary.begin_cpu_timeline_walks();
578
579 //
580 // Lots of optimization possibilities here...
581 //
582 // We spend a LOT of time doing the set lookups to map from a thread/process to a ThreadSummary / ProcessSummary.
583 // If we could somehow map directly from thread/process to the summary, this would speed up considerably.
584 //
585
586 for (auto& cpu: _cpus) {
587 // We don't know enough about iops to do anything with them.
588 // Also skip cpus with no activity
589 if (!cpu.is_iop() && cpu.is_active()) {
590 if (summary_cpu == NULL || summary_cpu == &cpu) {
591
592 summary.begin_cpu_timeline_walk(&cpu);
593
594 auto& timeline = cpu.timeline();
595 if (!timeline.empty()) {
596 AbsInterval timeline_timespan = AbsInterval(timeline.front().location(), timeline.back().max() - timeline.front().location());
597 AbsInterval trimmed_timespan = summary_timespan.intersection_range(timeline_timespan);
598
599 summary.incr_active_cpus();
600
601 auto start = cpu.activity_for_timestamp(trimmed_timespan.location());
602 auto end = cpu.activity_for_timestamp(trimmed_timespan.max()-AbsTime(1));
603
604 ASSERT(start && start->contains(trimmed_timespan.location()), "Sanity");
605 ASSERT(end && end->contains(trimmed_timespan.max()-AbsTime(1)), "Sanity");
606
607 ProcessSummary<SIZE>* process_summary = NULL;
608 ThreadSummary<SIZE>* thread_summary = NULL;
609
610 if (start->is_run() && !start->is_context_switch()) {
611 const MachineThread<SIZE>* thread_on_cpu = start->thread();
612 const MachineProcess<SIZE>* process_on_cpu = &thread_on_cpu->process();
613
614 process_summary = summary.mutable_process_summary(process_on_cpu);
615 thread_summary = process_summary->mutable_thread_summary(thread_on_cpu);
616 }
617
618 // NOTE! <=, not <, because end is inclusive of data we want to count!
619 while (start <= end) {
620 // NOTE! summary_timespan, NOT trimmed_timespan!
621 AbsInterval t = start->intersection_range(summary_timespan);
622
623 switch (start->type()) {
624 case kCPUActivity::Unknown:
625 // Only cpu summaries track unknown time
626 summary.add_unknown_time(t.length());
627 break;
628
629 case kCPUActivity::Idle:
630 summary.add_idle_time(t.length());
631 summary.add_all_cpus_idle_interval(t);
632 if (process_summary) process_summary->add_idle_time(t.length());
633 if (thread_summary) thread_summary->add_idle_time(t.length());
634 break;
635
636 case kCPUActivity::INTR:
637 summary.add_intr_time(t.length());
638 // It might seem like we should add INTR time to the wallclock run time
639 // for the top level summary, but the concurrency level is calculated as
640 // Actual / Wallclock, where Actual only counts RUN time. If we add INTR
641 // the results are skewed.
642 if (process_summary) process_summary->add_intr_time(t.length());
643 if (thread_summary) thread_summary->add_intr_time(t.length());
644 break;
645
646 case kCPUActivity::Run: {
647 // We must reset these each time. Consider the case where we have the following:
648 //
649 // RRRRRRRRIIIIIIIIIIIIIIIIRRRRRRRRRR
650 // ^ ^
651 // CSW Summary starts here
652 //
653 // The first run seen in the summary will not be a CSW, and yet process/thread summary
654 // are NULL...
655
656 const MachineThread<SIZE>* thread_on_cpu = start->thread();
657 const MachineProcess<SIZE>* process_on_cpu = &thread_on_cpu->process();
658
659 process_summary = summary.mutable_process_summary(process_on_cpu);
660 thread_summary = process_summary->mutable_thread_summary(thread_on_cpu);
661
662 if (start->is_context_switch()) {
663 summary.incr_context_switches();
664 process_summary->incr_context_switches();
665 thread_summary->incr_context_switches();
666 }
667
668 summary.add_run_time(t.length());
669 process_summary->add_run_time(t.length());
670 thread_summary->add_run_time(t.length());
671
672 // We only calculate wallclock run time if there is a chance
673 // it might differ from run time.
674 if (should_calculate_wallclock_run_time) {
675 summary.add_wallclock_run_interval(t);
676 process_summary->add_wallclock_run_interval(t);
677 }
678
679 break;
680 }
681 }
682
683 start++;
684 }
685 }
686
687 summary.end_cpu_timeline_walk(&cpu);
688 }
689 }
690 }
691
692 summary.end_cpu_timeline_walks();
693
694 // We only attempt to calculate future summary data in limited circumstances
695 // There must be enough future data to consistently decide if threads would run in the future.
696 // If the summary_cpu is not "all" we do not attempt to calculate.
697
698 if (summary_cpu == NULL) {
699 AbsInterval future_timespan(summary_timespan.max(), summary_timespan.length() * AbsTime(5));
700 if (future_timespan.intersection_range(timespan()).length() == future_timespan.length()) {
701 for (auto& cpu: _cpus) {
702 // We don't know enough about iops to do anything with them
703 if (!cpu.is_iop()) {
704 auto& timeline = cpu.timeline();
705
706 if (!timeline.empty()) {
707 AbsInterval timeline_timespan = AbsInterval(timeline.front().location(), timeline.back().max() - timeline.front().location());
708 AbsInterval trimmed_timespan = future_timespan.intersection_range(timeline_timespan);
709
710 auto start = cpu.activity_for_timestamp(trimmed_timespan.location());
711 auto end = cpu.activity_for_timestamp(trimmed_timespan.max()-AbsTime(1));
712
713 ASSERT(start && start->contains(trimmed_timespan.location()), "Sanity");
714 ASSERT(end && end->contains(trimmed_timespan.max()-AbsTime(1)), "Sanity");
715
716 ProcessSummary<SIZE>* process_summary = NULL;
717 ThreadSummary<SIZE>* thread_summary = NULL;
718
719 // NOTE! <=, not <, because end is inclusive of data we want to count!
720 while (start <= end) {
721 // NOTE! future_timespan, NOT trimmed_timespan!
722 AbsInterval t = start->intersection_range(future_timespan);
723
724 switch (start->type()) {
725 case kCPUActivity::Unknown:
726 break;
727
728 case kCPUActivity::Idle:
729 // On idle, we mark the current thread as blocked.
730 if (thread_summary)
731 thread_summary->set_is_blocked_in_future();
732 break;
733
734 case kCPUActivity::INTR:
735 break;
736
737 case kCPUActivity::Run: {
738 // We must reset these each time. Consider the case where we have the following:
739 //
740 // RRRRRRRRIIIIIIIIIIIIIIIIRRRRRRRRRR
741 // ^ ^
742 // CSW Summary starts here
743 //
744 // The first run seen in the summary will not be a CSW, and yet process/thread summary
745 // are NULL...
746
747 const MachineThread<SIZE>* thread_on_cpu = start->thread();
748 const MachineProcess<SIZE>* process_on_cpu = &thread_on_cpu->process();
749
750 process_summary = summary.mutable_process_summary(process_on_cpu);
751 thread_summary = process_summary->mutable_thread_summary(thread_on_cpu);
752
753 if (!thread_summary->is_future_initialized()) {
754 thread_summary->set_future_initialized();
755 thread_summary->set_total_blocked_in_summary(thread_on_cpu->blocked_in_timespan(summary_timespan));
756 thread_summary->set_first_block_after_summary(thread_on_cpu->next_blocked_after(summary_timespan.max()));
757 ASSERT(thread_summary->total_blocked_in_summary() + thread_summary->total_run_time() <= summary_timespan.length(), "More time blocked + running than is possible in summary timespan");
758 thread_summary->set_max_possible_future_run_time(summary_timespan.length() - (thread_summary->total_blocked_in_summary() + thread_summary->total_run_time()));
759 }
760
761 if (!thread_summary->is_blocked_in_future()) {
762 // We ONLY block at context_switch locations. But, we can context
763 // switch on any cpu. So, need a strong check!
764 if (t.max() >= thread_summary->first_block_after_summary()) {
765 thread_summary->set_is_blocked_in_future();
766 } else {
767 ASSERT(t.location() <= thread_summary->first_block_after_summary(), "Sanity");
768 // Each thread controls how much time it can accumulate in a given window.
769 // It may be that only a fraction (or none) of the time can be added.
770 // Make sure to only add the thread approved amount to the process and total summary
771 AbsTime future_time = thread_summary->add_future_run_time(t.length());
772 summary.add_future_run_time(future_time);
773 process_summary->add_future_run_time(future_time);
774 }
775 }
776 break;
777 }
778 }
779 start++;
780 }
781 }
782 }
783 }
784
785 //
786 // When we're doing future run predictions, we can create summaries for
787 // threads that have no run time, and no future run time.
788 //
789 // The way this happens is you have 2 or more cpus.
790 // On cpu 1, there is a blocking event for Thread T at time x.
791 //
792 // While walking through cpu 2's activity, you see T
793 // scheduled at x + N. You cannot add to T's future run
794 // time, and T never ran in the original time window.
795 // Thus, T is added and does nothing.
796 //
797
798 // Remove inactive threads/processes.
799 auto& process_summaries = summary.mutable_process_summaries();
800 auto process_it = process_summaries.begin();
801 while (process_it != process_summaries.end()) {
802 auto next_process_it = process_it;
803 ++next_process_it;
804 if (process_it->total_run_time() == 0 && process_it->total_future_run_time() == 0) {
805 DEBUG_ONLY({
806 for (auto& thread_summary : process_it->thread_summaries()) {
807 ASSERT(thread_summary.total_run_time() == 0 && thread_summary.total_future_run_time() == 0, "Process with 0 run time && 0 future run time has thread with non zero values");
808 }
809 });
810 process_summaries.erase(process_it);
811 } else {
812 // Our evil friend unordered_set returns const iterators...
813 auto& thread_summaries = const_cast<ProcessSummary<SIZE>*>(&*process_it)->mutable_thread_summaries();
814 auto thread_it = thread_summaries.begin();
815 while (thread_it != thread_summaries.end()) {
816 auto next_thread_it = thread_it;
817 ++next_thread_it;
818 if (thread_it->total_run_time() == 0 && thread_it->total_future_run_time() == 0) {
819 thread_summaries.erase(thread_it);
820 }
821 thread_it = next_thread_it;
822 }
823 }
824 process_it = next_process_it;
825 }
826 }
827 }
828
829 //
830 // Calculate vmfault data.
831 //
832 // We want to calculate this after the future CPU time, because it is possible a time slice might have vmfaults
833 // that span the entire timespan. This could result in a process/thread with no run time, and no future time, which
834 // would be removed as "inactive" during future CPU time calculation.
835 //
836
837 if (summary_cpu == NULL) {
838 // vmfault intervals are stored in the MachineThread
839 for (MachineThread<SIZE>* machine_thread : _threads_by_time) {
840 const MachineProcess<SIZE>* process = &machine_thread->process();
841
842 ProcessSummary<SIZE>* process_summary = NULL;
843 ThreadSummary<SIZE>* thread_summary = NULL;
844
845 const auto& vm_faults = machine_thread->vm_faults();
846 if (!vm_faults.empty()) {
847 AbsInterval vm_faults_timespan = AbsInterval(vm_faults.front().location(), vm_faults.back().max() - vm_faults.front().location());
848 AbsInterval trimmed_timespan = summary_timespan.intersection_range(vm_faults_timespan);
849
850 if (trimmed_timespan.length() > 0) {
851 auto start = interval_beginning_timespan(vm_faults, trimmed_timespan);
852 auto end = interval_ending_timespan(vm_faults, trimmed_timespan);
853
854 ASSERT(!start || start->intersects(trimmed_timespan), "Sanity");
855 ASSERT(!end || end->intersects(trimmed_timespan), "Sanity");
856 ASSERT((!start && !end) || ((start && end) && (start <= end)), "Sanity");
857
858 if (start && end) {
859 // NOTE! <=, not <, because end is inclusive of data we want to count!
860 while (start <= end) {
861 //
862 // NOTE! summary_timespan, NOT trimmed_timespan!
863 //
864 // 8/25/13 ... Okay, why do we care summary vs trimmed?
865 // It shouldn't be possible for start to lie outside trimmed...
866 // Leaving this for now rather than introducing some bizzare
867 // corner case, but wth...
868 //
869 AbsInterval t = start->intersection_range(summary_timespan);
870
871 ASSERT(t.length() > 0, "Might be too strong, but expecting this to be non-zero");
872
873 summary.add_vm_fault_time(t.length());
874
875 // We must initialize these lazily. If we don't, every process and thread gets
876 // a summary entry. But we don't want to keep looking them up over and over...
877 if (!process_summary) {
878 process_summary = summary.mutable_process_summary(process);
879 }
880 process_summary->add_vm_fault_time(t.length());
881
882 if (!thread_summary) {
883 thread_summary = process_summary->mutable_thread_summary(machine_thread);
884 }
885 thread_summary->add_vm_fault_time(t.length());
886
887 start++;
888 }
889 }
890 }
891 }
892 }
893 }
894
895
896 //
897 // Calculate IO activity data.
898 //
899 if (summary_cpu == NULL) {
900 //
901 // IO activity may overlap on even individual threads.
902 //
903 // All IO activity is stored in a single sorted vector, but
904 // it may overlap even at the thread level. There isn't an
905 // easy way to locate a starting and stopping point that intersect
906 // a given range.
907 //
908 // The solution being used is to flatten the overlapping IO
909 // and keep a sorted non overlapping list of IO activity. For any
910 // given timespan, we find the overlapping intervals of flattened
911 // IO activity and then look up the actual matching IOActivity
912 // objects.
913 //
914 if (!_all_io_active_intervals.empty()) {
915 AbsInterval io_timespan = AbsInterval(_all_io_active_intervals.front().location(), _all_io_active_intervals.back().max() - _all_io_active_intervals.front().location());
916 AbsInterval trimmed_timespan = summary_timespan.intersection_range(io_timespan);
917 if (trimmed_timespan.length() > 0) {
918 //
919 // First find the flattened start point
920 //
921 if (auto flattened_start = interval_beginning_timespan(_all_io_active_intervals, trimmed_timespan)) {
922 //
923 // Now find the actual start IOActivity
924 //
925 auto it = std::lower_bound(_all_io.begin(), _all_io.end(), flattened_start->location(), AbsIntervalLocationVsAbsTimeComparator());
926 ASSERT(it != _all_io.end(), "If we reach here, we should ALWAYS find a match!");
927
928 // We need <= in case there are multiple IOActivities ending at the same time
929 while (it != _all_io.end() && it->location() < summary_timespan.max()) {
930 AbsInterval t = it->intersection_range(summary_timespan);
931
932 // Some of the ranges will not intersect at all, for example
933 //
934 // IOActivity
935 //
936 // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
937 // XXXXXX
938 // XXXXXXXX
939 //
940 // Summary Range
941 //
942 // SSSSSSSSSSSSSSSSSS
943 //
944 // The flattened_start will point at the oldest IOActivity
945 // which overlaps the summary range, but many of the later
946 // IOActivities will not overlap.
947
948 if (t.length() > 0) {
949 //
950 // Wait time.
951 //
952 summary.add_io_time(t.length());
953
954 ProcessSummary<SIZE>* process_summary = summary.mutable_process_summary(&it->thread()->process());
955 process_summary->add_io_time(t.length());
956
957 ThreadSummary<SIZE>* thread_summary = process_summary->mutable_thread_summary(it->thread());
958 thread_summary->add_io_time(t.length());
959
960 //
961 // Bytes completed
962 //
963 if (summary_timespan.contains(it->max() - AbsTime(1))) {
964 summary.add_io_bytes_completed(it->size());
965 process_summary->add_io_bytes_completed(it->size());
966 thread_summary->add_io_bytes_completed(it->size());
967 }
968 }
969 it++;
970 }
971 }
972 }
973 }
974 }
975
976 //
977 // Calculate Jetsam activity data.
978 //
979 if (summary_cpu == NULL) {
980 // jetsam activity is stored in the MachineThread
981 for (MachineThread<SIZE>* machine_thread : _threads_by_time) {
982 const MachineProcess<SIZE>* process = &machine_thread->process();
983
984 ProcessSummary<SIZE>* process_summary = NULL;
985 ThreadSummary<SIZE>* thread_summary = NULL;
986
987 const auto& jetsam_activity = machine_thread->jetsam_activity();
988 if (!jetsam_activity.empty()) {
989 AbsInterval jetsam_timespan = AbsInterval(jetsam_activity.front().location(), jetsam_activity.back().max() - jetsam_activity.front().location());
990 AbsInterval trimmed_timespan = summary_timespan.intersection_range(jetsam_timespan);
991
992 if (trimmed_timespan.length() > 0) {
993 auto start = interval_beginning_timespan(jetsam_activity, trimmed_timespan);
994 auto end = interval_ending_timespan(jetsam_activity, trimmed_timespan);
995
996 ASSERT(!start || start->intersects(trimmed_timespan), "Sanity");
997 ASSERT(!end || end->intersects(trimmed_timespan), "Sanity");
998 ASSERT((!start && !end) || ((start && end) && (start <= end)), "Sanity");
999
1000 if (start && end) {
1001 // NOTE! <=, not <, because end is inclusive of data we want to count!
1002 while (start <= end) {
1003 //
1004 // NOTE! summary_timespan, NOT trimmed_timespan!
1005 //
1006 // 8/25/13 ... Okay, why do we care summary vs trimmed?
1007 // It shouldn't be possible for start to lie outside trimmed...
1008 // Leaving this for now rather than introducing some bizzare
1009 // corner case, but wth...
1010 //
1011 AbsInterval t = start->intersection_range(summary_timespan);
1012
1013 ASSERT(t.length() > 0, "Might be too strong, but expecting this to be non-zero");
1014
1015 summary.add_jetsam_time(t.length());
1016
1017 // We must initialize these lazily. If we don't, every process and thread gets
1018 // a summary entry. But we don't want to keep looking them up over and over...
1019 if (!process_summary) {
1020 process_summary = summary.mutable_process_summary(process);
1021 }
1022 process_summary->add_jetsam_time(t.length());
1023
1024 if (!thread_summary) {
1025 thread_summary = process_summary->mutable_thread_summary(machine_thread);
1026 }
1027 thread_summary->add_jetsam_time(t.length());
1028
1029 start++;
1030 }
1031 }
1032 }
1033 }
1034 }
1035
1036 // Jetsam kill times are stored in the process.
1037 for (MachineProcess<SIZE>* machine_process : _processes_by_time) {
1038 if (machine_process->is_exit_by_jetsam()) {
1039 if (summary_timespan.contains(machine_process->exit_timestamp())) {
1040 summary.increment_processes_jetsamed();
1041 summary.mutable_process_summary(machine_process)->set_jetsam_killed();
1042 }
1043 }
1044 }
1045 }
1046
1047 DEBUG_ONLY(summary.validate());
1048
1049 return summary;
1050 }
1051
1052 template <typename SIZE>
1053 uint32_t Machine<SIZE>::active_cpus() const {
1054 uint32_t cpus = 0;
1055
1056 for (auto& cpu : _cpus) {
1057 if (!cpu.timeline().empty()) {
1058 cpus++;
1059 }
1060 }
1061
1062 return cpus;
1063 }
1064
1065 // This attempts to analyze various pieces of data and guess
1066 // if the Machine represents an ios device or not.
1067
1068 template <typename SIZE>
1069 bool Machine<SIZE>::is_ios() const {
1070 // I looked at avg intr time, and min intr time; they were too close for
1071 // reliable detection of desktop vs device (desktop has intr(s) as short
1072 // as 60ns).
1073
1074 // For now, we're just going to do a really gross detection, in any trace
1075 // from a device we'd expect to see SpringBoard or backboardd.
1076
1077 for (auto process : _processes_by_time) {
1078 if (strcmp(process->name(), "SpringBoard") == 0)
1079 return true;
1080 if (strcmp(process->name(), "backboardd") == 0)
1081 return true;
1082 }
1083
1084 return false;
1085 }