system_cmds-643.30.1.tar.gz

[apple/system_cmds.git] / KDBG / CPUSummary.hpp
diff --git a/KDBG/CPUSummary.hpp b/KDBG/CPUSummary.hpp

new file mode 100644 (file)

index 0000000..eaafb65
--- /dev/null
+++ b/KDBG/CPUSummary.hpp
@@ -0,0 +1,353 @@
+//
+//  CPUSummary.hpp
+//  KDBG
+//
+//  Created by James McIlree on 4/22/13.
+//  Copyright (c) 2014 Apple. All rights reserved.
+//
+
+#ifndef kdprof_CPUSummary_hpp
+#define kdprof_CPUSummary_hpp
+
+template <typename SIZE>
+class MachineCPU;
+
+template <typename SIZE>
+class CPUSummary {
+    private:
+       // Disallow copy constructor to make sure that the compiler
+       // is moving these, instead of copying them when we pass around
+       CPUSummary(const CPUSummary& that) = delete;
+       CPUSummary& operator=(const CPUSummary& other) = delete;
+
+       friend class Machine<SIZE>;
+
+    public:
+       typedef std::unordered_set<ProcessSummary<SIZE>, ProcessSummaryHash<SIZE>, ProcessSummaryEqualTo<SIZE>> ProcessSummarySet;
+       typedef std::unordered_set<const MachineCPU<SIZE>*> CPUSummaryMachineCPUSet;
+
+    protected:
+       AbsTime                                         _total_unknown_time;
+       AbsTime                                         _total_run_time;
+       AbsTime                                         _total_idle_time;
+       AbsTime                                         _total_intr_time;
+       AbsTime                                         _total_future_run_time;
+       AbsTime                                         _total_wallclock_run_time;
+       AbsTime                                         _total_all_cpus_idle_time;
+       AbsTime                                         _total_vm_fault_time;
+       AbsTime                                         _total_io_time;
+       AbsTime                                         _total_jetsam_time;
+
+       uint32_t                                        _context_switch_count;
+       uint32_t                                        _count_idle_events;
+       uint32_t                                        _count_intr_events;
+       uint32_t                                        _count_vm_fault_events;
+       uint32_t                                        _count_io_events;
+       uint32_t                                        _count_processes_jetsamed;
+       uint32_t                                        _active_cpus;
+       
+       uint64_t                                        _io_bytes_completed;
+
+       CPUSummaryMachineCPUSet                         _cpus;
+       ProcessSummarySet                               _process_summaries;
+
+       std::vector<AbsInterval>                        _wallclock_run_intervals;               // This is the actual wallclock run interval data.
+       std::vector<AbsInterval>                        _per_cpu_wallclock_run_intervals;       // We need to accumulate intervals during summary generation, this is a temp buffer.
+
+       bool                                            _should_merge_all_cpus_idle_intervals;
+       std::vector<AbsInterval>                        _all_cpus_idle_intervals;
+       std::vector<AbsInterval>                        _per_cpu_all_cpus_idle_intervals;
+
+       void add_unknown_time(AbsTime time)             { _total_unknown_time += time; }
+       void add_run_time(AbsTime time)                 { _total_run_time += time; }
+       void add_idle_time(AbsTime time)                { _total_idle_time += time; _count_idle_events++; }
+       void add_intr_time(AbsTime time)                { _total_intr_time += time; _count_intr_events++; }
+       void add_future_run_time(AbsTime time)          { _total_future_run_time += time; }
+       void add_vm_fault_time(AbsTime time)            { _total_vm_fault_time += time; _count_vm_fault_events++; }
+       void add_io_time(AbsTime time)                  { _total_io_time += time; _count_io_events++; } // We want to bump the event count on all IO activity, not just on completion
+       void add_jetsam_time(AbsTime time)              { _total_jetsam_time += time; }
+
+       void add_io_bytes_completed(typename SIZE::ptr_t bytes) { _io_bytes_completed += bytes; }
+
+       void increment_processes_jetsamed()             { _count_processes_jetsamed++; }
+
+       //
+       // NOTE! Why are the various interval(s) accumulated one cpu at a time,
+       // instead of storing them all in a single vector, sorting it and processing
+       // once at the end?
+       //
+       // The single vector, sort and postprocess would work for wallclock time
+       // calculation, because wallclock times involve "union" operations where
+       // the number of cpu(s) don't matter.
+       //
+       // However, for the all-idle and idle-while-wating-on-IO calculations, we
+       // need "intersects" operations, I.E. all 16 cores need to be idle to count
+       // as "all-idle". In this mode, the number of cores matters, an intersection
+       // requires all 16 cores to simultaneously be the same state. This is difficult
+       // to calculate with more than 2 sources. By calculating one at a time,
+       // that is avoided, the state remains sanity-checkable throughout.
+       //
+
+
+       //
+       // Wallclock run intervals are added as each cpu timeline is walked.
+       // Between cpu(s), the results are accumulated to a single buffer
+       // After all cpus have been processed, the single buffer is summarized
+       //
+       // wallclock run time is the *union* of cpu run intervals.
+       //
+       void add_wallclock_run_interval(AbsInterval interval);
+       void accumulate_wallclock_run_intervals();
+       void summarize_wallclock_run_intervals();
+
+       //
+       // all cpus idle intervals are added as each cpu timeline is walked.
+       // Between cpu(s), the results are accumulated to a single buffer
+       // After all cpus have been processed, the single buffer is summarized.
+       //
+       // all cpus idle time is the *intersection* of cpu idle intervals
+       //
+       void add_all_cpus_idle_interval(AbsInterval interval);
+       void accumulate_all_cpus_idle_intervals();
+       void summarize_all_cpus_idle_intervals();
+
+       void incr_context_switches()                            { _context_switch_count++; }
+       void incr_active_cpus()                                 { _active_cpus++; }
+
+       // These bracket individual cpu timeline walks
+       void begin_cpu_timeline_walk(const MachineCPU<SIZE>* cpu);
+       void end_cpu_timeline_walk(const MachineCPU<SIZE>* cpu);
+
+       // These bracket all cpu timeline walks
+       void begin_cpu_timeline_walks(void);
+       void end_cpu_timeline_walks(void);
+
+       ProcessSummary<SIZE>* mutable_process_summary(const MachineProcess<SIZE>* process) {
+               auto it = _process_summaries.find(process);
+               if (it == _process_summaries.end()) {
+                       // We create any process summary that is missing.
+                       auto insert_result = _process_summaries.emplace(process);
+                       ASSERT(insert_result.second, "Sanity");
+                       it = insert_result.first;
+               }
+
+               // NOTE! Because we are using a Set instead of a Map, STL wants
+               // the objects to be immutable. "it" refers to a const Record, to
+               // prevent us from changing the hash or equality of the Set. We
+               // know that the allowed set of mutations will not change these,
+               // and so we evil hack(tm) and cast away the const'ness.
+               return const_cast<ProcessSummary<SIZE>*>(&*it);
+       }
+
+       ProcessSummarySet& mutable_process_summaries()          { return _process_summaries; }
+
+  public:
+       CPUSummary() :
+               _context_switch_count(0),
+               _count_idle_events(0),
+               _count_intr_events(0),
+               _count_vm_fault_events(0),
+               _count_io_events(0),
+               _count_processes_jetsamed(0),
+               _active_cpus(0),
+               _io_bytes_completed(0),
+               _should_merge_all_cpus_idle_intervals(false)
+       {
+       }
+
+       CPUSummary (CPUSummary&& rhs) noexcept :
+               _total_unknown_time(rhs._total_unknown_time),
+               _total_run_time(rhs._total_run_time),
+               _total_idle_time(rhs._total_idle_time),
+               _total_intr_time(rhs._total_intr_time),
+               _total_future_run_time(rhs._total_future_run_time),
+               _total_wallclock_run_time(rhs._total_wallclock_run_time),
+               _total_all_cpus_idle_time(rhs._total_all_cpus_idle_time),
+               _total_vm_fault_time(rhs._total_vm_fault_time),
+               _total_io_time(rhs._total_io_time),
+               _context_switch_count(rhs._context_switch_count),
+               _count_idle_events(rhs._count_idle_events),
+               _count_intr_events(rhs._count_intr_events),
+               _count_vm_fault_events(rhs._count_vm_fault_events),
+               _count_io_events(rhs._count_io_events),
+               _count_processes_jetsamed(rhs._count_processes_jetsamed),
+               _active_cpus(rhs._active_cpus),
+               _io_bytes_completed(rhs._io_bytes_completed),
+               _cpus(rhs._cpus),
+               _process_summaries(rhs._process_summaries),
+               // _wallclock_run_intervals
+               // _per_cpu_wallclock_run_intervals
+               _should_merge_all_cpus_idle_intervals(false)
+               // _all_cpus_idle_intervals
+               // _per_cpu_all_cpus_idle_intervals
+               // _wallclock_vm_fault_intervals
+               // _wallclock_pgin_intervals
+               // _wallclock_disk_read_intervals
+       {
+               ASSERT(rhs._all_cpus_idle_intervals.empty(), "Sanity");
+               ASSERT(rhs._per_cpu_all_cpus_idle_intervals.empty(), "Sanity");
+               ASSERT(rhs._wallclock_run_intervals.empty(), "Sanity");
+               ASSERT(rhs._per_cpu_wallclock_run_intervals.empty(), "Sanity");
+               ASSERT(rhs._should_merge_all_cpus_idle_intervals == false, "Sanity");
+       }
+
+       AbsTime total_time() const                                      { return _total_unknown_time + _total_run_time + _total_idle_time + _total_intr_time; }
+
+       AbsTime total_unknown_time() const                              { return _total_unknown_time; }
+       AbsTime total_run_time() const                                  { return _total_run_time; }
+       AbsTime total_idle_time() const                                 { return _total_idle_time; }
+       AbsTime total_intr_time() const                                 { return _total_intr_time; }
+       AbsTime total_future_run_time() const                           { return _total_future_run_time; }
+       AbsTime total_wallclock_run_time() const                        { return _total_wallclock_run_time; }
+       AbsTime total_all_cpus_idle_time() const                        { return _total_all_cpus_idle_time; }
+       AbsTime total_vm_fault_time() const                             { return _total_vm_fault_time; }
+       AbsTime total_io_time() const                                   { return _total_io_time; }
+       AbsTime total_jetsam_time() const                               { return _total_jetsam_time; }
+       
+       AbsTime avg_on_cpu_time() const                                 { return _total_run_time / _context_switch_count; }
+
+       uint32_t context_switches() const                               { return _context_switch_count; }
+       uint32_t num_idle_events() const                                { return _count_idle_events; }
+       uint32_t num_intr_events() const                                { return _count_intr_events; }
+       uint32_t num_vm_fault_events() const                            { return _count_vm_fault_events; }
+       uint32_t num_io_events() const                                  { return _count_io_events; }
+       uint32_t num_processes_jetsammed() const                        { return _count_processes_jetsamed; }
+
+       uint32_t active_cpus() const                                    { return _active_cpus; }
+
+       uint64_t io_bytes_completed() const                             { return _io_bytes_completed; }
+
+
+       // A CPUSummary may be a summary of one or more CPUs.
+       // The cpus set are the MachineCPU(s) that were used to
+       // construct this summary.
+       const CPUSummaryMachineCPUSet& cpus() const                     { return _cpus; }
+
+       const ProcessSummarySet& process_summaries() const              { return _process_summaries; }
+       const ProcessSummary<SIZE>* process_summary(const MachineProcess<SIZE>* process) const {
+               auto it = _process_summaries.find(process);
+               return (it == _process_summaries.end()) ? NULL : &*it;
+       }
+
+       DEBUG_ONLY(void validate() const;)
+};
+
+template <typename SIZE>
+void CPUSummary<SIZE>::begin_cpu_timeline_walks() {
+       _should_merge_all_cpus_idle_intervals = true;
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::begin_cpu_timeline_walk(const MachineCPU<SIZE>* cpu) {
+       ASSERT(cpu, "Sanity");
+       _cpus.emplace(cpu);
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::end_cpu_timeline_walk(const MachineCPU<SIZE>* cpu) {
+       ASSERT(cpu, "Sanity");
+
+       accumulate_wallclock_run_intervals();
+       accumulate_all_cpus_idle_intervals();
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::end_cpu_timeline_walks(void) {
+       summarize_wallclock_run_intervals();
+       summarize_all_cpus_idle_intervals();
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::add_wallclock_run_interval(AbsInterval interval)        {
+       ASSERT(_per_cpu_wallclock_run_intervals.empty() || (_per_cpu_wallclock_run_intervals.back() < interval && !interval.intersects(_per_cpu_wallclock_run_intervals.back())), "Invariant violated");
+       _per_cpu_wallclock_run_intervals.emplace_back(interval);
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::accumulate_wallclock_run_intervals() {
+       _wallclock_run_intervals = trange_vector_union(_wallclock_run_intervals, _per_cpu_wallclock_run_intervals);
+       _per_cpu_wallclock_run_intervals.clear();
+       // We don't shrink_to_fit here as its expected another CPU's run intervals will be processed next.
+
+       for (auto& process_summary : _process_summaries) {
+               // NOTE! Because we are using a Set instead of a Map, STL wants
+               // the objects to be immutable. We know that the operations being
+               // invoked will not change the hash, but we still must throw away
+               // the const'ness. Care must be taken to avoid the construction of
+               // temporary objects, thus the use of pointers...
+               const_cast<ProcessSummary<SIZE>*>(&process_summary)->accumulate_wallclock_run_intervals();
+       }
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::summarize_wallclock_run_intervals() {
+       ASSERT(_per_cpu_wallclock_run_intervals.empty(), "Sanity");
+       _per_cpu_wallclock_run_intervals.shrink_to_fit();
+
+       ASSERT(_total_wallclock_run_time == 0, "Called more than once");
+
+       ASSERT(is_trange_vector_sorted_and_non_overlapping(_wallclock_run_intervals), "Sanity");
+
+       for (auto& interval : _wallclock_run_intervals) {
+               _total_wallclock_run_time += interval.length();
+       }
+
+       _wallclock_run_intervals.clear();
+       _wallclock_run_intervals.shrink_to_fit();
+
+       for (auto& process_summary : _process_summaries) {
+               // NOTE! Because we are using a Set instead of a Map, STL wants
+               // the objects to be immutable. We know that the operations being
+               // invoked will not change the hash, but we still must throw away
+               // the const'ness. Care must be taken to avoid the construction of
+               // temporary objects, thus the use of pointers...
+               const_cast<ProcessSummary<SIZE>*>(&process_summary)->summarize_wallclock_run_intervals();
+       }
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::add_all_cpus_idle_interval(AbsInterval interval)        {
+       ASSERT(_per_cpu_all_cpus_idle_intervals.empty() || (_per_cpu_all_cpus_idle_intervals.back() < interval && !interval.intersects(_per_cpu_all_cpus_idle_intervals.back())), "Invariant violated");
+       _per_cpu_all_cpus_idle_intervals.emplace_back(interval);
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::accumulate_all_cpus_idle_intervals() {
+       if (_should_merge_all_cpus_idle_intervals) {
+               _should_merge_all_cpus_idle_intervals = false;
+               _all_cpus_idle_intervals = _per_cpu_all_cpus_idle_intervals;
+       } else {
+               _all_cpus_idle_intervals = trange_vector_intersect(_all_cpus_idle_intervals, _per_cpu_all_cpus_idle_intervals);
+       }
+       _per_cpu_all_cpus_idle_intervals.clear();
+}
+
+template <typename SIZE>
+void CPUSummary<SIZE>::summarize_all_cpus_idle_intervals() {
+       ASSERT(!_should_merge_all_cpus_idle_intervals, "Sanity");
+       ASSERT(_per_cpu_all_cpus_idle_intervals.empty(), "Sanity");
+       ASSERT(_total_all_cpus_idle_time == 0, "Called more than once");
+       ASSERT(is_trange_vector_sorted_and_non_overlapping(_all_cpus_idle_intervals), "Sanity");
+
+       _per_cpu_all_cpus_idle_intervals.shrink_to_fit();
+       for (auto& interval : _all_cpus_idle_intervals) {
+               _total_all_cpus_idle_time += interval.length();
+       }
+
+       _all_cpus_idle_intervals.clear();
+       _all_cpus_idle_intervals.shrink_to_fit();
+}
+
+#if !defined(NDEBUG) && !defined(NS_BLOCK_ASSERTIONS)
+template <typename SIZE>
+void CPUSummary<SIZE>::validate() const {
+       ASSERT(_total_wallclock_run_time <= _total_run_time, "Sanity");
+       ASSERT(_total_all_cpus_idle_time <= _total_idle_time, "Sanity");
+
+       for (const auto& process_summary : _process_summaries) {
+               process_summary.validate();
+       }
+}
+#endif
+
+#endif