kdprof/EventPrinting.hpp

   1 //
   2 //  EventPrinting.hpp
   3 //  kdprof
   4 //
   5 //  Created by James McIlree on 4/20/13.
   6 //  Copyright (c) 2013 Apple. All rights reserved.
   7 //
   8
   9 #ifndef kdprof_ParallelPrinting_hpp
  10 #define kdprof_ParallelPrinting_hpp
  11
  12 void print_event_header(const Globals& globals, bool is_64_bit);
  13
  14 template <typename SIZE>
  15 char* print_event(char* buf, char* buf_end, const Globals& globals, const Machine<SIZE>& machine, const KDEvent<SIZE>& event, uintptr_t event_index)
  16 {
  17         // Header is...
  18         //
  19         // [Index] Time Type  Code arg1 arg2 arg3 arg4 thread cpu# command/IOP-name pid
  20         //      8    16    4  34   8/16 8/16 8/16 8/16     10    4 16               6
  21         //
  22         // For now, each column is folding up the "after" spacing in a single printf, IOW
  23         //
  24         // buf += snprintf(buf, buf_end - buf, "%8s  ", "COL"); /* + 2 spaces */
  25         //
  26         // Not:
  27         //
  28         // buf += snprintf(buf, buf_end - buf, "%8s", "COL");
  29         // buf += snprintf(buf, buf_end - buf, "  "); /* 2 spaces */
  30
  31         ASSERT(event.cpu() > -1 && event.cpu() < machine.cpus().size(), "cpu_id out of range");
  32         const MachineCPU<SIZE>& cpu = machine.cpus()[event.cpu()];
  33
  34         //
  35         // Okay, here is how snprintf works.
  36         //
  37         // char buf[2];
  38         //
  39         // snprintf(buf, 0, "a"); // Returns 1, buf is unchanged.
  40         // snprintf(buf, 1, "a"); // Returns 1, buf = \0
  41         // snprintf(buf, 2, "a"); // Returns 1, buf = 'a', \0
  42
  43         //
  44         // If we cannot print successfully, we return the orignal pointer.
  45         //
  46         char* orig_buf = buf;
  47
  48         //
  49         // [Index]
  50         //
  51         if (globals.should_print_event_index()) {
  52                 buf += snprintf(buf, buf_end - buf, "%8llu ", (uint64_t)event_index);
  53         }
  54
  55         if (buf >= buf_end)
  56                 return orig_buf;
  57
  58         //
  59         // Time
  60         //
  61         if (globals.should_print_mach_absolute_timestamps()) {
  62                 if (globals.beginning_of_time().value() == 0)
  63                         buf += snprintf(buf, buf_end - buf, "%16llX  ", (event.timestamp() - globals.beginning_of_time()).value());
  64                 else
  65                         buf += snprintf(buf, buf_end - buf, "%16llu  ", (event.timestamp() - globals.beginning_of_time()).value());
  66         } else {
  67                 NanoTime ntime = (event.timestamp() - globals.beginning_of_time()).nano_time(globals.timebase());
  68                 buf += snprintf(buf, buf_end - buf, "%16.2f  ", (double)ntime.value() / 1000.0);
  69         }
  70
  71         if (buf >= buf_end)
  72                 return orig_buf;
  73
  74         //
  75         // Type Code
  76         //
  77         const char* type = event.is_func_start() ?  "beg" : (event.is_func_end() ? "end" : "---");
  78         auto trace_code_it = globals.trace_codes().find(event.dbg_cooked());
  79         if (cpu.is_iop() || !globals.should_print_symbolic_event_codes() || trace_code_it == globals.trace_codes().end()) {
  80                 buf += snprintf(buf, buf_end - buf, "%4s   %-34x ", type, event.dbg_cooked());
  81         } else {
  82                 buf += snprintf(buf, buf_end - buf, "%4s   %-34s ", type, trace_code_it->second.c_str());
  83         }
  84
  85         if (buf >= buf_end)
  86                 return orig_buf;
  87
  88         //
  89         // arg1
  90         //
  91         if (event.dbg_class() == DBG_IOKIT && event.dbg_subclass() == DBG_IOPOWER) {
  92                 std::string kext_name = event.arg1_as_string();
  93                 std::reverse(kext_name.begin(), kext_name.end());
  94
  95                 if (SIZE::is_64_bit)
  96                         buf += snprintf(buf, buf_end - buf, "%-16s ", kext_name.c_str());
  97                 else
  98                         buf += snprintf(buf, buf_end - buf, "%-8s ", kext_name.c_str());
  99         } else {
 100                 if (SIZE::is_64_bit)
 101                         buf += snprintf(buf, buf_end - buf, "%-16llX ", (uint64_t)event.arg1());
 102                 else
 103                         buf += snprintf(buf, buf_end - buf, "%-8x ", (uint32_t)event.arg1());
 104         }
 105
 106         if (buf >= buf_end)
 107                 return orig_buf;
 108
 109         //
 110         // Profiling showed that the repeated snprintf calls were hot, rolling them up is ~2.5% per on a HUGE file.
 111         //
 112         // arg2 arg3 arg4 thread cpu
 113         //
 114         if (SIZE::is_64_bit)
 115                 buf += snprintf(buf, buf_end - buf, "%-16llX %-16llX %-16llX %10llX %4u ", (uint64_t)event.arg2(), (uint64_t)event.arg3(), (uint64_t)event.arg4(), (uint64_t)event.tid(), event.cpu());
 116         else
 117                 buf += snprintf(buf, buf_end - buf, "%-8x %-8x %-8x %10llX %4u ", (uint32_t)event.arg2(), (uint32_t)event.arg3(), (uint32_t)event.arg4(), (uint64_t)event.tid(), event.cpu());
 118
 119         if (buf >= buf_end)
 120                 return orig_buf;
 121
 122         //
 123         // command & pid (handled together due to IOP not printing a pid
 124         //
 125         if (cpu.is_iop()) {
 126                 // We print the IOP name instead of a command
 127                 buf += snprintf(buf, buf_end - buf, "%-16s\n", cpu.name());
 128         } else {
 129                 if (const MachineThread<SIZE>* thread = machine.thread(event.tid(), event.timestamp())) {
 130                         buf += snprintf(buf, buf_end - buf, "%-16s %-6d\n", thread->process().name(), thread->process().pid());
 131                 } else {
 132                         buf += snprintf(buf, buf_end - buf, "%-16s %-6s\n", "?????", "???");
 133                 }
 134         }
 135
 136         // Still need to check this, its an error if we overflow on the last print!
 137         if (buf >= buf_end)
 138                 return orig_buf;
 139
 140         return buf;
 141 }
 142
 143 template <typename SIZE>
 144 char* print_event_range_to_buffer(const Globals& globals, const Machine<SIZE>& machine, TRange<uintptr_t> range, MemoryBuffer<char>& buffer ) {
 145         char* cursor = buffer.data();
 146         char* cursor_end = cursor + buffer.capacity();
 147
 148         if (const KDEvent<SIZE>* events = machine.events()) {
 149                 ASSERT(TRange<uintptr_t>(0, machine.event_count()).contains(range), "Sanity");
 150                 for (uintptr_t index = range.location(); index < range.max(); ++index) {
 151                         char* temp = print_event(cursor, cursor_end, globals, machine, events[index], index);
 152                         if (temp != cursor)
 153                                 cursor = temp;
 154                         else {
 155                                 // Changing the capacity will invalidate the cursor
 156                                 ptrdiff_t offset = cursor - buffer.data();
 157                                 buffer.set_capacity(buffer.capacity()*2);
 158                                 cursor = buffer.data() + offset;
 159                                 cursor_end = buffer.data() + buffer.capacity();
 160                         }
 161                 }
 162         }
 163
 164         return cursor;
 165 }
 166
 167 class PrintWorkUnit {
 168     protected:
 169         MemoryBuffer<char>      _buffer;
 170         TRange<uintptr_t>       _event_range;
 171         char*                   _buffer_end;
 172
 173         // We do not want work units copied.
 174         PrintWorkUnit(const PrintWorkUnit& that) = delete;
 175         PrintWorkUnit& operator=(const PrintWorkUnit& other) = delete;
 176
 177     public:
 178         PrintWorkUnit(MemoryBuffer<char>&& buffer, TRange<uintptr_t> event_range, char* buffer_end) :
 179                 _buffer(std::move(buffer)),
 180                 _event_range(event_range),
 181                 _buffer_end(buffer_end)
 182         {
 183                 ASSERT(_buffer.capacity(), "Sanity");
 184                 ASSERT(_buffer.data(), "Sanity");
 185                 ASSERT(!_buffer_end || _buffer_end > _buffer.data(), "Sanity");
 186                 ASSERT(!_buffer_end || (_buffer_end < _buffer.data() + _buffer.capacity()), "Sanity");
 187         }
 188
 189         MemoryBuffer<char>& buffer()                    { return _buffer; }
 190
 191         TRange<uintptr_t> event_range()                 { return _event_range; }
 192         void set_event_range(TRange<uintptr_t> range)   { _event_range = range; }
 193
 194         char* buffer_end() const                        { return _buffer_end; }
 195         void set_buffer_end(char* buffer_end)           { _buffer_end = buffer_end; }
 196 };
 197
 198 template <typename SIZE>
 199 class PrintProducer {
 200     protected:
 201         const Globals&          _globals;
 202         const Machine<SIZE>&    _machine;
 203         uintptr_t               _start_index;
 204         uintptr_t               _end_index;
 205         uintptr_t               _chunk_size;
 206
 207     public:
 208         PrintProducer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t chunk_size) :
 209                 _globals(globals),
 210                 _machine(machine),
 211                 _chunk_size(chunk_size)
 212         {
 213                 _start_index = 0;
 214                 _end_index = machine.event_count();
 215
 216                 if (globals.is_summary_start_set() || globals.is_summary_stop_set()) {
 217                         AbsInterval machine_timespan = machine.timespan();
 218
 219                         KDEvent<SIZE> start_event(globals.summary_start(machine_timespan));
 220                         auto it = std::lower_bound(machine.events(), machine.events() + _end_index, start_event);
 221                         ASSERT(&*it >= machine.events(), "Returned start index lower than start");
 222                         _start_index = std::distance(machine.events(), it);
 223
 224                         KDEvent<SIZE> end_event(globals.summary_stop(machine_timespan));
 225                         it = std::lower_bound(machine.events(), machine.events() + _end_index, end_event);
 226                         ASSERT(&*it <= machine.events() + _end_index, "Returned end index greater than end");
 227                         _end_index = std::distance(machine.events(), it);
 228
 229                         ASSERT(_start_index <= _end_index, "start index is > end index");
 230                 }
 231         }
 232
 233         bool produce(PrintWorkUnit& work_unit) {
 234                 // Claim a chunk of work to do
 235                 uintptr_t orig_start_index, new_start_index;
 236                 do {
 237                         orig_start_index = _start_index;
 238                         new_start_index = orig_start_index + std::min(_chunk_size, _end_index - orig_start_index);
 239                 } while (orig_start_index < _end_index && !OSAtomicCompareAndSwapPtrBarrier((void*)orig_start_index, (void *)new_start_index, (void * volatile *)&_start_index));
 240
 241                 // Did we claim work?
 242                 if (orig_start_index < _end_index) {
 243                         TRange<uintptr_t> event_range(orig_start_index, new_start_index - orig_start_index);
 244                         char* end = print_event_range_to_buffer(_globals, _machine, event_range, work_unit.buffer());
 245
 246                         work_unit.set_event_range(event_range);
 247                         work_unit.set_buffer_end(end);
 248                         return true;
 249                 }
 250
 251                 return false;
 252         }
 253
 254         uintptr_t start_index() const           { return _start_index; }
 255 };
 256
 257 template <typename SIZE>
 258 class PrintConsumer {
 259     protected:
 260         const Globals&          _globals;
 261         uintptr_t               _write_index;
 262         std::mutex              _write_mutex;
 263         std::condition_variable _write_condition;
 264
 265     public:
 266         PrintConsumer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t start_index) :
 267                 _globals(globals),
 268                 _write_index(start_index)
 269         {
 270         }
 271
 272         void consume(PrintWorkUnit& work_unit) {
 273                 std::unique_lock<std::mutex> guard(_write_mutex);
 274                 _write_condition.wait(guard, [&](){ return work_unit.event_range().location() == this->_write_index; });
 275
 276                 ASSERT(work_unit.event_range().location() == _write_index, "Sanity");
 277
 278                 char* data = work_unit.buffer().data();
 279                 size_t bytes = work_unit.buffer_end() - data;
 280                 write(_globals.output_fd(), work_unit.buffer().data(), bytes);
 281                 _write_index = work_unit.event_range().max();
 282
 283                 _write_condition.notify_all();
 284         }
 285 };
 286
 287 template <typename SIZE>
 288 uintptr_t print_machine_events(const Globals& globals, const Machine<SIZE>& machine) {
 289         print_event_header(globals, SIZE::is_64_bit);
 290
 291         if (const KDEvent<SIZE>* events = machine.events()) {
 292                 if (uintptr_t event_count = machine.event_count()) {
 293
 294                         //
 295                         // We want to chunk this up into reasonably sized pieces of work.
 296                         // Because each piece of work can potentially accumulate a large
 297                         // amount of memory, we need to limit the amount of work "in-flight".
 298                         //
 299                         uint32_t active_cpus = Kernel::active_cpu_count();
 300
 301                         uintptr_t chunk_size = 2000;
 302
 303                         PrintProducer<SIZE> producer(globals, machine, chunk_size);
 304                         PrintConsumer<SIZE> consumer(globals, machine, producer.start_index());
 305
 306                         std::vector<std::thread> threads;
 307                         for (uint32_t i=0; i<active_cpus; ++i) {
 308                                 threads.push_back(std::thread([&]() {
 309                                         PrintWorkUnit work_unit(MemoryBuffer<char>(160 * chunk_size), TRange<uintptr_t>(0, 0), (char*)NULL);
 310                                         while (producer.produce(work_unit)) {
 311                                                 consumer.consume(work_unit);
 312                                         }
 313                                 }));
 314                         }
 315
 316                         for(auto& thread : threads){
 317                                 thread.join();
 318                         }
 319
 320                         uint32_t totalProcesses = 0;
 321                         uint32_t totalThreads = 0;
 322
 323                         for (auto process : machine.processes()) {
 324                                 if (!process->is_created_by_previous_machine_state()) {
 325                                         totalProcesses++;
 326                                 }
 327                         }
 328
 329                         for (auto thread : machine.threads()) {
 330                                 if (!thread->is_created_by_previous_machine_state()) {
 331                                         totalThreads++;
 332                                 }
 333                         }
 334
 335                         dprintf(globals.output_fd(), "Total Events:       %llu\n", (uint64_t)event_count);
 336                         dprintf(globals.output_fd(), "Total Processes:    %u\n", totalProcesses);
 337                         dprintf(globals.output_fd(), "Total Threads:      %u\n", totalThreads);
 338
 339                         return event_count;
 340                 }
 341         }
 342
 343         return 0;
 344 }
 345
 346 #endif