diff --git a/tensorflow/lite/micro/docs/profiling.md b/tensorflow/lite/micro/docs/profiling.md index 16c6b51917b..e76fb474fbe 100644 --- a/tensorflow/lite/micro/docs/profiling.md +++ b/tensorflow/lite/micro/docs/profiling.md @@ -26,7 +26,7 @@ from within operator kernels and other TFLite Micro routines. The MicroInterpreter class constructor contains an optional profiler argument. This profiler must be an instance of the tflite::Profiler class, and should implement the BeginEvent and EndEvent methods. There is a default implementation -in tensorflow/lite/micro/micro_profiler.cc which can be used for most purposes. +in tensorflow/lite/micro/micro_profiler.h which can be used for most purposes. The best practice for profiling across multiple invocations is to reset or call `ClearEvents()` in between invocations. diff --git a/tensorflow/lite/micro/micro_profiler.cc b/tensorflow/lite/micro/micro_profiler.cc deleted file mode 100644 index ebead51a90d..00000000000 --- a/tensorflow/lite/micro/micro_profiler.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/micro/micro_profiler.h" - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/micro_log.h" -#include "tensorflow/lite/micro/micro_time.h" - -namespace tflite { - -uint32_t MicroProfiler::BeginEvent(const char* tag) { - if (num_events_ == kMaxEvents) { - MicroPrintf( - "MicroProfiler errored out because total number of events exceeded the " - "maximum of %d.", - kMaxEvents); - TFLITE_ASSERT_FALSE; - } - - tags_[num_events_] = tag; - start_ticks_[num_events_] = GetCurrentTimeTicks(); - end_ticks_[num_events_] = start_ticks_[num_events_] - 1; - return num_events_++; -} - -void MicroProfiler::EndEvent(uint32_t event_handle) { - TFLITE_DCHECK(event_handle < kMaxEvents); - end_ticks_[event_handle] = GetCurrentTimeTicks(); -} - -uint32_t MicroProfiler::GetTotalTicks() const { - int32_t ticks = 0; - for (int i = 0; i < num_events_; ++i) { - ticks += end_ticks_[i] - start_ticks_[i]; - } - return ticks; -} - -void MicroProfiler::Log() const { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - for (int i = 0; i < num_events_; ++i) { - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks)); - } -#endif -} - -void MicroProfiler::LogCsv() const { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); - for (int i = 0; i < num_events_; ++i) { -#if defined(HEXAGON) || defined(CMSIS_NN) - int ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%d", i, tags_[i], ticks); -#else - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); -#endif - } -#endif -} - -void MicroProfiler::LogTicksPerTagCsv() { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - MicroPrintf( - "\"Unique Tag\",\"Total ticks across all events with that tag.\""); - int total_ticks = 0; - for (int i = 0; i < num_events_; ++i) { - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - TFLITE_DCHECK(tags_[i] != nullptr); - int position = FindExistingOrNextPosition(tags_[i]); - TFLITE_DCHECK(position >= 0); - total_ticks_per_tag[position].tag = tags_[i]; - total_ticks_per_tag[position].ticks = - total_ticks_per_tag[position].ticks + ticks; - total_ticks += ticks; - } - - for (int i = 0; i < num_events_; ++i) { - TicksPerTag each_tag_entry = total_ticks_per_tag[i]; - if (each_tag_entry.tag == nullptr) { - break; - } - MicroPrintf("%s, %d", each_tag_entry.tag, each_tag_entry.ticks); - } - MicroPrintf("\"total number of ticks\", %d", total_ticks); -#endif -} - -// This method finds a particular array element in the total_ticks_per_tag array -// with the matching tag_name passed in the method. If it can find a -// matching array element that has the same tag_name, then it will return the -// position of the matching element. But if it unable to find a matching element -// with the given tag_name, it will return the next available empty position -// from the array. -int MicroProfiler::FindExistingOrNextPosition(const char* tag_name) { - int pos = 0; - for (; pos < num_events_; pos++) { - TicksPerTag each_tag_entry = total_ticks_per_tag[pos]; - if (each_tag_entry.tag == nullptr || - strcmp(each_tag_entry.tag, tag_name) == 0) { - return pos; - } - } - return pos < num_events_ ? pos : -1; -} -} // namespace tflite diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index b52ebcb4ea9..c44a53e61f1 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -16,15 +16,29 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ #define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ +#include +#include +#include +#include + +#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/compatibility.h" +#include "tensorflow/lite/micro/micro_log.h" #include "tensorflow/lite/micro/micro_profiler_interface.h" +#include "tensorflow/lite/micro/micro_time.h" namespace tflite { +enum class MicroProfilerLogFormat { + HumanReadable, + Csv, +}; + // MicroProfiler creates a common way to gain fine-grained insight into runtime -// performance. Bottleck operators can be identified along with slow code +// performance. Bottleneck operators can be identified along with slow code // sections. This can be used in conjunction with running the relevant micro // benchmark to evaluate end-to-end performance. +template class MicroProfiler : public MicroProfilerInterface { public: MicroProfiler() = default; @@ -33,59 +47,168 @@ class MicroProfiler : public MicroProfilerInterface { // Marks the start of a new event and returns an event handle that can be used // to mark the end of the event via EndEvent. The lifetime of the tag // parameter must exceed that of the MicroProfiler. - virtual uint32_t BeginEvent(const char* tag) override; + uint32_t BeginEvent(const char* tag) override { + if (num_events_ == MAX_EVENTS) { + MicroPrintf( + "MicroProfiler errored out because total number of events exceeded " + "the maximum of %d.", + MAX_EVENTS); + TFLITE_ASSERT_FALSE; + } + + tags_[num_events_] = tag; + start_ticks_[num_events_] = GetCurrentTimeTicks(); + end_ticks_[num_events_] = start_ticks_[num_events_] - 1; + return num_events_++; + } // Marks the end of an event associated with event_handle. It is the // responsibility of the caller to ensure than EndEvent is called once and // only once per event_handle. // // If EndEvent is called more than once for the same event_handle, the last - // call will be used as the end of event marker. If EndEvent is called 0 times + // call will be used as the end of event marker.If EndEvent is called 0 times // for a particular event_handle, the duration of that event will be 0 ticks. - virtual void EndEvent(uint32_t event_handle) override; + void EndEvent(uint32_t event_handle) override { + TFLITE_DCHECK(event_handle < MAX_EVENTS); + end_ticks_[event_handle] = GetCurrentTimeTicks(); + } // Clears all the events that have been currently profiled. - void ClearEvents() { num_events_ = 0; } + void ClearEvents() { + num_events_ = 0; + num_tag_groups_ = 0; + } // Returns the sum of the ticks taken across all the events. This number // is only meaningful if all of the events are disjoint (the end time of // event[i] <= start time of event[i+1]). - uint32_t GetTotalTicks() const; + uint32_t GetTotalTicks() const { + int32_t ticks = 0; + for (int i = 0; i < num_events_; ++i) { + ticks += end_ticks_[i] - start_ticks_[i]; + } + return ticks; + } - // Prints the profiling information of each of the events in human readable - // form. - void Log() const; + // Prints the profiling information of each of the events with the + // given format (human readable by default). + void Log(MicroProfilerLogFormat format = + MicroProfilerLogFormat::HumanReadable) const { +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + switch (format) { + case MicroProfilerLogFormat::HumanReadable: + for (int i = 0; i < num_events_; ++i) { + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + uint64_t us = TicksToUs(ticks); + MicroPrintf("%s took %" PRIu64 ".%03" PRIu64 " ms (%u ticks)", tags_[i], + us / 1000, us % 1000, ticks); + } + break; - // Prints the profiling information of each of the events in CSV (Comma - // Separated Value) form. - void LogCsv() const; + case MicroProfilerLogFormat::Csv: + MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); + for (int i = 0; i < num_events_; ++i) { +#if defined(HEXAGON) || defined(CMSIS_NN) + int ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%d,%s,%d", i, tags_[i], ticks); +#else + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); +#endif + } + break; + } +#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) + } - // Prints total ticks for each unique tag in CSV format. - // Output will have one row for each unique tag along with the - // total ticks summed across all events with that particular tag. - void LogTicksPerTagCsv(); + // Prints the profiling information of each of the events, grouped by tag. + void LogGrouped(MicroProfilerLogFormat format) { +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + for (int i = 0; i < num_events_; ++i) { + // Find if the tag already exists in uniqueTags + TagGroup& tag_group = GetTagGroup(tags_[i]); + + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + tag_group.tag = tags_[i]; + tag_group.ticks += ticks; + tag_group.tag_count++; + } + + std::sort(tag_groups_, tag_groups_ + num_tag_groups_, + [](const TagGroup& a, const TagGroup& b) { + return a.ticks > b.ticks; // Sort in descending order + }); + + switch (format) { + case MicroProfilerLogFormat::HumanReadable: { + MicroPrintf("Cumulative event times:"); + MicroPrintf("%-8s %-32s %-12s %-12s", "Count", "Tag", "Ticks", "Time"); + uint64_t total_ticks = 0; + uint64_t us; + for (int i = 0; i < num_tag_groups_; ++i) { + total_ticks += tag_groups_[i].ticks; + us = TicksToUs(tag_groups_[i].ticks); + MicroPrintf("%-8d %-32s %-12d %" PRIu64 ".%03" PRIu64 " ms", + tag_groups_[i].tag_count, tag_groups_[i].tag, + tag_groups_[i].ticks, us / 1000, us % 1000); + } + us = TicksToUs(total_ticks); + MicroPrintf("\nTotal time: %" PRIu64 ".%03" PRIu64 " ms (%lld ticks)", + us / 1000, us % 1000, total_ticks); + break; + } + case MicroProfilerLogFormat::Csv: { + MicroPrintf("\"Tag\",\"Total ticks\""); + for (int i = 0; i < num_tag_groups_; ++i) { + MicroPrintf("%s,%u", tag_groups_[i].tag, tag_groups_[i].ticks); + } + break; + } + } +#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) + } + + // Convenience function to call Log with CSV format. + void LogCsv() const { Log(MicroProfilerLogFormat::Csv); } + + // Convenience function to call LogGrouped with CSV format. + void LogTicksPerTagCsv() const { LogGrouped(MicroProfilerLogFormat::Csv); } private: - // Maximum number of events that this class can keep track of. The - // MicroProfiler will abort if AddEvent is called more than kMaxEvents number - // of times. Increase this number if you need more events. - static constexpr int kMaxEvents = 4096; - - const char* tags_[kMaxEvents]; - uint32_t start_ticks_[kMaxEvents]; - uint32_t end_ticks_[kMaxEvents]; + const char* tags_[MAX_EVENTS]; + uint32_t start_ticks_[MAX_EVENTS]; + uint32_t end_ticks_[MAX_EVENTS]; int num_events_ = 0; + int num_tag_groups_ = 0; - struct TicksPerTag { + struct TagGroup { const char* tag; uint32_t ticks; + uint32_t tag_count; }; + // In practice, the number of tags will be much lower than the number of // events. But it is theoretically possible that each event to be unique and - // hence we allow total_ticks_per_tag to have kMaxEvents entries. - TicksPerTag total_ticks_per_tag[kMaxEvents] = {}; + // hence we allow total_ticks_per_tag to have MAX_EVENTS entries. + TagGroup tag_groups_[MAX_EVENTS] = {}; - int FindExistingOrNextPosition(const char* tag_name); + // Helper function to find the index of a tag in the cumulative array + TagGroup& GetTagGroup(const char* tag) { + for (int i = 0; i < num_tag_groups_; ++i) { + if (strcmp(tag_groups_[i].tag, tag) == 0) { + return tag_groups_[i]; + } + } + + // Tag not found, so we create a new entry + // There should always be space since the array of tag groups + // is just as big as the array of events + tag_groups_[num_tag_groups_].tag = tag; + tag_groups_[num_tag_groups_].ticks = 0; + tag_groups_[num_tag_groups_].tag_count = 0; + return tag_groups_[num_tag_groups_++]; + } TF_LITE_REMOVE_VIRTUAL_DELETE }; @@ -93,7 +216,7 @@ class MicroProfiler : public MicroProfilerInterface { #if defined(TF_LITE_STRIP_ERROR_STRINGS) // For release builds, the ScopedMicroProfiler is a noop. // -// This is done because the ScipedProfiler is used as part of the +// This is done because the ScopedProfiler is used as part of the // MicroInterpreter and we want to ensure zero overhead for the release builds. class ScopedMicroProfiler { public: diff --git a/tensorflow/lite/micro/micro_time.h b/tensorflow/lite/micro/micro_time.h index 5e8ad11519a..bb16d3c20f2 100644 --- a/tensorflow/lite/micro/micro_time.h +++ b/tensorflow/lite/micro/micro_time.h @@ -34,6 +34,14 @@ inline uint32_t TicksToMs(int32_t ticks) { static_cast(_ticks_per_second)); } +inline uint64_t TicksToUs(int32_t ticks) { + uint64_t _ticks_per_second = ticks_per_second(); + _ticks_per_second = + _ticks_per_second > 0 ? _ticks_per_second : 1; // zero divide prevention + return static_cast(1000000.0f * static_cast(ticks) / + static_cast(_ticks_per_second)); +} + } // namespace tflite #endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_