NvElementProfiler.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. /*
  2. * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of NVIDIA CORPORATION nor the names of its
  13. * contributors may be used to endorse or promote products derived
  14. * from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  19. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  20. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  21. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  22. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  23. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  24. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. */
  28. #include <iostream>
  29. #include <string.h>
  30. #include <map>
  31. #include <stdint.h>
  32. #include "NvElementProfiler.h"
  33. #define LOCK() pthread_mutex_lock(&profiler_lock)
  34. #define UNLOCK() pthread_mutex_unlock(&profiler_lock)
  35. #define RETURN_IF_DISABLED() \
  36. if (!enabled) { \
  37. UNLOCK(); \
  38. return; \
  39. }
  40. #define GET_TIME(timeval) gettimeofday(timeval, NULL);
  41. #define TIMESPEC_DIFF_USEC(timespec1, timespec2) \
  42. (((timespec1)->tv_sec - (timespec2)->tv_sec) * 1000000L + \
  43. (timespec1)->tv_usec - (timespec2)->tv_usec)
  44. using namespace std;
  45. NvElementProfiler::NvElementProfiler(ProfilerField fields)
  46. :valid_fields(fields)
  47. {
  48. enabled = false;
  49. unit_id_counter = 0;
  50. reset();
  51. pthread_mutex_init(&profiler_lock, NULL);
  52. }
  53. NvElementProfiler::~NvElementProfiler()
  54. {
  55. LOCK();
  56. reset();
  57. UNLOCK();
  58. pthread_mutex_destroy(&profiler_lock);
  59. }
  60. void
  61. NvElementProfiler::enableProfiling(bool reset_data)
  62. {
  63. LOCK();
  64. if (enabled)
  65. {
  66. UNLOCK();
  67. return;
  68. }
  69. if(reset_data)
  70. {
  71. reset();
  72. }
  73. enabled = true;
  74. UNLOCK();
  75. }
  76. void
  77. NvElementProfiler::disableProfiling()
  78. {
  79. LOCK();
  80. RETURN_IF_DISABLED();
  81. data_int.accumulated_time.tv_sec +=
  82. (data_int.stop_time.tv_sec - data_int.start_time.tv_sec);
  83. data_int.accumulated_time.tv_usec +=
  84. (data_int.stop_time.tv_usec - data_int.start_time.tv_usec);
  85. data_int.start_time.tv_sec = 0;
  86. data_int.start_time.tv_usec = 0;
  87. data_int.stop_time.tv_sec = 0;
  88. data_int.stop_time.tv_usec = 0;
  89. enabled = false;
  90. UNLOCK();
  91. }
  92. void NvElementProfiler::getProfilerData(NvElementProfiler::NvElementProfilerData &data)
  93. {
  94. uint64_t total_time;
  95. LOCK();
  96. total_time = data_int.accumulated_time.tv_sec * 1000000L +
  97. data_int.accumulated_time.tv_usec +
  98. TIMESPEC_DIFF_USEC(&data_int.stop_time, &data_int.start_time);
  99. if (data_int.total_processed_units == 0 || total_time == 0)
  100. {
  101. data.average_fps = 0;
  102. }
  103. else
  104. {
  105. data.average_fps = ((float) (data_int.total_processed_units - 1)) *
  106. 1000000 / total_time;
  107. }
  108. if (data_int.total_processed_units == 0)
  109. {
  110. data.max_latency_usec = 0;
  111. data.min_latency_usec = 0;
  112. data.average_latency_usec = 0;
  113. }
  114. else
  115. {
  116. data.max_latency_usec = data_int.max_latency_usec;
  117. data.min_latency_usec = data_int.min_latency_usec;
  118. data.average_latency_usec =
  119. data_int.total_latency / data_int.total_processed_units;
  120. }
  121. data.profiling_time.tv_sec =
  122. data_int.accumulated_time.tv_sec + data_int.stop_time.tv_sec -
  123. data_int.start_time.tv_sec;
  124. data.profiling_time.tv_usec =
  125. data_int.accumulated_time.tv_usec + data_int.stop_time.tv_usec -
  126. data_int.start_time.tv_usec;
  127. if (data.profiling_time.tv_usec < 0)
  128. {
  129. data.profiling_time.tv_usec += 1000000;
  130. data.profiling_time.tv_sec--;
  131. }
  132. if (data.profiling_time.tv_usec > 1000000)
  133. {
  134. data.profiling_time.tv_usec -= 1000000;
  135. data.profiling_time.tv_sec++;
  136. }
  137. data.total_processed_units = data_int.total_processed_units;
  138. data.num_late_units = data_int.num_late_units;
  139. data.valid_fields = valid_fields;
  140. UNLOCK();
  141. }
  142. void NvElementProfiler::printProfilerData(ostream &out_stream)
  143. {
  144. NvElementProfilerData data;
  145. getProfilerData(data);
  146. if (data.valid_fields & PROFILER_FIELD_FPS)
  147. {
  148. out_stream << "Total Profiling time = " <<
  149. (data.profiling_time.tv_sec +
  150. (data.profiling_time.tv_usec / 1000000.0)) << endl;
  151. out_stream << "Average FPS = " << data.average_fps << endl;
  152. }
  153. if (data.valid_fields & PROFILER_FIELD_TOTAL_UNITS)
  154. {
  155. out_stream << "Total units processed = " <<
  156. data.total_processed_units << endl;
  157. }
  158. if (data.valid_fields & PROFILER_FIELD_LATE_UNITS)
  159. {
  160. out_stream << "Num. of late units = " <<
  161. data.num_late_units << endl;
  162. }
  163. if (data.valid_fields & PROFILER_FIELD_LATENCIES)
  164. {
  165. out_stream << "Average latency(usec) = " <<
  166. data.average_latency_usec << endl;
  167. out_stream << "Minimum latency(usec) = " <<
  168. data.min_latency_usec << endl;
  169. out_stream << "Maximum latency(usec) = " <<
  170. data.max_latency_usec << endl;
  171. }
  172. }
  173. void
  174. NvElementProfiler::reset()
  175. {
  176. memset(&data_int, 0, sizeof(data_int));
  177. data_int.min_latency_usec = (uint64_t) -1;
  178. unit_start_time_queue.clear();
  179. }
  180. uint64_t
  181. NvElementProfiler::startProcessing()
  182. {
  183. struct timeval time;
  184. uint64_t ret = 0;
  185. LOCK();
  186. if (enabled)
  187. {
  188. std::map<uint64_t,struct timeval>::iterator it =
  189. unit_start_time_queue.end();
  190. unit_id_counter++;
  191. GET_TIME(&time);
  192. unit_start_time_queue.insert(it,
  193. std::pair<uint64_t,struct timeval>(unit_id_counter, time));
  194. ret = unit_id_counter;
  195. }
  196. UNLOCK();
  197. return ret;
  198. }
  199. void
  200. NvElementProfiler::finishProcessing(uint64_t id, bool is_late)
  201. {
  202. struct timeval unit_start_time;
  203. struct timeval stop_time;
  204. uint64_t latency;
  205. LOCK();
  206. RETURN_IF_DISABLED();
  207. if ((valid_fields & PROFILER_FIELD_LATENCIES) &&
  208. unit_start_time_queue.empty())
  209. {
  210. UNLOCK();
  211. return;
  212. }
  213. GET_TIME(&stop_time);
  214. if (valid_fields & PROFILER_FIELD_LATENCIES)
  215. {
  216. std::map<uint64_t, struct timeval>::iterator it;
  217. if (id)
  218. {
  219. it = unit_start_time_queue.find(id);
  220. }
  221. else
  222. {
  223. it = unit_start_time_queue.begin();
  224. }
  225. if (it == unit_start_time_queue.end())
  226. {
  227. UNLOCK();
  228. return;
  229. }
  230. unit_start_time = it->second;
  231. unit_start_time_queue.erase(it);
  232. latency = TIMESPEC_DIFF_USEC(&stop_time, &unit_start_time);
  233. data_int.total_latency += latency;
  234. if (latency < data_int.min_latency_usec)
  235. {
  236. data_int.min_latency_usec = latency;
  237. }
  238. if(latency > data_int.max_latency_usec)
  239. {
  240. data_int.max_latency_usec = latency;
  241. }
  242. }
  243. data_int.stop_time = stop_time;
  244. if (!data_int.start_time.tv_sec && !data_int.start_time.tv_usec)
  245. {
  246. data_int.start_time = data_int.stop_time;
  247. }
  248. if (is_late)
  249. {
  250. data_int.num_late_units++;
  251. }
  252. data_int.total_processed_units++;
  253. UNLOCK();
  254. }