btQuickprof.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. /*
  2. ***************************************************************************************************
  3. **
  4. ** profile.cpp
  5. **
  6. ** Real-Time Hierarchical Profiling for Game Programming Gems 3
  7. **
  8. ** by Greg Hjelstrom & Byon Garrabrant
  9. **
  10. ***************************************************************************************************/
  11. // Credits: The Clock class was inspired by the Timer classes in
  12. // Ogre (www.ogre3d.org).
  13. #include "btQuickprof.h"
  14. #ifndef BT_NO_PROFILE
  15. static btClock gProfileClock;
  16. #ifdef __CELLOS_LV2__
  17. #include <sys/sys_time.h>
  18. #include <sys/time_util.h>
  19. #include <stdio.h>
  20. #endif
  21. #if defined (SUNOS) || defined (__SUNOS__)
  22. #include <stdio.h>
  23. #endif
  24. #if defined(WIN32) || defined(_WIN32)
  25. #define BT_USE_WINDOWS_TIMERS
  26. #define WIN32_LEAN_AND_MEAN
  27. #define NOWINRES
  28. #define NOMCX
  29. #define NOIME
  30. #ifdef WINRT
  31. #define GetTickCount GetTickCount64
  32. #endif
  33. #ifdef _XBOX
  34. #include <Xtl.h>
  35. #else //_XBOX
  36. #include <windows.h>
  37. #endif //_XBOX
  38. #include <time.h>
  39. #else //_WIN32
  40. #include <sys/time.h>
  41. #endif //_WIN32
  42. #define mymin(a,b) (a > b ? a : b)
  43. struct btClockData
  44. {
  45. #ifdef BT_USE_WINDOWS_TIMERS
  46. LARGE_INTEGER mClockFrequency;
  47. #ifdef WINRT
  48. LONGLONG mStartTick;
  49. #else
  50. DWORD mStartTick;
  51. #endif
  52. LONGLONG mPrevElapsedTime;
  53. LARGE_INTEGER mStartTime;
  54. #else
  55. #ifdef __CELLOS_LV2__
  56. uint64_t mStartTime;
  57. #else
  58. struct timeval mStartTime;
  59. #endif
  60. #endif //__CELLOS_LV2__
  61. };
  62. ///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
  63. btClock::btClock()
  64. {
  65. m_data = new btClockData;
  66. #ifdef BT_USE_WINDOWS_TIMERS
  67. QueryPerformanceFrequency(&m_data->mClockFrequency);
  68. #endif
  69. reset();
  70. }
  71. btClock::~btClock()
  72. {
  73. delete m_data;
  74. }
  75. btClock::btClock(const btClock& other)
  76. {
  77. m_data = new btClockData;
  78. *m_data = *other.m_data;
  79. }
  80. btClock& btClock::operator=(const btClock& other)
  81. {
  82. *m_data = *other.m_data;
  83. return *this;
  84. }
  85. /// Resets the initial reference time.
  86. void btClock::reset()
  87. {
  88. #ifdef BT_USE_WINDOWS_TIMERS
  89. QueryPerformanceCounter(&m_data->mStartTime);
  90. m_data->mStartTick = GetTickCount();
  91. m_data->mPrevElapsedTime = 0;
  92. #else
  93. #ifdef __CELLOS_LV2__
  94. typedef uint64_t ClockSize;
  95. ClockSize newTime;
  96. //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
  97. SYS_TIMEBASE_GET( newTime );
  98. m_data->mStartTime = newTime;
  99. #else
  100. gettimeofday(&m_data->mStartTime, 0);
  101. #endif
  102. #endif
  103. }
  104. /// Returns the time in ms since the last call to reset or since
  105. /// the btClock was created.
  106. unsigned long int btClock::getTimeMilliseconds()
  107. {
  108. #ifdef BT_USE_WINDOWS_TIMERS
  109. LARGE_INTEGER currentTime;
  110. QueryPerformanceCounter(&currentTime);
  111. LONGLONG elapsedTime = currentTime.QuadPart -
  112. m_data->mStartTime.QuadPart;
  113. // Compute the number of millisecond ticks elapsed.
  114. unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
  115. m_data->mClockFrequency.QuadPart);
  116. // Check for unexpected leaps in the Win32 performance counter.
  117. // (This is caused by unexpected data across the PCI to ISA
  118. // bridge, aka south bridge. See Microsoft KB274323.)
  119. unsigned long elapsedTicks = static_cast<unsigned long>(GetTickCount() - m_data->mStartTick);
  120. signed long msecOff = (signed long)(msecTicks - elapsedTicks);
  121. if (msecOff < -100 || msecOff > 100)
  122. {
  123. // Adjust the starting time forwards.
  124. LONGLONG msecAdjustment = mymin(msecOff *
  125. m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
  126. m_data->mPrevElapsedTime);
  127. m_data->mStartTime.QuadPart += msecAdjustment;
  128. elapsedTime -= msecAdjustment;
  129. // Recompute the number of millisecond ticks elapsed.
  130. msecTicks = (unsigned long)(1000 * elapsedTime /
  131. m_data->mClockFrequency.QuadPart);
  132. }
  133. // Store the current elapsed time for adjustments next time.
  134. m_data->mPrevElapsedTime = elapsedTime;
  135. return msecTicks;
  136. #else
  137. #ifdef __CELLOS_LV2__
  138. uint64_t freq=sys_time_get_timebase_frequency();
  139. double dFreq=((double) freq) / 1000.0;
  140. typedef uint64_t ClockSize;
  141. ClockSize newTime;
  142. SYS_TIMEBASE_GET( newTime );
  143. //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
  144. return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
  145. #else
  146. struct timeval currentTime;
  147. gettimeofday(&currentTime, 0);
  148. return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 +
  149. (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
  150. #endif //__CELLOS_LV2__
  151. #endif
  152. }
  153. /// Returns the time in us since the last call to reset or since
  154. /// the Clock was created.
  155. unsigned long int btClock::getTimeMicroseconds()
  156. {
  157. #ifdef BT_USE_WINDOWS_TIMERS
  158. LARGE_INTEGER currentTime;
  159. QueryPerformanceCounter(&currentTime);
  160. LONGLONG elapsedTime = currentTime.QuadPart -
  161. m_data->mStartTime.QuadPart;
  162. // Compute the number of millisecond ticks elapsed.
  163. unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
  164. m_data->mClockFrequency.QuadPart);
  165. // Check for unexpected leaps in the Win32 performance counter.
  166. // (This is caused by unexpected data across the PCI to ISA
  167. // bridge, aka south bridge. See Microsoft KB274323.)
  168. unsigned long elapsedTicks = static_cast<unsigned long>(GetTickCount() - m_data->mStartTick);
  169. signed long msecOff = (signed long)(msecTicks - elapsedTicks);
  170. if (msecOff < -100 || msecOff > 100)
  171. {
  172. // Adjust the starting time forwards.
  173. LONGLONG msecAdjustment = mymin(msecOff *
  174. m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
  175. m_data->mPrevElapsedTime);
  176. m_data->mStartTime.QuadPart += msecAdjustment;
  177. elapsedTime -= msecAdjustment;
  178. }
  179. // Store the current elapsed time for adjustments next time.
  180. m_data->mPrevElapsedTime = elapsedTime;
  181. // Convert to microseconds.
  182. unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime /
  183. m_data->mClockFrequency.QuadPart);
  184. return usecTicks;
  185. #else
  186. #ifdef __CELLOS_LV2__
  187. uint64_t freq=sys_time_get_timebase_frequency();
  188. double dFreq=((double) freq)/ 1000000.0;
  189. typedef uint64_t ClockSize;
  190. ClockSize newTime;
  191. //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
  192. SYS_TIMEBASE_GET( newTime );
  193. return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
  194. #else
  195. struct timeval currentTime;
  196. gettimeofday(&currentTime, 0);
  197. return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 +
  198. (currentTime.tv_usec - m_data->mStartTime.tv_usec);
  199. #endif//__CELLOS_LV2__
  200. #endif
  201. }
  202. inline void Profile_Get_Ticks(unsigned long int * ticks)
  203. {
  204. *ticks = gProfileClock.getTimeMicroseconds();
  205. }
  206. inline float Profile_Get_Tick_Rate(void)
  207. {
  208. // return 1000000.f;
  209. return 1000.f;
  210. }
  211. /***************************************************************************************************
  212. **
  213. ** CProfileNode
  214. **
  215. ***************************************************************************************************/
  216. /***********************************************************************************************
  217. * INPUT: *
  218. * name - pointer to a static string which is the name of this profile node *
  219. * parent - parent pointer *
  220. * *
  221. * WARNINGS: *
  222. * The name is assumed to be a static pointer, only the pointer is stored and compared for *
  223. * efficiency reasons. *
  224. *=============================================================================================*/
  225. CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) :
  226. Name( name ),
  227. TotalCalls( 0 ),
  228. TotalTime( 0 ),
  229. StartTime( 0 ),
  230. RecursionCounter( 0 ),
  231. Parent( parent ),
  232. Child( NULL ),
  233. Sibling( NULL ),
  234. m_userPtr(0)
  235. {
  236. Reset();
  237. }
  238. void CProfileNode::CleanupMemory()
  239. {
  240. delete ( Child);
  241. Child = NULL;
  242. delete ( Sibling);
  243. Sibling = NULL;
  244. }
  245. CProfileNode::~CProfileNode( void )
  246. {
  247. delete ( Child);
  248. delete ( Sibling);
  249. }
  250. /***********************************************************************************************
  251. * INPUT: *
  252. * name - static string pointer to the name of the node we are searching for *
  253. * *
  254. * WARNINGS: *
  255. * All profile names are assumed to be static strings so this function uses pointer compares *
  256. * to find the named node. *
  257. *=============================================================================================*/
  258. CProfileNode * CProfileNode::Get_Sub_Node( const char * name )
  259. {
  260. // Try to find this sub node
  261. CProfileNode * child = Child;
  262. while ( child ) {
  263. if ( child->Name == name ) {
  264. return child;
  265. }
  266. child = child->Sibling;
  267. }
  268. // We didn't find it, so add it
  269. CProfileNode * node = new CProfileNode( name, this );
  270. node->Sibling = Child;
  271. Child = node;
  272. return node;
  273. }
  274. void CProfileNode::Reset( void )
  275. {
  276. TotalCalls = 0;
  277. TotalTime = 0.0f;
  278. if ( Child ) {
  279. Child->Reset();
  280. }
  281. if ( Sibling ) {
  282. Sibling->Reset();
  283. }
  284. }
  285. void CProfileNode::Call( void )
  286. {
  287. TotalCalls++;
  288. if (RecursionCounter++ == 0) {
  289. Profile_Get_Ticks(&StartTime);
  290. }
  291. }
  292. bool CProfileNode::Return( void )
  293. {
  294. if ( --RecursionCounter == 0 && TotalCalls != 0 ) {
  295. unsigned long int time;
  296. Profile_Get_Ticks(&time);
  297. time-=StartTime;
  298. TotalTime += (float)time / Profile_Get_Tick_Rate();
  299. }
  300. return ( RecursionCounter == 0 );
  301. }
  302. /***************************************************************************************************
  303. **
  304. ** CProfileIterator
  305. **
  306. ***************************************************************************************************/
  307. CProfileIterator::CProfileIterator( CProfileNode * start )
  308. {
  309. CurrentParent = start;
  310. CurrentChild = CurrentParent->Get_Child();
  311. }
  312. void CProfileIterator::First(void)
  313. {
  314. CurrentChild = CurrentParent->Get_Child();
  315. }
  316. void CProfileIterator::Next(void)
  317. {
  318. CurrentChild = CurrentChild->Get_Sibling();
  319. }
  320. bool CProfileIterator::Is_Done(void)
  321. {
  322. return CurrentChild == NULL;
  323. }
  324. void CProfileIterator::Enter_Child( int index )
  325. {
  326. CurrentChild = CurrentParent->Get_Child();
  327. while ( (CurrentChild != NULL) && (index != 0) ) {
  328. index--;
  329. CurrentChild = CurrentChild->Get_Sibling();
  330. }
  331. if ( CurrentChild != NULL ) {
  332. CurrentParent = CurrentChild;
  333. CurrentChild = CurrentParent->Get_Child();
  334. }
  335. }
  336. void CProfileIterator::Enter_Parent( void )
  337. {
  338. if ( CurrentParent->Get_Parent() != NULL ) {
  339. CurrentParent = CurrentParent->Get_Parent();
  340. }
  341. CurrentChild = CurrentParent->Get_Child();
  342. }
  343. /***************************************************************************************************
  344. **
  345. ** CProfileManager
  346. **
  347. ***************************************************************************************************/
  348. CProfileNode CProfileManager::Root( "Root", NULL );
  349. CProfileNode * CProfileManager::CurrentNode = &CProfileManager::Root;
  350. int CProfileManager::FrameCounter = 0;
  351. unsigned long int CProfileManager::ResetTime = 0;
  352. /***********************************************************************************************
  353. * CProfileManager::Start_Profile -- Begin a named profile *
  354. * *
  355. * Steps one level deeper into the tree, if a child already exists with the specified name *
  356. * then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
  357. * *
  358. * INPUT: *
  359. * name - name of this profiling record *
  360. * *
  361. * WARNINGS: *
  362. * The string used is assumed to be a static string; pointer compares are used throughout *
  363. * the profiling code for efficiency. *
  364. *=============================================================================================*/
  365. void CProfileManager::Start_Profile( const char * name )
  366. {
  367. if (name != CurrentNode->Get_Name()) {
  368. CurrentNode = CurrentNode->Get_Sub_Node( name );
  369. }
  370. CurrentNode->Call();
  371. }
  372. /***********************************************************************************************
  373. * CProfileManager::Stop_Profile -- Stop timing and record the results. *
  374. *=============================================================================================*/
  375. void CProfileManager::Stop_Profile( void )
  376. {
  377. // Return will indicate whether we should back up to our parent (we may
  378. // be profiling a recursive function)
  379. if (CurrentNode->Return()) {
  380. CurrentNode = CurrentNode->Get_Parent();
  381. }
  382. }
  383. /***********************************************************************************************
  384. * CProfileManager::Reset -- Reset the contents of the profiling system *
  385. * *
  386. * This resets everything except for the tree structure. All of the timing data is reset. *
  387. *=============================================================================================*/
  388. void CProfileManager::Reset( void )
  389. {
  390. gProfileClock.reset();
  391. Root.Reset();
  392. Root.Call();
  393. FrameCounter = 0;
  394. Profile_Get_Ticks(&ResetTime);
  395. }
  396. /***********************************************************************************************
  397. * CProfileManager::Increment_Frame_Counter -- Increment the frame counter *
  398. *=============================================================================================*/
  399. void CProfileManager::Increment_Frame_Counter( void )
  400. {
  401. FrameCounter++;
  402. }
  403. /***********************************************************************************************
  404. * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset *
  405. *=============================================================================================*/
  406. float CProfileManager::Get_Time_Since_Reset( void )
  407. {
  408. unsigned long int time;
  409. Profile_Get_Ticks(&time);
  410. time -= ResetTime;
  411. return (float)time / Profile_Get_Tick_Rate();
  412. }
  413. #include <stdio.h>
  414. void CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing)
  415. {
  416. profileIterator->First();
  417. if (profileIterator->Is_Done())
  418. return;
  419. float accumulated_time=0,parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
  420. int i;
  421. int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
  422. for (i=0;i<spacing;i++) printf(".");
  423. printf("----------------------------------\n");
  424. for (i=0;i<spacing;i++) printf(".");
  425. printf("Profiling: %s (total running time: %.3f ms) ---\n", profileIterator->Get_Current_Parent_Name(), parent_time );
  426. float totalTime = 0.f;
  427. int numChildren = 0;
  428. for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
  429. {
  430. numChildren++;
  431. float current_total_time = profileIterator->Get_Current_Total_Time();
  432. accumulated_time += current_total_time;
  433. float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
  434. {
  435. int i; for (i=0;i<spacing;i++) printf(".");
  436. }
  437. printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
  438. totalTime += current_total_time;
  439. //recurse into children
  440. }
  441. if (parent_time < accumulated_time)
  442. {
  443. printf("what's wrong\n");
  444. }
  445. for (i=0;i<spacing;i++) printf(".");
  446. printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
  447. for (i=0;i<numChildren;i++)
  448. {
  449. profileIterator->Enter_Child(i);
  450. dumpRecursive(profileIterator,spacing+3);
  451. profileIterator->Enter_Parent();
  452. }
  453. }
  454. void CProfileManager::dumpAll()
  455. {
  456. CProfileIterator* profileIterator = 0;
  457. profileIterator = CProfileManager::Get_Iterator();
  458. dumpRecursive(profileIterator,0);
  459. CProfileManager::Release_Iterator(profileIterator);
  460. }
  461. #endif //BT_NO_PROFILE