|
| 1 | +#ifndef _SYRAH_CYCLE_TIMER_H_ |
| 2 | +#define _SYRAH_CYCLE_TIMER_H_ |
| 3 | + |
| 4 | +#if defined(__APPLE__) |
| 5 | + #if defined(__x86_64__) |
| 6 | + #include <sys/sysctl.h> |
| 7 | + #else |
| 8 | + #include <mach/mach.h> |
| 9 | + #include <mach/mach_time.h> |
| 10 | + #endif // __x86_64__ or not |
| 11 | + |
| 12 | + #include <stdio.h> // fprintf |
| 13 | + #include <stdlib.h> // exit |
| 14 | + |
| 15 | +#elif _WIN32 |
| 16 | +# include <windows.h> |
| 17 | +# include <time.h> |
| 18 | +#else |
| 19 | +# include <stdio.h> |
| 20 | +# include <stdlib.h> |
| 21 | +# include <string.h> |
| 22 | +# include <sys/time.h> |
| 23 | +#endif |
| 24 | + |
| 25 | + |
| 26 | + // This uses the cycle counter of the processor. Different |
| 27 | + // processors in the system will have different values for this. If |
| 28 | + // you process moves across processors, then the delta time you |
| 29 | + // measure will likely be incorrect. This is mostly for fine |
| 30 | + // grained measurements where the process is likely to be on the |
| 31 | + // same processor. For more global things you should use the |
| 32 | + // Time interface. |
| 33 | + |
| 34 | + // Also note that if you processors' speeds change (i.e. processors |
| 35 | + // scaling) or if you are in a heterogenous environment, you will |
| 36 | + // likely get spurious results. |
| 37 | + class CycleTimer { |
| 38 | + public: |
| 39 | + typedef unsigned long long SysClock; |
| 40 | + |
| 41 | + ////////// |
| 42 | + // Return the current CPU time, in terms of clock ticks. |
| 43 | + // Time zero is at some arbitrary point in the past. |
| 44 | + static SysClock currentTicks() { |
| 45 | +#if defined(__APPLE__) && !defined(__x86_64__) |
| 46 | + return mach_absolute_time(); |
| 47 | +#elif defined(_WIN32) |
| 48 | + LARGE_INTEGER qwTime; |
| 49 | + QueryPerformanceCounter(&qwTime); |
| 50 | + return qwTime.QuadPart; |
| 51 | +#elif defined(__x86_64__) |
| 52 | + unsigned int a, d; |
| 53 | + asm volatile("rdtsc" : "=a" (a), "=d" (d)); |
| 54 | + return static_cast<unsigned long long>(a) | |
| 55 | + (static_cast<unsigned long long>(d) << 32); |
| 56 | +#elif defined(__ARM_NEON__) && 0 // mrc requires superuser. |
| 57 | + unsigned int val; |
| 58 | + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val)); |
| 59 | + return val; |
| 60 | +#else |
| 61 | + timespec spec; |
| 62 | + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec); |
| 63 | + return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 + static_cast<float>(spec.tv_nsec)); |
| 64 | +#endif |
| 65 | + } |
| 66 | + |
| 67 | + ////////// |
| 68 | + // Return the current CPU time, in terms of seconds. |
| 69 | + // This is slower than currentTicks(). Time zero is at |
| 70 | + // some arbitrary point in the past. |
| 71 | + static double currentSeconds() { |
| 72 | + return currentTicks() * secondsPerTick(); |
| 73 | + } |
| 74 | + |
| 75 | + ////////// |
| 76 | + // Return the conversion from seconds to ticks. |
| 77 | + static double ticksPerSecond() { |
| 78 | + return 1.0/secondsPerTick(); |
| 79 | + } |
| 80 | + |
| 81 | + static const char* tickUnits() { |
| 82 | +#if defined(__APPLE__) && !defined(__x86_64__) |
| 83 | + return "ns"; |
| 84 | +#elif defined(__WIN32__) || defined(__x86_64__) |
| 85 | + return "cycles"; |
| 86 | +#else |
| 87 | + return "ns"; // clock_gettime |
| 88 | +#endif |
| 89 | + } |
| 90 | + |
| 91 | + ////////// |
| 92 | + // Return the conversion from ticks to seconds. |
| 93 | + static double secondsPerTick() { |
| 94 | + static bool initialized = false; |
| 95 | + static double secondsPerTick_val; |
| 96 | + if (initialized) return secondsPerTick_val; |
| 97 | +#if defined(__APPLE__) |
| 98 | + #ifdef __x86_64__ |
| 99 | + int args[] = {CTL_HW, HW_CPU_FREQ}; |
| 100 | + unsigned int Hz; |
| 101 | + size_t len = sizeof(Hz); |
| 102 | + if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) { |
| 103 | + fprintf(stderr, "Failed to initialize secondsPerTick_val!\n"); |
| 104 | + exit(-1); |
| 105 | + } |
| 106 | + secondsPerTick_val = 1.0 / (double) Hz; |
| 107 | + #else |
| 108 | + mach_timebase_info_data_t time_info; |
| 109 | + mach_timebase_info(&time_info); |
| 110 | + |
| 111 | + // Scales to nanoseconds without 1e-9f |
| 112 | + secondsPerTick_val = (1e-9*static_cast<double>(time_info.numer))/ |
| 113 | + static_cast<double>(time_info.denom); |
| 114 | + #endif // x86_64 or not |
| 115 | +#elif defined(_WIN32) |
| 116 | + LARGE_INTEGER qwTicksPerSec; |
| 117 | + QueryPerformanceFrequency(&qwTicksPerSec); |
| 118 | + secondsPerTick_val = 1.0/static_cast<double>(qwTicksPerSec.QuadPart); |
| 119 | +#else |
| 120 | + FILE *fp = fopen("/proc/cpuinfo","r"); |
| 121 | + char input[1024]; |
| 122 | + if (!fp) { |
| 123 | + fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo."); |
| 124 | + exit(-1); |
| 125 | + } |
| 126 | + // In case we don't find it, e.g. on the N900 |
| 127 | + secondsPerTick_val = 1e-9; |
| 128 | + while (!feof(fp) && fgets(input, 1024, fp)) { |
| 129 | + // NOTE(boulos): Because reading cpuinfo depends on dynamic |
| 130 | + // frequency scaling it's better to read the @ sign first |
| 131 | + float GHz, MHz; |
| 132 | + if (strstr(input, "model name")) { |
| 133 | + char* at_sign = strstr(input, "@"); |
| 134 | + if (at_sign) { |
| 135 | + char* after_at = at_sign + 1; |
| 136 | + char* GHz_str = strstr(after_at, "GHz"); |
| 137 | + char* MHz_str = strstr(after_at, "MHz"); |
| 138 | + if (GHz_str) { |
| 139 | + *GHz_str = '\0'; |
| 140 | + if (1 == sscanf(after_at, "%f", &GHz)) { |
| 141 | + //printf("GHz = %f\n", GHz); |
| 142 | + secondsPerTick_val = 1e-9f / GHz; |
| 143 | + break; |
| 144 | + } |
| 145 | + } else if (MHz_str) { |
| 146 | + *MHz_str = '\0'; |
| 147 | + if (1 == sscanf(after_at, "%f", &MHz)) { |
| 148 | + //printf("MHz = %f\n", MHz); |
| 149 | + secondsPerTick_val = 1e-6f / GHz; |
| 150 | + break; |
| 151 | + } |
| 152 | + } |
| 153 | + } |
| 154 | + } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) { |
| 155 | + //printf("MHz = %f\n", MHz); |
| 156 | + secondsPerTick_val = 1e-6f / MHz; |
| 157 | + break; |
| 158 | + } |
| 159 | + } |
| 160 | + fclose(fp); |
| 161 | +#endif |
| 162 | + |
| 163 | + initialized = true; |
| 164 | + return secondsPerTick_val; |
| 165 | + } |
| 166 | + |
| 167 | + ////////// |
| 168 | + // Return the conversion from ticks to milliseconds. |
| 169 | + static double msPerTick() { |
| 170 | + return secondsPerTick() * 1000.0; |
| 171 | + } |
| 172 | + |
| 173 | + private: |
| 174 | + CycleTimer(); |
| 175 | + }; |
| 176 | + |
| 177 | +#endif // #ifndef _SYRAH_CYCLE_TIMER_H_ |
0 commit comments