-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtscClock.h
130 lines (107 loc) · 4.59 KB
/
tscClock.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#pragma once
// #include <x86intrin.h>
#include <cmath>
#include <cstdint>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <ostream>
#include "util.h"
struct TimeConstant {
static constexpr uint64_t skNsPerUs = 1'000ul;
static constexpr uint64_t skUsPerMs = 1'000ul;
static constexpr uint64_t skNsPerMs = 1'000'000ul;
static constexpr uint64_t skMsPerSecond = 1'000ul;
static constexpr uint64_t skUsPerSecond = 1'000'000ul;
static constexpr uint64_t skNsPerSecond = 1'000'000'000ul;
};
// note: cpu-migrantions cause accuration loss or even errors
// so should pin thread to a single core and set cstate=0 and isolate the core
struct TscClock {
static constexpr uint32_t kCalibrateLoopCnt = 71;
static constexpr uint32_t kPauseMultiplier = 17;
static TscClock &getInstance() {
static TscClock clockInstance;
return clockInstance;
}
friend inline std::ostream &operator<<(std::ostream &out, const TscClock &clock) {
out << " ticksPerSecond:" << clock.ticksPerSecond_ << std::endl
<< " nsPerTick:" << clock.nsPerTick_ << std::endl
<< " ticksPerNs:" << clock.ticksPerNs_ << std::endl
<< " delayNsOffsetTicks_:" << clock.delayNsOffsetTicks_ << std::endl;
return out;
}
void calibrate(uint32_t loopCnt = kCalibrateLoopCnt) {
loopCnt = (loopCnt < kCalibrateLoopCnt) ? kCalibrateLoopCnt : loopCnt;
calibrateTsc(loopCnt);
calibrateDelayNsOffset(loopCnt);
}
uint64_t rdNs() const { return tsc2Ns(rdTsc()); }
uint64_t rdTsc() const { return __builtin_ia32_rdtsc(); };
inline double tsc2Sec(uint64_t tsc) const { return tsc / ticksPerSecond_; }
inline uint64_t tsc2Ns(uint64_t tsc) const { return static_cast<uint64_t>(tsc * nsPerTick_); }
void delayCycles(uint64_t cycles) {
const uint64_t endTick = rdTsc() + cycles;
while (((int64_t)endTick - (int64_t)rdTsc()) > 0) {
__builtin_ia32_pause();
}
}
// todo: Implement delayNs using umwait/tpause.
NoOptimize void delayNs(uint64_t ns) {
const uint64_t nowTick = rdTsc();
const uint64_t endTick = nowTick + ns * ticksPerNs_ - delayNsOffsetTicks_;
if (nowTick >= endTick) {
return;
}
while (((int64_t)endTick - (int64_t)rdTsc()) > 0) {
__builtin_ia32_pause();
}
}
private:
TscClock() = default;
~TscClock() = default;
NoOptimize void calibrateTsc(uint32_t loopCnt = kCalibrateLoopCnt) {
uint64_t billion = TimeConstant::skNsPerSecond;
std::timespec beginTime = {0, 0}, endTime = {0, 0};
uint64_t intervalTsc = 0, intervalNs = 0;
uint64_t deltaInitial = 0, deltaTerminate = 0, deltaTotal = 0, deltaMin = ~0;
uint64_t initialBeginTsc = 0, initialEndTsc = 0, terminateBeginTsc = 0, terminateEndTsc = 0;
for (uint32_t i = 0; i < loopCnt; i++) {
initialBeginTsc = rdTsc();
clock_gettime(CLOCK_MONOTONIC_RAW, &beginTime);
initialEndTsc = rdTsc();
for (uint64_t j = 0; j < TimeConstant::skNsPerMs * kPauseMultiplier; j++) {
__builtin_ia32_pause();
}
terminateBeginTsc = rdTsc();
clock_gettime(CLOCK_MONOTONIC_RAW, &endTime);
terminateEndTsc = rdTsc();
deltaInitial = initialEndTsc - initialBeginTsc;
deltaTerminate = terminateEndTsc - terminateBeginTsc;
deltaTotal = deltaInitial + deltaTerminate;
if (deltaTotal < deltaMin) {
deltaMin = deltaTotal;
intervalTsc = terminateBeginTsc - initialEndTsc;
intervalNs = (endTime.tv_sec - beginTime.tv_sec) * billion + endTime.tv_nsec - beginTime.tv_nsec;
ticksPerNs_ = intervalTsc / static_cast<double>(intervalNs);
nsPerTick_ = static_cast<double>(intervalNs) / intervalTsc;
ticksPerSecond_ = intervalTsc / static_cast<double>(intervalNs) * billion;
}
}
}
NoOptimize void calibrateDelayNsOffset(uint32_t loopCnt = kCalibrateLoopCnt) {
delayNsOffsetTicks_ = 0.0;
const uint64_t cnt = loopCnt * TimeConstant::skNsPerMs;
uint64_t beginTick = rdTsc();
for (uint64_t i = 0; i < cnt; i++) {
delayNs(0);
}
uint64_t endTick = rdTsc();
delayNsOffsetTicks_ = static_cast<double>(endTick - beginTick) / cnt;
}
private:
alignas(kDefaultCacheLineSize) double ticksPerSecond_ = 1.0;
double nsPerTick_ = 1.0;
double ticksPerNs_ = 1.0;
double delayNsOffsetTicks_ = 0.0;
};