forked from Celebrandil/CudaSift
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcudautils.h
111 lines (99 loc) · 2.76 KB
/
cudautils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#ifndef CUDAUTILS_H
#define CUDAUTILS_H
#include <cstdio>
#include <iostream>
#ifdef WIN32
#include <intrin.h>
#endif
#define safeCall(err) __safeCall(err, __FILE__, __LINE__)
#define safeThreadSync() __safeThreadSync(__FILE__, __LINE__)
#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__)
inline void __safeCall(cudaError err, const char *file, const int line)
{
if (cudaSuccess != err) {
fprintf(stderr, "safeCall() Runtime API error in file <%s>, line %i : %s.\n", file, line, cudaGetErrorString(err));
exit(-1);
}
}
inline void __safeThreadSync(const char *file, const int line)
{
cudaError err = cudaThreadSynchronize();
if (cudaSuccess != err) {
fprintf(stderr, "threadSynchronize() Driver API error in file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err));
exit(-1);
}
}
inline void __checkMsg(const char *errorMessage, const char *file, const int line)
{
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "checkMsg() CUDA error: %s in file <%s>, line %i : %s.\n", errorMessage, file, line, cudaGetErrorString(err));
exit(-1);
}
}
inline bool deviceInit(int dev)
{
int deviceCount;
safeCall(cudaGetDeviceCount(&deviceCount));
if (deviceCount == 0) {
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
return false;
}
if (dev < 0) dev = 0;
if (dev > deviceCount-1) dev = deviceCount - 1;
cudaDeviceProp deviceProp;
safeCall(cudaGetDeviceProperties(&deviceProp, dev));
if (deviceProp.major < 1) {
fprintf(stderr, "error: device does not support CUDA.\n");
return false;
}
safeCall(cudaSetDevice(dev));
return true;
}
class TimerGPU {
public:
cudaEvent_t start, stop;
cudaStream_t stream;
TimerGPU(cudaStream_t stream_ = 0) : stream(stream_) {
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, stream);
}
~TimerGPU() {
cudaEventDestroy(start);
cudaEventDestroy(stop);
}
float read() {
cudaEventRecord(stop, stream);
cudaEventSynchronize(stop);
float time;
cudaEventElapsedTime(&time, start, stop);
return time;
}
};
class TimerCPU
{
static const int bits = 10;
public:
long long beg_clock;
float freq;
TimerCPU(float freq_) : freq(freq_) { // freq = clock frequency in MHz
beg_clock = getTSC(bits);
}
long long getTSC(int bits) {
#ifdef WIN32
return __rdtsc()/(1LL<<bits);
#else
unsigned int low, high;
__asm__(".byte 0x0f, 0x31" :"=a" (low), "=d" (high));
return ((long long)high<<(32-bits)) | ((long long)low>>bits);
#endif
}
float read() {
long long end_clock = getTSC(bits);
long long Kcycles = end_clock - beg_clock;
float time = (float)(1<<bits)*Kcycles/freq/1e3f;
return time;
}
};
#endif