-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
174 lines (111 loc) · 4.72 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
##
import os
import copy
import ctypes
from ctypes import *
import cupy as cp
import numpy as np
import matplotlib.pyplot as plt
##
pnsz = np.asarray([100, 50, 30], dtype=np.int32)
mul = np.random.randn()
add = np.random.randn()
src = np.random.randn(pnsz[0], pnsz[1], pnsz[2]).astype(dtype=np.float32)
## Numpy in CPU
src_np = copy.deepcopy(src)
dst_np = src_np
dst_np = dst_np * mul
dst_np = dst_np + add
dst_np = dst_np * dst_np
dst_np = dst_np + dst_np
## Clang in CPU
clang_file = os.path.join(os.path.dirname(__file__), 'libmath_clang.so')
_math_clang = ctypes.CDLL(clang_file)
__mul_const_clang = _math_clang.mul_const
__add_const_clang = _math_clang.add_const
__mul_mat_clang = _math_clang.mul_mat
__add_mat_clang = _math_clang.add_mat
__mul_const_clang.argtypes = (POINTER(c_float), POINTER(c_float), c_float, POINTER(c_int))
__mul_const_clang.restypes = c_void_p
__add_const_clang.argtypes = (POINTER(c_float), POINTER(c_float), c_float, POINTER(c_int))
__add_const_clang.restypes = c_void_p
__mul_mat_clang.argtypes = (POINTER(c_float), POINTER(c_float), POINTER(c_float), POINTER(c_int))
__mul_mat_clang.restypes = c_void_p
__add_mat_clang.argtypes = (POINTER(c_float), POINTER(c_float), POINTER(c_float), POINTER(c_int))
__add_mat_clang.restypes = c_void_p
c_float_p = lambda x: x.ctypes.data_as(POINTER(c_float))
c_int_p = lambda x: x.ctypes.data_as(POINTER(c_int))
src_clang = copy.deepcopy(src)
dst_clang = src_clang
__mul_const_clang(c_float_p(dst_clang), c_float_p(dst_clang), mul, c_int_p(pnsz))
__add_const_clang(c_float_p(dst_clang), c_float_p(dst_clang), add, c_int_p(pnsz))
__mul_mat_clang(c_float_p(dst_clang), c_float_p(dst_clang), c_float_p(dst_clang), c_int_p(pnsz))
__add_mat_clang(c_float_p(dst_clang), c_float_p(dst_clang), c_float_p(dst_clang), c_int_p(pnsz))
## CU in GPU
cu_file = os.path.join(os.path.dirname(__file__), 'libmath_cu.so')
_math_cu = ctypes.CDLL(cu_file)
__mul_const_cu = _math_cu.mul_const
__add_const_cu = _math_cu.add_const
__mul_mat_cu = _math_cu.mul_mat
__add_mat_cu = _math_cu.add_mat
__mul_const_cu.argtypes = (POINTER(c_float), POINTER(c_float), c_float, POINTER(c_int))
__mul_const_cu.restypes = c_void_p
__add_const_cu.argtypes = (POINTER(c_float), POINTER(c_float), c_float, POINTER(c_int))
__add_const_cu.restypes = c_void_p
__mul_mat_cu.argtypes = (POINTER(c_float), POINTER(c_float), POINTER(c_float), POINTER(c_int))
__mul_mat_cu.restypes = c_void_p
__add_mat_cu.argtypes = (POINTER(c_float), POINTER(c_float), POINTER(c_float), POINTER(c_int))
__add_mat_cu.restypes = c_void_p
c_float_p = lambda x: x.ctypes.data_as(POINTER(c_float))
c_int_p = lambda x: x.ctypes.data_as(POINTER(c_int))
src_cu = copy.deepcopy(src)
dst_cu = src_cu
__mul_const_cu(c_float_p(dst_cu), c_float_p(dst_cu), mul, c_int_p(pnsz))
__add_const_cu(c_float_p(dst_cu), c_float_p(dst_cu), add, c_int_p(pnsz))
__mul_mat_cu(c_float_p(dst_cu), c_float_p(dst_cu), c_float_p(dst_cu), c_int_p(pnsz))
__add_mat_cu(c_float_p(dst_cu), c_float_p(dst_cu), c_float_p(dst_cu), c_int_p(pnsz))
# print(dst_np[:10, 0, 0])
# print(dst_cu[:10, 0, 0])
#
# print(np.sum(dst_np - dst_cu))
## Cupy in GPU
src_cp = cp.asarray(copy.deepcopy(src))
dst_cp = src_cp
mul_cp = cp.float32(mul)
add_cp = cp.float32(add)
dst_cp = dst_cp * mul_cp
dst_cp = dst_cp + add_cp
dst_cp = dst_cp * dst_cp
dst_cp = dst_cp + dst_cp
dst_cp2np = cp.asnumpy(dst_cp)
# print(dst_np[:10, 0, 0])
# print(dst_cp[:10, 0, 0])
# print(np.sum(dst_np - dst_cp2np))
## Cupy with kernel in GPU
cu_file = os.path.join(os.path.dirname(__file__), 'math_cu.cuh')
with open(cu_file, 'r') as f:
code = f.read()
__mul_const_cp = cp.RawKernel(code, 'mul_const_kernel')
__add_const_cp = cp.RawKernel(code, 'add_const_kernel')
__mul_mat_cp = cp.RawKernel(code, 'mul_mat_kernel')
__add_mat_cp = cp.RawKernel(code, 'add_mat_kernel')
nthread = 8
nblock = (nthread, nthread, nthread)
ngrid = (int((pnsz[0] + nthread - 1)/nthread),
int((pnsz[1] + nthread - 1)/nthread),
int((pnsz[2] + nthread - 1)/nthread))
src_cph = cp.asarray(copy.deepcopy(src))
dst_cph = src_cph
pnsz_cph = cp.asarray(copy.deepcopy(pnsz))
mul_cph = cp.float32(mul)
add_cph = cp.float32(add)
__mul_const_cp(ngrid, nblock, args=(dst_cph, dst_cph, mul_cph, pnsz_cph))
__add_const_cp(ngrid, nblock, args=(dst_cph, dst_cph, add_cph, pnsz_cph))
__mul_mat_cp(ngrid, nblock, args=(dst_cph, dst_cph, dst_cph, pnsz_cph))
__add_mat_cp(ngrid, nblock, args=(dst_cph, dst_cph, dst_cph, pnsz_cph))
dst_cph2np = cp.asnumpy(dst_cph)
##
print("Error between numpy and clang: %.8f" % np.sum(dst_np - dst_clang))
print("Error between numpy and cu : %.8f" % np.sum(dst_np - dst_cu))
print("Error between numpy and cupy : %.8f" % np.sum(dst_np - dst_cp2np))
print("Error between numpy and cupyh: %.8f" % np.sum(dst_np - dst_cph2np))