-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathac.cu
127 lines (100 loc) · 3.82 KB
/
ac.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "ac.h"
texture<int, cudaTextureType1D> tex_state_final;
texture<int, cudaTextureType1D> tex_dfa;
texture<int, cudaTextureType1D> tex_fail_state;
void
populate_final_states(int* final, int* dfa) {
for(int i=0; i<NUM_ROWS; i++) {
final[i] = dfa[i*NUM_COLS + 0];
}
}
__global__
void
profanity_filter_cuda(int* dfa, int* fail_state, unsigned char* tweets, bool* valid_state, int offset, int num_tweets, int tweet_length) {
int num_tweets_per_block = num_tweets/gridDim.x;
int num_tweets_per_thread = num_tweets/(gridDim.x*blockDim.x);
int start = blockIdx.x*num_tweets_per_block + threadIdx.x*num_tweets_per_thread;
int start_ptr = start*tweet_length;
int curr_state = 0;
int idx = 0;
int r_idx = 0;
unsigned char ch;
while(r_idx < num_tweets_per_thread && (start + r_idx) < num_tweets) {
ch = tweets[start_ptr + (r_idx*tweet_length) + idx++];
if(ch == 10) {
r_idx += 1;
curr_state = 0;
idx = 0;
continue;
}
int ord;
ord = int(ch) - int('a') + 1;
if(ch == ' ')
ord = 28;
else if(int(ch) == 39)
ord = 29;
if(ord <0 && ord >=30)
continue;
while(curr_state!=0 && tex1Dfetch (tex_dfa, curr_state*NUM_COLS + ord) == 0){
curr_state = tex1Dfetch (tex_fail_state, curr_state);
}
if(curr_state!=0 || tex1Dfetch (tex_dfa, curr_state*NUM_COLS + ord)!=0) {
curr_state = tex1Dfetch (tex_dfa, curr_state*NUM_COLS + ord);
int r = tex1Dfetch ( tex_state_final, curr_state );
if(r) {
valid_state[start + r_idx] = true;
break;
}
}
/* this commented region is our global memory approach */
// while(curr_state!=0 && dfa[curr_state*NUM_COLS + ord] == 0){
// curr_state = fail_state[curr_state];
// }
// if(curr_state!=0 || dfa[curr_state*NUM_COLS + ord]!=0) {
// curr_state = dfa[curr_state*NUM_COLS + ord];
// int r = dfa[curr_state*NUM_COLS] ;
// if(r) {
// valid_state[start + r_idx] = true;
// break;
// }
// }
}
}
void
profanity_filter_parallel(int* dfa, int* fail_state, char* tweets, bool* valid_state, int num_tweets, int tweet_length, int num_threads, int num_blocks) {
if(num_tweets < num_blocks*num_threads) {
num_blocks = 128;
num_threads = num_tweets/num_blocks;
}
int* d_dfa;
int* d_fail_state;
unsigned char* d_tweets;
bool* d_valid_state;
int* s_final;
int* final = (int *) malloc(NUM_ROWS*sizeof(int));
populate_final_states(final, dfa);
cudaMalloc((void **)&d_fail_state, NUM_ROWS*sizeof(int));
cudaMalloc((void **)&d_valid_state, num_tweets*sizeof(bool));
cudaMalloc((void **)&d_dfa, NUM_COLS*NUM_ROWS*sizeof(int));
cudaMalloc((void **)&s_final, NUM_ROWS*sizeof(int));
cudaMemcpy(d_fail_state, fail_state, NUM_ROWS*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_dfa, dfa, NUM_ROWS*NUM_COLS*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(s_final, final, NUM_ROWS*sizeof(int), cudaMemcpyHostToDevice);
cudaMemset(d_valid_state, false, num_tweets*sizeof(bool));
cudaMalloc((void **)&d_tweets, num_tweets*tweet_length*sizeof(unsigned char));
cudaMemcpy(d_tweets, tweets, num_tweets*tweet_length*sizeof(unsigned char), cudaMemcpyHostToDevice);
cudaBindTexture ( 0, tex_state_final, s_final, NUM_ROWS*sizeof(int) );
cudaBindTexture ( 0, tex_dfa, d_dfa, NUM_ROWS*NUM_COLS*sizeof(int) );
cudaBindTexture ( 0, tex_fail_state, d_fail_state, NUM_ROWS*sizeof(int) );
cudaFuncSetCacheConfig(profanity_filter_cuda, cudaFuncCachePreferL1);
profanity_filter_cuda<<<dim3(num_blocks,1,1), dim3(num_threads,1,1)>>>(d_dfa, d_fail_state, d_tweets, d_valid_state, 0, num_tweets, tweet_length);
cudaMemcpy(valid_state, d_valid_state, num_tweets*sizeof(bool), cudaMemcpyDeviceToHost);
cudaUnbindTexture ( tex_state_final );
cudaUnbindTexture ( tex_dfa );
cudaUnbindTexture ( tex_fail_state );
cudaFree(d_dfa);
cudaFree(s_final);
cudaFree(d_fail_state);
cudaFree(d_tweets);
cudaFree(d_valid_state);
}