Skip to content

Commit

Permalink
Merge pull request #71 from robertu94/master
Browse files Browse the repository at this point in the history
Enable SZ's openmp mode in CMake builds
  • Loading branch information
disheng222 authored Jul 15, 2021
2 parents 9a331d1 + 32c9ae5 commit f937952
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ option(BUILD_RANDOMACCESS "build the random access code" OFF)
option(BUILD_DOCKER_CONTAINERS "build docker containers for testing" OFF)
option(BUILD_FORTRAN "build the fortran interface" OFF)
option(BUILD_STATS "record statistics for prediction" OFF)
option(BUILD_OPENMP "build OpenMP support" OFF)
if(BUILD_DOCKER_CONTAINERS)

foreach(CONTAINER Centos Fedora Ubuntu Travis CentosPackaged)
Expand Down
3 changes: 3 additions & 0 deletions sz/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ target_compile_options(SZ
PRIVATE $<$<CONFIG:Debug>:-Wall -Wextra -Wpedantic -Wno-unused-parameter>
)

if(BUILD_OPENMP)
target_link_libraries(SZ PRIVATE OpenMP::OpenMP_C)
endif()
if(BUILD_PASTRI)
target_compile_definitions(SZ PUBLIC HAVE_PASTRI)
endif()
Expand Down
48 changes: 48 additions & 0 deletions sz/src/sz_omp.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,19 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
// quantization_intervals = optimize_intervals_float_3D(oriData, r1, realPrecision);
quantization_intervals = optimize_intervals_float_3D_opt(oriData, r1, r2, r3, realPrecision);
//quantization_intervals = 32768;
#ifdef DEBUG
printf("3D number of bins: %d\nerror bound %.20f\n", quantization_intervals, realPrecision);
#endif
// exit(0);
updateQuantizationInfo(quantization_intervals);
}
else{
quantization_intervals = exe_params->intvCapacity;
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("opt interval time: %.4f\n", elapsed_time);
#endif

elapsed_time = -sz_wtime();
int thread_num = sz_get_max_threads();
Expand Down Expand Up @@ -102,7 +106,9 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
}
sz_set_num_threads(thread_num);
// calculate block dims
#ifdef DEBUG
printf("number of blocks: %zu %zu %zu\n", num_x, num_y, num_z);
#endif

size_t split_index_x, split_index_y, split_index_z;
size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
Expand Down Expand Up @@ -171,7 +177,9 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
// free(P1);
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("compression and quantization time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
// printf("unpred count:\n");
// for(int i=0; i<num_blocks; i++){
Expand All @@ -185,7 +193,9 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
size_t nodeCount = 0;
Huffman_init_openmp(huffmanTree, result_type, num_elements, thread_num, freq);
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Build Huffman: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
for (size_t i = 0; i < stateNum; i++)
if (huffmanTree->code[i]) nodeCount++;
Expand Down Expand Up @@ -243,7 +253,9 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
result_pos += total_unpred * sizeof(float);

elapsed_time += sz_wtime();
#ifdef DEBUG
printf("write misc time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();

size_t * block_pos = (size_t *) result_pos;
Expand All @@ -269,7 +281,9 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
block_pos[id] = enCodeSize;
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Parallel Huffman encoding elapsed time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
// for(int t=0; t<thread_num; t++){
// memcpy(result_pos, encoding_buffer + t * max_num_block_elements * sizeof(int), block_pos[t]);
Expand All @@ -287,7 +301,9 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
result_pos += block_offset[thread_num - 1] + block_pos[thread_num - 1];

elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Final copy elapsed time: %.4f\n", elapsed_time);
#endif
// {
// int status;
// writeIntData_inBytes(result_type, num_elements, "/Users/LiangXin/github/SZ-develop/example/openmp/comp001_type.dat", &status);
Expand Down Expand Up @@ -373,7 +389,9 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
}
}

#ifdef DEBUG
printf("number of blocks: %zu %zu %zu, thread_num %d\n", num_x, num_y, num_z, thread_num);
#endif
sz_set_num_threads(thread_num);
size_t split_index_x, split_index_y, split_index_z;
size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
Expand Down Expand Up @@ -449,7 +467,9 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
}
int num_yz = num_y * num_z;
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Read data info elapsed time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
#pragma omp parallel for
for(int t=0; t<thread_num; t++){
Expand All @@ -468,7 +488,9 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
decode(comp_data_pos + block_offset[id], current_blockcount_x*current_blockcount_y*current_blockcount_z, root, type);
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Parallel Huffman decoding elapsed time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();

#pragma omp parallel for
Expand Down Expand Up @@ -500,7 +522,9 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
decompressDataSeries_float_3D_RA_block(data_pos, mean, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, realPrecision, type, unpredictable_data);
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Parallel decompress elapsed time: %.4f\n", elapsed_time);
#endif

free(block_offset);
free(result_type);
Expand Down Expand Up @@ -529,15 +553,19 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
// quantization_intervals = optimize_intervals_float_3D(oriData, r1, realPrecision);
quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision);
//quantization_intervals = 32768;
#ifdef DEBUG
printf("3D number of bins: %d\nerror bound %.20f\n", quantization_intervals, realPrecision);
#endif
// exit(0);
updateQuantizationInfo(quantization_intervals);
}
else{
quantization_intervals = exe_params->intvCapacity;
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("opt interval time: %.4f\n", elapsed_time);
#endif

elapsed_time = -sz_wtime();
int thread_num = sz_get_max_threads();
Expand Down Expand Up @@ -569,7 +597,9 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
}
sz_set_num_threads(thread_num);
// calculate block dims
#ifdef DEBUG
printf("number of blocks: %zu %zu %zu\n", num_x, num_y, num_z);
#endif

size_t split_index_x, split_index_y, split_index_z;
size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
Expand Down Expand Up @@ -635,7 +665,9 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
unpredictable_count[id] = SZ_compress_double_3D_MDQ_RA_block(data_pos, mean + id, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, realPrecision, P0, P1, type, unpredictable_data);
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("compression and quantization time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
// printf("unpred count:\n");
// for(int i=0; i<num_blocks; i++){
Expand All @@ -649,7 +681,9 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
size_t nodeCount = 0;
Huffman_init_openmp(huffmanTree, result_type, num_elements, thread_num, freq);
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Build Huffman: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
for (size_t i = 0; i < stateNum; i++)
if (huffmanTree->code[i]) nodeCount++;
Expand Down Expand Up @@ -701,7 +735,9 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
result_pos += total_unpred * sizeof(double);

elapsed_time += sz_wtime();
#ifdef DEBUG
printf("write misc time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();

size_t * block_pos = (size_t *) result_pos;
Expand All @@ -727,7 +763,9 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
block_pos[id] = enCodeSize;
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Parallel Huffman encoding elapsed time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
// for(int t=0; t<thread_num; t++){
// memcpy(result_pos, encoding_buffer + t * max_num_block_elements * sizeof(int), block_pos[t]);
Expand All @@ -745,7 +783,9 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
result_pos += block_offset[thread_num - 1] + block_pos[thread_num - 1];

elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Final copy elapsed time: %.4f\n", elapsed_time);
#endif
// {
// int status;
// writeIntData_inBytes(result_type, num_elements, "/Users/LiangXin/github/SZ-develop/example/openmp/comp001_type.dat", &status);
Expand Down Expand Up @@ -831,7 +871,9 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
}
}

#ifdef DEBUG
printf("number of blocks: %zu %zu %zu, thread_num %d\n", num_x, num_y, num_z, thread_num);
#endif
sz_set_num_threads(thread_num);
size_t split_index_x, split_index_y, split_index_z;
size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
Expand Down Expand Up @@ -886,7 +928,9 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
}
int num_yz = num_y * num_z;
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Read data info elapsed time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();
#pragma omp parallel for
for(int t=0; t<thread_num; t++){
Expand All @@ -905,7 +949,9 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
decode(comp_data_pos + block_offset[id], current_blockcount_x*current_blockcount_y*current_blockcount_z, root, type);
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Parallel Huffman decoding elapsed time: %.4f\n", elapsed_time);
#endif
elapsed_time = -sz_wtime();

#pragma omp parallel for
Expand All @@ -932,7 +978,9 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
decompressDataSeries_double_3D_RA_block(data_pos, mean, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, realPrecision, type, unpredictable_data);
}
elapsed_time += sz_wtime();
#ifdef DEBUG
printf("Parallel decompress elapsed time: %.4f\n", elapsed_time);
#endif

free(block_offset);
free(result_type);
Expand Down

0 comments on commit f937952

Please sign in to comment.