Skip to content

Commit

Permalink
Merge pull request #17 from LLNL/develop
Browse files Browse the repository at this point in the history
Merge develop to master for CORAL-2 release
  • Loading branch information
rhornung67 authored Jan 5, 2018
2 parents 801c298 + d6dd551 commit 03b8754
Show file tree
Hide file tree
Showing 169 changed files with 12,496 additions and 4,462 deletions.
102 changes: 102 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
sudo: required
dist: trusty
language: cpp
env:
global:
- secure: xN+lGOH8LzepE1OoOrHelDgu1wf7nL/B7nBWhN7EnCB7S7hZJL/AakruHy4lMfQfF4XkrnPWmBlmc4wdLH+o6jPkUISm4nLRSTMnRV2L+Mjyzg3aIEua0xpO6rLUNgsShB8mfkieTJq+kSj3Yp2CM7GEzm+UNNxeJcY0VdUHy9msRRRbXiLViIrfwBEVC9He7xG9NWfqkpsORfoiPmVDm7YzuXALdB8qkX4AWggysz/BCVj0PwBMr754eEpOodQ9GeKDF2Kwy5vPAqK5f7zwshJtF9VevyA1A2M9y8BHJMymz4wGaSxLNMeUU85AmVIvmzX0weG94JQ7mlUVszNpO5CCIyjwCOF+IDUI8HCDJGOY7+gGnv4H2LhDwAXvFLD65FlMntQQe2e4KRTnFxtJvvghjv5FyxJSHwctLsgeDpr2uZDcAhK1yf8TNsqqMiXQj2yGLByJy8j5PjUyd8oN47uZo0T5DDMd5c3ztUppc5+DisIoqmoYQeom3lYbpeudaf492ZDBWEV4rS9COl1h7CnpanMBpXWLFc2zXyfTpRn3GifutiF8M3rSS2KHcPyb9JLePTrC4+itMkwB4SHo1VYk4H2RQAdPMDFHMKCeVs2Z4sF9pGPJR+JzRekaKFLDm73ihsuE0cnx1oPVQMjSWa0e7A1a9W4UQBvp9xR++i4=
- OMP_NUM_THREADS=3
- DO_BUILD=yes
- DO_TEST=yes
matrix:
include:
- compiler: gcc-4
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } }
env:
- COMPILER=g++-4.9
- CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On"
- compiler: gcc-6
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } }
env:
- COMPILER=g++-6
- CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On"
- compiler: gcc-7
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-7, libtbb-dev ] } }
env:
- COMPILER=g++-7
- CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On"
- compiler: clang-5
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } }
env:
- COMPILER=clang++-5.0.0
- LLVM_VERSION=5.0.0
- DOWNLOAD_URL=http://releases.llvm.org/5.0.0/clang+llvm-5.0.0-linux-x86_64-ubuntu14.04.tar.xz
- CMAKE_EXTRA_FLAGS="-DCMAKE_CXX_FLAGS=-fmodules"
- compiler: clang-3.9
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } }
env:
- COMPILER=clang++-3.9.1
- LLVM_VERSION=3.9.1
- compiler: clang-4.0
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } }
env:
- COMPILER=clang++-4.0.0
- LLVM_VERSION=4.0.0
- compiler: intel-17
env:
- COMPILER=icpc
- TRAVIS_INSTALL_COMPILER="intel"
- compiler: nvcc
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } }
env:
- COMPILER=g++-4.9
- CMAKE_EXTRA_FLAGS="-DENABLE_CUDA=On"
- TRAVIS_INSTALL_COMPILER="nvcc"
- DO_TEST=no
- compiler: gcc-4.9-debug
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } }
env:
- COMPILER=g++-4.9
- CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DENABLE_COVERAGE=On"
- compiler: clang-3.9-debug
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } }
env:
- COMPILER=clang++
- LLVM_VERSION=3.9.1
- CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug"
- compiler: nvcc-debug
addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } }
env:
- COMPILER=g++-4.9
- CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DENABLE_CUDA=On"
- TRAVIS_INSTALL_COMPILER="nvcc"
- DO_TEST=no
cache:
directories:
- $HOME/llvm
before_install:
- sudo apt-get update -qq
- mkdir -p ${HOME}/download
- if [[ -n "${LLVM_VERSION}" ]]; then . ./scripts/install_llvm.sh ; fi
- CMAKE_URL="https://cmake.org/files/v3.7/cmake-3.7.0-rc2-Linux-x86_64.tar.gz"
- curl -o ${HOME}/cmake-tarball.tar.gz ${CMAKE_URL} &&
mkdir -p ${HOME}/cmake &&
tar xf ${HOME}/cmake-tarball.tar.gz -C ${HOME}/cmake --strip-components=1 &&
export PATH=${HOME}/cmake/bin:${PATH}
- if [[ "${TRAVIS_INSTALL_COMPILER}" == "intel" && -n "$INTEL_SERIAL_NUMBER" ]] ; then wget -q -O /dev/stdout 'https://raw.githubusercontent.com/nemequ/icc-travis/master/install-icc.sh' | /bin/sh; fi
- if [[ "${TRAVIS_INSTALL_COMPILER}" == "intel" && -z "$INTEL_SERIAL_NUMBER" ]] ; then export DO_BUILD=no ; export DO_TEST=no ; fi
- if [[ "${TRAVIS_INSTALL_COMPILER}" == "nvcc" ]]; then export DEBFILE=${HOME}/download/cuda-repo.deb; fi
- if [[ "${TRAVIS_INSTALL_COMPILER}" == "nvcc" ]]; then export DOWNLOAD_URL=http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_8.0.61-1_amd64.deb; fi
- if [[ "${TRAVIS_INSTALL_COMPILER}" == "nvcc" ]]; then if [[ ! -f /usr/local/cuda-8.0/bin/nvcc ]]; then if [[ ! -f ${DEBFILE} ]]; then travis_retry wget -O ${DEBFILE} ${DOWNLOAD_URL}; fi &&
travis_retry sudo dpkg -i ${DEBFILE} &&
travis_retry sudo apt-get update -qq &&
travis_retry sudo apt-get install --no-install-suggests --no-install-recommends -y cuda-drivers cuda-core-8-0 cuda-cudart-dev-8-0 cuda-cufft-dev-8-0 &&
travis_retry sudo apt-get clean; fi &&
export CUDA_HOME=/usr/local/cuda-8.0 &&
export CUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} &&
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} &&
export PATH=${CUDA_HOME}/bin:${PATH}; fi
script:
- ./scripts/travis_build_and_test.sh
after_success:
- if [[ "${CMAKE_EXTRA_FLAGS}" == *"ENABLE_COVERAGE"* ]] ; then bash <(curl -s https://codecov.io/bash) -a "-f"; fi
- if [[ "${TRAVIS_INSTALL_COMPILER}" == "intel" ]] ; then uninstall_intel_software ; fi
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ set(BLT_CXX_STANDARD 11)
#

set(ENABLE_TESTS On CACHE Bool "")
set(ENABLE_EXAMPLES Off CACHE Bool "")
set(ENABLE_EXAMPLES On CACHE Bool "")
set(ENABLE_DOCUMENTATION Off CACHE Bool "")

set(ENABLE_TBB Off CACHE Bool "")
Expand All @@ -62,7 +62,7 @@ include_directories(${RAJA_INCLUDE_DIRS})
#

set(RAJA_PERFSUITE_VERSION_MAJOR 0)
set(RAJA_PERFSUITE_VERSION_MINOR 1)
set(RAJA_PERFSUITE_VERSION_MINOR 2)
set(RAJA_PERFSUITE_VERSION_PATCHLEVEL 0)

set(RAJA_PERFSUITE_DEPENDS RAJA)
Expand Down
137 changes: 104 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ source repository. For example,
```
> mkdir RAJA-PERFSUITE
> cd RAJA-PERFSUITE
> git clone --recursive https://github.com/llnl/rajaperf.git
> git clone --recursive https://github.com/llnl/RAJAPerf.git
> ls
raja-perfsuite
RAJAPerf
```

The Performance Suite has [RAJA] and the CMake-based [BLT] build system
Expand All @@ -51,23 +51,23 @@ the Performance Suite source code. Note that if you switch to a different
branch, you will have to update the submodules; e.g.,

```
> cd raja-perfsuite
> cd RAJAPerf
> git checkout <some branch name>
> git submodule init
> git submodule update
```

RAJA and the Performance Suite are built together using the same CMake
configuration. For convenience, we include some scripts in the 'scripts'
directory some associated configuration files in the 'host-configs'
that illustrate how to build the code on various platforms at LLNL. Each
build script will create a descriptively-named build space directory in
the top-level erformance Suite directory, and run CMake with a configuration
appropriate for the platform and compilers used. After CMake completes,
enter the build directory and type 'make' (or 'make -j' for parallel) to
build the code. The provided configurations will build RAJA unit tests by
default. After the code builds, you can type 'make test' to verify that
everything is working properly. For example,
configuration. For convenience, we include scripts in the 'scripts'
directory that invoke associated (CMake cache) configuration files in the
'host-configs' directory that illustrate how to build the code on various
platforms at LLNL. Each build script creates a descriptively-named build
space directory in the top-level Performance Suite directory and runs CMake
with a configuration appropriate for the platform and compilers used. After
CMake completes, enter the build directory and type 'make' (or 'make -j' for
a parallel build) to build the code. The provided configurations will build
RAJA unit tests by default. After the code builds, you can type 'make test' to
verify that the RAJA build is working properly. For example,

```
> ./scripts/blueos_nvcc8.0_clang-coral.sh
Expand Down Expand Up @@ -127,6 +127,21 @@ Lastly, the program will emit a summary of provided input if it is given
something that it does not understand. Hopefully, this will make it easy for
users to understand and correct erroneous usage.

# Important notes

* The kernels that use RAJA 'nested' loop constructs will be replaced
at some point with new RAJA nested capabilities that are being developed.
The new nested constructs are simpler, more flexible, and perform better.

* The OpenMP target variants of the kernels in the Suite are a
work-in-progress. They are incomplete (a few RAJA features must be
filled in to make them comparable to other variants).

* The build system for the Suite needs to be reworked to have the
OpenMP target kernel variants run from the same executable as the CUDA
variants. Currently, a separate executable `./bin/raja-perf-nolibs.exe`
is generated for running OpenMP target variants when they are enabled.

* * *

# Generated output
Expand Down Expand Up @@ -165,7 +180,7 @@ Adding a new kernel to the suite involves three main steps:

1. Add unique kernel ID and unique name to the suite.
2. If the kernel is part of a new kernel group, also add a unique group ID and name for the group.
3. Implement a kernel class that contains all operations needed to run it.
3. Implement a kernel class that contains all operations needed to run it, with source files organized as described below.

These steps are described in the following sections.

Expand All @@ -174,11 +189,11 @@ These steps are described in the following sections.
Two key pieces of information identify a kernel: the group in which it
resides and the name of the kernel itself. For concreteness, we describe
how to add a kernel "Foo" that lives in the kernel group "Bar". The files
`RAJAPerfSuite.hxx` and `RAJAPerfSuite.cxx` define enumeration
`RAJAPerfSuite.hpp` and `RAJAPerfSuite.cpp` define enumeration
values and arrays of string names for the kernels, respectively.

First, add an enumeration value identifier for the kernel, that is unique
among all kernels, in the enum 'KerneID' in the header file `RAJAPerfSuite.hxx`:
among all kernels, in the enum 'KerneID' in the header file `RAJAPerfSuite.hpp`:

```cpp
enum KernelID {
Expand All @@ -194,7 +209,7 @@ this convention so that the kernel works with existing performance
suite machinery.

Second, add the kernel name to the array of strings 'KernelNames' in the file
`RAJAPerfSuite.cxx`:
`RAJAPerfSuite.cpp`:

```cpp
static const std::string KernelNames [] =
Expand All @@ -216,8 +231,8 @@ and matching one-to-one).

If a kernel is added as part of a new group of kernels in the suite, a
new value must be added to the 'GroupID' enum in the header file
`RAJAPerfSuite.hxx` and an associated group string name must be added to
the 'GroupNames' array of strings in the file `RAJAPerfSuite.cxx`. Again,
`RAJAPerfSuite.hpp` and an associated group string name must be added to
the 'GroupNames' array of strings in the file `RAJAPerfSuite.cpp`. Again,
the enumeration values and items in the string array must be kept
consistent.

Expand All @@ -231,7 +246,8 @@ all operations needed to execute and record execution timing and result
checksum information for each variant of the kernel.

Continuing with our example, we add 'Foo' class header and implementation
files 'Foo.hxx' and 'Foo.cxx', respectively, to the 'src/bar' directory.
files 'Foo.hpp', 'Foo.cpp' (CPU variants), `Foo-Cuda.cpp` (CUDA variants),
and `Foo-OMPTarget.cpp` (OpenMP target variants) to the 'src/bar' directory.
The class must inherit from the 'KernelBase' base class that defines the
interface for kernels in the suite.

Expand All @@ -243,7 +259,15 @@ Here is what the header file for the Foo kernel object may look like:
#ifndef RAJAPerf_Bar_Foo_HXX
#define RAJAPerf_Bar_Foo_HXX

#include "common/KernelBase.hxx"

///
/// Foo kernel reference implementation:
///
/// Describe it here...
///


#include "common/KernelBase.hpp"

namespace rajaperf
{
Expand All @@ -265,6 +289,9 @@ public:
void updateChecksum(VariantID vid);
void tearDown(VariantID vid);

void runCudaVariant(VariantID vid);
void runOpenMPTargetVariant(VariantID vid);

private:
// Kernel-specific data (pointers, scalars, etc.) used in kernel...
};
Expand Down Expand Up @@ -345,7 +372,8 @@ checksums can be compared at the end of a run.
Note: to simplify these operations and help ensure consistency, there exist
utility methods to allocate, initialize, deallocate, and copy data, and compute
checksums defined in the `DataUtils.hxx` header file in the 'common' directory.
checksums defined in the `DataUtils.hpp` `CudaDataUtils.hpp`, and
`OpenMPTargetDataUtils.hpp` header files in the 'common' directory.
##### runKernel() method
Expand All @@ -360,18 +388,61 @@ void Foo::runKernel(VariantID vid)
const Index_type run_reps = getRunReps();
// ...
// Declare data for vid variant of kernel...
switch ( vid ) {
startTimer();
for (SampIndex_type irep = 0; irep < run_reps; ++irep) {
// Implementation of vid variant of kernel...
}
stopTimer();
case Base_Seq : {
// ...
// Declare data for baseline sequential variant of kernel...
startTimer();
for (SampIndex_type irep = 0; irep < run_reps; ++irep) {
// Implementation of kernel variant...
}
stopTimer();
// ...
break;
}
// case statements for other CPU kernel variants....
#if defined(RAJA_ENABLE_TARGET_OPENMP)
case Base_OpenMPTarget :
case RAJA_OpenMPTarget :
{
runOpenMPTargetVariant(vid);
break;
}
#endif
#if defined(RAJA_ENABLE_CUDA)
case Base_CUDA :
case RAJA_CUDA :
{
runCudaVariant(vid);
break;
}
#endif
default : {
std::cout << "\n <kernel-name> : Unknown variant id = " << vid << std::endl;
}
}
}
```

All kernel implementation files are organized in this way. So following this
pattern will keep all new additions consistent.

Note: There are three source files for each kernel: 'Foo.cpp' contains CPU
variants, `Foo-Cuda.cpp` contains CUDA variants, and `Foo-OMPTarget.cpp`
constains OpenMP target variants. The reason for this is that it makes it
easier to apply unique compiler flags to different variants and to manage
compilation and linking issues that arise when some kernel variants are
combined in the same translation unit.

Note: for convenience, we make heavy use of macros to define data
declarations and kernel bodies in the suite. This significantly reduces
the amount of redundant code required to implement multiple variants
Expand All @@ -391,7 +462,7 @@ compared to help identify differences, and potentially errors, in
implementations, compiler optimizations, programming model execution, etc.

Note: to simplify checksum computations and help ensure consistency, there
are methods to compute checksums defined in the `DataUtils.hxx` header file
are methods to compute checksums defined in the `DataUtils.hpp` header file
in the 'common' directory.

##### tearDown() method
Expand All @@ -407,7 +478,7 @@ The 'Executor' class object is responsible for creating kernel objects
for the kernels to be run based on the suite input options. To ensure a new
kernel object will be created properly, add a call to its class constructor
based on its 'KernelID' in the 'getKernelObject()' method in the
`RAJAPerfSuite.cxx` file.
`RAJAPerfSuite.cpp` file.


## Adding a variant
Expand All @@ -420,7 +491,7 @@ items similar to adding a kernel as described above.

First, add an enumeration value identifier for the variant, that is unique
among all variants, in the enum 'VariantID' in the header file
`RAJAPerfSuite.hxx`:
`RAJAPerfSuite.hpp`:

```cpp
enum VariantID {
Expand All @@ -431,7 +502,7 @@ enum VariantID {
```

Second, add the variant name to the array of strings 'VariantNames' in the file
`RAJAPerfSuite.cxx`:
`RAJAPerfSuite.cpp`:

```cpp
static const std::string VariantNames [] =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "")

set(CMAKE_CXX_COMPILER "/usr/tce/packages/clang/clang-coral-2017.09.06/bin/clang++" CACHE PATH "")
set(CMAKE_C_COMPILER "/usr/tce/packages/clang/clang-coral-2017.09.06/bin/clang" CACHE PATH "")
set(CMAKE_CXX_COMPILER "/usr/tce/packages/clang/clang-coral-2017.09.18/bin/clang++" CACHE PATH "")
set(CMAKE_C_COMPILER "/usr/tce/packages/clang/clang-coral-2017.09.18/bin/clang" CACHE PATH "")

set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "")
Expand Down
Loading

0 comments on commit 03b8754

Please sign in to comment.