Skip to content

Commit

Permalink
build(setup): use OpenMP, work around compiler bug
Browse files Browse the repository at this point in the history
- Edit setup.sh script to compile with OpenMP support
- Work around a compiler bug triggered by enabling OpenMP

In a future commit, we will research the utility of OpenMP for
GPU offloading.
  • Loading branch information
rouson committed Jan 18, 2024
1 parent b061c7d commit 3ed73de
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 13 deletions.
21 changes: 14 additions & 7 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ HDF5_LIB_PATH="`brew --prefix hdf5`/lib"
NETCDFF_LIB_PATH="`brew --prefix netcdf-fortran`/lib"

FPM_LD_FLAG=" -L$NETCDF_LIB_PATH -L$HDF5_LIB_PATH -L$NETCDFF_LIB_PATH"
FPM_FLAG="-fcoarray=single -O3 -fallow-argument-mismatch -ffree-line-length-none -L$NETCDF_LIB_PATH -L$HDF5_LIB_PATH"
FPM_FLAG="-fopenmp -g -fcoarray=single -O3 -fallow-argument-mismatch -ffree-line-length-none -L$NETCDF_LIB_PATH -L$HDF5_LIB_PATH"
FPM_FC=${FC:-"gfortran-13"}
FPM_CC=${CC:-"gcc-13"}

Expand Down Expand Up @@ -125,14 +125,21 @@ if command -v fpm > /dev/null 2>&1; then
brew install fpm
fi

echo "$RUN_FPM_SH test"
$RUN_FPM_SH test
echo "$RUN_FPM_SH test --profile release"
$RUN_FPM_SH test --profile release

echo ""
echo "____________________ Inference-Engine has been set up! _______________________"
echo ""
echo "To run one of the programs in the example subdirectory, enter a command of the"
echo "following form at a shell command prompt after replacing <example-base-name>"
echo "with the base name of a file in the example/ subdirectory:"
echo "The example/ subdirectory contains sample use cases. Execute the following"
echo "command to see a list of examples that you can run:"
echo ""
echo "./build/run-fpm.sh run --example <example-base-name>"
echo "./build/run-fpm.sh run --example"
echo ""
echo "Execute an example with a command of the form"
echo ""
echo "./build/run-fpm.sh run --profile release --example <example-base-name>"
echo ""
echo "where '--profile release' ensures an optimized build, which can greatly reduce"
echo "runtime. The latter command will print usage information if the example"
echo "requires additional arguments."
24 changes: 18 additions & 6 deletions test/trainable_engine_test_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -281,20 +281,31 @@ function xor_gate_with_random_weights() result(test_passes)
integer, parameter :: num_inputs=2, mini_batch_size = 1, num_iterations=500000
!! Depending on where in the random-number sequence the weights start, this test can pass for lower
!! numbers of iterations, e.g., 400000. Using more iterations gives more robust convergence.
integer batch, iter, i
integer batch, iter, i, j

allocate(harvest(num_inputs, mini_batch_size, num_iterations))
call random_number(harvest)

! The following temporary copies are required by gfortran bug 100650 and possibly 49324
! The following temporary copies, tmp and tmp2, are required by gfortran bug 100650 and possibly 49324
! See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100650 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49324
tmp = [([(tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0)), batch=1, mini_batch_size)], iter=1, num_iterations)]

allocate(tmp(mini_batch_size*num_iterations))
do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations)
tmp((iter-1)*mini_batch_size + 1) = tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0))
end do
training_inputs = reshape(tmp, [mini_batch_size, num_iterations])

tmp2 = [([(xor(training_inputs(batch, iter)), batch = 1, mini_batch_size)], iter = 1, num_iterations )]
allocate(tmp2(size(tmp)))
do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations)
tmp2((iter-1)*mini_batch_size + 1) = xor(training_inputs(batch, iter))
end do
training_outputs = reshape(tmp2, [mini_batch_size, num_iterations])

mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)]
allocate(mini_batches(size(training_inputs,1)*num_iterations))
do concurrent(iter=1:num_iterations)
mini_batches(iter) = mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter)))
end do

trainable_engine = two_random_hidden_layers()

call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5)
Expand All @@ -306,7 +317,7 @@ function xor_gate_with_random_weights() result(test_passes)

contains

function xor(inputs) result(expected_outputs)
pure function xor(inputs) result(expected_outputs)
type(tensor_t), intent(in) :: inputs
type(tensor_t) expected_outputs
associate(sum_inputs => sum(inputs%values()))
Expand Down Expand Up @@ -396,6 +407,7 @@ function perturbed_identity_converges() result(test_passes)
integer, parameter :: num_epochs = 148
integer, parameter :: num_bins = 5
integer i, bin, epoch

trainable_engine = perturbed_identity_network(perturbation_magnitude=0.1)

associate(num_inputs => trainable_engine%num_inputs(), num_outputs => trainable_engine%num_outputs())
Expand Down

0 comments on commit 3ed73de

Please sign in to comment.