build(setup): use OpenMP, work around compiler bug

- Edit setup.sh script to compile with OpenMP support - Work around a compiler bug triggered by enabling OpenMP In a future commit, we will research the utility of OpenMP for GPU offloading.
BerkeleyLab · Jan 18, 2024 · 3ed73de · 3ed73de
1 parent b061c7d
commit 3ed73de
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 13 deletions.
diff --git a/setup.sh b/setup.sh
@@ -65,7 +65,7 @@ HDF5_LIB_PATH="`brew --prefix hdf5`/lib"
 NETCDFF_LIB_PATH="`brew --prefix netcdf-fortran`/lib"
 
 FPM_LD_FLAG=" -L$NETCDF_LIB_PATH -L$HDF5_LIB_PATH -L$NETCDFF_LIB_PATH"
-FPM_FLAG="-fcoarray=single -O3 -fallow-argument-mismatch -ffree-line-length-none -L$NETCDF_LIB_PATH -L$HDF5_LIB_PATH"
+FPM_FLAG="-fopenmp -g -fcoarray=single -O3 -fallow-argument-mismatch -ffree-line-length-none -L$NETCDF_LIB_PATH -L$HDF5_LIB_PATH"
 FPM_FC=${FC:-"gfortran-13"}
 FPM_CC=${CC:-"gcc-13"}
 
@@ -125,14 +125,21 @@ if command -v fpm > /dev/null 2>&1; then
   brew install fpm
 fi
 
-echo "$RUN_FPM_SH test"
-$RUN_FPM_SH test
+echo "$RUN_FPM_SH test --profile release" 
+$RUN_FPM_SH test --profile release
 
 echo ""
 echo "____________________ Inference-Engine has been set up! _______________________" 
 echo ""
-echo "To run one of the programs in the example subdirectory, enter a command of the"
-echo "following form at a shell command prompt after replacing <example-base-name>"
-echo "with the base name of a file in the example/ subdirectory:"
+echo "The example/ subdirectory contains sample use cases.  Execute the following"
+echo "command to see a list of examples that you can run:"
 echo ""
-echo "./build/run-fpm.sh run --example <example-base-name>"
+echo "./build/run-fpm.sh run --example"
+echo ""
+echo "Execute an example with a command of the form"
+echo ""
+echo "./build/run-fpm.sh run --profile release --example <example-base-name>"
+echo ""
+echo "where '--profile release' ensures an optimized build, which can greatly reduce"
+echo "runtime.  The latter command will print usage information if the example"
+echo "requires additional arguments."
diff --git a/test/trainable_engine_test_m.f90 b/test/trainable_engine_test_m.f90
@@ -281,20 +281,31 @@ function xor_gate_with_random_weights() result(test_passes)
     integer, parameter :: num_inputs=2, mini_batch_size = 1, num_iterations=500000
       !! Depending on where in the random-number sequence the weights start, this test can pass for lower
       !! numbers of iterations, e.g., 400000. Using more iterations gives more robust convergence.
-    integer batch, iter, i
+    integer batch, iter, i, j
 
     allocate(harvest(num_inputs, mini_batch_size, num_iterations))
     call random_number(harvest)
 
-    ! The following temporary copies are required by gfortran bug 100650 and possibly 49324
+    ! The following temporary copies, tmp and tmp2, are required by gfortran bug 100650 and possibly 49324
     ! See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100650 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49324
-    tmp = [([(tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0)), batch=1, mini_batch_size)], iter=1, num_iterations)]
+
+    allocate(tmp(mini_batch_size*num_iterations))
+    do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations)
+      tmp((iter-1)*mini_batch_size + 1) = tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0))
+    end do
     training_inputs = reshape(tmp, [mini_batch_size, num_iterations])
 
-    tmp2 = [([(xor(training_inputs(batch, iter)), batch = 1, mini_batch_size)], iter = 1, num_iterations )]
+    allocate(tmp2(size(tmp)))
+    do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations)
+      tmp2((iter-1)*mini_batch_size + 1) = xor(training_inputs(batch, iter))
+    end do
     training_outputs = reshape(tmp2, [mini_batch_size, num_iterations])
 
-    mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)]        
+    allocate(mini_batches(size(training_inputs,1)*num_iterations))
+    do concurrent(iter=1:num_iterations)
+      mini_batches(iter) = mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter)))
+    end do
+
     trainable_engine = two_random_hidden_layers()
 
     call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5)
@@ -306,7 +317,7 @@ function xor_gate_with_random_weights() result(test_passes)
 
   contains
 
-    function xor(inputs) result(expected_outputs)
+    pure function xor(inputs) result(expected_outputs)
       type(tensor_t), intent(in) :: inputs
       type(tensor_t) expected_outputs
       associate(sum_inputs => sum(inputs%values()))
@@ -396,6 +407,7 @@ function perturbed_identity_converges() result(test_passes)
     integer, parameter :: num_epochs = 148
     integer, parameter :: num_bins = 5 
     integer i, bin, epoch
+
     trainable_engine = perturbed_identity_network(perturbation_magnitude=0.1)
 
     associate(num_inputs => trainable_engine%num_inputs(), num_outputs => trainable_engine%num_outputs())