Merge pull request #114 from BerkeleyLab/enable-openmp

BerkeleyLab · Jan 18, 2024 · 0ef31d6 · 0ef31d6
2 parents a173c71 + 0d0311d
commit 0ef31d6
Show file tree

Hide file tree

Showing 11 changed files with 47 additions and 26 deletions.
diff --git a/example/concurrent-inferences.f90 b/example/concurrent-inferences.f90
@@ -16,7 +16,7 @@ program concurrent_inferences
 
   if (len(network_file_name%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example concurrent-inferences -- --network "<file-name>"' 
+      'Usage: fpm run --example concurrent-inferences --profile release --flag "-fopenmp" -- --network "<file-name>"'
   end if
 
   block 

diff --git a/example/learn-addition.f90 b/example/learn-addition.f90
@@ -34,7 +34,7 @@ program learn_addition
 
   if (len(final_network_file%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example learn-addition -- --output-file "<file-name>"' 
+      'Usage: fpm run --example learn-addition --profile release --flag "-fopenmp" -- --output-file "<file-name>"'
   end if
 
   block

diff --git a/example/learn-exponentiation.f90 b/example/learn-exponentiation.f90
@@ -34,7 +34,7 @@ program learn_exponentiation
 
   if (len(final_network_file%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example train-polynomials -- --output-file "<file-name>"' 
+      'Usage: fpm run --example train-polynomials --profile release --flag "-fopenmp" -- --output-file "<file-name>"'
   end if
 
   block

diff --git a/example/learn-microphysics-procedures.f90 b/example/learn-microphysics-procedures.f90
@@ -19,7 +19,7 @@ program learn_microphysics_procedures
 
   if (len(network_file%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run learn-microphysics-procedures -- --output-file "<file-name>"' 
+      'Usage: fpm run learn-microphysics-procedures --profile release --flag "-fopenmp" -- --output-file "<file-name>"'
   end if
 
   call system_clock(counter_start, clock_rate)

diff --git a/example/learn-multiplication.f90 b/example/learn-multiplication.f90
@@ -34,7 +34,7 @@ program learn_multiplication
 
   if (len(final_network_file%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example learn-multiplication -- --output-file "<file-name>"' 
+      'Usage: fpm run --example learn-multiplication --profile release --flag "-fopenmp" -- --output-file "<file-name>"'
   end if
 
   block

diff --git a/example/learn-power-series.f90 b/example/learn-power-series.f90
@@ -34,7 +34,7 @@ program learn_power_series
 
   if (len(final_network_file%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example learn-power-series -- --output-file "<file-name>"' 
+      'Usage: fpm run --example learn-power-series --profile release --flag "-fopenmp" -- --output-file "<file-name>"'
   end if
 
   block

diff --git a/example/learn-saturated-mixing-ratio.f90 b/example/learn-saturated-mixing-ratio.f90
@@ -18,7 +18,7 @@ program train_saturated_mixture_ratio
 
   if (len(network_file%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example learn-saturated-mixing-ratio -- --output-file "<file-name>"' 
+      'Usage: fpm run --example learn-saturated-mixing-ratio --profile release --flag "-fopenmp" -- --output-file "<file-name>"'
   end if
 
   call system_clock(counter_start, clock_rate)

diff --git a/example/train-and-write.f90 b/example/train-and-write.f90
@@ -21,7 +21,7 @@ program train_and_write
 
   if (len(final_network_file%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example train-and-write -- --output-file "<file-name>"' 
+      'Usage: fpm run --example train-and-write --profile release --flag "-fopenmp" -- --output-file "<file-name>"' 
   end if
 
   block

diff --git a/example/write-read-infer.f90 b/example/write-read-infer.f90
@@ -19,7 +19,7 @@ program write_read_infer
 
   if (len(file_name%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example write-read-infer -- --output-file "<file-name>"' 
+      'Usage: fpm run --example write-read-infer --profile release --flag "-fopenmp" -- --output-file "<file-name>"' 
   end if
 
   call write_read_query_infer(file_name)

diff --git a/setup.sh b/setup.sh
@@ -59,7 +59,7 @@ install_fpm_from_source()
   fi
 }
 
-# if no fpm, install either through homebrew, or gfortran compiling fpm.F90
+# if no fpm, install either through homebrew or by compiling fpm.F90 with gfortran
 if ! command -v fpm > /dev/null ; then
   if ! command -v brew > /dev/null ; then
     if ! command -v gfortran > /dev/null ; then
@@ -77,15 +77,20 @@ fi
 FPM_FC=${FC:-"gfortran-13"}
 FPM_CC=${CC:-"gcc-13"}
 
-mkdir -p build
-
-fpm test
+fpm test --profile release --flag "-fopenmp"
 
 echo ""
 echo "____________________ Inference-Engine has been set up! _______________________"
 echo ""
-echo "To run one of the programs in the example subdirectory, enter a command of the"
-echo "following form at a shell command prompt after replacing <example-base-name>"
-echo "with the base name of a file in the example/ subdirectory:"
+echo "Enter the command below to the see names of example use cases that you can run:"
+echo ""
+echo "fpm run --example"
+echo ""
+echo "To run an example, execute the following command after replacing <name> with"
+echo "one of the names listed by the above command:"
+echo ""
+echo "fpm run --profile release --flag \"-fopenmp\" --example <name>"
+echo ""
+echo "where the '--profile release' and '--flag \"-fopenmp\"' might reduce run times."
+echo "Example programs print usage information if additional arguments are required."
 echo ""
-echo "fpm run --example <example-base-name> --profile release"
diff --git a/test/trainable_engine_test_m.f90 b/test/trainable_engine_test_m.f90
@@ -281,32 +281,47 @@ function xor_gate_with_random_weights() result(test_passes)
     integer, parameter :: num_inputs=2, mini_batch_size = 1, num_iterations=500000
       !! Depending on where in the random-number sequence the weights start, this test can pass for lower
       !! numbers of iterations, e.g., 400000. Using more iterations gives more robust convergence.
-    integer batch, iter, i
+    integer batch, iter
 
     allocate(harvest(num_inputs, mini_batch_size, num_iterations))
     call random_number(harvest)
 
-    ! The following temporary copies are required by gfortran bug 100650 and possibly 49324
+    ! The following temporary copies, tmp and tmp2, are required by gfortran bug 100650 and possibly 49324
     ! See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100650 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49324
-    tmp = [([(tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0)), batch=1, mini_batch_size)], iter=1, num_iterations)]
+
+    allocate(tmp(mini_batch_size*num_iterations))
+    do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations)
+      tmp((iter-1)*mini_batch_size + 1) = tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0))
+    end do
     training_inputs = reshape(tmp, [mini_batch_size, num_iterations])
 
-    tmp2 = [([(xor(training_inputs(batch, iter)), batch = 1, mini_batch_size)], iter = 1, num_iterations )]
+    allocate(tmp2(size(tmp)))
+    do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations)
+      tmp2((iter-1)*mini_batch_size + 1) = xor(training_inputs(batch, iter))
+    end do
     training_outputs = reshape(tmp2, [mini_batch_size, num_iterations])
 
-    mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)]        
+    allocate(mini_batches(size(training_inputs,1)*num_iterations))
+    do concurrent(iter=1:num_iterations)
+      mini_batches(iter) = mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter)))
+    end do
+
     trainable_engine = two_random_hidden_layers()
 
     call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5)
 
     test_inputs = [tensor_t([true,true]), tensor_t([false,true]), tensor_t([true,false]), tensor_t([false,false])]
-    expected_test_outputs = [(xor(test_inputs(i)), i=1, size(test_inputs))]
-    actual_outputs = trainable_engine%infer(test_inputs)
-    test_passes = [(abs(actual_outputs(i)%values() - expected_test_outputs(i)%values()) < tolerance, i=1, size(actual_outputs))]
+    block
+      integer i
+
+      expected_test_outputs = [(xor(test_inputs(i)), i=1, size(test_inputs))]
+      actual_outputs = trainable_engine%infer(test_inputs)
+      test_passes = [(abs(actual_outputs(i)%values() - expected_test_outputs(i)%values()) < tolerance, i=1, size(actual_outputs))]
+    end block
 
   contains
 
-    function xor(inputs) result(expected_outputs)
+    pure function xor(inputs) result(expected_outputs)
       type(tensor_t), intent(in) :: inputs
       type(tensor_t) expected_outputs
       associate(sum_inputs => sum(inputs%values()))
@@ -396,6 +411,7 @@ function perturbed_identity_converges() result(test_passes)
     integer, parameter :: num_epochs = 148
     integer, parameter :: num_bins = 5 
     integer i, bin, epoch
+
     trainable_engine = perturbed_identity_network(perturbation_magnitude=0.1)
 
     associate(num_inputs => trainable_engine%num_inputs(), num_outputs => trainable_engine%num_outputs())