From cbd9b95b8063c919cf78ba9290bd663b41fe5f3e Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Wed, 17 Jan 2024 21:10:53 -0800 Subject: [PATCH 1/2] build(setup): use OpenMP, work around compiler bug - Edit setup.sh script to compile with OpenMP support - Work around a compiler bug triggered by enabling OpenMP In a future commit, we will research the utility of OpenMP for GPU offloading. --- setup.sh | 21 ++++++++++++-------- test/trainable_engine_test_m.f90 | 34 +++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/setup.sh b/setup.sh index 65de6f1ef..f270d4396 100755 --- a/setup.sh +++ b/setup.sh @@ -59,7 +59,7 @@ install_fpm_from_source() fi } -# if no fpm, install either through homebrew, or gfortran compiling fpm.F90 +# if no fpm, install either through homebrew or by compiling fpm.F90 with gfortran if ! command -v fpm > /dev/null ; then if ! command -v brew > /dev/null ; then if ! command -v gfortran > /dev/null ; then @@ -77,15 +77,20 @@ fi FPM_FC=${FC:-"gfortran-13"} FPM_CC=${CC:-"gcc-13"} -mkdir -p build - -fpm test +fpm test --profile release --flag "-fopenmp" echo "" echo "____________________ Inference-Engine has been set up! _______________________" echo "" -echo "To run one of the programs in the example subdirectory, enter a command of the" -echo "following form at a shell command prompt after replacing " -echo "with the base name of a file in the example/ subdirectory:" +echo "Enter the command below to the see names of example use cases that you can run:" +echo "" +echo "fpm run --example" +echo "" +echo "To run an example, execute the following command after replacing with" +echo "one of the names listed by the above command:" +echo "" +echo "fpm run --profile release --flag \"-fopenmp\" --example " +echo "" +echo "where the '--profile release' and '--flag \"-fopenmp\"' might reduce run times." +echo "Example programs print usage information if additional arguments are required." echo "" -echo "fpm run --example --profile release" diff --git a/test/trainable_engine_test_m.f90 b/test/trainable_engine_test_m.f90 index bc1409033..ad45735e1 100644 --- a/test/trainable_engine_test_m.f90 +++ b/test/trainable_engine_test_m.f90 @@ -281,32 +281,47 @@ function xor_gate_with_random_weights() result(test_passes) integer, parameter :: num_inputs=2, mini_batch_size = 1, num_iterations=500000 !! Depending on where in the random-number sequence the weights start, this test can pass for lower !! numbers of iterations, e.g., 400000. Using more iterations gives more robust convergence. - integer batch, iter, i + integer batch, iter allocate(harvest(num_inputs, mini_batch_size, num_iterations)) call random_number(harvest) - ! The following temporary copies are required by gfortran bug 100650 and possibly 49324 + ! The following temporary copies, tmp and tmp2, are required by gfortran bug 100650 and possibly 49324 ! See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100650 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49324 - tmp = [([(tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0)), batch=1, mini_batch_size)], iter=1, num_iterations)] + + allocate(tmp(mini_batch_size*num_iterations)) + do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations) + tmp((iter-1)*mini_batch_size + 1) = tensor_t(merge(true, false, harvest(:,batch,iter) < 0.5E0)) + end do training_inputs = reshape(tmp, [mini_batch_size, num_iterations]) - tmp2 = [([(xor(training_inputs(batch, iter)), batch = 1, mini_batch_size)], iter = 1, num_iterations )] + allocate(tmp2(size(tmp))) + do concurrent(batch = 1: mini_batch_size, iter = 1:num_iterations) + tmp2((iter-1)*mini_batch_size + 1) = xor(training_inputs(batch, iter)) + end do training_outputs = reshape(tmp2, [mini_batch_size, num_iterations]) - mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)] + allocate(mini_batches(size(training_inputs,1)*num_iterations)) + do concurrent(iter=1:num_iterations) + mini_batches(iter) = mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))) + end do + trainable_engine = two_random_hidden_layers() call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5) test_inputs = [tensor_t([true,true]), tensor_t([false,true]), tensor_t([true,false]), tensor_t([false,false])] - expected_test_outputs = [(xor(test_inputs(i)), i=1, size(test_inputs))] - actual_outputs = trainable_engine%infer(test_inputs) - test_passes = [(abs(actual_outputs(i)%values() - expected_test_outputs(i)%values()) < tolerance, i=1, size(actual_outputs))] + block + integer i + + expected_test_outputs = [(xor(test_inputs(i)), i=1, size(test_inputs))] + actual_outputs = trainable_engine%infer(test_inputs) + test_passes = [(abs(actual_outputs(i)%values() - expected_test_outputs(i)%values()) < tolerance, i=1, size(actual_outputs))] + end block contains - function xor(inputs) result(expected_outputs) + pure function xor(inputs) result(expected_outputs) type(tensor_t), intent(in) :: inputs type(tensor_t) expected_outputs associate(sum_inputs => sum(inputs%values())) @@ -396,6 +411,7 @@ function perturbed_identity_converges() result(test_passes) integer, parameter :: num_epochs = 148 integer, parameter :: num_bins = 5 integer i, bin, epoch + trainable_engine = perturbed_identity_network(perturbation_magnitude=0.1) associate(num_inputs => trainable_engine%num_inputs(), num_outputs => trainable_engine%num_outputs()) From 0d0311d837038642ebb13b8d41838a3d13ea437c Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Wed, 17 Jan 2024 21:11:24 -0800 Subject: [PATCH 2/2] doc(example): adjust usage info Add flags to the usage output to match the arguments of the recommended commands to the arguments employed in setup.sh. This prevents unnecessary rebuilding of the softwre stack. --- example/concurrent-inferences.f90 | 2 +- example/learn-addition.f90 | 2 +- example/learn-exponentiation.f90 | 2 +- example/learn-microphysics-procedures.f90 | 2 +- example/learn-multiplication.f90 | 2 +- example/learn-power-series.f90 | 2 +- example/learn-saturated-mixing-ratio.f90 | 2 +- example/train-and-write.f90 | 2 +- example/write-read-infer.f90 | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/example/concurrent-inferences.f90 b/example/concurrent-inferences.f90 index b2620ac24..65e35d0f8 100644 --- a/example/concurrent-inferences.f90 +++ b/example/concurrent-inferences.f90 @@ -16,7 +16,7 @@ program concurrent_inferences if (len(network_file_name%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example concurrent-inferences -- --network ""' + 'Usage: fpm run --example concurrent-inferences --profile release --flag "-fopenmp" -- --network ""' end if block diff --git a/example/learn-addition.f90 b/example/learn-addition.f90 index 3358a800f..f94e089e5 100644 --- a/example/learn-addition.f90 +++ b/example/learn-addition.f90 @@ -34,7 +34,7 @@ program learn_addition if (len(final_network_file%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example learn-addition -- --output-file ""' + 'Usage: fpm run --example learn-addition --profile release --flag "-fopenmp" -- --output-file ""' end if block diff --git a/example/learn-exponentiation.f90 b/example/learn-exponentiation.f90 index 404095038..411a42ea5 100644 --- a/example/learn-exponentiation.f90 +++ b/example/learn-exponentiation.f90 @@ -34,7 +34,7 @@ program learn_exponentiation if (len(final_network_file%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example train-polynomials -- --output-file ""' + 'Usage: fpm run --example train-polynomials --profile release --flag "-fopenmp" -- --output-file ""' end if block diff --git a/example/learn-microphysics-procedures.f90 b/example/learn-microphysics-procedures.f90 index eec4cef8f..5a115c690 100644 --- a/example/learn-microphysics-procedures.f90 +++ b/example/learn-microphysics-procedures.f90 @@ -19,7 +19,7 @@ program learn_microphysics_procedures if (len(network_file%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run learn-microphysics-procedures -- --output-file ""' + 'Usage: fpm run learn-microphysics-procedures --profile release --flag "-fopenmp" -- --output-file ""' end if call system_clock(counter_start, clock_rate) diff --git a/example/learn-multiplication.f90 b/example/learn-multiplication.f90 index 18ba510fc..28baca85b 100644 --- a/example/learn-multiplication.f90 +++ b/example/learn-multiplication.f90 @@ -34,7 +34,7 @@ program learn_multiplication if (len(final_network_file%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example learn-multiplication -- --output-file ""' + 'Usage: fpm run --example learn-multiplication --profile release --flag "-fopenmp" -- --output-file ""' end if block diff --git a/example/learn-power-series.f90 b/example/learn-power-series.f90 index b10cfc2c3..c73b97a0c 100644 --- a/example/learn-power-series.f90 +++ b/example/learn-power-series.f90 @@ -34,7 +34,7 @@ program learn_power_series if (len(final_network_file%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example learn-power-series -- --output-file ""' + 'Usage: fpm run --example learn-power-series --profile release --flag "-fopenmp" -- --output-file ""' end if block diff --git a/example/learn-saturated-mixing-ratio.f90 b/example/learn-saturated-mixing-ratio.f90 index fe30d982e..472653cb1 100644 --- a/example/learn-saturated-mixing-ratio.f90 +++ b/example/learn-saturated-mixing-ratio.f90 @@ -18,7 +18,7 @@ program train_saturated_mixture_ratio if (len(network_file%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example learn-saturated-mixing-ratio -- --output-file ""' + 'Usage: fpm run --example learn-saturated-mixing-ratio --profile release --flag "-fopenmp" -- --output-file ""' end if call system_clock(counter_start, clock_rate) diff --git a/example/train-and-write.f90 b/example/train-and-write.f90 index 81de22dac..a2f9ce313 100644 --- a/example/train-and-write.f90 +++ b/example/train-and-write.f90 @@ -21,7 +21,7 @@ program train_and_write if (len(final_network_file%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example train-and-write -- --output-file ""' + 'Usage: fpm run --example train-and-write --profile release --flag "-fopenmp" -- --output-file ""' end if block diff --git a/example/write-read-infer.f90 b/example/write-read-infer.f90 index 1a9854333..cbfd0d686 100644 --- a/example/write-read-infer.f90 +++ b/example/write-read-infer.f90 @@ -19,7 +19,7 @@ program write_read_infer if (len(file_name%string())==0) then error stop new_line('a') // new_line('a') // & - 'Usage: ./build/run-fpm.sh run --example write-read-infer -- --output-file ""' + 'Usage: fpm run --example write-read-infer --profile release --flag "-fopenmp" -- --output-file ""' end if call write_read_query_infer(file_name)