Merge pull request #96 from BerkeleyLab/read-training-configuration

App reads training configuration JSON file
BerkeleyLab · Nov 6, 2023 · dabc96c · dabc96c
2 parents 7aebf60 + 8f40466
commit dabc96c
Show file tree

Hide file tree

Showing 23 changed files with 248 additions and 83 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# NetCDF
+*.nc
+
 # Prerequisites
 *.d
 

diff --git a/app/train-cloud-microphysics.f90 b/app/train-cloud-microphysics.f90
@@ -38,15 +38,16 @@ program train_cloud_microphysics
 
   !! Internal dependencies;
   use inference_engine_m, only : &
-    inference_engine_t, mini_batch_t, input_output_pair_t, tensor_t, trainable_engine_t, rkind, NetCDF_file_t, sigmoid_t
+    inference_engine_t, mini_batch_t, input_output_pair_t, tensor_t, trainable_engine_t, rkind, NetCDF_file_t, sigmoid_t, &
+    training_configuration_t
   use ubounds_m, only : ubounds_t
   implicit none
 
   integer(int64) t_start, t_finish, clock_rate
   type(command_line_t) command_line
   type(file_t) plot_file
   type(string_t), allocatable :: lines(:)
-  character(len=*), parameter :: plot_file_name = "cost.plt"
+  character(len=*), parameter :: plot_file_name = "cost.plt", training_configuration_json = "training_configuration.json "
   character(len=:), allocatable :: base_name, stride_string, epochs_string, last_line
   integer plot_unit, stride, num_epochs, previous_epoch
   logical preexisting_plot_file
@@ -76,7 +77,7 @@ program train_cloud_microphysics
     read(last_line,*) previous_epoch
   end if
 
-  call read_train_write
+  call read_train_write(training_configuration_t(file_t(string_t(training_configuration_json))))
 
   close(plot_unit)
   call system_clock(t_finish)
@@ -85,7 +86,8 @@ program train_cloud_microphysics
 
 contains
 
- subroutine read_train_write
+ subroutine read_train_write(training_configuration)
+    type(training_configuration_t), intent(in) :: training_configuration
     real, allocatable, dimension(:,:,:,:) :: &
       pressure_in , potential_temperature_in , temperature_in , &
       pressure_out, potential_temperature_out, temperature_out, &
@@ -195,7 +197,7 @@ subroutine read_train_write
       else
         close(network_unit)
         print *,"Initializing a new network"
-        trainable_engine = new_engine(num_hidden_layers=6, nodes_per_hidden_layer=16, num_inputs=8, num_outputs=6, random=.false.)
+        trainable_engine = new_engine(training_configuration, randomize=.true.)
       end if
 
       print *,"Defining tensors from time steps 1 through", t_end, "with strides of", stride
@@ -229,7 +231,12 @@ subroutine read_train_write
         end associate
       end associate
 
-      associate(num_pairs => size(input_output_pairs), n_bins => 1) ! also tried n_bins => size(input_output_pairs)/10000
+      associate( &
+        num_pairs => size(input_output_pairs), &
+        n_bins => training_configuration%mini_batches(), &
+        adam => merge(.true., .false., training_configuration%optimizer_name() == "adam"), &
+        learning_rate => training_configuration%learning_rate() &
+      )
         bins = [(bin_t(num_items=num_pairs, num_bins=n_bins, bin_number=b), b = 1, n_bins)]
 
         print *,"Training network"
@@ -239,7 +246,7 @@ subroutine read_train_write
 
           call shuffle(input_output_pairs) ! set up for stochastic gradient descent
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost)
+          call trainable_engine%train(mini_batches, cost, adam, learning_rate)
           print *, epoch, minval(cost), maxval(cost), sum(cost)/size(cost)
           write(plot_unit,*) epoch, minval(cost), maxval(cost), sum(cost)/size(cost)
 
@@ -267,19 +274,26 @@ subroutine read_train_write
 
   end subroutine read_train_write
 
-  function new_engine(num_hidden_layers, nodes_per_hidden_layer, num_inputs, num_outputs, random) result(trainable_engine)
-    integer, intent(in) ::  num_hidden_layers, nodes_per_hidden_layer, num_inputs, num_outputs
-    logical, intent(in) :: random
+  function new_engine(training_configuration, randomize) result(trainable_engine)
+    logical, intent(in) :: randomize
+    type(training_configuration_t), intent(in) :: training_configuration
     type(trainable_engine_t) trainable_engine
     real(rkind), allocatable :: w(:,:,:), b(:,:)
+    character(len=len('YYYMMDD')) date
     integer l
+
+    call date_and_time(date)
 
-    associate(nodes => [num_inputs, [(nodes_per_hidden_layer, l = 1, num_hidden_layers)], num_outputs])
+    associate( &
+      nodes => training_configuration%nodes_per_layer(), &
+      activation => training_configuration%differentiable_activation_strategy(), &
+      residual_network => string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) &
+    )
       associate(max_nodes => maxval(nodes), layers => size(nodes))
 
         allocate(w(max_nodes, max_nodes, layers-1), b(max_nodes, max_nodes))
 
-        if (random) then
+        if (randomize) then
           call random_number(b)
           call random_number(w)
         else
@@ -288,8 +302,8 @@ function new_engine(num_hidden_layers, nodes_per_hidden_layer, num_inputs, num_o
         end if
 
         trainable_engine = trainable_engine_t( &
-          nodes = nodes, weights = w, biases = b, differentiable_activation_strategy = sigmoid_t(), metadata = & 
-          [string_t("Microphysics"), string_t("Damian Rouson"), string_t("2023-08-18"), string_t("sigmoid"), string_t("false")] &
+          nodes = nodes, weights = w, biases = b, differentiable_activation_strategy = activation, metadata = & 
+          [string_t("Microphysics"), string_t("Inference Engine"), string_t(date), activation%function_name(), residual_network] &
         )
       end associate
     end associate

diff --git a/example/fit-polynomials.f90 b/example/fit-polynomials.f90
@@ -58,7 +58,7 @@ program train_polynomials
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           print *,sum(cost)/size(cost)
         end do
       end block

diff --git a/example/learn-addition.f90 b/example/learn-addition.f90
@@ -77,7 +77,7 @@ program train_polynomials
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           print *,sum(cost)/size(cost)
         end do
       end block

diff --git a/example/learn-exponentiation.f90 b/example/learn-exponentiation.f90
@@ -77,7 +77,7 @@ program train_polynomials
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           print *,sum(cost)/size(cost)
         end do
       end block

diff --git a/example/learn-microphysics-procedures.f90 b/example/learn-microphysics-procedures.f90
@@ -86,7 +86,7 @@ program learn_microphysics_procedures
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           call system_clock(counter_end, clock_rate)
 
           associate( &

diff --git a/example/learn-multiplication.f90 b/example/learn-multiplication.f90
@@ -77,7 +77,7 @@ program train_polynomials
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           print *,sum(cost)/size(cost)
         end do
       end block

diff --git a/example/learn-power-series.f90 b/example/learn-power-series.f90
@@ -79,7 +79,7 @@ program train_polynomials
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           print *,sum(cost)/size(cost)
         end do
       end block

diff --git a/example/learn-saturated-mixing-ratio.f90 b/example/learn-saturated-mixing-ratio.f90
@@ -85,7 +85,7 @@ program train_saturated_mixture_ratio
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           call system_clock(counter_end, clock_rate)
 
           associate( &

diff --git a/example/print-training-configuration.f90 b/example/print-training-configuration.f90
@@ -6,10 +6,12 @@ program print_training_configuration
 
   associate(training_configuration => training_configuration_t( &
     hyperparameters_t(mini_batches=10, learning_rate=1.5, optimizer = "adam"), &
-    network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_function="sigmoid") &
+    network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_name="sigmoid") &
   ))
-    associate(json_file => file_t(training_configuration%to_json()))
-      call json_file%write_lines()
+    associate(lines => training_configuration%to_json())
+      associate(json_file => file_t(lines))
+        call json_file%write_lines()
+      end associate
     end associate
   end associate
 end program
diff --git a/example/train-and-write.f90 b/example/train-and-write.f90
@@ -65,7 +65,7 @@ program train_and_write
           call random_number(random_numbers)
           call shuffle(input_output_pairs, random_numbers)
           mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
-          call trainable_engine%train(mini_batches, cost, adam=.true.)
+          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
           print *,sum(cost)/size(cost)
         end do
       end block

diff --git a/fpm.toml b/fpm.toml
@@ -1,10 +1,10 @@
 name = "inference-engine"
 version = "0.5.0"
 license = "license"
-author = "Damian Rouson, Tan Nguyen, Jordan Welsman"
+author = "Damian Rouson, Tan Nguyen, Jordan Welsman, David Torres"
 maintainer = "rouson@lbl.gov"
 
 [dependencies]
 assert = {git = "https://github.com/sourceryinstitute/assert", tag = "1.5.0"}
-sourcery = {git = "https://github.com/sourceryinstitute/sourcery", tag = "4.4.3"}
+sourcery = {git = "https://github.com/sourceryinstitute/sourcery", tag = "4.4.4"}
 netcdf-interfaces = {git = "https://github.com/rouson/netcdf-interfaces.git", branch = "implicit-interfaces"}
diff --git a/src/inference_engine/hyperparameters_m.f90 b/src/inference_engine/hyperparameters_m.f90
@@ -1,5 +1,6 @@
 module hyperparameters_m
   use sourcery_m, only : string_t
+  use kind_parameters_m, only : rkind
   implicit none
 
   private
@@ -14,7 +15,10 @@ module hyperparameters_m
     procedure :: to_json
     procedure :: equals
     generic :: operator(==) => equals
-  end type
+    procedure :: mini_batches
+    procedure :: optimizer_name
+    procedure :: learning_rate
+    end type
 
   interface hyperparameters_t
 
@@ -48,6 +52,24 @@ elemental module function equals(lhs, rhs) result(lhs_equals_rhs)
       logical lhs_equals_rhs
     end function
 
+    elemental module function mini_batches(self) result(num_mini_batches)
+      implicit none
+      class(hyperparameters_t), intent(in) :: self
+      integer num_mini_batches
+    end function
+
+    elemental module function optimizer_name(self) result(identifier)
+      implicit none
+      class(hyperparameters_t), intent(in) :: self
+      type(string_t) identifier
+     end function
+
+
+     elemental module function learning_rate(self) result(rate)
+       implicit none
+       class(hyperparameters_t), intent(in) :: self
+       real(rkind) rate
+     end function
   end interface
 
 end module
diff --git a/src/inference_engine/hyperparameters_s.f90 b/src/inference_engine/hyperparameters_s.f90
@@ -63,4 +63,16 @@
     ]
   end procedure
 
+  module procedure mini_batches
+    num_mini_batches = self%mini_batches_
+  end procedure
+
+  module procedure optimizer_name
+    identifier = string_t(self%optimizer_)
+  end procedure
+
+  module procedure learning_rate
+    rate = self%learning_rate_
+  end procedure
+
 end submodule hyperparameters_s
diff --git a/src/inference_engine/network_configuration_m.f90 b/src/inference_engine/network_configuration_m.f90
@@ -9,11 +9,14 @@ module network_configuration_m
     private
     logical :: skip_connections_ = .false.
     integer, allocatable :: nodes_per_layer_(:)
-    character(len=:), allocatable :: activation_function_
+    character(len=:), allocatable :: activation_name_
   contains
     procedure :: to_json
     procedure :: equals
     generic :: operator(==) => equals
+    procedure :: activation_name
+    procedure :: nodes_per_layer
+    procedure :: skip_connections
   end type
 
   interface network_configuration_t
@@ -24,11 +27,11 @@ pure module function from_json(lines) result(network_configuration)
       type(network_configuration_t) network_configuration
     end function
 
-    pure module function from_components(skip_connections, nodes_per_layer, activation_function) result(network_configuration)
+    pure module function from_components(skip_connections, nodes_per_layer, activation_name) result(network_configuration)
       implicit none
       logical, intent(in) :: skip_connections
       integer, intent(in) :: nodes_per_layer(:)
-      character(len=*), intent(in) :: activation_function
+      character(len=*), intent(in) :: activation_name
       type(network_configuration_t) network_configuration
     end function
 
@@ -48,6 +51,25 @@ elemental module function equals(lhs, rhs) result(lhs_equals_rhs)
       logical lhs_equals_rhs
     end function
 
+    elemental module function activation_name(self) result(string)
+      implicit none
+      class(network_configuration_t), intent(in) :: self
+      type(string_t) string
+    end function
+
+    pure module function nodes_per_layer(self) result(nodes)
+      implicit none
+      class(network_configuration_t), intent(in) :: self
+      integer, allocatable :: nodes(:)
+    end function
+
+    elemental module function skip_connections(self) result(using_skip)
+      implicit none
+      class(network_configuration_t), intent(in) :: self
+      logical using_skip
+    end function
+
+
   end interface
 
 end module
diff --git a/src/inference_engine/network_configuration_s.f90 b/src/inference_engine/network_configuration_s.f90
@@ -5,23 +5,24 @@
 
   character(len=*), parameter :: skip_connections_key  = "skip connections"
   character(len=*), parameter :: nodes_per_layer_key = "nodes per layer"
-  character(len=*), parameter :: activation_function_key     = "activation function"
+  character(len=*), parameter :: activation_name_key     = "activation function"
 
 contains
 
   module procedure from_components
     network_configuration%skip_connections_ = skip_connections
     network_configuration%nodes_per_layer_ = nodes_per_layer
-    network_configuration%activation_function_ = activation_function
+    network_configuration%activation_name_ = activation_name
   end procedure 
 
   module procedure equals
 
-    call assert(allocated(lhs%activation_function_) .and. allocated(rhs%activation_function_), "network_configuration_s(equals): allocated activation_functions")
+    call assert(allocated(lhs%activation_name_) .and. allocated(rhs%activation_name_), &
+     "network_configuration_s(equals): allocated({lhs,rhs}%activation_name_)")
 
     lhs_equals_rhs = &
       lhs%skip_connections_ .eqv. rhs%skip_connections_ .and. &
-      lhs%activation_function_ == rhs%activation_function_ .and. &
+      lhs%activation_name_ == rhs%activation_name_ .and. &
       all(lhs%nodes_per_layer_ == rhs%nodes_per_layer_)
 
   end procedure 
@@ -37,7 +38,7 @@
         network_configuration_key_found = .true.
         network_configuration%skip_connections_  = lines(l+1)%get_json_value(string_t(skip_connections_key), mold=.true.)
         network_configuration%nodes_per_layer_ = lines(l+2)%get_json_value(string_t(nodes_per_layer_key), mold=[integer::])
-        network_configuration%activation_function_ = lines(l+3)%get_json_value(string_t(activation_function_key), mold=string_t(""))
+        network_configuration%activation_name_ = lines(l+3)%get_json_value(string_t(activation_name_key), mold=string_t(""))
         return
       end if
     end do
@@ -60,9 +61,21 @@
       string_t(indent // '"network configuration": {'), &
       string_t(indent // indent // '"' // skip_connections_key    // '" : '  // trim(adjustl(skip_connections_string  )) //  ','), &
       string_t(indent // indent // '"' // nodes_per_layer_key     // '" : [' // trim(adjustl(nodes_per_layer_string   )) // '],'), &
-      string_t(indent // indent // '"' // activation_function_key // '" : "' // trim(adjustl(self%activation_function_)) // '"' ), &
+      string_t(indent // indent // '"' // activation_name_key // '" : "' // trim(adjustl(self%activation_name_)) // '"' ), &
       string_t(indent // '}') &
     ]
   end procedure
 
+  module procedure activation_name
+    string = self%activation_name_
+  end procedure
+
+  module procedure nodes_per_layer
+    nodes = self%nodes_per_layer_
+  end procedure
+
+  module procedure skip_connections
+    using_skip = self%skip_connections_
+  end procedure
+
 end submodule network_configuration_s