Cambridge-ICCS · jwallwork23 · Dec 23, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/.github/workflows/test_suite_windows.yml b/.github/workflows/test_suite_windows.yml
@@ -69,11 +69,13 @@ jobs:
         shell: cmd
         run: |
           cd src
+          rem find torch location
+          for /f "tokens=2*" %%i in ('pip show torch ^| findstr /R "^Location"') do set torch_path=%%i
           cmake ^
             -Bbuild ^
             -G "NMake Makefiles" ^
             -DCMAKE_Fortran_FLAGS="/fpscomp:logicals" ^
-            -DCMAKE_PREFIX_PATH="C:\hostedtoolcache\windows\Python\3.12.7\x64\Lib\site-packages" ^
+            -DCMAKE_PREFIX_PATH=%torch_path% ^
             -DCMAKE_BUILD_TYPE=Release ^
             -DCMAKE_Fortran_COMPILER=ifx ^
             -DCMAKE_C_COMPILER=icx ^
@@ -85,7 +87,7 @@ jobs:
       - name: Integration tests
         shell: cmd
         run: |
-          set PATH=C:\hostedtoolcache\windows\Python\3.12.7\x64\Lib\site-packages;%PATH%
+          for /f "tokens=2*" %%i in ('pip show torch ^| findstr /R "^Location"') do set torch_path=%%i
           set PATH=C:\Program Files (x86)\FTorch\bin;%PATH%
-          set PATH=C:\hostedtoolcache\windows\Python\3.12.7\x64\Lib\site-packages\torch\lib;%PATH%
+          set PATH=%torch_path%\torch\lib;%PATH%
           run_integration_tests.bat
diff --git a/examples/6_Autograd/autograd.f90 b/examples/6_Autograd/autograd.f90
@@ -4,8 +4,9 @@ program example
   use, intrinsic :: iso_fortran_env, only : sp => real32
 
   ! Import our library for interfacing with PyTorch's Autograd module
-  use ftorch, only : torch_tensor, torch_kCPU, &
-                     torch_tensor_from_array, torch_tensor_to_array, torch_tensor_delete
+  use ftorch, only: assignment(=), operator(+), operator(-), operator(*), &
+    operator(/), operator(**), torch_kCPU, torch_tensor, torch_tensor_delete, &
+    torch_tensor_from_array, torch_tensor_to_array
 
   ! Import our tools module for testing utils
   use ftorch_test_utils, only : assert_allclose
@@ -16,8 +17,9 @@ program example
   integer, parameter :: wp = sp
 
   ! Set up Fortran data structures
-  integer, parameter :: n=2, m=5
-  real(wp), dimension(n,m), target :: in_data
+  integer, parameter :: n=2, m=1
+  real(wp), dimension(n,m), target :: in_data1
+  real(wp), dimension(n,m), target :: in_data2
   real(wp), dimension(:,:), pointer :: out_data
   real(wp), dimension(n,m) :: expected
   integer :: tensor_layout(2) = [1, 2]
@@ -27,45 +29,78 @@ program example
   logical :: test_pass
 
   ! Set up Torch data structures
-  type(torch_tensor) :: tensor
+  type(torch_tensor) :: a, b, Q
 
-  ! initialize in_data with some fake data
-  do j = 1, m
-    do i = 1, n
-      in_data(i,j) = ((i-1)*m + j) * 1.0_wp
-    end do
-  end do
+  ! Initialise input arrays as in Python example
+  in_data1(:,1) = [2.0_wp, 3.0_wp]
+  in_data2(:,1) = [6.0_wp, 4.0_wp]
 
   ! Construct a Torch Tensor from a Fortran array
-  call torch_tensor_from_array(tensor, in_data, tensor_layout, torch_kCPU)
+  ! TODO: Implement requires_grad=.true.
+  call torch_tensor_from_array(a, in_data1, tensor_layout, torch_kCPU)
+  call torch_tensor_from_array(b, in_data2, tensor_layout, torch_kCPU)
 
   ! check tensor rank and shape match those of in_data
-  if (tensor%get_rank() /= 2) then
+  if ((a%get_rank() /= 2) .or. (b%get_rank() /= 2)) then
     print *, "Error :: rank should be 2"
     stop 1
   end if
-  if (any(tensor%get_shape() /= [2, 5])) then
-    print *, "Error :: shape should be (2, 5)"
+  if (any(a%get_shape() /= [n, m]) .or. any(b%get_shape() /= [n, m])) then
+    write(6,"('Error :: shape should be (',i1,', ',i1,')')") n, m
     stop 1
   end if
 
+  ! Check arithmetic operations work for torch_tensors
+  write (*,*) "a = ", in_data1(:,1)
+  write (*,*) "b = ", in_data2(:,1)
+  Q = 3 * (a**3 - b * b / 3)
+
   ! Extract a Fortran array from a Torch tensor
-  call torch_tensor_to_array(tensor, out_data, shape(in_data))
+  call torch_tensor_to_array(Q, out_data, shape(in_data1))
+  write (*,*) "Q = 3 * (a ** 3 - b * b / 2) =", out_data(:,1)
 
   ! Check output tensor matches expected value
-  expected(:,:) = in_data
+  expected(:,1) = [-12.0_wp, 65.0_wp]
   test_pass = assert_allclose(out_data, expected, test_name="torch_tensor_to_array", rtol=1e-5)
+  if (.not. test_pass) then
+    call clean_up()
+    print *, "Error :: out_data does not match expected value"
+    stop 999
+  end if
 
-  ! Check that the data match
+  ! Check first input array is unchanged by the arithmetic operations
+  expected(:,1) = [2.0_wp, 3.0_wp]
+  test_pass = assert_allclose(in_data1, expected, test_name="torch_tensor_to_array", rtol=1e-5)
   if (.not. test_pass) then
-    print *, "Error :: in_data does not match out_data"
+    call clean_up()
+    print *, "Error :: in_data1 was changed during arithmetic operations"
     stop 999
   end if
 
-  ! Cleanup
-  nullify(out_data)
-  call torch_tensor_delete(tensor)
+  ! Check second input array is unchanged by the arithmetic operations
+  expected(:,1) = [6.0_wp, 4.0_wp]
+  test_pass = assert_allclose(in_data2, expected, test_name="torch_tensor_to_array", rtol=1e-5)
+  if (.not. test_pass) then
+    call clean_up()
+    print *, "Error :: in_data2 was changed during arithmetic operations"
+    stop 999
+  end if
+
+  ! Back-propagation
+  ! TODO: Requires API extension
 
+  ! Cleanup
+  call clean_up()
   write (*,*) "Autograd example ran successfully"
 
+  contains
+
+    ! Subroutine for freeing memory and nullifying pointers used in the example
+    subroutine clean_up()
+      nullify(out_data)
+      call torch_tensor_delete(a)
+      call torch_tensor_delete(b)
+      call torch_tensor_delete(Q)
+    end subroutine clean_up
+
 end program example
diff --git a/examples/6_Autograd/autograd.py b/examples/6_Autograd/autograd.py
@@ -5,7 +5,7 @@
 a = torch.tensor([2.0, 3.0], requires_grad=True)
 b = torch.tensor([6.0, 4.0], requires_grad=True)
 
-Q = 3 * a**3 - b**2
+Q = 3 * (a**3 - b * b / 3)
 print(Q)
 expect = torch.tensor([-12.0, 65.0])
 if not torch.allclose(Q, expect):

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -1,7 +1,9 @@
 if(CMAKE_BUILD_TESTS)
   add_subdirectory(1_SimpleNet)
   add_subdirectory(2_ResNet18)
-  # add_subdirectory(3_MultiGPU)
+  if(ENABLE_CUDA)
+    add_subdirectory(3_MultiGPU)
+  endif()
   add_subdirectory(4_MultiIO)
   # add_subdirectory(5_Looping)
   add_subdirectory(6_Autograd)

diff --git a/pages/autograd.md b/pages/autograd.md
@@ -0,0 +1,42 @@
+title: Online training
+
+[TOC]
+
+## Current state
+
+FTorch has supported offline training of ML models for some time. We are
+currently working on extending its functionality to support online training,
+too. This will involve exposing the automatic differentiation and
+back-propagation functionality in PyTorch/libtorch.
+
+In the following, we document a workplan of the related functionality. Each step
+below will be updated upon completion.
+
+### Operator overloading
+
+Mathematical operators involving Tensors are overloaded, so that we can compute
+expressions involving outputs from one or more ML models.
+
+Whilst it's possible to import such functionality with a bare
+```fortran
+use ftorch
+```
+statement, the best practice is to import specifically the operators that you
+wish to use. Note that the assignment operator `=` has a slightly different
+notation:
+```
+use ftorch, only: assignment(=), operator(+), operator(-), operator(*), &
+  operator(/), operator(**)
+```
+
+For a concrete example of how to compute mathematical expressions involving
+Torch tensors, see the associated
+[worked example](https://github.com/Cambridge-ICCS/FTorch/tree/main/examples/6_Autograd).
+
+### The `requires_grad` property
+
+*Not yet implemented.*
+
+### The `backward` operator
+
+*Not yet implemented.*
diff --git a/pages/developer.md b/pages/developer.md
@@ -77,6 +77,14 @@ and many of our users wish to _"clone-and-go"_ rather than develop, we provide b
 Development should only take place in `ftorch.fypp`, however._
 
 
+### Torch C++ API
+
+When extending or modifying functionality related to C++ header and/or source
+files `src/ctorch.h` and `src/ctorch.cpp`, we refer to the Torch C++
+[API documentation](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#exhale-class-classat-1-1-tensor)
+page on the PyTorch website for details.
+
+
 ### git hook
 
 In order to streamline the process of uploading we provide a pre-commit hook in

diff --git a/pages/examples.md b/pages/examples.md
@@ -187,9 +187,16 @@ data to multiple GPU devices.
 considers a variant of the SimpleNet demo, which demonstrates how to account for
 multiple input tensors and multiple output tensors.
 
-#### 7) Autograd
+#### 5) Looping
 
-[This worked example](https://github.com/Cambridge-ICCS/FTorch/tree/main/examples/5_Autograd)
+[This worked example](https://github.com/Cambridge-ICCS/FTorch/tree/main/examples/5_Looping)
+demonstrates best practices for performing inference on the same network with
+different input multiple times in the same workflow.
+
+#### 6) Autograd
+
+[This worked example](https://github.com/Cambridge-ICCS/FTorch/tree/main/examples/6_Autograd)
 is currently under development. Eventually, it will demonstrate how to perform
 automatic differentiation in FTorch by leveraging PyTorch's Autograd module.
-Currently, it just demonstrates how to use `torch_tensor_to_array`.
+Currently, it just demonstrates how to use `torch_tensor_to_array` and compute
+mathematical expressions involving Torch tensors.
diff --git a/run_integration_tests.sh b/run_integration_tests.sh
@@ -12,7 +12,12 @@
 set -eu
 
 CTEST_ARGS=$@
-EXAMPLES="1_SimpleNet 2_ResNet18 4_MultiIO 6_Autograd"
+EXAMPLES="
+  1_SimpleNet
+  2_ResNet18
+  4_MultiIO
+  6_Autograd
+"
 BUILD_DIR=src/build
 
 for EXAMPLE in ${EXAMPLES}; do

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -116,12 +116,14 @@ if(CMAKE_BUILD_TESTS)
        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
   file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/2_ResNet18
        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
-  # file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/3_MultiGPU DESTINATION
-  # ${CMAKE_CURRENT_SOURCE_DIR}/test/examples )
+  if(ENABLE_CUDA)
+    file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/3_MultiGPU
+         DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
+  endif()
   file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/4_MultiIO
        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
-  # file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/5_Looping DESTINATION
-  # ${CMAKE_CURRENT_SOURCE_DIR}/test/examples )
+  # file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/5_Looping
+  #      DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
   file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/6_Autograd
        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
   add_subdirectory(test/examples)

diff --git a/src/ctorch.cpp b/src/ctorch.cpp
@@ -1,3 +1,8 @@
+/*
+ * See
+ * https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#exhale-class-classat-1-1-tensor
+ * for more details on the Torch Tensor C++ API.
+ */
 #include <torch/script.h>
 #include <torch/torch.h>
 
@@ -233,6 +238,91 @@ void torch_tensor_delete(torch_tensor_t tensor) {
   delete t;
 }
 
+torch_tensor_t torch_tensor_assign(const torch_tensor_t input) {
+  auto in = reinterpret_cast<torch::Tensor *const>(input);
+  torch::AutoGradMode enable_grad(in->requires_grad());
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = in->detach().clone();
+  return output;
+}
+
+torch_tensor_t torch_tensor_add(const torch_tensor_t tensor1,
+                                const torch_tensor_t tensor2) {
+  auto t1 = reinterpret_cast<torch::Tensor *const>(tensor1);
+  auto t2 = reinterpret_cast<torch::Tensor *const>(tensor2);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = *t1 + *t2;
+  return output;
+}
+
+torch_tensor_t torch_tensor_subtract(const torch_tensor_t tensor1,
+                                     const torch_tensor_t tensor2) {
+  auto t1 = reinterpret_cast<torch::Tensor *const>(tensor1);
+  auto t2 = reinterpret_cast<torch::Tensor *const>(tensor2);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = *t1 - *t2;
+  return output;
+}
+
+torch_tensor_t torch_tensor_multiply(const torch_tensor_t tensor1,
+                                     const torch_tensor_t tensor2) {
+  auto t1 = reinterpret_cast<torch::Tensor *const>(tensor1);
+  auto t2 = reinterpret_cast<torch::Tensor *const>(tensor2);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = *t1 * *t2;
+  return output;
+}
+
+torch_tensor_t torch_tensor_premultiply(const torch_data_t scalar,
+                                        const torch_tensor_t tensor) {
+  auto t = reinterpret_cast<torch::Tensor *const>(tensor);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = scalar * *t;
+  return output;
+}
+
+torch_tensor_t torch_tensor_postmultiply(const torch_tensor_t tensor,
+                                         const torch_data_t scalar) {
+  auto t = reinterpret_cast<torch::Tensor *const>(tensor);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = *t * scalar;
+  return output;
+}
+
+torch_tensor_t torch_tensor_divide(const torch_tensor_t tensor1,
+                                   const torch_tensor_t tensor2) {
+  auto t1 = reinterpret_cast<torch::Tensor *const>(tensor1);
+  auto t2 = reinterpret_cast<torch::Tensor *const>(tensor2);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = *t1 / *t2;
+  return output;
+}
+
+torch_tensor_t torch_tensor_postdivide(const torch_tensor_t tensor,
+                                       const torch_data_t scalar) {
+  auto t = reinterpret_cast<torch::Tensor *const>(tensor);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = *t / scalar;
+  return output;
+}
+
+torch_tensor_t torch_tensor_power(const torch_tensor_t tensor,
+                                  const torch_data_t exponent) {
+  auto t = reinterpret_cast<torch::Tensor *const>(tensor);
+  torch::Tensor *output = nullptr;
+  output = new torch::Tensor;
+  *output = pow(*t, exponent);
+  return output;
+}
+
 torch_jit_script_module_t torch_jit_load(const char *filename,
                                          const torch_device_t device_type = torch_kCPU,
                                          const int device_index = -1,