From daae187f1150cc51dbce0dc717b5f46e2ae45259 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 25 Nov 2024 04:54:58 -0800 Subject: [PATCH] fix(concur-infer): omp default(none), collapse(3) This commit fixes the `omp parallel do` statement to default to declaring sharing for all variables accessed and to collapse three nested loops into one. --- example/concurrent-inferences.f90 | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/example/concurrent-inferences.f90 b/example/concurrent-inferences.f90 index 2c2f430fa..3fd996022 100644 --- a/example/concurrent-inferences.f90 +++ b/example/concurrent-inferences.f90 @@ -55,7 +55,7 @@ program concurrent_inferences print *,"Performing",lat*lev*lon," inferences inside `omp parallel do`." call system_clock(t_start, clock_rate) - !$omp parallel do shared(inputs,outputs) + !$omp parallel do default(none) shared(inputs,outputs,neural_network) collapse(3) do j=1,lon do k=1,lev do i=1,lat @@ -63,7 +63,6 @@ program concurrent_inferences end do end do end do - !$omp end parallel do call system_clock(t_finish) print *,"Elapsed system clock: ", real(t_finish - t_start, real64)/real(clock_rate, real64) @@ -89,19 +88,19 @@ program concurrent_inferences neural_network = neural_network_t(double_precision_file_t(network_file_name)) print *,"Defining an array of tensor_t input objects with random normalized components" - allocate(outputs(lat,lon,lev)) - allocate( inputs(lat,lon,lev)) - allocate(input_components(lat,lon,lev,neural_network%num_inputs())) + allocate(outputs(lat,lev,lon)) + allocate( inputs(lat,lev,lon)) + allocate(input_components(lat,lev,lon,neural_network%num_inputs())) call random_number(input_components) - do concurrent(i=1:lat, j=1:lon, k=1:lev) - inputs(i,j,k) = tensor_t(input_components(i,j,k,:)) + do concurrent(i=1:lat, k=1:lev, j=1:lon) + inputs(i,k,j) = tensor_t(input_components(i,k,j,:)) end do print *,"Performing double-precision concurrent inference" call system_clock(t_start, clock_rate) - do concurrent(i=1:lat, j=1:lon, k=1:lev) - outputs(i,j,k) = neural_network%infer(inputs(i,j,k)) + do concurrent(i=1:lat, k=1:lev, j=1:lon) + outputs(i,k,j) = neural_network%infer(inputs(i,k,j)) end do call system_clock(t_finish) print *,"Double-precision concurrent inference time: ", real(t_finish - t_start, real64)/real(clock_rate, real64)