Skip to content

Commit

Permalink
fix(concur-infer): omp default(none), collapse(3)
Browse files Browse the repository at this point in the history
This commit fixes the `omp parallel do` statement to default to
declaring sharing for all variables accessed and to collapse three
nested loops into one.
  • Loading branch information
rouson committed Nov 25, 2024
1 parent a57a57a commit daae187
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions example/concurrent-inferences.f90
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,14 @@ program concurrent_inferences

print *,"Performing",lat*lev*lon," inferences inside `omp parallel do`."
call system_clock(t_start, clock_rate)
!$omp parallel do shared(inputs,outputs)
!$omp parallel do default(none) shared(inputs,outputs,neural_network) collapse(3)
do j=1,lon
do k=1,lev
do i=1,lat
outputs(i,k,j) = neural_network%infer(inputs(i,k,j))
end do
end do
end do
!$omp end parallel do
call system_clock(t_finish)
print *,"Elapsed system clock: ", real(t_finish - t_start, real64)/real(clock_rate, real64)

Expand All @@ -89,19 +88,19 @@ program concurrent_inferences
neural_network = neural_network_t(double_precision_file_t(network_file_name))

print *,"Defining an array of tensor_t input objects with random normalized components"
allocate(outputs(lat,lon,lev))
allocate( inputs(lat,lon,lev))
allocate(input_components(lat,lon,lev,neural_network%num_inputs()))
allocate(outputs(lat,lev,lon))
allocate( inputs(lat,lev,lon))
allocate(input_components(lat,lev,lon,neural_network%num_inputs()))
call random_number(input_components)

do concurrent(i=1:lat, j=1:lon, k=1:lev)
inputs(i,j,k) = tensor_t(input_components(i,j,k,:))
do concurrent(i=1:lat, k=1:lev, j=1:lon)
inputs(i,k,j) = tensor_t(input_components(i,k,j,:))
end do

print *,"Performing double-precision concurrent inference"
call system_clock(t_start, clock_rate)
do concurrent(i=1:lat, j=1:lon, k=1:lev)
outputs(i,j,k) = neural_network%infer(inputs(i,j,k))
do concurrent(i=1:lat, k=1:lev, j=1:lon)
outputs(i,k,j) = neural_network%infer(inputs(i,k,j))
end do
call system_clock(t_finish)
print *,"Double-precision concurrent inference time: ", real(t_finish - t_start, real64)/real(clock_rate, real64)
Expand Down

0 comments on commit daae187

Please sign in to comment.