Skip to content

Commit

Permalink
Update raven filter memory estimator
Browse files Browse the repository at this point in the history
This is for the raven filter's state at the following commit in
`httomolibgpu`:
https://github.com/DiamondLightSource/httomolibgpu/tree/66ce152a91abd474ed64f7a11b2bf59e03bb5b51
  • Loading branch information
yousefmoazzam committed Dec 18, 2024
1 parent 1b57bdb commit 45992e6
Showing 1 changed file with 23 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from typing import Tuple
import numpy as np

from httomo_backends.cufft import CufftType, cufft_estimate_2d
from httomo_backends.cufft import CufftType, cufft_estimate_1d


__all__ = [
Expand Down Expand Up @@ -80,42 +80,51 @@ def _calc_memory_bytes_raven_filter(
pad_x = kwargs["pad_x"]
pad_y = kwargs["pad_y"]

# Unpadded input
input_size = np.prod(non_slice_dims_shape) * dtype.itemsize
output_size = np.prod(non_slice_dims_shape) * dtype.itemsize

# Padded input
padded_non_slice_dims_shape = (
non_slice_dims_shape[0] + 2 * pad_y,
non_slice_dims_shape[1] + 2 * pad_x,
)

in_slice_size_pad = (
(padded_non_slice_dims_shape[0])
* (padded_non_slice_dims_shape[1])
* dtype.itemsize
)
out_slice_size_pad = in_slice_size_pad

# Conversion of padded input data to `complex64` (implicitly done by `fft2()` function)
complex_slice_fft_data = in_slice_size_pad / dtype.itemsize * np.complex64().nbytes
complex_slice_fft_data_shifted = complex_slice_fft_data
data_out_ifft_complex = complex_slice_fft_data

# Plan size for 2D FFT
fftplan_slice = cufft_estimate_2d(
nx=padded_non_slice_dims_shape[1],
ny=padded_non_slice_dims_shape[0],
# 2D FFT becomes two 1D FFTs (possibly due to applying 2D FFT to non-adjacent axes 0 and
# 2), so a plan for a 1D FFT is needed rather than a plan for a 2D FFT
fft_1d_plan = cufft_estimate_1d(
nx=padded_non_slice_dims_shape[0],
fft_type=CufftType.CUFFT_C2C,
batch=non_slice_dims_shape[1],
)

# Copy from applying fftshift to FFT result
complex_slice_fft_data_shifted = complex_slice_fft_data

# Two copies of `complex64` data come from 2D IFFT becoming a loop over two 1D IFFTs, and
# applying 1D IFFT to non-adjacent axes 0 and 2 causes data to not be C contiguous (thus,
# needing to be copied to get a version of the data which is C contiguous)
#
# NOTE: The same copies are generated by the 2D FFT becoming two 1D FFTs, but the order of
# allocations and deallocations of those copies are such that they don't contribute to peak
# GPU memory usage. Thus, they aren't accounted for in the estimated memory, unlike the
# copies generated by the 2D IFFT becoming two 1D IFFTs
ifft_complex64_copies = 2 * complex_slice_fft_data

tot_memory_bytes = int(
input_size
+ output_size
+ in_slice_size_pad
+ out_slice_size_pad
+ complex_slice_fft_data
+ 2 * fft_1d_plan
+ complex_slice_fft_data_shifted
+ data_out_ifft_complex
+ 2.1 * fftplan_slice
+ ifft_complex64_copies
)

return (tot_memory_bytes, 0)

0 comments on commit 45992e6

Please sign in to comment.