diff --git a/.github/workflows/ci-judi.yml b/.github/workflows/ci-judi.yml
index 96b4aeeec..2ab51793a 100644
--- a/.github/workflows/ci-judi.yml
+++ b/.github/workflows/ci-judi.yml
@@ -48,6 +48,7 @@ jobs:
 
       - name: Set julia python
         run: |
+          echo "PYTHON=$(which python3)" >> $GITHUB_ENV
           PYTHON=$(which python3) julia -e 'using Pkg;Pkg.add("PyCall");Pkg.build("PyCall")'
 
       - name: Build JUDI
diff --git a/.github/workflows/ci-op.yml b/.github/workflows/ci-op.yml
index 21caa36d5..6beb24926 100644
--- a/.github/workflows/ci-op.yml
+++ b/.github/workflows/ci-op.yml
@@ -77,6 +77,7 @@ jobs:
 
       - name: Set julia python
         run: |
+          echo "PYTHON=$(which python3)" >> $GITHUB_ENV
           PYTHON=$(which python3) julia -e 'using Pkg;Pkg.add("PyCall");Pkg.build("PyCall")'
 
       - name: Build JUDI
diff --git a/Project.toml b/Project.toml
index 75c75f8eb..d68e277ec 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "JUDI"
 uuid = "f3b833dc-6b2e-5b9c-b940-873ed6319979"
 authors = ["Philipp Witte, Mathias Louboutin"]
-version = "3.4.4"
+version = "3.4.5"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
@@ -51,4 +51,4 @@ test = ["Aqua", "JLD2", "Printf", "Test", "TimerOutputs", "Flux"]
 [weakdeps]
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
\ No newline at end of file
diff --git a/deps/build.jl b/deps/build.jl
index 42f873c2b..3a357ac6c 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -5,24 +5,22 @@ struct DevitoException <: Exception
     msg::String
 end
 
-python = PyCall.pyprogramname
-
-try
-    pk = pyimport("pkg_resources")
+pk = try
+    pyimport("pkg_resources")
 catch e
-    Cmd([python, "-m", "pip", "install", "--user", "setuptools"])
-    run(cmd)
-    pk = pyimport("pkg_resources")
+    run(PyCall.python_cmd(`-m pip install --user setuptools`))
+    pyimport("pkg_resources")
 end
 
 ################## Devito ##################
 # pip command
-cmd = Cmd([python, "-m", "pip", "install", "-U", "--user", "devito[extras,tests]>=4.4"])
+dvver = "4.8.10"
+cmd = PyCall.python_cmd(`-m pip install --user devito\[extras,tests\]\>\=$(dvver)`)
 
 try
-    dv_ver = split(pk.get_distribution("devito").version, "+")[1]
-    if cmp(dv_ver, "4.8.7") < 0
-        @info "Devito  version too low, updating to >=4.8.7"
+    dv_ver = VersionNumber(split(pk.get_distribution("devito").version, "+")[1])
+    if dv_ver < VersionNumber(dvver)
+        @info "Devito  version too low, updating to >=$(dvver)"
         run(cmd)
     end
 catch e
@@ -30,12 +28,9 @@ catch e
     run(cmd)
 end
 
-
 ################## Matplotlib ##################
-# pip command
-cmd = Cmd([python, "-m", "pip", "install", "--user", "matplotlib"])
 try
     mpl = pyimport("matplotlib")
 catch e
-    run(cmd)
+    run(PyCall.python_cmd(`-m pip install --user matplotlib`))
 end
diff --git a/docs/src/helper.md b/docs/src/helper.md
index a74f23946..31fd8d2bc 100644
--- a/docs/src/helper.md
+++ b/docs/src/helper.md
@@ -104,8 +104,6 @@ remove_out_of_bounds_receivers
 ```@docs
 devito_model
 setup_grid
-pad_sizes
-pad_array
 remove_padding
 convertToCell
 process_input_data
diff --git a/examples/scripts/fwi_example_2D.jl b/examples/scripts/fwi_example_2D.jl
index 6370399fd..c2c168ca3 100644
--- a/examples/scripts/fwi_example_2D.jl
+++ b/examples/scripts/fwi_example_2D.jl
@@ -4,7 +4,7 @@
 #
 
 using Statistics, Random, LinearAlgebra
-using JUDI, SlimOptim, HDF5, SegyIO, PyPlot
+using JUDI, HDF5, SegyIO, SlimOptim, SlimPlotting
 
 # Load starting model
 n,d,o,m0 = read(h5open("$(JUDI.JUDI_DATA)/overthrust_model.h5","r"), "n", "d", "o", "m0")
@@ -66,4 +66,6 @@ for j=1:niterations
     model0.m .= proj(model0.m .+ step .* p)
 end
 
-figure(); imshow(sqrt.(1f0./adjoint(model0.m))); title("FWI with SGD")
+figure()
+plot_velocity(model0.m'.^(-.5))
+title("FWI with SGD")
diff --git a/examples/scripts/modeling_basic_2D.jl b/examples/scripts/modeling_basic_2D.jl
index 7977cfb5e..af1e066f7 100644
--- a/examples/scripts/modeling_basic_2D.jl
+++ b/examples/scripts/modeling_basic_2D.jl
@@ -9,10 +9,12 @@
 #' This example is converted to a markdown file for the documentation.
 
 #' # Import JUDI, Linear algebra utilities and Plotting
-using JUDI, PyPlot, LinearAlgebra
+using JUDI, LinearAlgebra, SlimPlotting
 
 #+ echo = false; results = "hidden"
 close("all")
+imcmap = "cet_CET_L1"
+dcmap = "PuOr"
 
 #' # Create a JUDI model structure
 #' In JUDI, a `Model` structure contains the grid information (origin, spacing, number of gridpoints)
@@ -91,7 +93,7 @@ q = judiVector(srcGeometry, wavelet)
 #' condition for the propagation.
 
 # Setup options
-opt = Options(subsampling_factor=2, space_order=32)
+opt = Options(subsampling_factor=2, space_order=16, free_surface=false)
 
 #' Linear Operators
 #' The core idea behind JUDI is to abstract seismic inverse problems in term of linear algebra. In its simplest form, seismic inversion can be formulated as
@@ -119,10 +121,7 @@ dobs = Pr*F*adjoint(Ps)*q
 
 #' Plot the shot record
 fig = figure()
-imshow(dobs.data[1], vmin=-1, vmax=1, cmap="PuOr", extent=[xrec[1], xrec[end], timeD/1000, 0], aspect="auto")
-xlabel("Receiver position (m)")
-ylabel("Time (s)")
-title("Synthetic data")
+plot_sdata(dobs[1]; new_fig=false, name="Synthetic data", cmap=dcmap)
 display(fig)
 
 #' Because we have abstracted the linear algebra, we can solve the adjoint wave-equation as well 
@@ -152,19 +151,13 @@ rtm = adjoint(J)*dD
 
 #' We show the linearized data.
 fig = figure()
-imshow(dD.data[1], vmin=-1, vmax=1, cmap="PuOr", extent=[xrec[1], xrec[end], timeD/1000, 0], aspect="auto")
-xlabel("Receiver position (m)")
-ylabel("Time (s)")
-title("Linearized data")
+plot_sdata(dobs[1]; new_fig=false, name="Linearized data", cmap=dcmap)
 display(fig)
 
 
 #' And the RTM image
 fig = figure()
-imshow(rtm', vmin=-1e2, vmax=1e2, cmap="Greys", extent=[0, (n[1]-1)*d[1], (n[2]-1)*d[2], 0 ], aspect="auto")
-xlabel("Lateral position(m)")
-ylabel("Depth (m)")
-title("RTM image")
+plot_simage(rtm'; new_fig=false, name="RTM image", cmap=imcmap)
 display(fig)
 
 #' ## Inversion utility functions
@@ -185,10 +178,7 @@ f, g = fwi_objective(model0, q, dobs; options=opt)
 
 #' Plot gradient
 fig = figure()
-imshow(g', vmin=-1e2, vmax=1e2, cmap="Greys", extent=[0, (n[1]-1)*d[1], (n[2]-1)*d[2], 0 ], aspect="auto")
-xlabel("Lateral position(m)")
-ylabel("Depth (m)")
-title("FWI gradient")
+plot_simage(g'; new_fig=false, name="FWI gradient", cmap=imcmap)
 display(fig)
 
 
@@ -199,17 +189,11 @@ fjn, gjn = lsrtm_objective(model0, q, dobs, dm; nlind=true, options=opt)
 
 #' Plot gradients
 fig = figure()
-imshow(gj', vmin=-1, vmax=1, cmap="Greys", extent=[0, (n[1]-1)*d[1], (n[2]-1)*d[2], 0 ], aspect="auto")
-xlabel("Lateral position(m)")
-ylabel("Depth (m)")
-title("LSRTM gradient")
+plot_simage(gj'; new_fig=false, name="LSRTM gradient", cmap=imcmap, cbar=true)
 display(fig)
 
 fig = figure()
-imshow(gjn', vmin=-1, vmax=1, cmap="Greys", extent=[0, (n[1]-1)*d[1], (n[2]-1)*d[2], 0 ], aspect="auto")
-xlabel("Lateral position(m)")
-ylabel("Depth (m)")
-title("LSRTM gradient with background data substracted")
+plot_simage(gjn'; new_fig=false, name="LSRTM gradient with background data substracted", cmap=imcmap, cbar=true)
 display(fig)
 
 #' By extension, lsrtm_objective is the same as fwi_objecive when `dm` is zero
@@ -218,13 +202,10 @@ display(fig)
 #' OMP_NUM_THREADS=1 (no parllelism) produces the exact (difference == 0) same result
 #' gjn2 == g
 fjn2, gjn2 = lsrtm_objective(model0, q, dobs, 0f0.*dm; nlind=true, options=opt)
-fig = figure()
 
 #' Plot gradient
-imshow(gjn2', vmin=-1e2, vmax=1e2, cmap="Greys", extent=[0, (n[1]-1)*d[1], (n[2]-1)*d[2], 0 ], aspect="auto")
-xlabel("Lateral position(m)")
-ylabel("Depth (m)")
-title("LSRTM gradient with zero perturbation")
+fig = figure()
+plot_simage(gjn2'; new_fig=false, name="LSRTM gradient with zero perturbation", cmap=imcmap)
 display(fig)
 
 
@@ -236,15 +217,9 @@ f, gmf = twri_objective(model0, q, dobs, nothing; options=Options(frequencies=[[
 
 #' Plot gradients
 fig = figure()
-imshow(gm', vmin=-1, vmax=1, cmap="Greys", extent=[0, (n[1]-1)*d[1], (n[2]-1)*d[2], 0 ], aspect="auto")
-xlabel("Lateral position(m)")
-ylabel("Depth (m)")
-title("TWRI gradient w.r.t m")
+plot_simage(gm'; new_fig=false, name="TWRI gradient w.r.t m", cmap=imcmap)
 display(fig)
 
 fig = figure()
-imshow(gy.data[1], vmin=-1e2, vmax=1e2, cmap="PuOr", extent=[xrec[1], xrec[end], timeD/1000, 0], aspect="auto")
-xlabel("Receiver position (m)")
-ylabel("Time (s)")
-title("TWRI gradient w.r.t y")
+plot_sdata(gy[1]; new_fig=false, name="TWRI gradient w.r.t y", cmap=dcmap)
 display(fig)
\ No newline at end of file
diff --git a/src/JUDI.jl b/src/JUDI.jl
index cadd66172..11f802bd2 100644
--- a/src/JUDI.jl
+++ b/src/JUDI.jl
@@ -9,7 +9,6 @@ module JUDI
 export JUDIPATH, set_verbosity, ftp_data, get_serial, set_serial, set_parallel
 JUDIPATH = dirname(pathof(JUDI))
 
-
 # Only needed if extension not available (julia < 1.9)
 if !isdefined(Base, :get_extension)
     using Requires
@@ -102,10 +101,12 @@ function _worker_pool()
         return nothing
     end
     p = default_worker_pool()
-    pool = length(p) < 2 ? nothing : p
+    pool = nworkers(p) < 2 ? nothing : p
     return pool
 end
 
+nworkers(::Any) = length(workers())
+
 _TFuture = Future
 _verbose = false
 _devices = []
@@ -178,9 +179,6 @@ function __init__()
     copy!(devito, pyimport("devito"))
     # Initialize lock at session start
     PYLOCK[] = ReentrantLock()
-    
-    # Prevent autopadding to use external allocator
-    set_devito_config("autopadding", false)
 
     # Make sure there is no conflict for the cuda init thread with CUDA.jl
     if get(ENV, "DEVITO_PLATFORM", "") == "nvidiaX"
diff --git a/src/TimeModeling/Modeling/distributed.jl b/src/TimeModeling/Modeling/distributed.jl
index a8a07a351..a55c0ad53 100644
--- a/src/TimeModeling/Modeling/distributed.jl
+++ b/src/TimeModeling/Modeling/distributed.jl
@@ -21,12 +21,6 @@ end
     x
 end
 
-"""
-    safe_gc()
-
-Generic GC, compatible with different julia versions of it.
-"""
-safe_gc() = try Base.GC.gc(); catch; gc() end
 
 """
     local_reduce!(future, other)
@@ -64,9 +58,28 @@ Adapted from `DistributedOperations.jl` (MIT license). Striped from custom types
 with different reduction functions.
 """
 function reduce!(futures::Vector{_TFuture})
+    isnothing(_worker_pool()) && return reduce_all_workers!(futures)
+    # Number of parallel workers
+    nwork = nworkers(_worker_pool())
+    nf = length(futures)
+    # Reduction batch. We want to avoid finished task to hang waiting for the
+    # binary tree reduction to reach their index holding memory.
+    bsize = min(nwork, nf)
+    # First batch
+    res = reduce_all_workers!(futures[1:bsize])
+    # Loop until all reduced
+    for i = bsize+1:bsize:nf
+        last = min(nf, i + bsize - 1)
+        single_reduce!(res, reduce_all_workers!(futures[i:last]))
+    end
+    return res
+end
+
+
+function reduce_all_workers!(futures::Vector{_TFuture})
     # Get length a next power of two for binary reduction
     M = length(futures)
-    L = round(Int,log2(prevpow(2,M)))
+    L = round(Int, log2(prevpow(2,M)))
     m = 2^L
     # remainder
     R = M - m
diff --git a/src/TimeModeling/Modeling/misfit_fg.jl b/src/TimeModeling/Modeling/misfit_fg.jl
index 3d9906a95..429da9b8f 100644
--- a/src/TimeModeling/Modeling/misfit_fg.jl
+++ b/src/TimeModeling/Modeling/misfit_fg.jl
@@ -11,6 +11,7 @@ function _multi_src_fg(model_full::AbstractModel, source::Dtypes, dObs::Dtypes,
                       data_precon=nothing, model_precon=LinearAlgebra.I)
     GC.gc(true)
     devito.clear_cache()
+
     # assert this is for single source LSRTM
     @assert source.nsrc == 1 "Multiple sources are used in a single-source fwi_objective"
     @assert dObs.nsrc == 1 "Multiple-source data is used in a single-source fwi_objective"    
@@ -63,6 +64,7 @@ function _multi_src_fg(model_full::AbstractModel, source::Dtypes, dObs::Dtypes,
 
     length(options.frequencies) == 0 ? freqs = nothing : freqs = options.frequencies
     IT = illum ? (PyArray, PyArray) : (PyObject, PyObject)
+
     @juditime "Python call to J_adjoint" begin
         argout = rlock_pycall(ac."J_adjoint", Tuple{Float32, PyArray, IT...}, modelPy,
                 src_coords, qIn, rec_coords, dObserved, t_sub=options.subsampling_factor,
diff --git a/src/TimeModeling/Modeling/propagation.jl b/src/TimeModeling/Modeling/propagation.jl
index 8283ce305..90ed4b2fb 100644
--- a/src/TimeModeling/Modeling/propagation.jl
+++ b/src/TimeModeling/Modeling/propagation.jl
@@ -21,8 +21,6 @@ the pool is empty, a standard loop and accumulation is ran. If the pool is a jul
 any custom Distributed pool, the loop is distributed via `remotecall` followed by are binary tree remote reduction.
 """
 function run_and_reduce(func, pool, nsrc, arg_func::Function; kw=nothing)
-    # Allocate devices
-    _set_devices!()
     # Run distributed loop
     res = Vector{_TFuture}(undef, nsrc)
     for i = 1:nsrc
@@ -44,21 +42,11 @@ function run_and_reduce(func, ::Nothing, nsrc, arg_func::Function; kw=nothing)
             kw_loc = isnothing(kw) ? Dict() : kw(i)
             next = func(arg_func(i)...; kw_loc...)
         end
-        single_reduce!(out, next)
-    end
-    out
-end
-
-function _set_devices!()
-    ndevices = length(_devices)
-    if ndevices < 2
-        return
-    end
-    asyncmap(enumerate(workers())) do (pi, p)
-        remotecall_wait(p) do
-            pyut.set_device_ids(_devices[pi % ndevices + 1])
+        @juditime "Reducting $(func) for src $(i)" begin
+            single_reduce!(out, next)
         end
     end
+    out
 end
 
 _prop_fw(::judiPropagator{T, O}) where {T, O} = true 
@@ -112,7 +100,7 @@ function multi_src_fg!(G, model, q, dobs, dm; options=Options(), ms_func=multi_s
     kw_func = i -> Dict(:illum=> illum, Dict(k => kw_i(v, i) for (k, v) in kw)...)
     # Distribute source
     res = run_and_reduce(ms_func, pool, nsrc, arg_func; kw=kw_func)
-    f, g = update_illum(res, model, :adjoint_born)
+    res = update_illum(res, model, :adjoint_born)
     f, g = as_vec(res, Val(options.return_array))
     G .+= g
     return f
diff --git a/src/TimeModeling/Modeling/twri_objective.jl b/src/TimeModeling/Modeling/twri_objective.jl
index d92598659..328725e49 100644
--- a/src/TimeModeling/Modeling/twri_objective.jl
+++ b/src/TimeModeling/Modeling/twri_objective.jl
@@ -71,7 +71,7 @@ function _twri_objective(model_full::AbstractModel, source::judiVector, dObs::ju
     dtComp = convert(Float32, modelPy."critical_dt")
 
     # Extrapolate input data to computational grid
-    qIn = time_resample(source.data[1], source.geometry, dtComp)
+    qIn = time_resample(make_input(source), source.geometry, dtComp)
     dObserved = time_resample(make_input(dObs), dObs.geometry, dtComp)
 
     if isnothing(y)
diff --git a/src/TimeModeling/Utils/auxiliaryFunctions.jl b/src/TimeModeling/Utils/auxiliaryFunctions.jl
index c805583df..d25b6b3db 100644
--- a/src/TimeModeling/Utils/auxiliaryFunctions.jl
+++ b/src/TimeModeling/Utils/auxiliaryFunctions.jl
@@ -9,7 +9,7 @@ export ricker_wavelet, get_computational_nt, calculate_dt, setup_grid, setup_3D_
 export convertToCell, limit_model_to_receiver_area, remove_out_of_bounds_receivers, extend_gradient
 export remove_padding, subsample, process_input_data
 export generate_distribution, select_frequencies
-export devito_model, pad_sizes, pad_array
+export devito_model, pad_array
 export transducer, as_vec
 export Gardner
 
@@ -24,10 +24,8 @@ Parameters
 * `dm`: Squared slowness perturbation (optional), Array or PhysicalParameter.
 """
 function devito_model(model::MT, options::JUDIOptions, dm) where {MT<:AbstractModel}
-    pad = pad_sizes(model, options)
     # Set up Python model structure
-    dm = pad_array(dm, pad)
-    physpar = Dict((n, isa(v, PhysicalParameter) ? pad_array(v.data, pad) : v) for (n, v) in _params(model))
+    physpar = Dict((n, isa(v, PhysicalParameter) ? v.data : v) for (n, v) in _params(model))
 
     modelPy = rlock_pycall(pm."Model", PyObject, origin(model), spacing(model), size(model), fs=options.free_surface,
                    nbl=nbl(model), space_order=options.space_order, dt=options.dt_comp, dm=dm;
@@ -40,31 +38,6 @@ devito_model(model::AbstractModel, options::JUDIOptions, dm::PhysicalParameter)
 devito_model(model::AbstractModel, options::JUDIOptions, dm::Vector{T}) where T = devito_model(model, options, reshape(dm, size(model)))
 devito_model(model::AbstractModel, options::JUDIOptions) = devito_model(model, options, nothing)
 
-"""
-    pad_sizes(model, options; so=nothing)
-
-Computes ABC padding sizes according to the model's numbr of abc points and spatial order
-
-Parameters
-* `model`: JUDI or Python side Model.
-* `options`: JUDI Options structure.
-* `so`: Space order (optional) defaults to options.space_order.
-"""
-function pad_sizes(model::PyObject, options; so=nothing)
-    isnothing(so) && (so = options.space_order)
-    N = model.grid.dim
-    return tuple([(nbl + so, nbr + so) for (nbl, nbr)=model.padsizes]...)
-end
-
-function pad_sizes(model::AbstractModel{T, N}, options; so=nothing) where {T, N}
-    isnothing(so) && (so = options.space_order)
-    padsizes = [(nbl(model) + so, nbl(model) + so) for i=1:N]
-    if options.free_surface
-        padsizes[end] = (so, nbl(model) + so)
-    end
-    return tuple(padsizes...)
-end
-
 """
     pad_array(m, nb; mode=:border)
 
diff --git a/src/TimeModeling/Utils/time_utilities.jl b/src/TimeModeling/Utils/time_utilities.jl
index 60ccf15d0..4deaa2812 100644
--- a/src/TimeModeling/Utils/time_utilities.jl
+++ b/src/TimeModeling/Utils/time_utilities.jl
@@ -121,6 +121,15 @@ function _maybe_pad_t0(qIn::Matrix{T}, qGeom::Geometry, dObserved::Matrix{T}, da
         pql = Int(div(get_t0(qGeom, 1) - ts, dt))
         pqr = Int(div(te - get_t(qGeom, 1), dt))
         qIn = vcat(zeros(T, pql, size(qIn, 2)), qIn, zeros(T, pqr, size(qIn, 2)))
+
+        # Pad in case of mismatch
+        leftover = size(qIn, 1) - size(dObserved, 1)
+        if leftover > 0
+            dObserved = vcat(dObserved, zeros(T, leftover, size(dObserved, 2)))
+        elseif leftover < 0
+            qIn = vcat(qIn, zeros(T, -leftover, size(qIn, 2)))
+        end
+        
     else
         throw(judiMultiSourceException("""
             Data and source have different
diff --git a/src/pysource/FD_utils.py b/src/pysource/FD_utils.py
index 2dd7977cf..06edee06f 100644
--- a/src/pysource/FD_utils.py
+++ b/src/pysource/FD_utils.py
@@ -1,6 +1,11 @@
 from sympy import rot_axis2, rot_axis3
+from devito import TensorFunction, Differentiable, div, grad, cos, sin
 
-from devito import TensorFunction, Differentiable, div, grad
+
+trig_mapper = {cos.__sympy_class__: cos, sin.__sympy_class__: sin}
+
+r2 = lambda x: rot_axis2(x).applyfunc(lambda i: trig_mapper.get(i.func, i.func)(*i.args))
+r3 = lambda x: rot_axis3(x).applyfunc(lambda i: trig_mapper.get(i.func, i.func)(*i.args))
 
 
 def laplacian(v, irho):
@@ -27,22 +32,19 @@ def R_mat(model):
     """
     # Rotation matrix
     try:
-        Rt = rot_axis2(model.theta)
+        Rt = r2(model.theta)
     except AttributeError:
-        Rt = rot_axis2(0)
+        Rt = r2(0)
     if model.dim == 3:
         try:
-            Rt *= rot_axis3(model.phi)
+            Rt *= r3(model.phi)
         except AttributeError:
-            Rt *= rot_axis3(0)
+            Rt *= r3(0)
     else:
         Rt = Rt[[0, 2], [0, 2]]
-    R = TensorFunction(name="R", grid=model.grid, components=Rt, symmetric=False)
-    try:
-        R.name == "R"
-        return R
-    except AttributeError:
-        return Rt
+    # Rebuild sin/cos
+
+    return TensorFunction(name="R", grid=model.grid, components=Rt, symmetric=False)
 
 
 def thomsen_mat(model):
diff --git a/src/pysource/fields.py b/src/pysource/fields.py
index e7a935a24..98f5c8870 100644
--- a/src/pysource/fields.py
+++ b/src/pysource/fields.py
@@ -2,8 +2,8 @@
 
 from devito import (TimeFunction, ConditionalDimension, Function,
                     DefaultDimension, Dimension, VectorTimeFunction,
-                    TensorTimeFunction)
-from devito.data.allocators import ExternalAllocator
+                    TensorTimeFunction, Buffer)
+from devito.builtins import initialize_function
 from devito.tools import as_tuple
 
 try:
@@ -11,8 +11,11 @@
 except ImportError:
     import devito as dvp  # noqa
 
+from utils import compression_mode
 
-def wavefield(model, space_order, save=False, nt=None, fw=True, name='', t_sub=1):
+
+def wavefield(model, space_order, save=False, nt=None, fw=True, name='', t_sub=1,
+              tfull=False):
     """
     Create the wavefield for the wave equation
 
@@ -31,24 +34,28 @@ def wavefield(model, space_order, save=False, nt=None, fw=True, name='', t_sub=1
         Forward or backward (for naming)
     name: string
         Custom name attached to default (u+name)
+    tfull: Bool
+        Whether need full buffer for e.g. second time derivative
     """
     name = "u"+name if fw else "v"+name
     save = False if t_sub > 1 else save
+    nsave = Buffer(3 if tfull else 2) if not save else nt
+
     if model.is_tti:
         u = TimeFunction(name="%s1" % name, grid=model.grid, time_order=2,
-                         space_order=space_order, save=None if not save else nt)
+                         space_order=space_order, save=nsave)
         v = TimeFunction(name="%s2" % name, grid=model.grid, time_order=2,
-                         space_order=space_order, save=None if not save else nt)
+                         space_order=space_order, save=nsave)
         return (u, v)
     elif model.is_elastic:
         v = VectorTimeFunction(name="v", grid=model.grid, time_order=1,
-                               space_order=space_order, save=None)
+                               space_order=space_order, save=Buffer(1))
         tau = TensorTimeFunction(name="tau", grid=model.grid, time_order=1,
-                                 space_order=space_order, save=None)
+                                 space_order=space_order, save=Buffer(1))
         return (v, tau)
     else:
         return TimeFunction(name=name, grid=model.grid, time_order=2,
-                            space_order=space_order, save=None if not save else nt)
+                            space_order=space_order, save=nsave)
 
 
 def forward_wavefield(model, space_order, save=True, nt=10, dft=False, t_sub=1, fw=True):
@@ -110,7 +117,7 @@ def memory_field(p):
         Forward wavefield
     """
     return TimeFunction(name='r%s' % p.name, grid=p.grid, time_order=2,
-                        space_order=p.space_order, save=None)
+                        space_order=p.space_order, save=Buffer(2))
 
 
 def wavefield_subsampled(model, u, nt, t_sub, space_order=8):
@@ -139,9 +146,9 @@ def wavefield_subsampled(model, u, nt, t_sub, space_order=8):
         return None
     wf_s = []
     for wf in as_tuple(u):
-        usave = TimeFunction(name='us_%s' % wf.name, grid=model.grid, time_order=2,
-                             space_order=space_order, time_dim=time_subsampled,
-                             save=nsave)
+        usave = dvp.TimeFunction(name='us_%s' % wf.name, grid=model.grid, time_order=2,
+                                 space_order=space_order, time_dim=time_subsampled,
+                                 save=nsave, compression=compression_mode())
         wf_s.append(usave)
     return wf_s
 
@@ -169,14 +176,14 @@ def lr_src_fields(model, weight, wavelet, empty_w=False, rec=False):
     time = model.grid.time_dim
     nt = wavelet.shape[0]
     wn = 'rec' if rec else 'src'
-    wavelett = Function(name='wf_%s' % wn, dimensions=(time,), shape=(nt,))
+    wavelett = TimeFunction(name='wf_%s' % wn, dimensions=(time,), time_dim=time,
+                            shape=(nt,), save=nt, grid=model.grid)
     wavelett.data[:] = np.array(wavelet)[:, 0]
     if empty_w:
         source_weight = Function(name='%s_weight' % wn, grid=model.grid, space_order=0)
     else:
-        source_weight = Function(name='%s_weight' % wn, grid=model.grid, space_order=0,
-                                 allocator=ExternalAllocator(weight),
-                                 initializer=lambda x: None)
+        source_weight = Function(name='%s_weight' % wn, grid=model.grid, space_order=0)
+        initialize_function(source_weight, weight, 0)
     return source_weight, wavelett
 
 
diff --git a/src/pysource/fields_exprs.py b/src/pysource/fields_exprs.py
index ee101f9bd..0f49b110a 100644
--- a/src/pysource/fields_exprs.py
+++ b/src/pysource/fields_exprs.py
@@ -1,8 +1,7 @@
 import numpy as np
 
-from devito import Inc, Eq, ConditionalDimension, cos, sin, sign
+from devito import Inc, Eq, ConditionalDimension, cos, sin
 from devito.tools import as_tuple
-from devito.symbolics import retrieve_functions, INT, retrieve_derivatives
 
 from fields import (wavefield_subsampled, lr_src_fields, fourier_modes, norm_holder,
                     illumination)
@@ -31,7 +30,7 @@ def save_subsampled(model, u, nt, t_sub, space_order=8):
         return []
     eq_save = []
     for (wfs, wf) in zip(wf_s, as_tuple(u)):
-        eq_save.append(Eq(wfs, wf))
+        eq_save.append(Eq(wfs, wf, subdomain=model.physical))
     return eq_save
 
 
@@ -102,7 +101,7 @@ def extended_rec(model, wavelet, v):
     return [Inc(ws, model.grid.time_dim.spacing * wf * wt)]
 
 
-def freesurface(model, eq, mfuncs=None, fd_only=False):
+def freesurface(model, fields):
     """
     Generate the stencil that mirrors the field as a free surface modeling for
     the acoustic wave equation
@@ -113,42 +112,20 @@ def freesurface(model, eq, mfuncs=None, fd_only=False):
         Physical model
     eq: Eq or List of Eq
         Equation to apply mirror to
-    mfuncs: List of Functions
-        List of functions to mirror (default=None). Mirrors all functions if not provided
     """
-    fs_eq = []
-    fsdomain = model.grid.subdomains['fsdomain']
-    zfs = model.grid.subdomains['fsdomain'].dimensions[-1]
-    z = zfs.parent
+    eqs = []
 
-    for eq_i in eq:
-        for p in eq_i._flatten:
-            # Add modulo replacements to to rhs
-            lhs, rhs = p.lhs, p.rhs
+    z = model.grid.dimensions[-1]
+    zfs = model.fs_dim
 
-            Dz = {d for d in retrieve_derivatives(rhs) if z in d.dims}
-            # Remove inner duplicate
-            Dz = Dz - {d for D in Dz for d in retrieve_derivatives(D.expr) if z in d.dims}
-            Dz = {d: d._eval_at(lhs).evaluate for d in Dz}
+    for u in as_tuple(fields):
+        if u == 0:
+            continue
 
-            funcs = {f for f in retrieve_functions(Dz.values())}
+        eqs.extend([Eq(u._subs(z, - zfs), - u._subs(z, zfs)),
+                    Eq(u._subs(z, 0), 0)])
 
-            if mfuncs:
-                funcs = {f for f in funcs if f.function in mfuncs}
-
-            mapper = {}
-            for f in funcs:
-                zind = f.indices[-1]
-                if (zind - z).as_coeff_Mul()[0] < 0:
-                    s = sign(zind._subs(z.spacing, 1))
-                    mapper.update({f: s * f._subs(zind, INT(abs(zind)))})
-
-            dzmapper = {d: v.subs(mapper) for d, v in Dz.items()}
-            fs_eq.append(p.func(lhs, rhs.subs(dzmapper), subdomain=fsdomain))
-            if not fd_only:
-                fs_eq.append(p.func(lhs._subs(z, 0), 0, subdomain=fsdomain))
-
-    return fs_eq
+    return eqs
 
 
 def otf_dft(u, freq, dt, factor=None):
@@ -268,4 +245,5 @@ def illumexpr(u, illum):
         return []
     u0 = as_tuple(u)
     I = illumination(u, illum)
-    return [Inc(I, sum(u0)*sum(u0))]
+    expr = sum([ui**2 for ui in u0])
+    return [Inc(I, u0[0].grid.time_dim.spacing * expr)]
diff --git a/src/pysource/geom_utils.py b/src/pysource/geom_utils.py
index c841acdea..c53677c91 100644
--- a/src/pysource/geom_utils.py
+++ b/src/pysource/geom_utils.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 from devito.tools import as_tuple
 
 from sources import *
@@ -13,7 +15,7 @@ def src_rec(model, u, src_coords=None, rec_coords=None, wavelet=None, nt=None):
         else:
             src = PointSource(name="src%s" % namef, grid=model.grid, ntime=nt,
                               coordinates=src_coords)
-            src.data[:] = wavelet[:] if wavelet is not None else 0.
+            src.data[:] = wavelet.view(np.ndarray) if wavelet is not None else 0.
     rcv = None
     if rec_coords is not None:
         rcv = Receiver(name="rcv%s" % namef, grid=model.grid, ntime=nt,
@@ -56,6 +58,7 @@ def geom_expr(model, u, src_coords=None, rec_coords=None, wavelet=None, fw=True,
     dt = model.grid.time_dim.spacing
     geom_expr = []
     src, rcv = src_rec(model, u, src_coords, rec_coords, wavelet, nt)
+    model.__init_abox__(src, rcv, fw=fw)
     if src is not None:
         # Elastic inject into diagonal of stress
         if model.is_elastic:
diff --git a/src/pysource/interface.py b/src/pysource/interface.py
index 0b00945f4..feae6d6ba 100644
--- a/src/pysource/interface.py
+++ b/src/pysource/interface.py
@@ -466,6 +466,7 @@ def J_adjoint_standard(model, src_coords, wavelet, rec_coords, recin,
 
     if return_obj:
         return f, g.data, getattr(Iu, "data", None), getattr(Iv, "data", None)
+
     return g.data, getattr(Iu, "data", None), getattr(Iv, "data", None)
 
 
@@ -618,6 +619,7 @@ def wri_func(model, src_coords, wavelet, rec_coords, recin, yin,
             grady = c2 * recin - rcv.data[:]
             if norm_y != 0:
                 grady -= np.abs(eps) * ydat / norm_y
+            grady = grady.astype(model.dtype)
 
         # Correcting for reduced gradient
         if not grad_corr:
diff --git a/src/pysource/kernels.py b/src/pysource/kernels.py
index 338313f68..dd471483c 100644
--- a/src/pysource/kernels.py
+++ b/src/pysource/kernels.py
@@ -59,13 +59,13 @@ def acoustic_kernel(model, u, fw=True, q=None):
     damp = model.damp
     stencil = solve(wmr * u.dt2 + damp * udt - ulaplace - q, u_n)
 
-    if 'nofsdomain' in model.grid.subdomains:
-        pde = [Eq(u_n, stencil, subdomain=model.grid.subdomains['nofsdomain'])]
-        pde += freesurface(model, pde, (u,))
+    pde = [Eq(u_n, stencil, subdomain=model.physical)]
+    if model.fs:
+        fseq = freesurface(model, u_n)
     else:
-        pde = [Eq(u_n, stencil)]
+        fseq = []
 
-    return pde
+    return pde, fseq
 
 
 def SLS_2nd_order(model, p, fw=True, q=None, f0=0.015):
@@ -118,7 +118,6 @@ def SLS_2nd_order(model, p, fw=True, q=None, f0=0.015):
             b * r.forward - q + (1 - damp) * p.dt
         u_p = Eq(p.forward, solve(pde_p, p.forward))
 
-        return [u_r, u_p]
     else:
         # Attenuation Memory variable
         pde_r = r.dt.T + b * p + (1 / t_s) * r
@@ -130,7 +129,7 @@ def SLS_2nd_order(model, p, fw=True, q=None, f0=0.015):
 
         u_p = Eq(p.backward, solve(pde_p, p.backward))
 
-        return [u_r, u_p]
+    return [u_r, u_p], []
 
 
 def tti_kernel(model, u1, u2, fw=True, q=None):
@@ -144,7 +143,7 @@ def tti_kernel(model, u1, u2, fw=True, q=None):
     u1 : TimeFunction
         First component (pseudo-P) of the wavefield
     u2 : TimeFunction
-        First component (pseudo-P) of the wavefield
+        Second component (pseudo-S) of the wavefield
     fw: Bool
         Whether forward or backward in time propagation
     q : TimeFunction or Expr
@@ -163,20 +162,15 @@ def tti_kernel(model, u1, u2, fw=True, q=None):
     stencilp = solve(wmr * u1.dt2 + damp * udt1 - H0 - q[0], u1_n)
     stencilr = solve(wmr * u2.dt2 + damp * udt2 - H1 - q[1], u2_n)
 
-    if 'nofsdomain' in model.grid.subdomains:
-        # Water at free surface, no anisotrpy
-        acout_ttip = Eq(u1_n, stencilp.subs(model.zero_thomsen))
-        acout_ttir = Eq(u2_n, stencilr.subs(model.zero_thomsen))
-        pdea = freesurface(model, (acout_ttip, acout_ttir), (u1, u2))
-        # Standard PDE in subsurface
-        first_stencil = Eq(u1_n, stencilp, subdomain=model.grid.subdomains['nofsdomain'])
-        second_stencil = Eq(u2_n, stencilr, subdomain=model.grid.subdomains['nofsdomain'])
+    first_stencil = Eq(u1_n, stencilp, subdomain=model.physical)
+    second_stencil = Eq(u2_n, stencilr, subdomain=model.physical)
+
+    if model.fs:
+        fseq = freesurface(model, (u1_n, u2_n))
     else:
-        pdea = []
-        first_stencil = Eq(u1_n, stencilp)
-        second_stencil = Eq(u2_n, stencilr)
+        fseq = []
 
-    return [first_stencil, second_stencil] + pdea
+    return [first_stencil, second_stencil], fseq
 
 
 def elastic_kernel(model, v, tau, fw=True, q=None):
@@ -196,8 +190,6 @@ def elastic_kernel(model, v, tau, fw=True, q=None):
     q : TimeFunction or Expr
         Full time-space source as a tuple (one value for each component)
     """
-    if 'nofsdomain' in model.grid.subdomains:
-        raise NotImplementedError("Free surface not supported for elastic modelling")
     if not fw:
         raise NotImplementedError("Only forward modeling for the elastic equation")
 
@@ -219,7 +211,14 @@ def elastic_kernel(model, v, tau, fw=True, q=None):
 
     eq_tau = tau.dt - lam * diag(div(v.forward)) - mu * e
 
-    u_v = Eq(v.forward, model.damp * solve(eq_v, v.forward))
-    u_t = Eq(tau.forward, model.damp * solve(eq_tau, tau.forward))
+    u_v = Eq(v.forward, model.damp * solve(eq_v, v.forward),
+             subdomain=model.physical)
+    u_t = Eq(tau.forward, model.damp * solve(eq_tau, tau.forward),
+             subdomain=model.physical)
+
+    if model.fs:
+        fseq = freesurface(model, tau.forward.diagonal())
+    else:
+        fseq = []
 
-    return [u_v, u_t]
+    return [u_v, u_t], fseq
diff --git a/src/pysource/models.py b/src/pysource/models.py
index 3d84d971f..405fe8278 100644
--- a/src/pysource/models.py
+++ b/src/pysource/models.py
@@ -1,16 +1,54 @@
 import numpy as np
 import warnings
+from functools import cached_property
+
 from sympy import finite_diff_weights as fd_w
-from devito import (Grid, Function, SubDomain, SubDimension, Eq, Inc,
-                    Operator, mmin, mmax, initialize_function, switchconfig,
-                    Abs, sqrt, sin, Constant)
-from devito.data.allocators import ExternalAllocator
+from devito import (Grid, Function, SubDimension, Eq, Inc, switchconfig,
+                    Operator, mmin, mmax, initialize_function, MPI,
+                    Abs, sqrt, sin, Constant, CustomDimension)
+
 from devito.tools import as_tuple, memoized_func
 
 try:
     from devitopro import *  # noqa
+    from devitopro.subdomains import ABox
+    AboxBase = ABox
 except ImportError:
-    pass
+    ABox = None
+    AboxBase = object
+
+
+class ABoxSlowness(AboxBase):
+
+    def _1d_cmax(self, vp, eps):
+        cmaxs = []
+
+        if eps is not None:
+            assert vp.shape_allocated == eps.shape_allocated
+
+        for (di, d) in enumerate(vp.grid.dimensions):
+            rdim = tuple(i for (i, dl) in enumerate(vp.grid.dimensions) if dl is not d)
+
+            # Max over other dimensions, 1D array with the max in each plane
+            vpi = vp.data.min(axis=rdim)**(-.5)
+            # THomsen correction
+            if eps is not None:
+                epsi = eps.data.max(axis=rdim)
+                vpi._local[:] *= np.sqrt(1. + 2.*epsi._local[:])
+            # Gather on all ranks if distributed.
+            # Since we have a small-ish 1D vector we avoid the index gymnastic
+            # and create the full 1d vector on al ranks with the local values
+            # at the local indices and simply gather with Max
+            if vp.grid.distributor.is_parallel:
+                out = np.zeros(vp.grid.shape[di], dtype=vpi.dtype)
+                tmp = np.zeros(vp.grid.shape[di], dtype=vpi.dtype)
+                tmp[vp.local_indices[di]] = vpi._local
+                vp.grid.distributor.comm.Allreduce(tmp, out, op=MPI.MAX)
+                cmaxs.append(out)
+            else:
+                cmaxs.append(vpi)
+
+        return cmaxs
 
 
 __all__ = ['Model']
@@ -37,40 +75,6 @@ def getmax(f):
 _thomsen = [('epsilon', 1), ('delta', 1), ('theta', 0), ('phi', 0)]
 
 
-class PhysicalDomain(SubDomain):
-
-    name = 'nofsdomain'
-
-    def __init__(self, so, fs=False):
-        super(PhysicalDomain, self).__init__()
-        self.so = so
-        self.fs = fs
-
-    def define(self, dimensions):
-        map_d = {d: d for d in dimensions}
-        if self.fs:
-            map_d[dimensions[-1]] = ('middle', self.so, 0)
-        return map_d
-
-
-class FSDomain(SubDomain):
-
-    name = 'fsdomain'
-
-    def __init__(self, so):
-        super(FSDomain, self).__init__()
-        self.size = so
-
-    def define(self, dimensions):
-        """
-        Definition of the top part of the domain for wrapped indices FS
-        """
-        z = dimensions[-1]
-        map_d = {d: d for d in dimensions}
-        map_d.update({z: ('left', self.size)})
-        return map_d
-
-
 @memoized_func
 def damp_op(ndim, padsizes, abc_type, fs):
     """
@@ -78,20 +82,21 @@ def damp_op(ndim, padsizes, abc_type, fs):
 
     Parameters
     ----------
-    padsize : List of tuple
+    ndim : int
+        Number of dimensions in the model.
+    padsizes : List of tuple
         Number of points in the damping layer for each dimension and side.
-    spacing :
-        Grid spacing coefficient.
-    mask : bool, optional
+    abc_type : mask or damp
         whether the dampening is a mask or layer.
         mask => 1 inside the domain and decreases in the layer
-        not mask => 0 inside the domain and increase in the layer
+        damp => 0 inside the domain and increase in the layer
+    fs: bool
+        Whether the model is with free surface or not
     """
     damp = Function(name="damp", grid=Grid(tuple([11]*ndim)), space_order=0)
     eqs = [Eq(damp, 1.0 if abc_type == "mask" else 0.0)]
     for (nbl, nbr), d in zip(padsizes, damp.dimensions):
         # 3 Point buffer to avoid weird interaction with abc
-        nbr = nbr - 3
         if not fs or d is not damp.dimensions[-1]:
             nbl = nbl - 3
             dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbl)
@@ -103,6 +108,7 @@ def damp_op(ndim, padsizes, abc_type, fs):
             val = -val if abc_type == "mask" else val
             eqs += [Inc(damp.subs({d: dim_l}), val/d.spacing)]
         # right
+        nbr = nbr - 3
         dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbr)
         dim_r = SubDimension.right(name='abc_%s_r' % d.name, parent=d,
                                    thickness=nbr)
@@ -169,32 +175,31 @@ class Model(object):
         Asymuth angle in radian.
     dt: Float
         User provided computational time-step
+    abox: Float
+        Whether to use the exapanding box, defaults to true
     """
     def __init__(self, origin, spacing, shape, space_order=8, nbl=40, dtype=np.float32,
                  m=None, epsilon=None, delta=None, theta=None, phi=None, rho=None,
-                 b=None, qp=None, lam=None, mu=None, dm=None, fs=False, **kwargs):
+                 b=None, qp=None, lam=None, mu=None, dm=None, fs=False,
+                 **kwargs):
         # Setup devito grid
         self.shape = tuple(shape)
         self.nbl = int(nbl)
         self.origin = tuple([dtype(o) for o in origin])
         abc_type = "mask" if (qp is not None or mu is not None) else "damp"
         self.fs = fs
+        self._abox = None
         # Origin of the computational domain with boundary to inject/interpolate
         # at the correct index
         origin_pml = [dtype(o - s*nbl) for o, s in zip(origin, spacing)]
         shape_pml = np.array(shape) + 2 * self.nbl
         if fs:
-            fsdomain = FSDomain(space_order + 1)
-            physdomain = PhysicalDomain(space_order + 1, fs=fs)
-            subdomains = (physdomain, fsdomain)
             origin_pml[-1] = origin[-1]
             shape_pml[-1] -= self.nbl
-        else:
-            subdomains = ()
         # Physical extent is calculated per cell, so shape - 1
         extent = tuple(np.array(spacing) * (shape_pml - 1))
         self.grid = Grid(extent=extent, shape=shape_pml, origin=tuple(origin_pml),
-                         dtype=dtype, subdomains=subdomains)
+                         dtype=dtype)
 
         # Absorbing boundary layer
         if self.nbl != 0:
@@ -214,6 +219,7 @@ def __init__(self, origin, spacing, shape, space_order=8, nbl=40, dtype=np.float
             self._m = self._gen_phys_param(m, 'm', space_order)
         # density
         self._init_density(rho, b, space_order)
+        # Perturbation for linearized modeling
         self._dm = self._gen_phys_param(dm, 'dm', space_order)
 
         # Model type
@@ -277,6 +283,7 @@ def physical_params(self, **kwargs):
 
         if not kwargs.get('born', False):
             params.pop('dm', None)
+
         return params
 
     @property
@@ -305,8 +312,8 @@ def _gen_phys_param(self, field, name, space_order, is_param=False,
             else:
                 # We take advantage of the external allocator
                 function = Function(name=name, grid=self.grid, space_order=space_order,
-                                    allocator=ExternalAllocator(field),
-                                    initializer=lambda x: None, parameter=is_param)
+                                    parameter=is_param)
+                function.data[:] = field
         else:
             function = Constant(name=name, value=np.amin(field))
         self._physical_parameters.append(name)
@@ -422,12 +429,12 @@ def _cfl_coeff(self):
         """
         # Elasic coefficient (see e.g )
         if self.is_elastic:
-            so = max(self._space_order // 2, 2)
+            so = max(self.space_order // 2, 2)
             coeffs = fd_w(1, range(-so, so), .5)
             c_fd = sum(np.abs(coeffs[-1][-1])) / 2
             return .9 * np.sqrt(self.dim) / self.dim / c_fd
         a1 = 4  # 2nd order in time
-        so = max(self._space_order // 2, 4)
+        so = max(self.space_order // 2, 4)
         coeffs = fd_w(2, range(-so, so), 0)[-1][-1]
         return .9 * np.sqrt(a1/float(self.grid.dim * sum(np.abs(coeffs))))
 
@@ -536,11 +543,36 @@ def spacing_map(self):
         Map between spacing symbols and their values for each `SpaceDimension`.
         """
         sp_map = self.grid.spacing_map
-        sp_map.update({self.grid.time_dim.spacing: self.critical_dt})
         return sp_map
 
+    def abox(self, src, rec, fw=True):
+        if ABox is None or (src is None and rec is None):
+            return {}
+        if not fw:
+            src, rec = rec, src
+        eps = getattr(self, 'epsilon', None)
+        abox = ABoxSlowness(src, rec, self.m, self.space_order, eps=eps)
+        return {'abox': abox}
+
+    def __init_abox__(self, src, rec, fw=True):
+        return
+
+    @cached_property
+    def physical(self):
+        if ABox is None:
+            return None
+        else:
+            return self._abox
+
+    @cached_property
+    def fs_dim(self):
+        so = self.space_order // 2
+        return CustomDimension(name="zfs", symbolic_min=1,
+                               symbolic_max=so,
+                               symbolic_size=so)
 
-class EmptyModel(object):
+
+class EmptyModel():
     """
     An pseudo Model structure that does not contain any physical field
     but only the necessary information to create an operator.
@@ -553,16 +585,11 @@ def __init__(self, tti, visco, elastic, spacing, fs, space_order, p_params):
         self.is_elastic = elastic
         self.spacing = spacing
         self.fs = fs
+        self.space_order = space_order
         N = 2 * space_order + 1
-        if fs:
-            fsdomain = FSDomain(N)
-            physdomain = PhysicalDomain(N, fs=fs)
-            subdomains = (physdomain, fsdomain)
-        else:
-            subdomains = ()
+
         self.grid = Grid(tuple([N]*len(spacing)),
-                         extent=[s*(N-1) for s in spacing],
-                         subdomains=subdomains)
+                         extent=[s*(N-1) for s in spacing])
         self.dimensions = self.grid.dimensions
 
         # Create the function for the physical parameters
@@ -607,3 +634,28 @@ def zero_thomsen(self):
             except AttributeError:
                 pass
         return out
+
+    def __init_abox__(self, src, rec, fw=True):
+        if ABox is None:
+            return
+        if src is None and rec is None:
+            self._abox = None
+            return
+        eps = getattr(self, 'epsilon', None)
+        if not fw:
+            src, rec = rec, src
+        self._abox = ABoxSlowness(src, rec, self.m, self.space_order, eps=eps)
+
+    @cached_property
+    def physical(self):
+        if ABox is None:
+            return None
+        else:
+            return self._abox
+
+    @cached_property
+    def fs_dim(self):
+        so = self.space_order // 2
+        return CustomDimension(name="zfs", symbolic_min=1,
+                               symbolic_max=so,
+                               symbolic_size=so)
diff --git a/src/pysource/operators.py b/src/pysource/operators.py
index 6e37d3cc6..16b464b43 100644
--- a/src/pysource/operators.py
+++ b/src/pysource/operators.py
@@ -98,6 +98,10 @@ def forward_op(p_params, tti, visco, elas, space_order, fw, spacing, save, t_sub
     # Setting forward wavefield
     u = wavefield(model, space_order, save=save, nt=nt, t_sub=t_sub, fw=fw)
 
+    # Setup source and receiver
+    g_expr = geom_expr(model, u, src_coords=scords, nt=nt,
+                       rec_coords=rcords, wavelet=wavelet, fw=fw)
+
     # Expression for saving wavefield if time subsampling is used
     eq_save = save_subsampled(model, u, nt, t_sub, space_order=space_order)
 
@@ -108,11 +112,7 @@ def forward_op(p_params, tti, visco, elas, space_order, fw, spacing, save, t_sub
     wrec = extended_rec(model, wavelet if wr else None, u)
 
     # Set up PDE expression and rearrange
-    pde = wave_kernel(model, u, q=q, f0=Constant('f0'), fw=fw)
-
-    # Setup source and receiver
-    g_expr = geom_expr(model, u, src_coords=scords, nt=nt,
-                       rec_coords=rcords, wavelet=wavelet, fw=fw)
+    pde, extra = wave_kernel(model, u, q=q, f0=Constant('f0'), fw=fw)
 
     # On-the-fly Fourier
     dft = otf_dft(u, freq_list, model.grid.time_dim.spacing, factor=dft_sub)
@@ -126,7 +126,7 @@ def forward_op(p_params, tti, visco, elas, space_order, fw, spacing, save, t_sub
     # Create operator and run
     subs = model.spacing_map
     pname = "forward" if fw else "adjoint"
-    op = Operator(pde + wrec + nv_t + dft + g_expr + eq_save + nv_s + Ieq,
+    op = Operator(pde + wrec + nv_t + dft + g_expr + extra + eq_save + nv_s + Ieq,
                   subs=subs, name=pname+name(model),
                   opt=opt_op(model))
     op.cfunction
@@ -154,9 +154,14 @@ def born_op(p_params, tti, visco, elas, space_order, fw, spacing, save, pt_src,
     f0 = Constant('f0')
 
     # Setting wavefield
-    u = wavefield(model, space_order, save=save, nt=nt, t_sub=t_sub, fw=fw)
+    u = wavefield(model, space_order, save=save, nt=nt, t_sub=t_sub, fw=fw, tfull=True)
     ul = wavefield(model, space_order, name="l", fw=fw)
 
+    # Setup source and receiver
+    g_expr = geom_expr(model, u, rec_coords=rcords if nlind else None,
+                       src_coords=scords, wavelet=wavelet, fw=fw)
+    g_exprl = geom_expr(model, ul, rec_coords=rcords, nt=nt, fw=fw)
+
     # Expression for saving wavefield if time subsampling is used
     eq_save = save_subsampled(model, u, nt, t_sub, space_order=space_order)
 
@@ -164,15 +169,11 @@ def born_op(p_params, tti, visco, elas, space_order, fw, spacing, save, pt_src,
     q = extented_src(model, wsrc, wavelet)
 
     # Set up PDE expression and rearrange
-    pde = wave_kernel(model, u, q=q, f0=f0, fw=fw)
+    pde, extra = wave_kernel(model, u, q=q, f0=f0, fw=fw)
     if getattr(model, 'dm', 0) == 0:
-        pdel = []
+        pdel, extral = [], []
     else:
-        pdel = wave_kernel(model, ul, q=lin_src(model, u, ic=ic), f0=f0, fw=fw)
-    # Setup source and receiver
-    g_expr = geom_expr(model, u, rec_coords=rcords if nlind else None,
-                       src_coords=scords, wavelet=wavelet, fw=fw)
-    g_exprl = geom_expr(model, ul, rec_coords=rcords, nt=nt, fw=fw)
+        pdel, extral = wave_kernel(model, ul, q=lin_src(model, u, ic=ic), f0=f0, fw=fw)
 
     # On-the-fly Fourier
     dft = otf_dft(u, freq_list, model.critical_dt, factor=dft_sub)
@@ -182,7 +183,7 @@ def born_op(p_params, tti, visco, elas, space_order, fw, spacing, save, pt_src,
 
     # Create operator and run
     subs = model.spacing_map
-    op = Operator(pde + g_expr + g_exprl + pdel + dft + eq_save + Ieq,
+    op = Operator(pde + g_expr + extra + g_exprl + pdel + extral + dft + eq_save + Ieq,
                   subs=subs, name="born"+name(model),
                   opt=opt_op(model))
     op.cfunction
@@ -203,17 +204,18 @@ def adjoint_born_op(p_params, tti, visco, elas, space_order, fw, spacing, pt_rec
     residual = np.ones((nt, 1))
     rcords = np.ones((1, ndim)) if pt_rec else None
     freq_list = np.ones((nfreq,)) if nfreq > 0 else None
+
     # Setting adjoint wavefieldgradient
-    v = wavefield(model, space_order, fw=not fw)
+    v = wavefield(model, space_order, fw=not fw, tfull=True)
     u = forward_wavefield(model, space_order, save=save, nt=nt,
                           dft=nfreq > 0, t_sub=t_sub, fw=fw)
 
-    # Set up PDE expression and rearrange
-    pde = wave_kernel(model, v, fw=False, f0=Constant('f0'))
-
     # Setup source and receiver
     r_expr = geom_expr(model, v, src_coords=rcords, wavelet=residual, fw=not fw)
 
+    # Set up PDE expression and rearrange
+    pde, extra = wave_kernel(model, v, fw=False, f0=Constant('f0'))
+
     # Setup gradient wrt m
     gradm = Function(name="gradm", grid=model.grid)
     g_expr = grad_expr(gradm, u, v, model, w=w, freq=freq_list,
@@ -224,7 +226,8 @@ def adjoint_born_op(p_params, tti, visco, elas, space_order, fw, spacing, pt_rec
 
     # Create operator and run
     subs = model.spacing_map
-    op = Operator(pde + r_expr + g_expr + Ieq, subs=subs, name="gradient"+name(model),
+    op = Operator(pde + r_expr + extra + g_expr + Ieq, subs=subs,
+                  name="gradient"+name(model),
                   opt=opt_op(model))
     try:
         op.cfunction
diff --git a/src/pysource/propagators.py b/src/pysource/propagators.py
index dbaf59681..1cdea79ff 100644
--- a/src/pysource/propagators.py
+++ b/src/pysource/propagators.py
@@ -4,7 +4,7 @@
                     wavefield_subsampled, norm_holder, frequencies, memory_field)
 from fields_exprs import extented_src
 from sensitivity import grad_expr
-from utils import weight_fun, opt_op, fields_kwargs, nfreq, base_kwargs
+from utils import weight_fun, opt_op, fields_kwargs, nfreq, base_kwargs, cleanup_wf
 from operators import forward_op, born_op, adjoint_born_op
 
 from devito import Operator, Function, Constant
@@ -68,6 +68,7 @@ def forward(model, src_coords, rcv_coords, wavelet, save=False,
                            wr, wrt, nv2, nvt2, f0q, I)
     kw.update(fields)
     kw.update(model.physical_params())
+    kw.update(model.abox(src, rcv, fw=fw))
 
     # SLS field
     if model.is_viscoacoustic:
@@ -103,7 +104,7 @@ def gradient(model, residual, rcv_coords, u, return_op=False, fw=True,
     # Space order
     space_order = model.space_order
     # Setting adjoint wavefieldgradient
-    v = wavefield(model, space_order, fw=not fw)
+    v = wavefield(model, space_order, fw=not fw, tfull=True)
     try:
         t_sub = as_tuple(u)[0].indices[0]._factor
         if isinstance(t_sub, Constant):
@@ -133,6 +134,7 @@ def gradient(model, residual, rcv_coords, u, return_op=False, fw=True,
 
     kw.update(fields_kwargs(src, u, v, gradm, f0q, f, I))
     kw.update(model.physical_params())
+    kw.update(model.abox(src, None, fw=not fw))
 
     # SLS field
     if model.is_viscoacoustic:
@@ -144,6 +146,8 @@ def gradient(model, residual, rcv_coords, u, return_op=False, fw=True,
 
     summary = op(**kw)
 
+    cleanup_wf(u)
+
     # Output
     return gradm, I, summary
 
@@ -161,7 +165,7 @@ def born(model, src_coords, rcv_coords, wavelet, save=False,
     nt = wavelet.shape[0]
 
     # Wavefields
-    u = wavefield(model, space_order, save=save, nt=nt, t_sub=t_sub, fw=fw)
+    u = wavefield(model, space_order, save=save, nt=nt, t_sub=t_sub, fw=fw, tfull=True)
     ul = wavefield(model, space_order, name="l", fw=fw)
 
     # Illumination
@@ -181,6 +185,7 @@ def born(model, src_coords, rcv_coords, wavelet, save=False,
                                dft_sub=dft_sub, ws=ws, t_sub=t_sub, f0=f0, illum=illum)
 
         kw.update(fields_kwargs(rnl, I))
+        kw.update(model.abox(snl, rnl, fw=True))
 
         if return_op:
             return op, u, outrec, kw
@@ -209,6 +214,7 @@ def born(model, src_coords, rcv_coords, wavelet, save=False,
     # Update kwargs
     kw.update(fields_kwargs(u, ul, snl, rnl, rcvl, u_save, dft_m, fr, ws, wt, f0q, I))
     kw.update(model.physical_params(born=True))
+    kw.update(model.abox(snl, rnl, fw=True))
 
     # SLS field
     if model.is_viscoacoustic:
@@ -244,7 +250,7 @@ def forward_grad(model, src_coords, rcv_coords, wavelet, v,
     q = extented_src(model, ws, wavelet, q=q)
 
     # Set up PDE expression and rearrange
-    pde = wave_kernel(model, u, q=q, f0=f0)
+    pde, extra = wave_kernel(model, u, q=q, f0=f0)
 
     # Setup source and receiver
     rexpr = geom_expr(model, u, src_coords=src_coords, nt=nt,
@@ -257,7 +263,7 @@ def forward_grad(model, src_coords, rcv_coords, wavelet, v,
 
     # Create operator and run
     subs = model.spacing_map
-    op = Operator(pde + rexpr + g_expr,
+    op = Operator(pde + rexpr + extra + g_expr,
                   subs=subs, name="forward_grad",
                   opt=opt_op(model))
 
diff --git a/src/pysource/sensitivity.py b/src/pysource/sensitivity.py
index 5333d4b7b..445bcc212 100644
--- a/src/pysource/sensitivity.py
+++ b/src/pysource/sensitivity.py
@@ -5,9 +5,14 @@
 from devito.tools import as_tuple
 
 from fields import frequencies
-from fields_exprs import sub_time, freesurface
+from fields_exprs import sub_time
 from FD_utils import laplacian
 
+try:
+    import devitopro as dvp
+except ImportError:
+    dvp = None
+
 
 def func_name(freq=None, ic="as"):
     """
@@ -43,12 +48,7 @@ def grad_expr(gradm, u, v, model, w=None, freq=None, dft_sub=None, ic="as"):
     ic_func = ic_dict[func_name(freq=freq, ic=ic)]
     u, v = as_tuple(u), as_tuple(v)
     expr = ic_func(u, v, model, freq=freq, factor=dft_sub, w=w)
-    if model.fs and ic in ["fwi", "isic"]:
-        # Only need `fs` processing for isic for the spatial derivatives.
-        eq_g = [Eq(gradm, gradm - expr, subdomain=model.grid.subdomains['nofsdomain'])]
-        eq_g += freesurface(model, eq_g, (*as_tuple(u), *as_tuple(v)), fd_only=True)
-    else:
-        eq_g = [Eq(gradm, gradm - expr)]
+    eq_g = [Eq(gradm, gradm - expr, subdomain=model.physical)]
     return eq_g
 
 
diff --git a/src/pysource/utils.py b/src/pysource/utils.py
index 1003ea9de..7892d0d2f 100644
--- a/src/pysource/utils.py
+++ b/src/pysource/utils.py
@@ -2,12 +2,21 @@
     from collections.abc import Iterable
 except ImportError:
     from collections import Iterable
+
+import os
 import numpy as np
 from sympy import sqrt
 
 from devito import configuration
+from devito.arch import Device
+from devito.arch.compiler import NvidiaCompiler, CudaCompiler
 from devito.tools import as_tuple
 
+try:
+    import devitopro as dvp
+except ImportError:
+    dvp = None
+
 
 # Weighting
 def weight_fun(w_fun, model, src_coords):
@@ -86,7 +95,7 @@ def opt_op(model):
     model: Model
         Model structure to know if we are in a TTI model
     """
-    if configuration['platform'].name in ['nvidiaX', 'amdgpuX']:
+    if isinstance(configuration['platform'], Device):
         opts = {'openmp': True if configuration['language'] == 'openmp' else None,
                 'mpi': configuration['mpi']}
         mode = 'advanced'
@@ -123,18 +132,31 @@ def fields_kwargs(*args):
     return kw
 
 
-DEVICE = {"id": -1}  # noqa
+def base_kwargs(dt):
+    """
+    Most basic keyword arguments needed by the operator.
+    """
+    return {'dt': dt}
 
 
-def set_device_ids(devid):
-    DEVICE["id"] = devid
+def cleanup_wf(u):
+    """
+    Delete serialized snapshots
+    """
+    for ui in as_tuple(u):
+        try:
+            serialized = ui._parent._fnames
+            basedir = '/'.join(str(serialized[0]).split('/')[:-1])
+            os.system('rm -rf %s' % basedir)
+        except AttributeError:
+            continue
 
 
-def base_kwargs(dt):
+def compression_mode():
     """
-    Most basic keyword arguments needed by the operator.
+    Check compiler used to see if can use bitcomp
     """
-    if configuration['platform'].name == 'nvidiaX':
-        return {'dt': dt, 'deviceid': DEVICE["id"]}
+    if configuration['compiler'] in [NvidiaCompiler, CudaCompiler]:
+        return 'bitcomp'
     else:
-        return {'dt': dt}
+        return 'noop'