Skip to content

Commit

Permalink
integrated ClimateBase
Browse files Browse the repository at this point in the history
  • Loading branch information
Balinus committed Jun 16, 2023
1 parent cdf461b commit 7090912
Show file tree
Hide file tree
Showing 4 changed files with 338 additions and 6 deletions.
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ name = "ClimateTools"
uuid = "4f4ee721-4970-5af2-8560-6c1d960e3231"
authors = ["Philippe Roy <borghor@yahoo.ca>"]
repo = "https://github.com/JuliaClimate/ClimateTools.jl.git"
version = "0.24.0"
version = "0.24.1"

[deps]
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
ClimateBase = "35604d93-0fb8-4872-9436-495b01d137e2"
#ClimateBase = "35604d93-0fb8-4872-9436-495b01d137e2"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Expand All @@ -32,7 +32,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[compat]
ArgCheck = "1, 2"
AxisArrays = "0.3, 0.4"
ClimateBase = "0.6"
#ClimateBase = "0.6"
DataFrames = "0.19, 0.20, 0.21, 1"
Distances = "0.7, 0.8, 0.9, 0.10"
Extremes = "0.3"
Expand Down
112 changes: 111 additions & 1 deletion src/ClimateTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ module ClimateTools

# External modules
using Reexport
@reexport using ClimateBase
using NetCDF
@reexport using NCDatasets
using Shapefile
Expand Down Expand Up @@ -49,6 +48,104 @@ function __init__()
copy!(scipy, pyimport_conda("scipy.interpolate", "scipy"))
end


# TYPES

"""
ClimGrid{A <: AxisArray}
In-memory representation of Climate Forecast netCDF files.
struct ClimGrid\n
data::AxisArray # Data\n
longrid::AbstractArray{N,2} where N # the longitude grid\n
latgrid::AbstractArray{N,2} where N # the latitude grid\n
msk::Array{N, 2} where N # Data mask (NaNs and 1.0)\n
grid_mapping::Dict#{String, Any} # bindings for native grid\n
dimension_dict::Dict\n
model::String\n
frequency::String # Day, month, years\n
experiment::String # Historical, RCP4.5, RCP8.5, etc.\n
run::String\n
project::String # CORDEX, CMIP5, etc.\n
institute::String # UQAM, DMI, etc.\n
filename::String # Path of the original file\n
dataunits::String # Celsius, kelvin, etc.\n
latunits::String # latitude coordinate unit\n
lonunits::String # longitude coordinate unit\n
variable::String # Type of variable (i.e. can be the same as "typeofvar", but it is changed when calculating indices)\n
typeofvar::String # Variable type (e.g. tasmax, tasmin, pr)\n
typeofcal::String # Calendar type\n
timeattrib::Dict # Time attributes (e.g. days since ... )\n
varattribs::Dict # Variable attributes dictionary\n
globalattribs::Dict # Global attributes dictionary\n
end\n
"""
struct ClimGrid{A <: AxisArray}
data::A
longrid::Array{N,T} where T where N
latgrid::Array{N,T} where T where N
msk::Array{N,T} where T where N
grid_mapping::Dict # information of native grid
dimension_dict::Dict
timeattrib::Dict
model::String
frequency::String
experiment::String
run::String
project::String
institute::String
filename::String
dataunits::String
latunits::String # of the coordinate variable
lonunits::String # of the coordinate variable
variable::String # Type of variable
typeofvar::String # Variable type (e.g. tasmax, tasmin, pr)
typeofcal::String # Calendar type
varattribs::Dict # Variable attributes
globalattribs::Dict # Global attributes

end

"""
ClimGrid(data; longrid=[], latgrid=[], msk=[], grid_mapping=Dict(), dimension_dict=Dict(), model="NA", frequency="NA", experiment="NA", run="NA", project="NA", institute="NA", filename="NA", dataunits="NA", latunits="NA", lonunits="NA", variable="NA", typeofvar="NA", typeofcal="NA", varattribs=Dict(), globalattribs=Dict())
Constructor of the ClimGrid function. Data is an AxisArray. Everything else is optional, but usually needed for further processing (mapping, interpolation, etc...).
struct ClimGrid\n
data::AxisArray # Data \n
longrid::AbstractArray{N,2} where N # the longitude grid \n
latgrid::AbstractArray{N,2} where N # the latitude grid \n
msk::Array{N, 2} where N # Data mask (NaNs and 1.0) \n
grid_mapping::Dict#{String, Any} # bindings for native grid \n
dimension_dict::Dict\n
model::String\n
frequency::String # Day, month, years\n
experiment::String # Historical, RCP4.5, RCP8.5, etc.\n
run::String\n
project::String # CORDEX, CMIP5, etc.\n
institute::String # UQAM, DMI, etc.\n
filename::String # Path of the original file\n
dataunits::String # Celsius, kelvin, etc.\n
latunits::String # latitude coordinate unit\n
lonunits::String # longitude coordinate unit\n
variable::String # Type of variable (i.e. can be the same as "typeofvar", but it is changed when calculating indices)\n
typeofvar::String # Variable type (e.g. tasmax, tasmin, pr)\n
typeofcal::String # Calendar type\n
timeattrib::Dict # Time attributes (e.g. days since ... )\n
varattribs::Dict # Variable attributes dictionary\n
globalattribs::Dict # Global attributes dictionary\n
end\n
"""
function ClimGrid(data; longrid=[], latgrid=[], msk=[], grid_mapping=Dict(), dimension_dict=Dict(), timeattrib=Dict(), model="NA", frequency="NA", experiment="NA", run="NA", project="NA", institute="NA", filename="NA", dataunits="NA", latunits="NA", lonunits="NA", variable="NA", typeofvar="NA", typeofcal="NA", varattribs=Dict(), globalattribs=Dict())

if isempty(dimension_dict)
dimension_dict = Dict(["lon" => "lon", "lat" => "lat"])
end

if isempty(msk)
msk = Array{Float64}(ones((size(data, 1), size(data, 2))))
end

ClimGrid(data, longrid, latgrid, msk, grid_mapping, dimension_dict, timeattrib, model, frequency, experiment, run, project, institute, filename, dataunits, latunits, lonunits, variable, typeofvar, typeofcal, varattribs, globalattribs)
end

# Included files
include("functions.jl")
include("indices.jl")
Expand All @@ -62,6 +159,18 @@ include("time.jl")
include("spatial.jl")
include("analysis.jl")

export ClimGrid
export periodmean
export finitemean
export temporalsubset
export verticalmean
export get_timevec
export buildtimetype
export timeindex
export buildarray_climato
export buildarrayinterface
export buildarray_annual
export buildarray_resample
export inpoly, inpolygrid, meshgrid, inpolyvec, ndgrid
export findmax, findmin
export frostdays, summerdays, icingdays, tropicalnights
Expand Down Expand Up @@ -96,4 +205,5 @@ export yearmonthdayhour
export write, findmindist



end #module
222 changes: 222 additions & 0 deletions src/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,228 @@ function get_position_clusters(x::Vector{Cluster})

end


"""
finitemean(x,y)
Calculate mean while omitting NaN, Inf, etc.
"""
finitemean(x) = mean(filter(x -> !isnan(x)&isfinite(x),x))
finitemean(x,y) = mapslices(finitemean,x,dims=y)

"""
periodmean(C::ClimGrid; startdate::Tuple, enddate::Tuple)
Mean of array data over a given period.
"""
function periodmean(C::ClimGrid; level=1, start_date::Tuple=(Inf, ), end_date::Tuple=(Inf,))

if start_date != (Inf,) || end_date != (Inf,)
C = temporalsubset(C, start_date, end_date)
end

datain = C.data.data

# Mean and squeeze
if ndims(datain) == 2
dataout = datain
elseif ndims(datain) == 3
if size(datain, 3) == 1 # already an average on single value
dataout = dropdims(datain, dims=3)
else
dataout = dropdims(finitemean(datain, 3), dims=3)
end
elseif ndims(datain) == 4
datain_lev = datain[:,:,level,:] # extract level
if size(datain_lev, 3) == 1
dataout = dropdims(datain_lev, dims=3)
else
dataout = dropdims(finitemean(datain_lev, 3), dims=3)
end
end

# Build output AxisArray
FD = buildarray_climato(C, dataout)

# Return climGrid type containing the indice
return ClimGrid(FD, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable="periodmean", typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs)
end

"""
verticalmean(C::ClimGrid; startdate::Tuple, enddate::Tuple)
Mean of array data over all vertical levels.
"""
function verticalmean(C::ClimGrid)

datain = C.data.data

if ndims(datain) < 4
error("There is no vertical levels in the dataset")
end

if size(datain, 3) == 1 # Only one vertical level
dataout = dropdims(datain[:, :, :, :], dims = 3)
else
dataout = dropdims(finitemean(datain, 3), dims=3)
end

# Build output AxisArray
FD = buildarray_verticalmean(C, dataout)

# Return climGrid type containing the indice
return ClimGrid(FD, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable="periodmean", typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs)
end

function buildarray_climato(C::ClimGrid, dataout)
lonsymbol = Symbol(C.dimension_dict["lon"])
latsymbol = Symbol(C.dimension_dict["lat"])
if ndims(dataout) == 2 # original was a 3D field
FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val))
elseif ndims(dataout) == 3 # original was a 4D field
levsymbol = Symbol(C.dimension_dict["height"])
FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{levsymbol}(C[1][Axis{levsymbol}].val))
end
return FD
end

function buildarray_verticalmean(C::ClimGrid, dataout)
lonsymbol = Symbol(C.dimension_dict["lon"])
latsymbol = Symbol(C.dimension_dict["lat"])
timesymbol = Symbol(C.dimension_dict["time"])

FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{timesymbol}(C[1][Axis{timesymbol}].val))

return FD
end

function buildarrayinterface(axisArraytmp, A)
latsymbol = Symbol(A.dimension_dict["lat"])
lonsymbol = Symbol(A.dimension_dict["lon"])
if ndims(axisArraytmp) == 2
axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val))
elseif ndims(axisArraytmp) == 3
axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val), Axis{:time}(A[1][Axis{:time}].val))
elseif ndims(axisArraytmp) == 4
axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val), Axis{:plev}(A[1][Axis{:plev}].val), Axis{:time}(A[1][Axis{:time}].val))
end
return axisArray
end

function buildarray_annual(C::ClimGrid, dataout, numYears)
lonsymbol = Symbol(C.dimension_dict["lon"])
latsymbol = Symbol(C.dimension_dict["lat"])
FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(Dates.year.(DateTime.(numYears))))
return FD
end

function buildarray_resample(C::ClimGrid, dataout, newtime)
lonsymbol = Symbol(C.dimension_dict["lon"])
latsymbol = Symbol(C.dimension_dict["lat"])
FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(newtime))
return FD
end


"""
function temporalsubset(C::ClimGrid, startdate::Date, enddate::Date)
Returns the temporal subset of ClimGrid C. The temporal subset is defined by a start and end date.
"""
function temporalsubset(C::ClimGrid, datebeg::Tuple, dateend::Tuple)

T = typeof(get_timevec(C)[1])
timeV = get_timevec(C)
idxtimebeg, idxtimeend = timeindex(timeV, datebeg, dateend, T)

# startdate = buildtimetype(datebeg, T)
# enddate = buildtimetype(dateend, T)

# some checkups
@argcheck idxtimebeg <= idxtimeend

dataOut = C[1][Axis{:time}(idxtimebeg:idxtimeend)]

# The following control ensure that a 1-timestep temporal subset returns a 3D Array with time information on the timestep. i.e. startdate == enddate
if ndims(dataOut) == 2
timeV = startdate
latsymbol = Symbol(C.dimension_dict["lat"])
lonsymbol = Symbol(C.dimension_dict["lon"])
data2 = fill(NaN, (size(dataOut,1), size(dataOut, 2), 1))
data2[:,:,1] = dataOut
dataOut = AxisArray(data2, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(C[1][Axis{:time}].val))

end

return ClimGrid(dataOut, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal=C.typeofcal, varattribs=C.varattribs, globalattribs=C.globalattribs)

end

"""
get_timevec(C::ClimGrid)
Returns time vector of ClimGrid C.
"""
get_timevec(C::ClimGrid) = C[1][Axis{:time}][:]

"""
buildtimetype(datetuple, f)
Returns the adequate DateTime for temporal subsetting using DateType *f*
"""
function buildtimetype(date_tuple, f)

if length(date_tuple) == 1
dateout = f(date_tuple[1], 01, 01)
elseif length(date_tuple) == 2
dateout = f(date_tuple[1], date_tuple[2], 01)
elseif length(date_tuple) == 3
dateout = f(date_tuple[1], date_tuple[2], date_tuple[3])
elseif length(date_tuple) == 4
dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], 00, 00)
elseif length(date_tuple) == 5
dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], date_tuple[5], 00)
elseif length(date_tuple) == 6
dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], date_tuple[5], date_tuple[6])
end

return dateout
end

"""
timeindex(timeVec, start_date, end_date, T)
Return the index of time vector specified by start_date and end_date. T is the DateTime type (see NCDatasets.jl documentation).
"""
function timeindex(timeV, datebeg::Tuple, dateend::Tuple, T)

# Start Date
if !isinf(datebeg[1])
# Build DateTime type
start_date = buildtimetype(datebeg, T)
# @argcheck start_date >= timeV[1]
idxtimebeg = findfirst(timeV .>= start_date)[1]
else
idxtimebeg = 1
end
# End date
if !isinf(dateend[1])
# Build DateTime type
end_date = buildtimetype(dateend, T)
# @argcheck end_date <= timeV[end]
idxtimeend = findlast(timeV .<= end_date)[1]
else
idxtimeend = length(timeV)
end

if !isinf(datebeg[1]) && !isinf(dateend[1])
@argcheck start_date <= end_date
end
return idxtimebeg, idxtimeend
end


# """
# getunit(C::ClimGrid)
#
Expand Down
Loading

0 comments on commit 7090912

Please sign in to comment.