diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c46759de..f5b13481 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -20,7 +20,7 @@ jobs: fail-fast: false matrix: version: - - '1.9' + - '1.10' os: - ubuntu-latest arch: @@ -55,7 +55,7 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@latest with: - version: '1.9' + version: '1.10' - run: make docs env: PYTHON: "" diff --git a/Project.toml b/Project.toml index 3fd46e26..5806a4a7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ExponentialFamily" uuid = "62312e5e-252a-4322-ace9-a5f4bf9b357b" authors = ["Ismail Senoz ", "Dmitry Bagaev "] -version = "1.2.2" +version = "1.3.0" [deps] BayesBase = "b4ee3484-f114-42fe-b91c-797d54a0c67e" @@ -28,7 +28,7 @@ TinyHugeNumbers = "783c9a47-75a3-44ac-a16b-f1ab7b3acf04" [compat] Aqua = "0.7" -BayesBase = "1.1" +BayesBase = "1.2" Distributions = "0.25" DomainSets = "0.5.2, 0.6, 0.7" FastCholesky = "1.0" diff --git a/README.md b/README.md index fc302e25..b707fa8d 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,17 @@ |:-------------------------------------------------------------------------:|:------------------------------------------------------:|:---------------------------------------:| | [![][docs-stable-img]][docs-stable-url] [![][docs-dev-img]][docs-dev-url] | [![CI][ci-img]][ci-url] [![Aqua][aqua-img]][aqua-url] | [![CodeCov][codecov-img]][codecov-url] | -[ci-img]: https://github.com/biaslab/ExponentialFamily.jl/actions/workflows/CI.yml/badge.svg?branch=main -[ci-url]: https://github.com/biaslab/ExponentialFamily.jl/actions +[ci-img]: https://github.com/reactivebayes/ExponentialFamily.jl/actions/workflows/CI.yml/badge.svg?branch=main +[ci-url]: https://github.com/reactivebayes/ExponentialFamily.jl/actions [docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg -[docs-dev-url]: https://biaslab.github.io/ExponentialFamily.jl/dev +[docs-dev-url]: https://reactivebayes.github.io/ExponentialFamily.jl/dev -[codecov-img]: https://codecov.io/gh/biaslab/ExponentialFamily.jl/branch/main/graph/badge.svg -[codecov-url]: https://codecov.io/gh/biaslab/ExponentialFamily.jl?branch=main +[codecov-img]: https://codecov.io/gh/reactivebayes/ExponentialFamily.jl/branch/main/graph/badge.svg +[codecov-url]: https://codecov.io/gh/reactivebayes/ExponentialFamily.jl?branch=main [docs-stable-img]: https://img.shields.io/badge/docs-stable-blue.svg -[docs-stable-url]: https://biaslab.github.io/ExponentialFamily.jl/stable +[docs-stable-url]: https://reactivebayes.github.io/ExponentialFamily.jl/stable [aqua-img]: https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg [aqua-url]: https://github.com/JuliaTesting/Aqua.jl @@ -32,7 +32,7 @@ ExponentialFamily.jl is a Julia package that extends the functionality of Distri - **Fisher Information**: ExponentialFamily.jl also offers computation of the [Fisher Information (FI)](https://en.wikipedia.org/wiki/Fisher_information) for various distributions. FI is a crucial quantity in statistical inference, providing insights into the sensitivity of a model's parameters to changes in the data. Essentially FI is the hessian of logpartition with respect to vectorized natural parameters. FI allows users to gain a deeper understanding of the behavior and performance of their probabilistic models. -Read more about the package in the [documentation](https://biaslab.github.io/ExponentialFamily.jl/stable/). +Read more about the package in the [documentation](https://reactivebayes.github.io/ExponentialFamily.jl/stable/). ## Installation ExponentialFamily.jl can be installed through the Julia package manager. In the Julia REPL, type `]` to enter the package manager mode and run: @@ -42,4 +42,4 @@ pkg> add ExponentialFamily # License -[MIT License](LICENSE) Copyright (c) 2023 BIASlab +[MIT License](LICENSE) Copyright (c) 2023-2024 BIASlab, 2024-present ReactiveBayes diff --git a/docs/make.jl b/docs/make.jl index 96482769..3753ac68 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -19,6 +19,8 @@ makedocs( if get(ENV, "CI", nothing) == "true" deploydocs( - repo = "github.com/biaslab/ExponentialFamily.jl.git" + repo = "github.com/ReactiveBayes/ExponentialFamily.jl.git", + devbranch = "main", + forcepush = true ) -end \ No newline at end of file +end diff --git a/docs/src/examples.md b/docs/src/examples.md index b15c65f7..5782539c 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -50,7 +50,7 @@ prod(PreserveTypeProd(ExponentialFamilyDistribution), prior, likelihood) Note that the result does not correspond to the `Laplace` distribution and returns a generic univariate `ExponentialFamilyDistribution`. This approach ensures consistency and compatibility, especially when dealing with a wide range of probability distributions. -Refer to the [`BayesBase`](https://github.com/biaslab/BayesBase.jl) for the documentation about available product strategies. +Refer to the [`BayesBase`](https://github.com/ReactiveBayes/BayesBase.jl) for the documentation about available product strategies. ## Computing various useful attributes of an exponential family member @@ -130,4 +130,4 @@ fisherinformation_of_gamma_in_natural_space(gamma_parameters_in_natural_space) ## Approximating attributes -Refer to the [`ExpectationApproximations.jl`](https://github.com/biaslab/ExpectationApproximations.jl) package for approximating various attributes of the members of the exponential family. \ No newline at end of file +Refer to the [`ExpectationApproximations.jl`](https://github.com/ReactiveBayes/ExpectationApproximations.jl) package for approximating various attributes of the members of the exponential family. diff --git a/docs/src/interface.md b/docs/src/interface.md index d5d9e4ec..c434bb04 100644 --- a/docs/src/interface.md +++ b/docs/src/interface.md @@ -38,11 +38,13 @@ isproper getbasemeasure getsufficientstatistics getlogpartition +getgradlogpartition getfisherinformation getsupport basemeasure sufficientstatistics logpartition +gradlogpartition fisherinformation isbasemeasureconstant ConstantBaseMeasure diff --git a/src/common.jl b/src/common.jl index be33c46f..2acfa11a 100644 --- a/src/common.jl +++ b/src/common.jl @@ -44,3 +44,5 @@ function binomial_prod(n, p, x) end end end + +mvdigamma(η,p) = sum( digamma(η + (one(d) - d)/2) for d=1:p) \ No newline at end of file diff --git a/src/distributions/bernoulli.jl b/src/distributions/bernoulli.jl index 561365eb..8ba29f91 100644 --- a/src/distributions/bernoulli.jl +++ b/src/distributions/bernoulli.jl @@ -94,6 +94,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Bernoulli}) = (η) -> begin return -log(logistic(-η₁)) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Bernoulli}) = (η) -> begin + (η₁,) = unpack_parameters(Bernoulli, η) + return SA[logistic(η₁)] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Bernoulli}) = (η) -> begin (η₁,) = unpack_parameters(Bernoulli, η) f = logistic(-η₁) diff --git a/src/distributions/beta.jl b/src/distributions/beta.jl index 26ac9807..7e363a9e 100644 --- a/src/distributions/beta.jl +++ b/src/distributions/beta.jl @@ -62,6 +62,16 @@ getlogpartition(::NaturalParametersSpace, ::Type{Beta}) = (η) -> begin return logbeta(η₁ + one(η₁), η₂ + one(η₂)) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Beta}) = (η) -> begin + (η₁, η₂) = unpack_parameters(Beta, η) + η₁p = η₁ + one(η₁) + η₂p = η₂ + one(η₂) + ηsum = η₁p + η₂p + dig = digamma(ηsum) + + return SA[digamma(η₁p) - dig, digamma(η₂p) - dig] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Beta}) = (η) -> begin (η₁, η₂) = unpack_parameters(Beta, η) psia = trigamma(η₁ + one(η₁)) diff --git a/src/distributions/binomial.jl b/src/distributions/binomial.jl index 6bd640eb..79a00ad4 100644 --- a/src/distributions/binomial.jl +++ b/src/distributions/binomial.jl @@ -92,6 +92,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Binomial}, ntrials) = (η) -> b return ntrials * log1pexp(η₁) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Binomial}, ntrials) = (η) -> begin + (η₁,) = unpack_parameters(Binomial, η) + return SA[ntrials*exp(η₁) / (one(η₁) + exp(η₁))] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Binomial}, ntrials) = (η) -> begin (η₁,) = unpack_parameters(Binomial, η) aux = logistic(η₁) diff --git a/src/distributions/categorical.jl b/src/distributions/categorical.jl index f1d01b3e..bd256d06 100644 --- a/src/distributions/categorical.jl +++ b/src/distributions/categorical.jl @@ -79,6 +79,19 @@ getlogpartition(::NaturalParametersSpace, ::Type{Categorical}, conditioner) = return logsumexp(η) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Categorical}, conditioner) = + (η) -> begin + if (conditioner !== length(η)) + throw( + DimensionMismatch( + "Cannot evaluate the logparition of the `Categorical` with `conditioner = $(conditioner)` and vector of natural parameters `η = $(η)`" + ) + ) + end + sumη = vmapreduce(exp, +, η) + return vmap(d->exp(d)/sumη ,η) + end + getfisherinformation(::NaturalParametersSpace, ::Type{Categorical}, conditioner) = (η) -> begin if (conditioner !== length(η)) diff --git a/src/distributions/chi_squared.jl b/src/distributions/chi_squared.jl index d6220a8a..209267cc 100644 --- a/src/distributions/chi_squared.jl +++ b/src/distributions/chi_squared.jl @@ -64,6 +64,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Chisq}) = (η) -> begin return loggamma(η1 + o) + (η1 + o) * logtwo end +getgradlogpartition(::NaturalParametersSpace, ::Type{Chisq}) = (η) -> begin + (η1,) = unpack_parameters(Chisq, η) + return SA[digamma(η1 + one(η1)) + logtwo] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Chisq}) = (η) -> begin (η1,) = unpack_parameters(Chisq, η) SA[trigamma(η1 + one(η1));;] diff --git a/src/distributions/dirichlet.jl b/src/distributions/dirichlet.jl index bfe81cc3..3dc19268 100644 --- a/src/distributions/dirichlet.jl +++ b/src/distributions/dirichlet.jl @@ -69,6 +69,11 @@ getfisherinformation(::NaturalParametersSpace, ::Type{Dirichlet}) = return Diagonal(map(d -> trigamma(d + 1), η1)) - Ones{Float64}(n, n) * trigamma(sum(η1) + n) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Dirichlet}) = (η) -> begin + (η1,) = unpack_parameters(Dirichlet, η) + return digamma.(η1 .+ 1) .- digamma(sum(η1 .+ 1)) +end + # Mean parametrization getlogpartition(::MeanParametersSpace, ::Type{Dirichlet}) = (θ) -> begin @@ -83,3 +88,8 @@ getfisherinformation(::MeanParametersSpace, ::Type{Dirichlet}) = (θ) -> begin n = length(α) return Diagonal(map(d -> trigamma(d), α)) - Ones{Float64}(n, n) * trigamma(sum(α)) end + +getgradlogpartition(::MeanParametersSpace, ::Type{Dirichlet}) = (θ) -> begin + (α,) = unpack_parameters(Dirichlet, θ) + return digamma.(α) .- digamma(sum(α)) +end \ No newline at end of file diff --git a/src/distributions/erlang.jl b/src/distributions/erlang.jl index 2bcf8d54..e65e344a 100644 --- a/src/distributions/erlang.jl +++ b/src/distributions/erlang.jl @@ -57,6 +57,13 @@ getlogpartition(::NaturalParametersSpace, ::Type{Erlang}) = (η) -> begin return loggamma(η1 + 1) - (η1 + one(η1)) * log(-η2) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Erlang}) = (η) -> begin + (η1, η2) = unpack_parameters(Erlang, η) + dη1 = digamma(η1 + 1) - log(-η2) + dη2 = - (η1 + one(η1))*inv(η2) + return SA[dη1, dη2] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Erlang}) = (η) -> begin (η1, η2) = unpack_parameters(Erlang, η) miη2 = -inv(η2) diff --git a/src/distributions/exponential.jl b/src/distributions/exponential.jl index e6820585..1278ee24 100644 --- a/src/distributions/exponential.jl +++ b/src/distributions/exponential.jl @@ -44,6 +44,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Exponential}) = (η) -> begin return -log(-η₁) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Exponential}) = (η) -> begin + (η₁,) = unpack_parameters(Exponential, η) + return SA[-1/η₁] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Exponential}) = (η) -> begin (η₁,) = unpack_parameters(Exponential, η) SA[inv(η₁^2);;] diff --git a/src/distributions/gamma_family/gamma_family.jl b/src/distributions/gamma_family/gamma_family.jl index 2ccf232a..a08976b0 100644 --- a/src/distributions/gamma_family/gamma_family.jl +++ b/src/distributions/gamma_family/gamma_family.jl @@ -100,6 +100,11 @@ getfisherinformation(::NaturalParametersSpace, ::Type{Gamma}) = (η) -> begin SA[trigamma(η₁ + one(η₁)) -inv(η₂); -inv(η₂) (η₁+one(η₁))/(η₂^2)] end +getgradlogpartition(::NaturalParametersSpace, ::Type{Gamma}) = (η) -> begin + (η₁, η₂) = unpack_parameters(Gamma, η) + return SA[digamma(η₁ + one(η₁)) - log(-η₂), - (η₁ + one(η₁)) / η₂] +end + # Mean parametrization getlogpartition(::MeanParametersSpace, ::Type{Gamma}) = (θ) -> begin @@ -114,3 +119,8 @@ getfisherinformation(::MeanParametersSpace, ::Type{Gamma}) = (θ) -> begin inv(scale) shape/abs2(scale) ] end + +getgradlogpartition(::MeanParametersSpace, ::Type{Gamma}) = (θ) -> begin + (shape, scale) = unpack_parameters(Gamma, θ) + return SA[digamma(shape) - log(scale), - shape / scale] +end \ No newline at end of file diff --git a/src/distributions/gamma_family/gamma_shape_rate.jl b/src/distributions/gamma_family/gamma_shape_rate.jl index 08e55922..cf8aa8bd 100644 --- a/src/distributions/gamma_family/gamma_shape_rate.jl +++ b/src/distributions/gamma_family/gamma_shape_rate.jl @@ -26,13 +26,15 @@ GammaShapeRate() = GammaShapeRate(1.0, 1.0) Distributions.@distr_support GammaShapeRate 0 Inf -BayesBase.support(dist::GammaShapeRate) = Distributions.RealInterval(minimum(dist), maximum(dist)) -BayesBase.shape(dist::GammaShapeRate) = dist.a -BayesBase.rate(dist::GammaShapeRate) = dist.b -BayesBase.scale(dist::GammaShapeRate) = inv(dist.b) -BayesBase.mean(dist::GammaShapeRate) = shape(dist) / rate(dist) -BayesBase.var(dist::GammaShapeRate) = shape(dist) / abs2(rate(dist)) -BayesBase.params(dist::GammaShapeRate) = (shape(dist), rate(dist)) +BayesBase.support(dist::GammaShapeRate) = Distributions.RealInterval(minimum(dist), maximum(dist)) +BayesBase.shape(dist::GammaShapeRate) = dist.a +BayesBase.rate(dist::GammaShapeRate) = dist.b +BayesBase.scale(dist::GammaShapeRate) = inv(dist.b) +BayesBase.mean(dist::GammaShapeRate) = shape(dist) / rate(dist) +BayesBase.var(dist::GammaShapeRate) = shape(dist) / abs2(rate(dist)) +BayesBase.params(dist::GammaShapeRate) = (shape(dist), rate(dist)) +BayesBase.kurtosis(dist::GammaShapeRate) = kurtosis(convert(Gamma, dist)) +BayesBase.skewness(dist::GammaShapeRate) = skewness(convert(Gamma, dist)) BayesBase.mode(d::GammaShapeRate) = shape(d) >= 1 ? mode(Gamma(shape(d), scale(d))) : throw(error("Gamma has no mode when shape < 1")) diff --git a/src/distributions/gamma_inverse.jl b/src/distributions/gamma_inverse.jl index cd8767ca..e521ab60 100644 --- a/src/distributions/gamma_inverse.jl +++ b/src/distributions/gamma_inverse.jl @@ -63,6 +63,13 @@ getlogpartition(::NaturalParametersSpace, ::Type{GammaInverse}) = (η) -> begin return loggamma(-η₁ - one(η₁)) - (-η₁ - one(η₁)) * log(-η₂) end +getgradlogpartition(::NaturalParametersSpace, ::Type{GammaInverse}) = (η) -> begin + (η₁, η₂) = unpack_parameters(GammaInverse, η) + dη1 = -digamma(-η₁ - one(η₁)) + log(-η₂) + dη2 = - (-η₁ - one(η₁))/η₂ + return SA[dη1, dη2] +end + getfisherinformation(::NaturalParametersSpace, ::Type{GammaInverse}) = (η) -> begin (η₁, η₂) = unpack_parameters(GammaInverse, η) diff --git a/src/distributions/geometric.jl b/src/distributions/geometric.jl index 51f33b6e..81e00cdf 100644 --- a/src/distributions/geometric.jl +++ b/src/distributions/geometric.jl @@ -49,6 +49,11 @@ getfisherinformation(::NaturalParametersSpace, ::Type{Geometric}) = (η) -> begi return SA[exp(η1) / (one(η1) - exp(η1))^2;;] end +getgradlogpartition(::NaturalParametersSpace, ::Type{Geometric}) = (η) -> begin + (η1,) = unpack_parameters(Geometric, η) + return SA[exp(η1) / (one(η1) - exp(η1));] +end + # Mean parametrization getlogpartition(::MeanParametersSpace, ::Type{Geometric}) = (θ) -> begin @@ -60,3 +65,8 @@ getfisherinformation(::MeanParametersSpace, ::Type{Geometric}) = (θ) -> begin (p,) = unpack_parameters(Geometric, θ) return SA[one(p) / (p^2 * (one(p) - p));;] end + +getgradlogpartition(::MeanParametersSpace, ::Type{Geometric}) = (θ) -> begin + (p,) = unpack_parameters(Geometric, θ) + return SA[one(p) / (p^2 - p);] +end \ No newline at end of file diff --git a/src/distributions/laplace.jl b/src/distributions/laplace.jl index 4d8a71b4..4d60b1c5 100644 --- a/src/distributions/laplace.jl +++ b/src/distributions/laplace.jl @@ -168,6 +168,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Laplace}, _) = (η) -> begin return log(-2 / η₁) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Laplace}, _) = (η) -> begin + (η₁,) = unpack_parameters(Laplace, η) + return SA[-inv(η₁);] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Laplace}, _) = (η) -> begin (η₁,) = unpack_parameters(Laplace, η) return SA[inv(η₁^2);;] diff --git a/src/distributions/lognormal.jl b/src/distributions/lognormal.jl index 32c1f23b..9c1cf971 100644 --- a/src/distributions/lognormal.jl +++ b/src/distributions/lognormal.jl @@ -53,6 +53,13 @@ getlogpartition(::NaturalParametersSpace, ::Type{LogNormal}) = (η) -> begin return -(η₁ + 1)^2 / (4η₂) - log(-2η₂) / 2 end +getgradlogpartition(::NaturalParametersSpace, ::Type{LogNormal}) = (η) -> begin + (η₁, η₂) = unpack_parameters(LogNormal, η) + dη1 = -(η₁ + 1)/(2η₂) + dη2 = (η₁ + 1)^2/(4η₂^2) - inv(η₂)/2 + return SA[dη1, dη2] +end + getfisherinformation(::NaturalParametersSpace, ::Type{LogNormal}) = (η) -> begin (η₁, η₂) = unpack_parameters(LogNormal, η) @@ -66,6 +73,13 @@ getlogpartition(::MeanParametersSpace, ::Type{LogNormal}) = (θ) -> begin return abs2(μ) / (2abs2(σ)) + log(σ) end +getgradlogpartition(::MeanParametersSpace, ::Type{LogNormal}) = (θ) -> begin + (μ, σ) = unpack_parameters(LogNormal, θ) + dμ = abs(μ) / (abs2(σ)) + dσ = -abs2(μ) / (σ^3) + 1/σ + return SA[dμ, dσ] +end + getfisherinformation(::MeanParametersSpace, ::Type{LogNormal}) = (θ) -> begin (μ, σ) = unpack_parameters(LogNormal, θ) invσ² = inv(abs2(σ)) diff --git a/src/distributions/matrix_dirichlet.jl b/src/distributions/matrix_dirichlet.jl index 9f898db2..06010965 100644 --- a/src/distributions/matrix_dirichlet.jl +++ b/src/distributions/matrix_dirichlet.jl @@ -153,6 +153,14 @@ getlogpartition(::NaturalParametersSpace, ::Type{MatrixDirichlet}) = ) end +getgradlogpartition(::NaturalParametersSpace, ::Type{MatrixDirichlet}) = + (η) -> begin + (η1,) = unpack_parameters(MatrixDirichlet, η) + return vmapreduce( + d -> getgradlogpartition(NaturalParametersSpace(), Dirichlet)(convert(Vector, d)),vcat, + eachcol(η1)) + end + getfisherinformation(::NaturalParametersSpace, ::Type{MatrixDirichlet}) = (η) -> begin (η1,) = unpack_parameters(MatrixDirichlet, η) @@ -177,6 +185,14 @@ getlogpartition(::MeanParametersSpace, ::Type{MatrixDirichlet}) = ) end +getgradlogpartition(::MeanParametersSpace, ::Type{MatrixDirichlet}) = + (θ) -> begin + (α,) = unpack_parameters(MatrixDirichlet, θ) + return vmapreduce( + d -> getgradlogpartition(NaturalParametersSpace(), Dirichlet)(convert(Vector, d)),vcat, + eachcol(α)) + end + getfisherinformation(::MeanParametersSpace, ::Type{MatrixDirichlet}) = (θ) -> begin (α,) = unpack_parameters(MatrixDirichlet, θ) diff --git a/src/distributions/negative_binomial.jl b/src/distributions/negative_binomial.jl index ebb36bbf..c0d3ddf6 100644 --- a/src/distributions/negative_binomial.jl +++ b/src/distributions/negative_binomial.jl @@ -106,6 +106,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{NegativeBinomial}, conditioner) return -conditioner * log(one(η1) - exp(η1)) end +getgradlogpartition(::NaturalParametersSpace,::Type{NegativeBinomial}, conditioner) = (η) -> begin + (η1,) = unpack_parameters(NegativeBinomial, η) + return SA[-conditioner*(-exp(η1)/(one(η1)-exp(η1)));] +end + getfisherinformation(::NaturalParametersSpace, ::Type{NegativeBinomial}, r) = (η) -> begin (η1,) = unpack_parameters(NegativeBinomial, η) return SA[r * exp(η1) / (one(η1) - exp(η1))^2;;] @@ -118,6 +123,11 @@ getlogpartition(::MeanParametersSpace, ::Type{NegativeBinomial}, conditioner) = return -conditioner * log(one(p) - p) end +getgradlogpartition(::MeanParametersSpace,::Type{NegativeBinomial}, conditioner) = (θ) -> begin + (p,) = unpack_parameters(NegativeBinomial, η) + return SA[conditioner*inv(one(p) - p);] +end + getfisherinformation(::MeanParametersSpace, ::Type{NegativeBinomial}, r) = (θ) -> begin (p,) = unpack_parameters(NegativeBinomial, θ) return SA[r / (p^2 * (one(p) - p));;] diff --git a/src/distributions/normal_family/normal_family.jl b/src/distributions/normal_family/normal_family.jl index b45f1f89..80e70658 100644 --- a/src/distributions/normal_family/normal_family.jl +++ b/src/distributions/normal_family/normal_family.jl @@ -396,10 +396,18 @@ end # isapprox function Base.isapprox(left::UnivariateNormalDistributionsFamily, right::UnivariateNormalDistributionsFamily; kwargs...) + return all(p -> isapprox(p[1], p[2]; kwargs...), zip(mean_var(left), mean_var(right))) +end + +function Base.isapprox(left::D, right::D; kwargs...) where { D <: UnivariateNormalDistributionsFamily } return all(p -> isapprox(p[1], p[2]; kwargs...), zip(params(left), params(right))) end function Base.isapprox(left::MultivariateNormalDistributionsFamily, right::MultivariateNormalDistributionsFamily; kwargs...) + return all(p -> isapprox(p[1], p[2]; kwargs...), zip(mean_cov(left), mean_cov(right))) +end + +function Base.isapprox(left::D, right::D; kwargs...) where { D <: MultivariateNormalDistributionsFamily } return all(p -> isapprox(p[1], p[2]; kwargs...), zip(params(left), params(right))) end @@ -575,6 +583,12 @@ getlogpartition(::NaturalParametersSpace, ::Type{NormalMeanVariance}) = (η) -> return -abs2(η₁) / 4η₂ - log(-2η₂) / 2 end +getgradlogpartition(::NaturalParametersSpace, ::Type{NormalMeanVariance}) = + (η) -> begin + (η₁, η₂) = unpack_parameters(NormalMeanVariance, η) + return SA[-η₁ * inv(η₂*2), abs2(η₁) / ( 4 * abs2(η₂)) - 1 / (2 * η₂)] + end + getfisherinformation(::NaturalParametersSpace, ::Type{NormalMeanVariance}) = (η) -> begin (η₁, η₂) = unpack_parameters(NormalMeanVariance, η) @@ -591,6 +605,12 @@ getlogpartition(::MeanParametersSpace, ::Type{NormalMeanVariance}) = (θ) -> beg return μ / 2σ² + log(sqrt(σ²)) end +getgradlogpartition(::MeanParametersSpace, ::Type{NormalMeanVariance}) = + (θ) -> begin + (μ, σ²) = unpack_parameters(NormalMeanVariance, θ) + return SA[μ / σ², - abs2(μ) / (2σ²^2) + 1 / σ²] + end + getfisherinformation(::MeanParametersSpace, ::Type{NormalMeanVariance}) = (θ) -> begin (_, σ²) = unpack_parameters(NormalMeanVariance, θ) return SA[inv(σ²) 0; 0 inv(2 * (σ²^2))] @@ -678,6 +698,13 @@ getlogpartition(::NaturalParametersSpace, ::Type{MvNormalMeanCovariance}) = (η) return (dot(η₁, Cinv, η₁) / 2 - (k * log(2) + l)) / 2 end +getgradlogpartition(::NaturalParametersSpace, ::Type{MvNormalMeanCovariance}) = + (η) -> begin + (η₁, η₂) = unpack_parameters(MvNormalMeanCovariance, η) + Cinv, _ = cholinv_logdet(-η₂) + return pack_parameters(MvNormalMeanCovariance, (0.5 * Cinv * η₁, 0.25 * Cinv * η₁ * η₁' * Cinv + 0.5 * Cinv)) + end + getfisherinformation(::NaturalParametersSpace, ::Type{MvNormalMeanCovariance}) = (η) -> begin (η₁, η₂) = unpack_parameters(MvNormalMeanCovariance, η) diff --git a/src/distributions/normal_family/normal_mean_precision.jl b/src/distributions/normal_family/normal_mean_precision.jl index bda7c297..72e90303 100644 --- a/src/distributions/normal_family/normal_mean_precision.jl +++ b/src/distributions/normal_family/normal_mean_precision.jl @@ -27,15 +27,17 @@ BayesBase.support(dist::NormalMeanPrecision) = Distributions.RealInterval(minimu BayesBase.weightedmean(dist::NormalMeanPrecision) = precision(dist) * mean(dist) -BayesBase.mean(dist::NormalMeanPrecision) = dist.μ -BayesBase.median(dist::NormalMeanPrecision) = mean(dist) -BayesBase.mode(dist::NormalMeanPrecision) = mean(dist) -BayesBase.var(dist::NormalMeanPrecision) = inv(dist.w) -BayesBase.std(dist::NormalMeanPrecision) = sqrt(var(dist)) -BayesBase.cov(dist::NormalMeanPrecision) = var(dist) -BayesBase.invcov(dist::NormalMeanPrecision) = dist.w +BayesBase.mean(dist::NormalMeanPrecision) = dist.μ +BayesBase.median(dist::NormalMeanPrecision) = mean(dist) +BayesBase.mode(dist::NormalMeanPrecision) = mean(dist) +BayesBase.var(dist::NormalMeanPrecision) = inv(dist.w) +BayesBase.std(dist::NormalMeanPrecision) = sqrt(var(dist)) +BayesBase.cov(dist::NormalMeanPrecision) = var(dist) +BayesBase.invcov(dist::NormalMeanPrecision) = dist.w BayesBase.entropy(dist::NormalMeanPrecision) = (1 + log2π - log(precision(dist))) / 2 -BayesBase.params(dist::NormalMeanPrecision) = (mean(dist), precision(dist)) +BayesBase.params(dist::NormalMeanPrecision) = (mean(dist), precision(dist)) +BayesBase.kurtosis(dist::NormalMeanPrecision) = kurtosis(convert(Normal, dist)) +BayesBase.skewness(dist::NormalMeanPrecision) = skewness(convert(Normal, dist)) BayesBase.pdf(dist::NormalMeanPrecision, x::Real) = (invsqrt2π * exp(-abs2(x - mean(dist)) * precision(dist) / 2)) * sqrt(precision(dist)) BayesBase.logpdf(dist::NormalMeanPrecision, x::Real) = -(log2π - log(precision(dist)) + abs2(x - mean(dist)) * precision(dist)) / 2 diff --git a/src/distributions/normal_family/normal_mean_variance.jl b/src/distributions/normal_family/normal_mean_variance.jl index 5e41f58a..1d1becf4 100644 --- a/src/distributions/normal_family/normal_mean_variance.jl +++ b/src/distributions/normal_family/normal_mean_variance.jl @@ -32,15 +32,18 @@ function BayesBase.weightedmean_invcov(dist::NormalMeanVariance) return (xi, w) end -BayesBase.mean(dist::NormalMeanVariance) = dist.μ -BayesBase.median(dist::NormalMeanVariance) = mean(dist) -BayesBase.mode(dist::NormalMeanVariance) = mean(dist) -BayesBase.var(dist::NormalMeanVariance) = dist.v -BayesBase.std(dist::NormalMeanVariance) = sqrt(var(dist)) -BayesBase.cov(dist::NormalMeanVariance) = var(dist) -BayesBase.invcov(dist::NormalMeanVariance) = inv(cov(dist)) -BayesBase.entropy(dist::NormalMeanVariance) = (1 + log2π + log(var(dist))) / 2 -BayesBase.params(dist::NormalMeanVariance) = (dist.μ, dist.v) +BayesBase.mean(dist::NormalMeanVariance) = dist.μ +BayesBase.median(dist::NormalMeanVariance) = mean(dist) +BayesBase.mode(dist::NormalMeanVariance) = mean(dist) +BayesBase.var(dist::NormalMeanVariance) = dist.v +BayesBase.std(dist::NormalMeanVariance) = sqrt(var(dist)) +BayesBase.cov(dist::NormalMeanVariance) = var(dist) +BayesBase.invcov(dist::NormalMeanVariance) = inv(cov(dist)) +BayesBase.entropy(dist::NormalMeanVariance) = (1 + log2π + log(var(dist))) / 2 +BayesBase.params(dist::NormalMeanVariance) = (dist.μ, dist.v) +BayesBase.kurtosis(dist::NormalMeanVariance) = kurtosis(convert(Normal, dist)) +BayesBase.skewness(dist::NormalMeanVariance) = skewness(convert(Normal, dist)) + BayesBase.pdf(dist::NormalMeanVariance, x::Real) = (invsqrt2π * exp(-abs2(x - mean(dist)) / 2cov(dist))) / std(dist) BayesBase.logpdf(dist::NormalMeanVariance, x::Real) = -(log2π + log(var(dist)) + abs2(x - mean(dist)) / var(dist)) / 2 diff --git a/src/distributions/normal_family/normal_weighted_mean_precision.jl b/src/distributions/normal_family/normal_weighted_mean_precision.jl index cc15f868..f49bb1fe 100644 --- a/src/distributions/normal_family/normal_weighted_mean_precision.jl +++ b/src/distributions/normal_family/normal_weighted_mean_precision.jl @@ -42,6 +42,8 @@ BayesBase.cov(dist::NormalWeightedMeanPrecision) = var(dist) BayesBase.invcov(dist::NormalWeightedMeanPrecision) = dist.w BayesBase.entropy(dist::NormalWeightedMeanPrecision) = (1 + log2π - log(precision(dist))) / 2 BayesBase.params(dist::NormalWeightedMeanPrecision) = (weightedmean(dist), precision(dist)) +BayesBase.kurtosis(dist::NormalWeightedMeanPrecision) = kurtosis(convert(Normal, dist)) +BayesBase.skewness(dist::NormalWeightedMeanPrecision) = skewness(convert(Normal, dist)) BayesBase.pdf(dist::NormalWeightedMeanPrecision, x::Real) = (invsqrt2π * exp(-abs2(x - mean(dist)) * precision(dist) / 2)) * sqrt(precision(dist)) BayesBase.logpdf(dist::NormalWeightedMeanPrecision, x::Real) = -(log2π - log(precision(dist)) + abs2(x - mean(dist)) * precision(dist)) / 2 diff --git a/src/distributions/normal_gamma.jl b/src/distributions/normal_gamma.jl index a392fac7..a557d50f 100644 --- a/src/distributions/normal_gamma.jl +++ b/src/distributions/normal_gamma.jl @@ -143,6 +143,18 @@ getlogpartition(::NaturalParametersSpace, ::Type{NormalGamma}) = (η) -> begin return loggamma(η3half) - log(-2η2) * (1 / 2) - (η3half) * log(-η4 + η1^2 / (4η2)) end +getgradlogpartition(::NaturalParametersSpace,::Type{NormalGamma}) = (η) -> begin + (η1, η2, η3, η4) = unpack_parameters(NormalGamma, η) + η3half = η3 + (1 / 2) + c = (-η4 + η1^2/(4η2)) + dη1 = -η3half*((η1/(2η2)) / c) + dη2 = -inv(η2)/2 - η3half*(-η1^2/(4η2^2) / c) + dη3 = digamma(η3half) - log(c) + dη4 = η3half /c + + return SA[dη1, dη2, dη3, dη4] +end + getfisherinformation(::NaturalParametersSpace, ::Type{NormalGamma}) = (η) -> begin (η1, η2, η3, η4) = unpack_parameters(NormalGamma, η) diff --git a/src/distributions/pareto.jl b/src/distributions/pareto.jl index 4b8d0ed9..da09c42a 100644 --- a/src/distributions/pareto.jl +++ b/src/distributions/pareto.jl @@ -148,6 +148,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Pareto}, conditioner) = (η) -> return log(conditioner^(one(η1) + η1) / (-one(η1) - η1)) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Pareto}, conditioner) = (η) -> begin + (η1,) = unpack_parameters(Pareto, η) + return SA[log(conditioner) - one(η1)/(one(η1)+η1);] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Pareto}, _) = (η) -> begin (η1,) = unpack_parameters(Pareto, η) return SA[1 / (-1 - η1)^2;;] @@ -160,6 +165,11 @@ getlogpartition(::MeanParametersSpace, ::Type{Pareto}, conditioner) = (θ) -> be return -log(shape) - shape * log(conditioner) end +getgradlogpartition(::MeanParametersSpace, ::Type{Pareto}, conditioner) = (θ) -> begin + (shape,) = unpack_parameters(Pareto, θ) + return SA[-inv(shape) - log(conditioner);] +end + getfisherinformation(::MeanParametersSpace, ::Type{Pareto}, conditioner) = (θ) -> begin (α,) = unpack_parameters(Pareto, θ) ### Below fisher information is problematic if α is larger than conditioner as Pareto diff --git a/src/distributions/poisson.jl b/src/distributions/poisson.jl index a24872f6..abdabd6f 100644 --- a/src/distributions/poisson.jl +++ b/src/distributions/poisson.jl @@ -60,6 +60,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Poisson}) = (η) -> begin return exp(η1) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Poisson}) = (η) -> begin + (η1,) = unpack_parameters(Poisson, η) + return SA[exp(η1)] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Poisson}) = (η) -> begin (η1,) = unpack_parameters(Poisson, η) SA[exp(η1);;] diff --git a/src/distributions/rayleigh.jl b/src/distributions/rayleigh.jl index a0d76b9e..7f1cf973 100644 --- a/src/distributions/rayleigh.jl +++ b/src/distributions/rayleigh.jl @@ -56,6 +56,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Rayleigh}) = (η) -> begin return -log(-2 * η1) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Rayleigh}) = (η) -> begin + (η1, ) = unpack_parameters(Rayleigh, η) + return SA[-inv(η1);] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Rayleigh}) = (η) -> begin (η1,) = unpack_parameters(Rayleigh, η) SA[inv(η1^2);;] @@ -68,6 +73,11 @@ getlogpartition(::MeanParametersSpace, ::Type{Rayleigh}) = (θ) -> begin return 2 * log(σ) end +getgradlogpartition(::MeanParametersSpace, ::Type{Rayleigh}) = (θ) -> begin + (σ,) = unpack_parameters(Rayleigh, θ) + return SA[2/σ;] +end + getfisherinformation(::MeanParametersSpace, ::Type{Rayleigh}) = (θ) -> begin (σ,) = unpack_parameters(Rayleigh, θ) return SA[4 / σ^2;;] diff --git a/src/distributions/von_mises.jl b/src/distributions/von_mises.jl index 2e43a381..ec4a5bd4 100644 --- a/src/distributions/von_mises.jl +++ b/src/distributions/von_mises.jl @@ -82,7 +82,11 @@ isbasemeasureconstant(::Type{VonMises}) = ConstantBaseMeasure() getbasemeasure(::Type{VonMises}, _) = (x) -> inv(twoπ) getsufficientstatistics(::Type{VonMises}, _) = (cos, sin) - +getgradlogpartition(::NaturalParametersSpace, ::Type{VonMises}, _) = (η) -> begin + u = sqrt(dot(η, η)) + same_part = besseli(1, u) / (u * besseli(0, u)) + return SA[η[1] * same_part, η[2] * same_part] +end getlogpartition(::NaturalParametersSpace, ::Type{VonMises}, _) = (η) -> begin return log(besseli(0, sqrt(dot(η, η)))) end diff --git a/src/distributions/von_mises_fisher.jl b/src/distributions/von_mises_fisher.jl index 352a5e34..c2f05445 100644 --- a/src/distributions/von_mises_fisher.jl +++ b/src/distributions/von_mises_fisher.jl @@ -80,6 +80,15 @@ getlogpartition(::NaturalParametersSpace, ::Type{VonMisesFisher}) = (η) -> begi return log(besseli((p / 2) - 1, κ)) - ((p / 2) - 1) * log(κ) end +getgradlogpartition(::NaturalParametersSpace, ::Type{VonMisesFisher}) = (η) -> begin + κ = sqrt(dot(η, η)) + p = length(η) + term1 = - ((p / 2) - 1) / κ + term2 = ((p / 2) - 1)/κ + besseli((p / 2), κ)/besseli((p / 2) - 1, κ) + term3 = (term1 + term2)/(κ) + return term3*η +end + getfisherinformation(::NaturalParametersSpace, ::Type{VonMisesFisher}) = (η) -> begin u = norm(η) p = length(η) diff --git a/src/distributions/weibull.jl b/src/distributions/weibull.jl index 2b1c94be..a25dd0a0 100644 --- a/src/distributions/weibull.jl +++ b/src/distributions/weibull.jl @@ -99,6 +99,11 @@ getlogpartition(::NaturalParametersSpace, ::Type{Weibull}, conditioner) = (η) - return -log(-η1) - log(conditioner) end +getgradlogpartition(::NaturalParametersSpace, ::Type{Weibull},conditioner) = (η) -> begin + (η1,) = unpack_parameters(Weibull, η) + return SA[-inv(η1);] +end + getfisherinformation(::NaturalParametersSpace, ::Type{Weibull}, _) = (η) -> begin (η1,) = unpack_parameters(Weibull, η) SA[inv(η1)^2;;] @@ -108,9 +113,15 @@ end getlogpartition(::MeanParametersSpace, ::Type{Weibull}, k) = (θ) -> begin (λ,) = unpack_parameters(Weibull, θ) - return k * log(λ) - log(k) + return SA[k/λ;] end +getgradlogpartition(::MeanParametersSpace, ::Type{Weibull},conditioner) = (θ) -> begin + (λ,) = unpack_parameters(Weibull, θ) + return SA[-inv(η1);] +end + + getfisherinformation(::MeanParametersSpace, ::Type{Weibull}, k) = (θ) -> begin (λ,) = unpack_parameters(MeanParametersSpace(), Weibull, θ) γ = -digamma(1) # Euler-Mascheroni constant (see https://en.wikipedia.org/wiki/Euler%E2%80%93Mascheroni_constant) diff --git a/src/distributions/wishart.jl b/src/distributions/wishart.jl index 6c780d7d..4af9dcbe 100644 --- a/src/distributions/wishart.jl +++ b/src/distributions/wishart.jl @@ -261,6 +261,16 @@ getlogpartition(::NaturalParametersSpace, ::Type{WishartFast}) = (η) -> begin return term1 + term2 end + +getgradlogpartition(::NaturalParametersSpace, ::Type{WishartFast}) = (η) -> begin + η1, η2 = unpack_parameters(WishartFast, η) + p = first(size(η2)) + term1 = -logdet(-η2) + mvdigamma(η1 + (p + one(η1)) /2 , p) + term2 = vec(((η1+(p+one(p))/2))*cholinv(η2)) + + return [term1; term2] +end + getfisherinformation(::NaturalParametersSpace, ::Type{WishartFast}) = (η) -> begin η1, η2 = unpack_parameters(WishartFast, η) @@ -280,7 +290,16 @@ getfisherinformation(::NaturalParametersSpace, ::Type{WishartFast}) = getlogpartition(::MeanParametersSpace, ::Type{WishartFast}) = (θ) -> begin (ν, invS) = unpack_parameters(WishartFast, θ) p = first(size(invS)) - return (ν / 2) * (p * log(2.0) - logdet(invS)) + mvtrigamma(p, ν / 2) + return (ν / 2) * (p * log(2.0) - logdet(invS)) + logmvgamma(p, ν / 2) +end + +getgradlogpartition(::MeanParametersSpace, ::Type{WishartFast}) = (θ) -> begin + ν, invS = unpack_parameters(WishartFast, θ) + p = first(size(invS)) + term1 = ((p * log(2.0) - logdet(invS)) + mvdigamma(ν/2,p))/2 + term2 = vec((-ν/2)*cholinv(invS)) + + return [term1; term2] end getfisherinformation(::MeanParametersSpace, ::Type{WishartFast}) = (θ) -> begin diff --git a/src/distributions/wishart_inverse.jl b/src/distributions/wishart_inverse.jl index bc8ee246..8d109bb5 100644 --- a/src/distributions/wishart_inverse.jl +++ b/src/distributions/wishart_inverse.jl @@ -278,11 +278,21 @@ getsufficientstatistics(::Type{InverseWishartFast}) = (chollogdet, cholinv) getlogpartition(::NaturalParametersSpace, ::Type{InverseWishartFast}) = (η) -> begin η1, η2 = unpack_parameters(InverseWishartFast, η) p = first(size(η2)) - term1 = (η1 + (p + 1) / 2) * logdet(-η2) - term2 = logmvgamma(p, -(η1 + (p + 1) / 2)) + term1 = (η1 + (p + one(η1)) / 2) * logdet(-η2) + term2 = logmvgamma(p, -(η1 + (p + one(η1)) / 2)) return term1 + term2 end + +getgradlogpartition(::NaturalParametersSpace, ::Type{InverseWishartFast}) = (η) -> begin + η1, η2 = unpack_parameters(InverseWishartFast, η) + p = first(size(η2)) + term1 = logdet(-η2) - mvdigamma(-(η1 + (p + one(η1)) /2) , p) + term2 = vec(-((η1+(p+one(p))/2))*cholinv(-η2)) + + return [term1; term2] +end + getfisherinformation(::NaturalParametersSpace, ::Type{InverseWishartFast}) = (η) -> begin η1, η2 = unpack_parameters(InverseWishartFast, η) diff --git a/src/exponential_family.jl b/src/exponential_family.jl index dccad370..5897dfef 100644 --- a/src/exponential_family.jl +++ b/src/exponential_family.jl @@ -2,8 +2,8 @@ export ExponentialFamilyDistribution export ExponentialFamilyDistribution, ExponentialFamilyDistributionAttributes, getnaturalparameters, getattributes export MeanToNatural, NaturalToMean, MeanParametersSpace, NaturalParametersSpace -export getbasemeasure, getsufficientstatistics, getlogpartition, getfisherinformation, getsupport, getmapping, getconditioner -export basemeasure, sufficientstatistics, logpartition, fisherinformation, insupport, isproper +export getbasemeasure, getsufficientstatistics, getlogpartition, getgradlogpartition, getfisherinformation, getsupport, getmapping, getconditioner +export basemeasure, sufficientstatistics, logpartition, gradlogpartition, fisherinformation, insupport, isproper export isbasemeasureconstant, ConstantBaseMeasure, NonConstantBaseMeasure using LoopVectorization @@ -301,6 +301,18 @@ function logpartition(ef::ExponentialFamilyDistribution, η = getnaturalparamete return getlogpartition(ef)(η) end +""" + gradlogpartition(::ExponentialFamilyDistribution, η) + +Return the computed value of `gradlogpartition` of the exponential family distribution at the point `η`. +By default `η = getnaturalparameters(ef)`. + +See also: [`getgradlogpartition`](@ref) +""" +function gradlogpartition(ef::ExponentialFamilyDistribution, η = getnaturalparameters(ef)) + return getgradlogpartition(ef)(η) +end + """ fisherinformation(distribution, η) @@ -329,11 +341,19 @@ getlogpartition(::Nothing, ef::ExponentialFamilyDistribution{T}) where {T} = get getlogpartition(attributes::ExponentialFamilyDistributionAttributes, ::ExponentialFamilyDistribution) = getlogpartition(attributes) +getgradlogpartition(ef::ExponentialFamilyDistribution) = getgradlogpartition(ef.attributes, ef) +getgradlogpartition(::Nothing, ef::ExponentialFamilyDistribution{T}) where {T} = + getgradlogpartition(T, getconditioner(ef)) +# TODO: Implement Monte Carlo estimation for gradlogpartition. +getgradlogpartition(attributes::ExponentialFamilyDistributionAttributes, ::ExponentialFamilyDistribution) = + error("Generic gradlogpartition is not implemented.") + getfisherinformation(ef::ExponentialFamilyDistribution) = getfisherinformation(ef.attributes, ef) getfisherinformation(::Nothing, ef::ExponentialFamilyDistribution{T}) where {T} = getfisherinformation(T, getconditioner(ef)) +# TODO: Implement a generic fisherinformation. getfisherinformation(attributes::ExponentialFamilyDistributionAttributes, ::ExponentialFamilyDistribution) = - error("TODO: not implemented. Should we call ForwardDiff here?") + error("Generic getfisherinformation is not implemented.") getsupport(ef::ExponentialFamilyDistribution) = getsupport(ef.attributes, ef) getsupport(::Nothing, ef::ExponentialFamilyDistribution{T}) where {T} = getsupport(T) @@ -423,6 +443,22 @@ getlogpartition( ::Nothing ) where {T <: Distribution} = getlogpartition(space, T) +""" + getgradlogpartition([ space = NaturalParametersSpace() ], ::Type{T}, [ conditioner ]) where { T <: Distribution } + +A specific verion of `getgradlogpartition` defined particularly for distribution types from `Distributions.jl` package. +Does not require an instance of the `ExponentialFamilyDistribution` and can be called directly with a specific distribution type instead. +Optionally, accepts the `space` parameter, which defines the parameters space. +For conditional exponential family distributions requires an extra `conditioner` argument. +""" +getgradlogpartition(::Type{T}, conditioner = nothing) where {T <: Distribution} = + getgradlogpartition(NaturalParametersSpace(), T, conditioner) +getgradlogpartition( + space::Union{MeanParametersSpace, NaturalParametersSpace}, + ::Type{T}, + ::Nothing +) where {T <: Distribution} = getgradlogpartition(space, T) + """ getfisherinformation([ space = NaturalParametersSpace() ], ::Type{T}) where { T <: Distribution } @@ -535,7 +571,7 @@ For details on the dispatch mechanism of `_logpdf`, refer to the `check_logpdf` See also: [`check_logpdf`](@ref) """ function _logpdf(ef::ExponentialFamilyDistribution{T}, x) where {T} - vartype, _x = check_logpdf(variate_form(typeof(ef)), typeof(x), eltype(x), ef, x) + vartype, _x = check_logpdf(ef, x) _logpdf(vartype, ef, _x) end @@ -596,6 +632,8 @@ See also: [`_logpdf`](@ref) [`PointBasedLogpdfCall`](@ref) [`MapBasedLogpdfCall` """ function check_logpdf end +check_logpdf(ef::ExponentialFamilyDistribution, x) = check_logpdf(variate_form(typeof(ef)), typeof(x), eltype(x), ef, x) + check_logpdf(::Type{Univariate}, ::Type{<:Number}, ::Type{<:Number}, ef, x) = (PointBasedLogpdfCall(), x) check_logpdf(::Type{Multivariate}, ::Type{<:AbstractVector}, ::Type{<:Number}, ef, x) = (PointBasedLogpdfCall(), x) check_logpdf(::Type{Matrixvariate}, ::Type{<:AbstractMatrix}, ::Type{<:Number}, ef, x) = (PointBasedLogpdfCall(), x) @@ -830,6 +868,8 @@ BayesBase.mean(ef::ExponentialFamilyDistribution{T}) where {T <: Distribution} = BayesBase.var(ef::ExponentialFamilyDistribution{T}) where {T <: Distribution} = var(convert(T, ef)) BayesBase.std(ef::ExponentialFamilyDistribution{T}) where {T <: Distribution} = std(convert(T, ef)) BayesBase.cov(ef::ExponentialFamilyDistribution{T}) where {T <: Distribution} = cov(convert(T, ef)) +BayesBase.skewness(ef::ExponentialFamilyDistribution{T}) where {T <: Distribution} = skewness(convert(T, ef)) +BayesBase.kurtosis(ef::ExponentialFamilyDistribution{T}) where {T <: Distribution} = kurtosis(convert(T, ef)) BayesBase.rand(ef::ExponentialFamilyDistribution, args...) = rand(Random.default_rng(), ef, args...) BayesBase.rand!(ef::ExponentialFamilyDistribution, args...) = rand!(Random.default_rng(), ef, args...) diff --git a/test/distributions/beta_tests.jl b/test/distributions/beta_tests.jl index a0cac81a..1a108ab7 100644 --- a/test/distributions/beta_tests.jl +++ b/test/distributions/beta_tests.jl @@ -31,7 +31,7 @@ end @testitem "Beta: ExponentialFamilyDistribution" begin include("distributions_setuptests.jl") - for a in 0.1:0.2:0.9, b in 0.1:0.2:0.9 + for a in 0.1:0.1:0.9, b in 1.1:0.2:2.0 @testset let d = Beta(a, b) ef = test_exponentialfamily_interface(d; option_assume_no_allocations = true) diff --git a/test/distributions/distributions_setuptests.jl b/test/distributions/distributions_setuptests.jl index f1b544c0..d80ecb53 100644 --- a/test/distributions/distributions_setuptests.jl +++ b/test/distributions/distributions_setuptests.jl @@ -54,10 +54,11 @@ function test_exponentialfamily_interface(distribution; test_packing_unpacking = true, test_isproper = true, test_basic_functions = true, + test_gradlogpartition_properties = true, test_fisherinformation_properties = true, test_fisherinformation_against_hessian = true, test_fisherinformation_against_jacobian = true, - option_assume_no_allocations = false, + option_assume_no_allocations = false ) T = ExponentialFamily.exponential_family_typetag(distribution) @@ -71,6 +72,7 @@ function test_exponentialfamily_interface(distribution; test_packing_unpacking && run_test_packing_unpacking(distribution) test_isproper && run_test_isproper(distribution; assume_no_allocations = option_assume_no_allocations) test_basic_functions && run_test_basic_functions(distribution; assume_no_allocations = option_assume_no_allocations) + test_gradlogpartition_properties && run_test_gradlogpartition_properties(distribution) test_fisherinformation_properties && run_test_fisherinformation_properties(distribution) test_fisherinformation_against_hessian && run_test_fisherinformation_against_hessian(distribution; assume_no_allocations = option_assume_no_allocations) test_fisherinformation_against_jacobian && run_test_fisherinformation_against_jacobian(distribution; assume_no_allocations = option_assume_no_allocations) @@ -203,6 +205,17 @@ function run_test_basic_functions(distribution; nsamples = 10, test_gradients = # ! do not use fixed RNG samples = [rand(distribution) for _ in 1:nsamples] + # Not all methods are defined for all objects in Distributions.jl + # For this methods we first test if the method is defined for the distribution + # And only then we test the method for the exponential family form + potentially_missing_methods = ( + cov, + skewness, + kurtosis + ) + + argument_type = Tuple{typeof(distribution)} + for x in samples # We believe in the implementation in the `Distributions.jl` @test @inferred(logpdf(ef, x)) ≈ logpdf(distribution, x) @@ -214,6 +227,12 @@ function run_test_basic_functions(distribution; nsamples = 10, test_gradients = @test all(rand(StableRNG(42), ef, 10) .≈ rand(StableRNG(42), distribution, 10)) @test all(rand!(StableRNG(42), ef, [deepcopy(x) for _ in 1:10]) .≈ rand!(StableRNG(42), distribution, [deepcopy(x) for _ in 1:10])) + for method in potentially_missing_methods + if hasmethod(method, argument_type) + @test @inferred(method(ef)) ≈ method(distribution) + end + end + @test @inferred(isbasemeasureconstant(ef)) === isbasemeasureconstant(T) @test @inferred(basemeasure(ef, x)) == getbasemeasure(T, conditioner)(x) @test all(@inferred(sufficientstatistics(ef, x)) .== map(f -> f(x), getsufficientstatistics(T, conditioner))) @@ -302,6 +321,27 @@ function run_test_fisherinformation_properties(distribution; test_properties_in_ end end +function run_test_gradlogpartition_properties(distribution; nsamples = 6000, test_against_forwardiff = true) + ef = @inferred(convert(ExponentialFamilyDistribution, distribution)) + + (η, conditioner) = (getnaturalparameters(ef), getconditioner(ef)) + + rng = StableRNG(42) + # Some distributions do not use a vector to store a collection of samples (e.g. matrix for MvGaussian) + collection_of_samples = rand(rng, distribution, nsamples) + # The `check_logpdf` here converts the collection to a vector like iterable + _, samples = ExponentialFamily.check_logpdf(ef, collection_of_samples) + expectation_of_sufficient_statistics = mean((s) -> ExponentialFamily.pack_parameters(ExponentialFamily.sufficientstatistics(ef, s)), samples) + gradient = gradlogpartition(ef) + inverse_fisher = cholinv(fisherinformation(ef)) + @test length(gradient) === length(η) + @test dot(gradient - expectation_of_sufficient_statistics, inverse_fisher, gradient - expectation_of_sufficient_statistics) ≈ 0 atol = 0.01 + + if test_against_forwardiff + @test gradient ≈ ForwardDiff.gradient((η) -> getlogpartition(ef)(η), getnaturalparameters(ef)) + end +end + function run_test_fisherinformation_against_hessian(distribution; assume_ours_faster = true, assume_no_allocations = true) T = ExponentialFamily.exponential_family_typetag(distribution) diff --git a/test/distributions/gamma_family/gamma_shape_rate_tests.jl b/test/distributions/gamma_family/gamma_shape_rate_tests.jl index 5c4c54aa..d538101c 100644 --- a/test/distributions/gamma_family/gamma_shape_rate_tests.jl +++ b/test/distributions/gamma_family/gamma_shape_rate_tests.jl @@ -78,7 +78,7 @@ end @test pdf(dist3, 1.0) ≈ 0.5413411329464508 @test logpdf(dist3, 1.0) ≈ -0.6137056388801094 - # see https://github.com/biaslab/ReactiveMP.jl/issues/314 + # see https://github.com/ReactiveBayes/ReactiveMP.jl/issues/314 dist = GammaShapeRate(257.37489915581654, 3.0) @test pdf(dist, 86.2027941354432) == 0.07400338986721687 end diff --git a/test/distributions/gamma_inverse_tests.jl b/test/distributions/gamma_inverse_tests.jl index 590507cc..7e3a72c8 100644 --- a/test/distributions/gamma_inverse_tests.jl +++ b/test/distributions/gamma_inverse_tests.jl @@ -45,7 +45,7 @@ end @testitem "GammaInverse: ExponentialFamilyDistribution" begin include("distributions_setuptests.jl") - for α in 10rand(4), θ in 10rand(4) + for α in (10rand(4) .+ 4.0), θ in 10rand(4) @testset let d = InverseGamma(α, θ) ef = test_exponentialfamily_interface(d; option_assume_no_allocations = true) diff --git a/test/distributions/matrix_dirichlet_tests.jl b/test/distributions/matrix_dirichlet_tests.jl index a67991aa..6021933c 100644 --- a/test/distributions/matrix_dirichlet_tests.jl +++ b/test/distributions/matrix_dirichlet_tests.jl @@ -70,12 +70,12 @@ end include("distributions_setuptests.jl") for len in 3:5 - α = rand(len, len) + α = rand(1.0:2.0, len, len) @testset let d = MatrixDirichlet(α) ef = test_exponentialfamily_interface(d; test_basic_functions = true, option_assume_no_allocations = false) η1 = getnaturalparameters(ef) - for x in [rand(len, len) for _ in 1:3] + for x in [rand(1.0:2.0, len, len) for _ in 1:3] x = x ./ sum(x) @test @inferred(isbasemeasureconstant(ef)) === ConstantBaseMeasure() @test @inferred(basemeasure(ef, x)) === 1.0 diff --git a/test/distributions/mv_normal_wishart_tests.jl b/test/distributions/mv_normal_wishart_tests.jl index 6a8bf3e6..5fa03c57 100644 --- a/test/distributions/mv_normal_wishart_tests.jl +++ b/test/distributions/mv_normal_wishart_tests.jl @@ -14,7 +14,7 @@ end @testitem "MvNormalWishart: ExponentialFamilyDistribution" begin include("distributions_setuptests.jl") - for dim in (3), invS in rand(Wishart(10, Array(Eye(dim))), 4) + for dim in (3,), invS in rand(Wishart(10, Array(Eye(dim))), 4) ν = dim + 2 @testset let (d = MvNormalWishart(rand(dim), invS, rand(), ν)) ef = test_exponentialfamily_interface( @@ -22,10 +22,11 @@ end option_assume_no_allocations = false, test_basic_functions = false, test_fisherinformation_against_hessian = false, - test_fisherinformation_against_jacobian = false + test_fisherinformation_against_jacobian = false, + test_gradlogpartition_properties = false ) - run_test_basic_functions(ef; assume_no_allocations = false, test_samples_logpdf = false) + run_test_basic_functions(d; assume_no_allocations = false, test_samples_logpdf = false) end end end diff --git a/test/distributions/normal_family/mv_normal_mean_covariance_tests.jl b/test/distributions/normal_family/mv_normal_mean_covariance_tests.jl index e3e51467..e36786c8 100644 --- a/test/distributions/normal_family/mv_normal_mean_covariance_tests.jl +++ b/test/distributions/normal_family/mv_normal_mean_covariance_tests.jl @@ -66,6 +66,12 @@ end @test ndims(MvNormalMeanCovariance([0.0, 0.0, 0.0])) === 3 @test size(MvNormalMeanCovariance([0.0, 0.0])) === (2,) @test size(MvNormalMeanCovariance([0.0, 0.0, 0.0])) === (3,) + + distribution = MvNormalMeanCovariance([0.0, 0.0], [2.0 0.0; 0.0 3.0]) + + @test distribution ≈ distribution + @test distribution ≈ convert(MvNormalMeanPrecision, distribution) + @test distribution ≈ convert(MvNormalWeightedMeanPrecision, distribution) end @testitem "MvNormalMeanCovariance: vague" begin diff --git a/test/distributions/normal_family/mv_normal_mean_precision_tests.jl b/test/distributions/normal_family/mv_normal_mean_precision_tests.jl index 58a78b6d..b713f509 100644 --- a/test/distributions/normal_family/mv_normal_mean_precision_tests.jl +++ b/test/distributions/normal_family/mv_normal_mean_precision_tests.jl @@ -66,6 +66,12 @@ end @test ndims(MvNormalMeanPrecision([0.0, 0.0, 0.0])) === 3 @test size(MvNormalMeanPrecision([0.0, 0.0])) === (2,) @test size(MvNormalMeanPrecision([0.0, 0.0, 0.0])) === (3,) + + distribution = MvNormalMeanPrecision([0.0, 0.0], [2.0 0.0; 0.0 3.0]) + + @test distribution ≈ distribution + @test distribution ≈ convert(MvNormalMeanCovariance, distribution) + @test distribution ≈ convert(MvNormalWeightedMeanPrecision, distribution) end @testitem "MvNormalMeanPrecision: vague" begin diff --git a/test/distributions/normal_family/mv_normal_weighted_mean_precision_tests.jl b/test/distributions/normal_family/mv_normal_weighted_mean_precision_tests.jl index e181f2e9..5d375ff0 100644 --- a/test/distributions/normal_family/mv_normal_weighted_mean_precision_tests.jl +++ b/test/distributions/normal_family/mv_normal_weighted_mean_precision_tests.jl @@ -67,6 +67,12 @@ end @test ndims(MvNormalWeightedMeanPrecision([0.0, 0.0, 0.0])) === 3 @test size(MvNormalWeightedMeanPrecision([0.0, 0.0])) === (2,) @test size(MvNormalWeightedMeanPrecision([0.0, 0.0, 0.0])) === (3,) + + distribution = MvNormalWeightedMeanPrecision([0.0, 0.0], [2.0 0.0; 0.0 3.0]) + + @test distribution ≈ distribution + @test distribution ≈ convert(MvNormalMeanCovariance, distribution) + @test distribution ≈ convert(MvNormalMeanPrecision, distribution) end @testitem "MvNormalWeightedMeanPrecision: vague" begin diff --git a/test/distributions/normal_family/normal_family_tests.jl b/test/distributions/normal_family/normal_family_tests.jl index c46b1538..6aa580c4 100644 --- a/test/distributions/normal_family/normal_family_tests.jl +++ b/test/distributions/normal_family/normal_family_tests.jl @@ -250,7 +250,8 @@ end d; # These are handled differently below test_fisherinformation_against_hessian = false, - test_fisherinformation_against_jacobian = false + test_fisherinformation_against_jacobian = false, + test_gradlogpartition_properties = false ) (η₁, η₂) = (inv(Σ) * mean(d), -inv(Σ) / 2) @@ -262,6 +263,12 @@ end @test @inferred(logpartition(ef)) ≈ -1 / 4 * (η₁' * inv(η₂) * η₁) - 1 / 2 * logdet(-2η₂) @test @inferred(insupport(ef, x)) end + + run_test_gradlogpartition_properties(d, test_against_forwardiff = false) + + # Extra test with AD-friendly logpartition function + lp_ag = ForwardDiff.gradient(getlogpartitionfortest(NaturalParametersSpace(), MvNormalMeanCovariance), getnaturalparameters(ef)) + @test gradlogpartition(ef) ≈ lp_ag end end diff --git a/test/distributions/normal_family/normal_mean_precision_tests.jl b/test/distributions/normal_family/normal_mean_precision_tests.jl index 73c85ca2..6fb0e5a6 100644 --- a/test/distributions/normal_family/normal_mean_precision_tests.jl +++ b/test/distributions/normal_family/normal_mean_precision_tests.jl @@ -104,6 +104,12 @@ end @test convert(NormalMeanPrecision, 0, 1) == NormalMeanPrecision{Float64}(0.0, 1.0) @test convert(NormalMeanPrecision, 0, 10) == NormalMeanPrecision{Float64}(0.0, 10.0) @test convert(NormalMeanPrecision, 0, 0.1) == NormalMeanPrecision{Float64}(0.0, 0.1) + + distribution = NormalMeanPrecision(-2.0, 3.0) + + @test distribution ≈ distribution + @test distribution ≈ convert(NormalMeanVariance, distribution) + @test distribution ≈ convert(NormalWeightedMeanPrecision, distribution) end @testitem "NormalMeanPrecision: vague" begin diff --git a/test/distributions/normal_family/normal_mean_variance_tests.jl b/test/distributions/normal_family/normal_mean_variance_tests.jl index fb56665e..a93fde4a 100644 --- a/test/distributions/normal_family/normal_mean_variance_tests.jl +++ b/test/distributions/normal_family/normal_mean_variance_tests.jl @@ -104,6 +104,12 @@ end @test convert(NormalMeanVariance, 0, 1) == NormalMeanVariance{Float64}(0.0, 1.0) @test convert(NormalMeanVariance, 0, 10) == NormalMeanVariance{Float64}(0.0, 10.0) @test convert(NormalMeanVariance, 0, 0.1) == NormalMeanVariance{Float64}(0.0, 0.1) + + distribution = NormalMeanVariance(-2.0, 3.0) + + @test distribution ≈ distribution + @test distribution ≈ convert(NormalMeanPrecision, distribution) + @test distribution ≈ convert(NormalWeightedMeanPrecision, distribution) end @testitem "NormalMeanVariance: vague" begin diff --git a/test/distributions/normal_family/normal_weighted_mean_precision_tests.jl b/test/distributions/normal_family/normal_weighted_mean_precision_tests.jl index e0671905..835f9e92 100644 --- a/test/distributions/normal_family/normal_weighted_mean_precision_tests.jl +++ b/test/distributions/normal_family/normal_weighted_mean_precision_tests.jl @@ -104,6 +104,12 @@ end @test convert(NormalWeightedMeanPrecision, 0, 1) == NormalWeightedMeanPrecision{Float64}(0.0, 1.0) @test convert(NormalWeightedMeanPrecision, 0, 10) == NormalWeightedMeanPrecision{Float64}(0.0, 10.0) @test convert(NormalWeightedMeanPrecision, 0, 0.1) == NormalWeightedMeanPrecision{Float64}(0.0, 0.1) + + distribution = NormalWeightedMeanPrecision(-2.0, 3.0) + + @test distribution ≈ distribution + @test distribution ≈ convert(NormalMeanPrecision, distribution) + @test distribution ≈ convert(NormalMeanVariance, distribution) end @testitem "NormalWeightedMeanPrecision: vague" begin diff --git a/test/distributions/pareto_tests.jl b/test/distributions/pareto_tests.jl index 2d3660a8..21ddc7a2 100644 --- a/test/distributions/pareto_tests.jl +++ b/test/distributions/pareto_tests.jl @@ -5,7 +5,7 @@ @testitem "Pareto: ExponentialFamilyDistribution" begin include("distributions_setuptests.jl") - for shape in (1.0, 2.0, 3.0), scale in (0.25, 0.5, 2.0) + for shape in (5.0, 6.0, 7.0), scale in (0.25, 0.5, 2.0) @testset let d = Pareto(shape, scale) ef = test_exponentialfamily_interface(d; option_assume_no_allocations = false) η1 = -shape - 1 diff --git a/test/distributions/von_mises_fisher_tests.jl b/test/distributions/von_mises_fisher_tests.jl index d0c8e9aa..7c0ac82b 100644 --- a/test/distributions/von_mises_fisher_tests.jl +++ b/test/distributions/von_mises_fisher_tests.jl @@ -15,7 +15,7 @@ end @testitem "VonMisesFisher: ExponentialFamilyDistribution" begin include("distributions_setuptests.jl") - for len in 3, b in (0.5) + for len in 3:5, b in (0.5) a_unnormalized = rand(len) a = a_unnormalized ./ norm(a_unnormalized) @testset let d = VonMisesFisher(a, b) diff --git a/test/distributions/wishart_tests.jl b/test/distributions/wishart_tests.jl index 10b4e79e..6bc3c14b 100644 --- a/test/distributions/wishart_tests.jl +++ b/test/distributions/wishart_tests.jl @@ -68,6 +68,7 @@ end end end + @testitem "Wishart: ExponentialFamilyDistribution" begin include("distributions_setuptests.jl")