Skip to content

Commit

Permalink
Merge pull request #21 from francescoalemanno/type
Browse files Browse the repository at this point in the history
enhance usability
- add BHist
- add to_pdf, Significance
- revamp plots
- update readme
- remove useless deps
  • Loading branch information
francescoalemanno authored Oct 28, 2022
2 parents dd5213a + ba889f3 commit 1925099
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 70 deletions.
4 changes: 0 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@ uuid = "000d9b38-65fe-4c81-bdb9-69f01f102479"
authors = ["Francesco Alemanno <francescoalemanno710[at]gmail.com>", "Moelf <proton[at]jling.dev>"]
version = "1.0.4"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
julia = "^1.6"

Expand Down
33 changes: 30 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,34 @@
# BayesHistogram.jl
optimal histogram binning based on piecewise constant model
Optimal histogram binning based on piecewise constant model.

![plot.png](plot.png "")
Paper: _Studies in Astronomical Time Series Analysis. VI. Bayesian Block Representations_ [https://arxiv.org/abs/1207.5578]

for more worked examples read `make_plot.jl`.
for a showcase look below.

## Installation
```julia
using Pkg
Pkg.add("BayesHistogram")
```

## Simple usage
```julia
using Plots, BayesHistogram
X = exp.(randn(5000)./3)
bl = bayesian_blocks(X)
# plot using "to_pdf"
support, density = to_pdf(bl)
plot(support, density)
# or using "edges" parameter and an histogramming procedure
histogram(X, bins=bl.edges, normalize = :pdf)
```

## Showcase
### it handles weighted data and errors correctly
![plot2.png](plot2.png "")
### bins are determined automatically & optimally
![plot3.png](plot3.png "")
### it routinely outperforms common binning rules
![plot.png](plot.png "")

for worked examples read `make_plot.jl`.
141 changes: 87 additions & 54 deletions make_plot.jl
Original file line number Diff line number Diff line change
@@ -1,52 +1,90 @@
#has been run only with julia 1.8+
using Plots, Random, Statistics
using BayesHistogram
let
gr()
rng = Xoshiro(123514)
gr()


function fake_data_pdf(v)
# a fake statistical model with peaks and exponential background
N1 = 3000
N2 = 1000
tN = N1
x = N1*exp(-v)
for i in 1:5
N = round(Int64,0.4*N1/sqrt(i))
tN += N
std = i*0.02
mu = i*0.7
x += N*exp(-abs2(v-mu)/(2*std^2))/sqrt(2*pi*std^2)
end
return x/tN
end

x = shuffle(
rng,
[
randexp(rng, N1)
randn(rng, N2) .* 0.02 .+ sqrt(1)
randn(rng, N2) .* 0.02 .+ sqrt(2)
randn(rng, N2) .* 0.04 .+ sqrt(4)
randn(rng, N2) .* 0.08 .+ sqrt(8)
],
)
@. x = round(x * 100) / 100
b = bayesian_blocks(x, prior = BIC())
function fake_data()
# sampler for the fake model above
rng = Xoshiro(123513)
N1 = 3000
x = randexp(rng, N1)
for i in 1:5
N = round(Int64,0.4*N1/sqrt(i))
std = i*0.02
mu = i*0.7
append!(x, randn(rng, N) .* std .+ mu)
end
@. x = round(x * 200) / 200
return x
end

let plot_mode = true
x = fake_data()
stephist(x, normalize = :pdf, lw=1.7, label = "classical", legend=:topright)

if plot_mode
# this uses BayesHistogram.jl "to_pdf"
b1 = bayesian_blocks(x, prior=BIC())
xvals, epdf = to_pdf(b1)
plot!(xvals, epdf, lw=2, label = "bayes-hist")
else
# this uses Plots.jl "stephist"
b1 = bayesian_blocks(x)
stephist!(x, bin = b1.edges, normalize = :pdf,lw=2, label = "bayes-hist")
end
lx = 0:0.001:8
pdf = fake_data_pdf.(lx)
plot!(lx,pdf, lw = 1.5,color="black", linestyle = :dash, label = "density")
xlims!(0,4.5)
savefig("plot3.png")
plot!()
end


let
gr()
x = fake_data()
b = bayesian_blocks(x)
P = []

edg_equi_area = quantile(x, range(0, 1, length = ceil(Int, 2 * length(x)^(1.88 / 5))))
Hs = [
("Bayes Histogram", b.edges),
("EquiArea", edg_equi_area),
("Bayes Histogram (def. mode)", 0),
("Sturges", :sturges),
("Rice", :rice),
("Sqrt", :sqrt),
]
i = 1
for (lab, bin) in Hs
pl = stephist(
x,
label = :none,
bins = bin,
yaxis = :log,
normalize = :pdf,
lw = 1,
grid = false,
)
i == 1 && scatter!(pl,
b.centers,
b.heights,
yerr = b.error_heights,
label = :none,
lw = 2.0,
markersize= 2.0
)
for (lab, bin) in Hs
pl = plot(yaxis = :log,grid = false,)
if i == 1
X,Y = to_pdf(b)
plot!(pl, X, Y,label = :none)
else
stephist!(pl,
x,
label = :none,
bins = bin,
yaxis = :log,
normalize = :pdf,
lw = 1,
)
end
title!(pl, lab, titlefont = font(12))
push!(P, pl)
i += 1
Expand All @@ -58,34 +96,29 @@ let
end


using Trapz
let
gr()
rng = Xoshiro(12351)
x = collect(range(-4, 4, length = 300))
w = ceil.((rand(rng, length(x)) .* 3 .+ @. (exp(-x^2 / 2) * 15)))
pdf = w ./ trapz(x, w)
rng = Xoshiro(1231)
x = collect(-4:0.01:4)
w = round.(Int,exp.(.-x.^2).*30 .+ rand(rng, length(x))) .+ 0.5
b = bayesian_blocks(x, weights = w, prior=BIC())
scatter(x, shuffle(rng,LinRange(0,0.5, length(x))), ms=sqrt.(w),alpha=0.5, label="noisy weighted data",markercolor="red",markerstrokewidth=0)
dx, dy = to_pdf(b)

b = bayesian_blocks(x, weights = w, prior = AIC())

plot(x, pdf, color = "red", label = "noisy weighted obs", lw = 0.5)
stephist!(
x,
bins = b.edges,
weights = w,
normalize = :pdf,
plot!(
dx, dy,
color = "black",
label = "bayeshist",
lw = 2,
yaxis=:log,
label = "bayes-hist",
lw = 2.5,
)

scatter!(
b.centers,
b.heights,
yerr = b.error_heights,
label = :none,
color = "black",
lw = 2,
lw = 2.5,
)

savefig("plot2.png")
Expand Down
Binary file modified plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified plot2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plot3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 29 additions & 4 deletions src/BayesHistogram.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,34 @@
) where {T<:Real,W<:Real}
"""
module BayesHistogram

include("priors.jl")

struct BHist{T<:Real}
edges::Vector{T}
counts::Vector{T}
centers::Vector{T}
widths::Vector{T}
heights::Vector{T}
error_counts::Vector{T}
error_heights::Vector{T}
end

function to_pdf(h::BHist{T}; lb = minimum(h.heights)/3) where T
X = T[]
Y = T[]
push!(X, h.edges[begin])
push!(Y, lb)
for i in eachindex(h.heights)
push!(X, h.edges[i])
push!(X, h.edges[i+1])
push!(Y, h.heights[i])
push!(Y, h.heights[i])
end
push!(X, h.edges[end])
push!(Y, lb)
return X, Y
end

function sort_sane(raw_x, raw_w, raw_w2)
p = sortperm(raw_x)
tmp_x = raw_x[p]
Expand Down Expand Up @@ -63,7 +88,7 @@ function build_blocks(t, edges, weights, weights2)
widths = diff(edges)
heights = counts ./ (total .* widths)
error_heights = error_counts ./ (total .* widths)
return (; edges, counts, centers, widths, heights, error_counts, error_heights)
return BHist(edges, counts, centers, widths, heights, error_counts, error_heights)
end

"""
Expand All @@ -81,7 +106,7 @@ end
- `sumw2`: sum of weight^2 in each observation, this is particularly useful
when this algorihtm is used for re-binning of already made histograms where
the `sumw2` for each bin is different from `weight^2` of each bin.
- `prior`: choose from `NoPrior`, `Pearson`, `Geometric`, `BIC`, `AIC`, `HQIC`
- `prior`: choose from `NoPrior`, `Pearson`, `Geometric`, `BIC`, `AIC`, `HQIC`, `Significance`, `Scargle` (identical to `Significance`)
- resolution: handles on how fine we count along the `datas axis
- min_counts: minimum sum of weights of a block that can be splitted.
Expand Down Expand Up @@ -173,5 +198,5 @@ function bayesian_blocks(
return build_blocks(t, edges, weights, sumw2)
end

export bayesian_blocks, Pearson, Geometric, Scargle, NoPrior, BIC, AIC, HQIC
export BHist, bayesian_blocks, to_pdf, Pearson, Geometric, Significance, Scargle, NoPrior, BIC, AIC, HQIC
end
7 changes: 4 additions & 3 deletions src/priors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@ function (w::Pearson)(max_blocks, cnt_total, cnt_single)
end


struct Scargle{T<:Real}
struct Significance{T<:Real}
p0::T
Scargle(x::T) where {T} =
Significance(x::T) where {T} =
0 < x < 1 ? new{T}(x) :
error("false positive rate parameter must be between 0 and 1.")
end
const Scargle = Significance

function (w::Scargle)(max_blocks, cnt_total, cnt_single)
function (w::Significance)(max_blocks, cnt_total, cnt_single)
# unused unused
C0 = 73.53
C1 = -0.478
Expand Down
12 changes: 10 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ end
@testset "Integer inputs" begin
input_int = round.(Int, randn(1000) * 100)
input_float = float.(input_int)
@test bayesian_blocks(input_int) == bayesian_blocks(input_float)
@test bayesian_blocks(input_int).counts == bayesian_blocks(input_float).counts
end

@testset "sanitize" begin
Expand Down Expand Up @@ -224,4 +224,12 @@ end
bl_round = bayesian_blocks(data, prior = Pearson(0.2))
@test sum(abs2,(bl_noround.edges .- bl_round.edges)) < 0.0005
@test sum(abs2,(bl_noround.error_counts .- bl_round.error_counts)) < 0.2
end
end

@testset "to_pdf" begin
bl = bayesian_blocks(x, prior=BIC())
res = to_pdf(bl)
@test res[1] == [-3.1578070050937224, -3.1578070050937224, -2.494702398745103, -2.494702398745103, -1.8298532494637834, -1.8298532494637834, -1.4561209721822812, -1.4561209721822812, -0.9536217995699808, -0.9536217995699808, 0.8030403893027767, 0.8030403893027767, 1.1970293913971268, 1.1970293913971268, 1.6398624165469307, 1.6398624165469307, 2.1005843440399232, 2.1005843440399232, 2.498821309397118, 2.498821309397118, 2.92140180959945, 2.92140180959945, 3.8845391427592286, 3.8845391427592286]
@test res[2] == [0.0004845276479270787, 0.008143511518846264, 0.008143511518846264, 0.03399267341235206, 0.03399267341235206, 0.10970419867994981, 0.10970419867994981, 0.18666697402180743, 0.18666697402180743, 0.3594316562395907, 0.3594316562395907, 0.23909296832971377, 0.23909296832971377, 0.14316908721645752, 0.14316908721645752, 0.06511519901656145, 0.06511519901656145, 0.032141667181797555, 0.032141667181797555, 0.009465652101989552, 0.009465652101989552, 0.001453582943781236, 0.001453582943781236, 0.0004845276479270787]
end

0 comments on commit 1925099

Please sign in to comment.