fixes DE and refactor all algorithms (#6)

* refactor all algorithms * tune tolerances * finally fixed bias in ABCDE * tune examples * regen plot * change version
francescoalemanno · Jun 12, 2020 · 05a3c55 · 05a3c55
1 parent 2652cd3
commit 05a3c55
Show file tree

Hide file tree

Showing 10 changed files with 426 additions and 425 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "KissABC"
 uuid = "9c9dad79-530a-4643-a18b-2704674d4108"
 authors = ["Francesco Alemanno <francescoalemanno710@gmail.com>"]
-version = "1.1.0"
+version = "1.2.0"
 
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"

diff --git a/docs/literate/example_1.jl b/docs/literate/example_1.jl
@@ -33,30 +33,30 @@ prior=Factored(
             Uniform(-1,1), #there is a smeared distribution centered around 0
             Uniform(0,1), # the peak has surely a width below 1
             Uniform(0,4), # the smeared distribution surely has a width less than 4
-            Beta(4,4) # the number of total events from both distributions look about the same, so we will favor 0.5 just a bit
+            Beta(2,2) # the number of total events from both distributions look about the same, so we will favor 0.5 just a bit
         );
 
 # let's look at a sample from the prior, to see that it works
 rand(prior)
 # now we need a distance function to compare datasets, this is not the best distance we could use, but it will work out anyway
 function D(x,y)
-    r=LinRange(0,1,length(x)+length(y))
+    r=(0.1,0.2,0.5,0.8,0.9)
     mean(abs,quantile(x,r).-quantile(y,r))
 end
 
 # we can now run ABCDE to get the posterior distribution of our parameters given the dataset `data`
 plan=ABCplan(prior,model,data,D,params=5000)
-res,Δ,converged=ABCDE(plan,0.05,parallel=true,verbose=false);
+res,Δ,converged=ABCDE(plan,0.02,parallel=true,generations=500,verbose=false);
 
 # Has it converged to the target tolerance?
 print("Converged = ",converged)
 
-# let's see the median and 90% confidence interval for the inferred parameters and let's compare them with the true values
+# let's see the median and 95% confidence interval for the inferred parameters and let's compare them with the true values
 function getstats(V)
     (
         median=median(V),
-        lowerbound=quantile(V,0.05),
-        upperbound=quantile(V,0.95)
+        lowerbound=quantile(V,0.025),
+        upperbound=quantile(V,0.975)
     )
 end
 
@@ -68,4 +68,4 @@ for is in eachindex(stats)
     println(labels[is], " ≡ " ,parameters[is], " → ", stats[is])
 end
 
-# we can see that the true values lie inside the confidence interval.
+# The inferred parameters are close to nominal values
diff --git a/docs/literate/index.jl b/docs/literate/index.jl
@@ -38,7 +38,7 @@ end
 # Now we are all set, we can use `ABCDE` which is sequential Monte Carlo algorithm with an adaptive proposal, to simulate the posterior distribution for this model, inferring μ and σ
 
 plan=ABCplan(prior, sim, tdata, ksdist)
-res,Δ = ABCDE(plan, 0.1, nparticles=200,parallel=true, verbose=false);
+res,Δ = ABCDE(plan, 0.1, nparticles=2000,generations=150,parallel=true, verbose=false);
 
 # the parameters we chose are: a tolerance on distances equal to `0.1`, a number of simulated particles equal to `200`, we enabled Threaded parallelism, and ofcourse the first four parameters are the ingredients we set in the previous steps, the simulated posterior results are in `res`, while in `Δ` we can find the distances calculated for each sample.
 # We can now extract the inference results:

diff --git a/images/inf_normaldist.png b/images/inf_normaldist.png
diff --git a/src/ABCREJ.jl b/src/ABCREJ.jl
@@ -0,0 +1,24 @@
+
+
+"""
+    ABC(plan, α_target; nparticles = 100, parallel = false)
+
+Classical ABC rejection algorithm.
+
+# Arguments:
+- `plan`: a plan built using the function ABCplan.
+- `α_target`: target acceptance rate for ABC rejection algorithm, `nparticles/α` will be sampled and only the best `nparticles` will be retained.
+- `nparticles`:  number of samples from the approximate posterior that will be returned
+- `parallel`: when set to `true` multithreaded parallelism is enabled
+"""
+function ABC(plan::ABCplan, α_target;
+             nparticles=100, parallel=false)
+    @extract_params plan prior distance simulation data params
+    @assert 0<α_target<=1 "α_target is the acceptance rate, and must be properly set between 0 - 1."
+    simparticles=ceil(Int,nparticles/α_target)
+    particles,distances=sample_plan(plan,simparticles,parallel)
+    idx=sortperm(distances)[1:nparticles]
+    (particles=particles[idx],
+     distances=distances[idx],
+     ϵ=distances[idx[end]])
+end
diff --git a/src/DE.jl b/src/DE.jl
@@ -0,0 +1,98 @@
+"""
+    deperturb(prior::Distribution, sample, r1, r2, γ)
+
+Function for `ABCDE` whose purpose is computing `sample + γ (r1 - r2) + ϵ` (the perturbation function of differential evolution) in a way suited to the prior.
+
+# Arguments:
+- `prior`
+- `sample`
+- `r1`
+- `r2`
+"""
+deperturb
+
+function deperturb(prior::Factored,sample,r1,r2,γ)
+    deperturb.(prior.p,sample,r1,r2,γ)
+end
+
+function de_ϵ(sample,r1,r2,γ)
+    γ*(abs(r1-r2)+abs(sample-r2)+abs(r1-sample))/100
+end
+
+function deperturb(prior::ContinuousUnivariateDistribution,sample,r1,r2,γ)
+    p = (r2-r1)*γ + randn()*de_ϵ(sample,r1,r2,γ)
+    sample + p
+end
+
+function deperturb(prior::DiscreteUnivariateDistribution,sample::T,r1,r2,γ) where T
+    p = (r2-r1)*γ + randn()*max(de_ϵ(sample,r1,r2,γ),0.5)
+    sp=sign(p)
+    ap=abs(p)
+    intp=floor(ap)
+    floatp=ap-intp
+    pprob=(intp+ifelse(rand()>floatp,oftype(p,0),oftype(p,1)))*sp
+    sample + round(T,pprob)
+end
+
+"""
+    ABCDE(plan, ϵ_target; nparticles=100, generations=500, parallel=false, verbose=true)
+
+A sequential monte carlo algorithm inspired by differential evolution, very efficient (simpler version of B.M.Turner 2012, https://doi.org/10.1016/j.jmp.2012.06.004)
+
+# Arguments:
+- `plan`: a plan built using the function ABCplan.
+- `ϵ_target`: maximum acceptable distance between simulated datasets and the target dataset
+- `nparticles`: number of samples from the approximate posterior that will be returned
+- `generations`: total of simulations per particle
+- `parallel`: when set to `true` multithreaded parallelism is enabled
+- `verbose`: when set to `true` verbosity is enabled
+"""
+function ABCDE(plan::ABCplan, ϵ_target; nparticles=100, generations=500, parallel=false, verbose=true)
+    @extract_params plan prior distance simulation data params
+    θs,Δs=sample_plan(plan,nparticles,parallel)
+    γ = 2.38/sqrt(2*length(prior))
+    iters=0
+    complete=1-sum(Δs.>ϵ_target)/nparticles
+    while iters<generations
+        iters+=1
+        nθs=identity.(θs)
+        nΔs=identity.(Δs)
+        @cthreads parallel for i in 1:nparticles
+            s=i
+            if Δs[i]>ϵ_target
+                s=rand(1:nparticles)
+            end
+            a=s
+            while a==s
+                a=rand(1:nparticles)
+            end
+            b=a
+            while b==a || b==s
+                b=rand(1:nparticles)
+            end
+            θp=deperturb(prior,θs[s],θs[a],θs[b],γ)
+            w_prior=pdf(prior,θp)/pdf(prior,θs[i])
+            rand() > min(1,w_prior) && continue
+            xp=simulation(θp,params)
+            dp=distance(xp,data)
+            if dp <= max(ϵ_target, Δs[i])
+                nΔs[i]=dp
+                nθs[i]=θp
+            end
+        end
+        θs=nθs
+        Δs=nΔs
+        ncomplete=1-sum(Δs.>ϵ_target)/nparticles
+        if verbose && (ncomplete!=complete || complete>=(nparticles-1)/nparticles)
+            @info "Finished run:" completion=ncomplete nsim=iters*nparticles range_ϵ=extrema(Δs)
+        end
+        complete=ncomplete
+    end
+    conv=maximum(Δs)<=ϵ_target
+    if verbose
+        @info "End:" completion=complete converged=conv nsim=generations*nparticles range_ϵ=extrema(Δs)
+    end
+    θs,Δs,conv
+end
+
+export ABCDE