using DataFrames
using Distributions
using Empirikos
using Hypatia
using Plots
using LaTeXStrings
using IntervalSets
using JuMP
using Random
# Set up pgfplotsx and custom preamble
gr()
# Set plot theme
theme(:default;
= :transparent,
background_color_legend = :transparent,
foreground_color_legend =nothing,
grid=:box,
frame= :left,
legendfonthalign =1.3) thickness_scaling
Identifying genes associated with prostate cancer
F-localization interval analysis
Setup and data loading
# Load the prostate data
= Prostate.ebayes_samples() Zs
6033-element Vector{StandardNormalSample{Float64}}:
N(1.47236666651029; μ, σ=1.0)
N(3.57291516191986; μ, σ=1.0)
N(-0.0277536813375331; μ, σ=1.0)
N(-1.1320516549926; μ, σ=1.0)
N(-0.140220986707002; μ, σ=1.0)
N(0.958809840576886; μ, σ=1.0)
N(1.0681232949828; μ, σ=1.0)
N(-1.2914820655344; μ, σ=1.0)
N(-1.22750497688098; μ, σ=1.0)
N(1.17605516919472; μ, σ=1.0)
N(3.34583078878689; μ, σ=1.0)
N(0.188740669060314; μ, σ=1.0)
N(-0.851333492603207; μ, σ=1.0)
⋮
N(0.55060618889427; μ, σ=1.0)
N(0.265461705731843; μ, σ=1.0)
N(-1.19340060387477; μ, σ=1.0)
N(-1.83473655530782; μ, σ=1.0)
N(0.0707678142839226; μ, σ=1.0)
N(0.0243971545443552; μ, σ=1.0)
N(-0.207896831967974; μ, σ=1.0)
N(0.287356609464891; μ, σ=1.0)
N(-0.777423419217064; μ, σ=1.0)
N(-1.18366655761165; μ, σ=1.0)
N(0.103544128661099; μ, σ=1.0)
N(-0.909192514625776; μ, σ=1.0)
Marginal distribution of z-scores
DKW-F-Localization
# Create and fit the DKW F-Localization
= DvoretzkyKieferWolfowitz(0.05)
dkw_floc = fit(dkw_floc, Zs)
fitted_dkw
# Plot the DKW band
= plot(fitted_dkw, label="DKW band",
dkw_plot =L"z", ylab=L"\widehat{F}_n(z)", size=(380,280)) xlab
KDE-F-Localization
# Create and fit the KDE-based F-Localization
= Empirikos.InfinityNormDensityBand(α=0.05)
infty_floc = fit(infty_floc, Zs)
fitted_infty_floc
# Plot the KDE band with histogram
= histogram([response(Z) for Z in Zs],
prostate_marginal_plot =50, normalize=true,
bins="Histogram", fillalpha=0.4, linealpha=0.4, fillcolor=:lightgray,
label=(380,280), xlims=(-4.5,4.5))
size
plot!(prostate_marginal_plot, fitted_infty_floc,
="KDE band", xlims=(-4.5,4.5),
label=L"\widehat{f}_n(z)", xguide=L"z")
yguide
plot!(prostate_marginal_plot, [fitted_infty_floc.a_min; fitted_infty_floc.a_max], seriestype=:vline,
=:dot, label=nothing, color=:lightgrey) linestyle
Confidence intervals
# Define prior classes
= Empirikos.autoconvexclass(GaussianScaleMixtureClass(), Zs; :grid_scaling => 1.1) gcal_scalemix
GaussianScaleMixtureClass | σs = [0.1, 0.11, 0.121, 0.1331, 0.14641, 0.161051, 0.177156, 0.194872, 0.214359, 0.235795 … 4.52593, 4.97852, 5.47637, 6.02401, 6.62641, 7.28905, 8.01795, 8.81975, 9.70172, 10.6719]
F-Localization methods setup
= FLocalizationInterval(
floc_method_dkw_scalemix = dkw_floc,
flocalization = gcal_scalemix,
convexclass = Hypatia.Optimizer
solver
)
= FLocalizationInterval(
floc_method_kde_scalemix = infty_floc,
flocalization = gcal_scalemix,
convexclass = Hypatia.Optimizer
solver )
EB intervals with F-Localization: ∞-density band [α: 0.05] [Kernel: FlatTopKernel | bandwidth = nothing] [Bootstrap: Multinomial(1000)]
𝒢: GaussianScaleMixtureClass | σs = [0.1, 0.11, 0.121, 0.1331, 0.14641, 0.161051, 0.177156, 0.194872, 0.214359, 0.235795 … 4.52593, 4.97852, 5.47637, 6.02401, 6.62641, 7.28905, 8.01795, 8.81975, 9.70172, 10.6719]
Compute confidence intervals
# Define values for which to compute confidence intervals
= -3:0.25:3
ts
# Define posterior targets
= Empirikos.PosteriorMean.(StandardNormalSample.(ts)) postmean_targets
25-element Vector{PosteriorMean{StandardNormalSample{Float64}}}:
𝔼[μ | N(-3.0; μ, σ=1.0)]
𝔼[μ | N(-2.75; μ, σ=1.0)]
𝔼[μ | N(-2.5; μ, σ=1.0)]
𝔼[μ | N(-2.25; μ, σ=1.0)]
𝔼[μ | N(-2.0; μ, σ=1.0)]
𝔼[μ | N(-1.75; μ, σ=1.0)]
𝔼[μ | N(-1.5; μ, σ=1.0)]
𝔼[μ | N(-1.25; μ, σ=1.0)]
𝔼[μ | N(-1.0; μ, σ=1.0)]
𝔼[μ | N(-0.75; μ, σ=1.0)]
𝔼[μ | N(-0.5; μ, σ=1.0)]
𝔼[μ | N(-0.25; μ, σ=1.0)]
𝔼[μ | N(0.0; μ, σ=1.0)]
𝔼[μ | N(0.25; μ, σ=1.0)]
𝔼[μ | N(0.5; μ, σ=1.0)]
𝔼[μ | N(0.75; μ, σ=1.0)]
𝔼[μ | N(1.0; μ, σ=1.0)]
𝔼[μ | N(1.25; μ, σ=1.0)]
𝔼[μ | N(1.5; μ, σ=1.0)]
𝔼[μ | N(1.75; μ, σ=1.0)]
𝔼[μ | N(2.0; μ, σ=1.0)]
𝔼[μ | N(2.25; μ, σ=1.0)]
𝔼[μ | N(2.5; μ, σ=1.0)]
𝔼[μ | N(2.75; μ, σ=1.0)]
𝔼[μ | N(3.0; μ, σ=1.0)]
Compute confidence intervals for scale mixture priors
= confint.(floc_method_dkw_scalemix, postmean_targets, Zs) postmean_ci_dkw_scalemix
= confint.(floc_method_kde_scalemix, postmean_targets, Zs) postmean_ci_kde_scalemix
Visualize confidence intervals for scale mixture priors
= plot(ts, postmean_ci_kde_scalemix,
postmean_scalemix_plot ="Gauss-F-Loc", fillcolor=:darkorange, fillalpha=0.5, ylim=(-2.55,2.55),
label= L"z", yguide=L"E[\mu \mid Z=z]",
xguide =(380,280))
size
plot!(postmean_scalemix_plot, ts, postmean_ci_dkw_scalemix,
="DKW-F-Loc", show_ribbon=false, alpha=0.9, color=:black)
label
plot!(postmean_scalemix_plot, [-3.0;3.0], [-3.0; 3.0], seriestype=:line,
=:dot, label=nothing, color=:lightgrey) linestyle