Identifying genes associated with prostate cancer

F-localization interval analysis

Setup and data loading

using DataFrames
using Distributions
using Empirikos
using Hypatia
using Plots
using LaTeXStrings
using IntervalSets
using JuMP
using Random

# Set up pgfplotsx and custom preamble
gr()

# Set plot theme
theme(:default;
      background_color_legend = :transparent,
      foreground_color_legend = :transparent,
      grid=nothing,
      frame=:box,
      legendfonthalign = :left,
      thickness_scaling=1.3)

# Load the prostate data
Zs = Prostate.ebayes_samples()

6033-element Vector{StandardNormalSample{Float64}}:
 N(1.47236666651029; μ, σ=1.0)
 N(3.57291516191986; μ, σ=1.0)
 N(-0.0277536813375331; μ, σ=1.0)
 N(-1.1320516549926; μ, σ=1.0)
 N(-0.140220986707002; μ, σ=1.0)
 N(0.958809840576886; μ, σ=1.0)
 N(1.0681232949828; μ, σ=1.0)
 N(-1.2914820655344; μ, σ=1.0)
 N(-1.22750497688098; μ, σ=1.0)
 N(1.17605516919472; μ, σ=1.0)
 N(3.34583078878689; μ, σ=1.0)
 N(0.188740669060314; μ, σ=1.0)
 N(-0.851333492603207; μ, σ=1.0)
 ⋮
 N(0.55060618889427; μ, σ=1.0)
 N(0.265461705731843; μ, σ=1.0)
 N(-1.19340060387477; μ, σ=1.0)
 N(-1.83473655530782; μ, σ=1.0)
 N(0.0707678142839226; μ, σ=1.0)
 N(0.0243971545443552; μ, σ=1.0)
 N(-0.207896831967974; μ, σ=1.0)
 N(0.287356609464891; μ, σ=1.0)
 N(-0.777423419217064; μ, σ=1.0)
 N(-1.18366655761165; μ, σ=1.0)
 N(0.103544128661099; μ, σ=1.0)
 N(-0.909192514625776; μ, σ=1.0)

Marginal distribution of z-scores

DKW-F-Localization

# Create and fit the DKW F-Localization
dkw_floc = DvoretzkyKieferWolfowitz(0.05)
fitted_dkw = fit(dkw_floc, Zs)

# Plot the DKW band
dkw_plot = plot(fitted_dkw, label="DKW band",
     xlab=L"z", ylab=L"\widehat{F}_n(z)",  size=(380,280))

KDE-F-Localization

# Create and fit the KDE-based F-Localization
infty_floc = Empirikos.InfinityNormDensityBand(α=0.05)
fitted_infty_floc = fit(infty_floc, Zs)

# Plot the KDE band with histogram
prostate_marginal_plot = histogram([response(Z) for Z in Zs],
    bins=50, normalize=true,
    label="Histogram", fillalpha=0.4, linealpha=0.4, fillcolor=:lightgray,
    size=(380,280), xlims=(-4.5,4.5))

plot!(prostate_marginal_plot, fitted_infty_floc,
      label="KDE band", xlims=(-4.5,4.5),
      yguide=L"\widehat{f}_n(z)", xguide=L"z")

plot!(prostate_marginal_plot, [fitted_infty_floc.a_min; fitted_infty_floc.a_max], seriestype=:vline,
      linestyle=:dot, label=nothing, color=:lightgrey)

Confidence intervals

# Define prior classes
gcal_scalemix = Empirikos.autoconvexclass(GaussianScaleMixtureClass(), Zs; :grid_scaling => 1.1)

GaussianScaleMixtureClass | σs = [0.1, 0.11, 0.121, 0.1331, 0.14641, 0.161051, 0.177156, 0.194872, 0.214359, 0.235795  …  4.52593, 4.97852, 5.47637, 6.02401, 6.62641, 7.28905, 8.01795, 8.81975, 9.70172, 10.6719]

F-Localization methods setup

floc_method_dkw_scalemix = FLocalizationInterval(
    flocalization = dkw_floc,
    convexclass = gcal_scalemix, 
    solver = Hypatia.Optimizer
)

floc_method_kde_scalemix = FLocalizationInterval(
    flocalization = infty_floc,
    convexclass = gcal_scalemix, 
    solver = Hypatia.Optimizer
)

EB intervals with F-Localization: ∞-density band [α: 0.05] [Kernel: FlatTopKernel | bandwidth = nothing] [Bootstrap: Multinomial(1000)]
                  𝒢: GaussianScaleMixtureClass | σs = [0.1, 0.11, 0.121, 0.1331, 0.14641, 0.161051, 0.177156, 0.194872, 0.214359, 0.235795  …  4.52593, 4.97852, 5.47637, 6.02401, 6.62641, 7.28905, 8.01795, 8.81975, 9.70172, 10.6719]

Compute confidence intervals

# Define values for which to compute confidence intervals
ts = -3:0.25:3

# Define posterior targets
postmean_targets = Empirikos.PosteriorMean.(StandardNormalSample.(ts))

25-element Vector{PosteriorMean{StandardNormalSample{Float64}}}:
 𝔼[μ | N(-3.0; μ, σ=1.0)]
 𝔼[μ | N(-2.75; μ, σ=1.0)]
 𝔼[μ | N(-2.5; μ, σ=1.0)]
 𝔼[μ | N(-2.25; μ, σ=1.0)]
 𝔼[μ | N(-2.0; μ, σ=1.0)]
 𝔼[μ | N(-1.75; μ, σ=1.0)]
 𝔼[μ | N(-1.5; μ, σ=1.0)]
 𝔼[μ | N(-1.25; μ, σ=1.0)]
 𝔼[μ | N(-1.0; μ, σ=1.0)]
 𝔼[μ | N(-0.75; μ, σ=1.0)]
 𝔼[μ | N(-0.5; μ, σ=1.0)]
 𝔼[μ | N(-0.25; μ, σ=1.0)]
 𝔼[μ | N(0.0; μ, σ=1.0)]
 𝔼[μ | N(0.25; μ, σ=1.0)]
 𝔼[μ | N(0.5; μ, σ=1.0)]
 𝔼[μ | N(0.75; μ, σ=1.0)]
 𝔼[μ | N(1.0; μ, σ=1.0)]
 𝔼[μ | N(1.25; μ, σ=1.0)]
 𝔼[μ | N(1.5; μ, σ=1.0)]
 𝔼[μ | N(1.75; μ, σ=1.0)]
 𝔼[μ | N(2.0; μ, σ=1.0)]
 𝔼[μ | N(2.25; μ, σ=1.0)]
 𝔼[μ | N(2.5; μ, σ=1.0)]
 𝔼[μ | N(2.75; μ, σ=1.0)]
 𝔼[μ | N(3.0; μ, σ=1.0)]

Compute confidence intervals for scale mixture priors

postmean_ci_dkw_scalemix = confint.(floc_method_dkw_scalemix, postmean_targets, Zs)

postmean_ci_kde_scalemix = confint.(floc_method_kde_scalemix, postmean_targets, Zs)

Visualize confidence intervals for scale mixture priors

postmean_scalemix_plot = plot(ts, postmean_ci_kde_scalemix,
    label="Gauss-F-Loc", fillcolor=:darkorange, fillalpha=0.5, ylim=(-2.55,2.55),
    xguide = L"z", yguide=L"E[\mu \mid Z=z]",
    size=(380,280))

plot!(postmean_scalemix_plot, ts, postmean_ci_dkw_scalemix,
    label="DKW-F-Loc", show_ribbon=false, alpha=0.9, color=:black)

plot!(postmean_scalemix_plot, [-3.0;3.0], [-3.0; 3.0], seriestype=:line,
    linestyle=:dot, label=nothing, color=:lightgrey)