Code
library(statsr)
library(ggplot2)
library(tidyverse)
data(tapwater)
glimpse(tapwater)Rows: 28
Columns: 6
$ date <fct> 2009-02-25, 2008-12-22, 2008-09-25, 2008-05-14, 2008-04-14,…
$ tthm <dbl> 34.38, 39.33, 108.63, 88.00, 81.00, 49.25, 75.00, 82.86, 85…
$ samples <int> 8, 9, 8, 8, 2, 8, 6, 7, 8, 4, 4, 4, 4, 6, 4, 8, 10, 10, 10,…
$ nondetects <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ min <dbl> 32.00, 31.00, 85.00, 75.00, 81.00, 26.00, 70.00, 70.00, 80.…
$ max <dbl> 39.00, 46.00, 120.00, 94.00, 81.00, 68.00, 80.00, 90.00, 90…
Code
ggplot(tapwater, aes(x = tthm)) +
geom_histogram(bins = 10, fill = "#FF6B6B", alpha = 0.7) +
labs(
title = "Histogramme de pph",
x = "pph",
y = "Fréquence"
) +
theme_minimal()Code
set.seed(42)
# Hyperparamètres du prior
m_0 = 35
n_0 = 25
s2_0 = 156.25
v_0 = n_0 - 1
# Statistiques de l'échantillon
Y = tapwater$tthm
ybar = mean(Y)
s2 = var(Y)
n = length(Y)
# Hyperparamètres du posterior
n_n = n_0 + n
m_n = (n*ybar + n_0*m_0) / n_n
v_n = v_0 + n
s2_n = ((n-1)*s2 + v_0*s2_0 + n_0*n*(m_0 - ybar)^2/n_n) / v_n
# Inference statsr
bayes_inference(
tthm, data = tapwater, prior = "NG",
mu_0 = m_0, n_0 = n_0, s_0 = sqrt(s2_0), v_0 = v_0,
stat = "mean", type = "ci", method = "theoretical",
show_res = TRUE, show_summ = TRUE, show_plot = FALSE
)Single numerical variable
n = 28, y-bar = 55.5239, s = 23.254
(Assuming proper prior: mu | sigma^2 ~ N(35, *sigma^2/25)
(Assuming proper prior: 1/sigma^2 ~ G(24/2,156.25*24/2)
Joint Posterior Distribution for mu and 1/sigma^2:
N(45.8428, sigma^2/53) G(52/2, 8.6769*52/2)
Marginal Posterior for mu:
Student t with posterior mean = 45.8428, posterior scale = 2.9457 on 52 df
95% CI: (39.9319 , 51.7537)
Code
# -----------------------------
# 1. Distribution de sigma (prior & posterior)
# -----------------------------
# Prior : phi = 1/sigma^2 ~ Gamma(v0/2, v0*s2_0/2)
phi_prior = rgamma(5000, shape = v_0/2, rate = v_0*s2_0/2)
sigma_prior = 1/sqrt(phi_prior)
# Posterior : phi_n = 1/sigma^2 ~ Gamma(vn/2, vn*s2_n/2)
phi_post = rgamma(5000, shape = v_n/2, rate = v_n*s2_n/2)
sigma_post = 1/sqrt(phi_post)
df_sigma = tibble(
sigma = c(sigma_prior, sigma_post),
type = rep(c("Prior", "Posterior"), each = 5000)
)
ggplot(df_sigma, aes(x = sigma, fill = type, color = type)) +
geom_density(alpha = 0.3, linewidth = 1) +
labs(
title = "Distribution a priori et a posteriori de σ",
x = "σ",
y = "Densité"
) +
theme_minimal()Code
# -----------------------------
# 2. Distribution de mu (prior & posterior)
# -----------------------------
# Prior : mu ~ Normal(m0, sigma/sqrt(n0))
mu_prior = rnorm(5000, mean = m_0, sd = sigma_prior / sqrt(n_0))
# Posterior : mu_n ~ Normal(mn, sigma/sqrt(nn))
mu_post = rnorm(5000, mean = m_n, sd = sigma_post / sqrt(n_n))
df_mu = tibble(
mu = c(mu_prior, mu_post),
type = rep(c("Prior", "Posterior"), each = 5000)
)
ggplot(df_mu, aes(x = mu, fill = type, color = type)) +
geom_density(alpha = 0.3, linewidth = 1) +
labs(
title = "Distribution a priori et a posteriori de μ",
x = "μ",
y = "Densité"
) +
theme_minimal()