5
p-Hacking: Garden of Forking Paths
drag Y to set number of sub-tests K (1 = honest, 20 = pure fishing) · click to burst-sample · [R] reset
idle
224 lines · vanilla
view source
// p-Hacking sandbox — "I just keep resampling until significant"
//
// Two side-by-side histograms of p-values, both generated from TRULY-NULL data
// (no real effect anywhere). Left panel: an honest analyst who runs ONE pre-
// specified t-test per study. Their p-value distribution is uniform on [0,1],
// because under H0 a p-value is itself a Uniform(0,1) random variable.
//
// Right panel: a "fishing" investigator who runs K sub-analyses per study
// (different subgroup splits, transformations, exclusions) and reports the
// MINIMUM p-value. The min of K i.i.d. Uniform(0,1) is Beta(1, K), heavily
// skewed toward 0, so the fishing histogram piles up on the left.
//
// User scrubs mouseY to set K from 1 (matches honest) to 20 (false-positive
// rate near 1 - (1-0.05)^20 ≈ 64%).
let W = 0, H = 0;
const BINS = 20; // 20 equal-width bins on [0, 1] => width 0.05
const ALPHA = 0.05;
const N_PER_GROUP = 12; // sample size per arm for each t-test
const STUDIES_PER_FRAME = 6; // how many studies we add to each hist per frame
let honestHist = new Int32Array(BINS);
let honestTotal = 0;
let honestSig = 0; // count of honest p < 0.05
let fishHist = new Int32Array(BINS);
let fishTotal = 0;
let fishSig = 0;
let K = 5; // number of sub-tests the investigator runs
// floating "p-value" droplets for visual feedback on the latest studies
let drops = [];
function init({ width, height }) {
W = width; H = height;
}
// --- statistics helpers ---
// Box-Muller standard normal
function randn() {
let u = Math.random(); if (u < 1e-12) u = 1e-12;
const v = Math.random();
return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
}
// Welch / Student two-sample t-test, returns two-sided p-value.
// We use a high-accuracy normal approximation for the t-CDF, which is fine
// since with df ≈ 22 the tail mass at the levels we care about (around 0.05)
// is within ~1-2% of the exact Student-t value — plenty for a visual demo.
function tTestTwoSidedP(a, b) {
const na = a.length, nb = b.length;
let ma = 0, mb = 0;
for (let i = 0; i < na; i++) ma += a[i];
for (let i = 0; i < nb; i++) mb += b[i];
ma /= na; mb /= nb;
let va = 0, vb = 0;
for (let i = 0; i < na; i++) { const d = a[i] - ma; va += d * d; }
for (let i = 0; i < nb; i++) { const d = b[i] - mb; vb += d * d; }
va /= (na - 1); vb /= (nb - 1);
const se = Math.sqrt(va / na + vb / nb);
if (se < 1e-15) return 1;
const t = (ma - mb) / se;
// two-sided normal approx: p = 2 * (1 - Phi(|t|))
const p = 2 * (1 - phi(Math.abs(t)));
return Math.min(1, Math.max(0, p));
}
// Standard normal CDF via erf approximation (Abramowitz & Stegun 7.1.26).
function phi(x) {
return 0.5 * (1 + erf(x / Math.SQRT2));
}
function erf(x) {
const sign = x < 0 ? -1 : 1;
x = Math.abs(x);
const a1 = 0.254829592, a2 = -0.284496736, a3 = 1.421413741;
const a4 = -1.453152027, a5 = 1.061405429, p = 0.3275911;
const t = 1 / (1 + p * x);
const y = 1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * Math.exp(-x * x);
return sign * y;
}
function sampleNullGroup(n) {
const a = new Float64Array(n);
for (let i = 0; i < n; i++) a[i] = randn(); // both arms ~ N(0,1) — H0 is true
return a;
}
// Honest analyst: one pre-registered test.
function honestStudy() {
const a = sampleNullGroup(N_PER_GROUP);
const b = sampleNullGroup(N_PER_GROUP);
return tTestTwoSidedP(a, b);
}
// Fishing investigator: run k sub-analyses on a fresh null sample and report
// the MINIMUM p-value. Each sub-analysis is a different "subgroup split" of
// independently-sampled null data — equivalent in distribution to k independent
// t-tests, which is the cleanest illustration of multiple comparisons.
function fishStudy(k) {
let best = 1;
for (let i = 0; i < k; i++) {
const a = sampleNullGroup(N_PER_GROUP);
const b = sampleNullGroup(N_PER_GROUP);
const p = tTestTwoSidedP(a, b);
if (p < best) best = p;
}
return best;
}
function binOf(p) {
let bi = Math.floor(p * BINS);
if (bi >= BINS) bi = BINS - 1;
if (bi < 0) bi = 0;
return bi;
}
function tick({ ctx, dt, width, height, input }) {
if (width !== W || height !== H) { W = width; H = height; }
// mouseY scrubs K from 1..20. When mouse is off-canvas, hold current K.
const insideY = input.mouseY >= 0 && input.mouseY <= H;
if (insideY) {
const frac = 1 - input.mouseY / H; // top = high K, bottom = K=1
const newK = Math.max(1, Math.min(20, Math.round(1 + frac * 19)));
K = newK;
}
// clicks burst a chunk of studies. consumeClicks() returns an array of clicks.
const clickArr = input.consumeClicks ? input.consumeClicks() : null;
const clicks = clickArr && clickArr.length ? clickArr.length : 0;
if (input.justPressed && (input.justPressed("r") || input.justPressed("R"))) {
honestHist = new Int32Array(BINS); honestTotal = 0; honestSig = 0;
fishHist = new Int32Array(BINS); fishTotal = 0; fishSig = 0;
drops = [];
}
const studies = STUDIES_PER_FRAME + clicks * 200;
for (let i = 0; i < studies; i++) {
const ph = honestStudy();
honestHist[binOf(ph)]++;
honestTotal++;
if (ph < ALPHA) honestSig++;
const pf = fishStudy(K);
fishHist[binOf(pf)]++;
fishTotal++;
if (pf < ALPHA) fishSig++;
if (i === 0) {
drops.push({ side: 0, p: ph, age: 0 });
drops.push({ side: 1, p: pf, age: 0 });
}
}
// ---- layout ----
ctx.fillStyle = "#0a0a10";
ctx.fillRect(0, 0, W, H);
const padL = 18, padR = 18, padTop = 56, padBot = 92;
const gap = 14;
const panelW = (W - padL - padR - gap) / 2;
const panelH = H - padTop - padBot;
const leftX = padL;
const rightX = padL + panelW + gap;
const topY = padTop;
const botY = padTop + panelH;
drawPanel(ctx, leftX, topY, panelW, panelH, honestHist, honestTotal,
"Honest test (1 pre-specified t-test)",
"rgba(120,200,255,0.75)", "rgba(120,200,255,1)");
drawPanel(ctx, rightX, topY, panelW, panelH, fishHist, fishTotal,
`Fishing expedition (min of K=${K} sub-tests)`,
"rgba(255,120,140,0.75)", "rgba(255,120,140,1)");
// animated drops falling into the histograms
for (let i = drops.length - 1; i >= 0; i--) {
const d = drops[i];
d.age += dt;
if (d.age > 0.9) { drops.splice(i, 1); continue; }
const x0 = d.side === 0 ? leftX : rightX;
const x = x0 + 10 + (d.p) * (panelW - 20);
const y = topY + 6 + d.age * 80;
if (y > botY - 4) continue;
const a = 1 - d.age / 0.9;
const color = d.p < ALPHA
? `rgba(255,210,80,${(0.9 * a).toFixed(3)})`
: `rgba(255,255,255,${(0.6 * a).toFixed(3)})`;
ctx.fillStyle = color;
ctx.beginPath();
ctx.arc(x, y, 2.5, 0, Math.PI * 2);
ctx.fill();
}
// ---- title + HUD ----
ctx.fillStyle = "#e8e8f0";
ctx.font = "bold 16px monospace";
ctx.fillText("p-Hacking: the Garden of Forking Paths", padL, 24);
ctx.font = "11px monospace";
ctx.fillStyle = "#9aa";
ctx.fillText(`null data: two groups of N(0,1), n=${N_PER_GROUP} per arm · ${(honestTotal + fishTotal).toLocaleString()} studies simulated`, padL, 42);
// bottom stats
const hRate = honestTotal > 0 ? honestSig / honestTotal : 0;
const fRate = fishTotal > 0 ? fishSig / fishTotal : 0;
// expected fishing FP rate when K independent sub-tests: 1 - (1 - alpha)^K
const expectedFishRate = 1 - Math.pow(1 - ALPHA, K);
ctx.font = "12px monospace";
ctx.fillStyle = "#9cf";
ctx.fillText(`honest false-positive rate (p<0.05): ${(hRate * 100).toFixed(2)}% (expected: 5.00%)`,
padL, botY + 24);
ctx.fillStyle = "#fca";
ctx.fillText(`fishing false-positive rate (p<0.05): ${(fRate * 100).toFixed(2)}% (expected: ${(expectedFishRate * 100).toFixed(2)}%)`,
padL, botY + 42);
// K-slider visualization on the right edge
const sliderX = W - 14;
ctx.fillStyle = "rgba(255,255,255,0.07)";
ctx.fillRect(sliderX - 4, padTop, 6, panelH);
const knobY = padTop + (1 - (K - 1) / 19) * panelH;
ctx.fillStyle = "rgba(255,210,80,1)";
ctx.beginPath();
ctx.arc(sliderX - 1, knobY, 5, 0, Math.PI * 2);
ctx.fill();
ctx.fillStyle = "#fc8";
ctx.font = "bold 11px monospace";
ctx.fillText(`K=${K}`, sliderX - 38, knobY + 4);
ctx.fillStyle = "#667";
ctx.font = "10px monospace";
ctx.fillText("drag mouseY to scrub K · click to burst-sample · [R] reset · Simmons, Nelson & Simonsohn (2011)",
padL, H - 8);
}
function drawPanel(ctx, x, y, w, h, hist, total, label, barColor, lineColor) {
ctx.fillStyle = "#13131c";
ctx.fillRect(x, y, w, h);
const x0 = x + 10, x1 = x + w - 10;
const y0 = y + 26, y1 = y + h - 26;
const ww = x1 - x0, hh = y1 - y0;
// Find max bin count for scaling
let mx = 0;
for (let i = 0; i < BINS; i++) if (hist[i] > mx) mx = hist[i];
// Reference: under H0 uniform, each bin expects total/BINS counts.
// Show that as a dashed reference line on the honest panel.
const ref = total / BINS;
// bars
for (let i = 0; i < BINS; i++) {
const bx = x0 + (i / BINS) * ww;
const bw = ww / BINS - 1;
const c = hist[i];
const bh = mx > 0 ? (c / mx) * hh : 0;
// tint bars where p < alpha
const binMid = (i + 0.5) / BINS;
if (binMid < ALPHA) {
ctx.fillStyle = "rgba(255,210,80,0.95)";
} else {
ctx.fillStyle = barColor;
}
ctx.fillRect(bx, y1 - bh, bw, bh);
}
// dashed expected-uniform line
if (total > 0 && mx > 0) {
const refY = y1 - (ref / mx) * hh;
ctx.strokeStyle = "rgba(255,255,255,0.35)";
ctx.setLineDash([4, 4]);
ctx.beginPath();
ctx.moveTo(x0, refY);
ctx.lineTo(x1, refY);
ctx.stroke();
ctx.setLineDash([]);
}
// alpha line at p = 0.05
const aX = x0 + ALPHA * ww;
ctx.strokeStyle = "rgba(255,210,80,0.6)";
ctx.beginPath();
ctx.moveTo(aX, y0);
ctx.lineTo(aX, y1);
ctx.stroke();
ctx.fillStyle = "rgba(255,210,80,0.85)";
ctx.font = "10px monospace";
ctx.fillText("α=0.05", aX + 3, y0 + 10);
// axis tick labels
ctx.fillStyle = "#778";
ctx.font = "10px monospace";
ctx.fillText("0", x0 - 3, y1 + 12);
ctx.fillText("0.5", x0 + ww / 2 - 6, y1 + 12);
ctx.fillText("1", x1 - 4, y1 + 12);
ctx.fillText("p-value", x0 + ww / 2 - 22, y1 + 22);
// panel label
ctx.fillStyle = "#e8e8f0";
ctx.font = "bold 12px monospace";
ctx.fillText(label, x + 8, y + 16);
// n studies
ctx.fillStyle = "#789";
ctx.font = "10px monospace";
ctx.fillText(`n=${total.toLocaleString()}`, x1 - 60, y + 16);
}
Comments (0)
Log in to comment.