2

Bootstrap Distribution Builder

click to add a data point · [M] toggle mean/median · [R] reset

We have a single fixed sample shown as the strip on top. The **bootstrap** repeatedly draws a new sample of size **with replacement** from those same points and computes a statistic — here the sample mean by default, or the sample median if you toggle. Each frame, one resample is taken (the chosen points pulse gold), and is added to the growing histogram below. The shaded yellow band is the empirical 2.5/97.5 percentile interval — a bootstrap 95% confidence interval for the underlying parameter. For the mean, classical theory predicts , and that Normal curve is overlaid in orange once enough resamples accumulate; for the median there is no clean closed form, so we let the bootstrap *be* the answer. **Try this:** click far to the right of the data to add a single outlier. In `mean` mode the histogram shifts and widens dramatically; switch to `median` mode and the distribution barely budges — a live demonstration of why the median is a robust estimator.

idle
390 lines · vanilla
view source
// Bootstrap Distribution Builder.
// A fixed original sample is drawn as a strip on top. Each frame we resample
// WITH REPLACEMENT, pulse the selected points, compute the sample statistic
// (mean by default, median if toggled), and add it to a growing histogram
// of bootstrap statistics. Overlay the analytic CLT Normal N(xbar, s^2/n)
// once enough samples accumulate, plus a shaded 2.5/97.5 percentile band.
//
// Click on the strip to add a new data point (x-position becomes the value).
// Press [M] / [m] or click the toggle button to switch mean <-> median.
// Press [R] to reset (re-randomize the original sample).

let W = 0, H = 0;

// data
let originalSample = [];          // user-mutable: the "data we have"
let bootStats = [];               // store all bootstrap stats (capped)
const BOOT_CAP = 50000;
let histBins;                     // counts per bin
const BINS = 80;
let total = 0;
let sumStat = 0;
let sumStatSq = 0;

// pulse animation for which indices got picked last
let lastPicks = null;             // Int32Array of indices last drawn
let pulseAge = 0;                 // seconds since last pulse

// statistic mode
let mode = "mean"; // or "median"

// pacing
let accum = 0;

// cached sort for percentile band — sorting bootStats every frame is O(n log n)
// across up to 50k items at 60fps. Refresh at ~4Hz, which is plenty visually.
let sortedCache = null;
let sortedAtTotal = -1;
let sortedAge = 0;

// data domain (fixed so things don't jitter when user adds outliers)
const DATA_LO = 0;
const DATA_HI = 10;

// layout cache (computed each frame from W,H)
let layout = null;

// reproducible "default" original sample (mixture-ish, but on [0,10])
function defaultSample() {
  const arr = [];
  const n = 18;
  // cluster around 4 with sd ~1.2
  for (let i = 0; i < n; i++) {
    let s = 0;
    for (let k = 0; k < 6; k++) s += Math.random();
    const z = (s - 3) / Math.sqrt(0.5);
    let v = 4 + 1.2 * z;
    if (v < DATA_LO + 0.1) v = DATA_LO + 0.1;
    if (v > DATA_HI - 0.1) v = DATA_HI - 0.1;
    arr.push(v);
  }
  return arr;
}

function resetHistogram() {
  histBins = new Int32Array(BINS);
  bootStats.length = 0;
  total = 0;
  sumStat = 0;
  sumStatSq = 0;
  lastPicks = null;
  pulseAge = 0;
  sortedCache = null;
  sortedAtTotal = -1;
  sortedAge = 0;
}

function sampleMean(arr) {
  let s = 0;
  for (let i = 0; i < arr.length; i++) s += arr[i];
  return s / arr.length;
}

function sampleSD(arr) {
  if (arr.length < 2) return 0;
  const m = sampleMean(arr);
  let s2 = 0;
  for (let i = 0; i < arr.length; i++) {
    const d = arr[i] - m;
    s2 += d * d;
  }
  return Math.sqrt(s2 / (arr.length - 1));
}

// in-place selection median (uses a copy)
function sampleMedian(arr) {
  const n = arr.length;
  if (n === 0) return 0;
  const c = new Float64Array(n);
  for (let i = 0; i < n; i++) c[i] = arr[i];
  // sort
  Array.prototype.sort.call(c, (a, b) => a - b);
  if (n % 2) return c[(n - 1) >> 1];
  return 0.5 * (c[n / 2 - 1] + c[n / 2]);
}

function statOf(values) {
  return mode === "mean" ? sampleMean(values) : sampleMedian(values);
}

// stat range — recomputed when sample or mode changes
function statRange() {
  if (originalSample.length === 0) return { lo: DATA_LO, hi: DATA_HI };
  const xbar = sampleMean(originalSample);
  const s = sampleSD(originalSample);
  const n = originalSample.length;
  // For both mean and median, SE ~ s/sqrt(n) is a decent display width.
  // We want a window wide enough to also capture an outlier-driven mean.
  const center = mode === "mean" ? xbar : sampleMedian(originalSample);
  const halfWidth = Math.max(4 * s / Math.sqrt(Math.max(1, n)), 0.6);
  let lo = center - halfWidth;
  let hi = center + halfWidth;
  // clamp display window to data domain so we don't pan off-screen
  if (lo < DATA_LO - 1) lo = DATA_LO - 1;
  if (hi > DATA_HI + 1) hi = DATA_HI + 1;
  if (hi - lo < 0.5) { hi = lo + 0.5; }
  return { lo, hi };
}

function pushBoot() {
  const n = originalSample.length;
  if (n === 0) return;
  const picks = new Int32Array(n);
  const drawn = new Float64Array(n);
  for (let i = 0; i < n; i++) {
    const k = Math.floor(Math.random() * n);
    picks[i] = k;
    drawn[i] = originalSample[k];
  }
  const stat = statOf(drawn);
  lastPicks = picks;
  pulseAge = 0;

  // store
  if (bootStats.length < BOOT_CAP) bootStats.push(stat);
  else bootStats[total % BOOT_CAP] = stat;

  const r = statRange();
  const bi = Math.floor(((stat - r.lo) / (r.hi - r.lo)) * BINS);
  if (bi >= 0 && bi < BINS) histBins[bi]++;
  total++;
  sumStat += stat;
  sumStatSq += stat * stat;
}

function percentile(sorted, p) {
  if (sorted.length === 0) return 0;
  const idx = (sorted.length - 1) * p;
  const lo = Math.floor(idx);
  const hi = Math.ceil(idx);
  if (lo === hi) return sorted[lo];
  return sorted[lo] + (sorted[hi] - sorted[lo]) * (idx - lo);
}

function init({ width, height }) {
  W = width; H = height;
  originalSample = defaultSample();
  resetHistogram();
}

// ---- click region helpers ----
function inRect(x, y, r) {
  return x >= r.x && x <= r.x + r.w && y >= r.y && y <= r.y + r.h;
}

// process input clicks; we want to add data points AND handle a UI toggle.
// consumeClicks() returns an array of {x,y,button} — use per-click coords
// so rapid clicks across separate UI regions don't all hit one rect.
function handleClicks(input) {
  const clicks = input.consumeClicks ? input.consumeClicks() : [];
  if (!clicks.length || !layout) return;

  for (let c = 0; c < clicks.length; c++) {
    const ev = clicks[c];
    const mx = (ev && ev.x != null) ? ev.x : input.mouseX;
    const my = (ev && ev.y != null) ? ev.y : input.mouseY;
    // toggle button?
    if (inRect(mx, my, layout.toggleBtn)) {
      mode = (mode === "mean") ? "median" : "mean";
      resetHistogram();
      continue;
    }
    // reset button?
    if (inRect(mx, my, layout.resetBtn)) {
      originalSample = defaultSample();
      resetHistogram();
      continue;
    }
    // strip region — add a data point at mx
    if (inRect(mx, my, layout.strip)) {
      const t = (mx - layout.strip.x) / layout.strip.w;
      const v = DATA_LO + t * (DATA_HI - DATA_LO);
      if (originalSample.length < 200 && v > DATA_LO && v < DATA_HI) {
        originalSample.push(v);
        resetHistogram();
      }
      continue;
    }
  }
}

function tick({ ctx, dt, width, height, input }) {
  if (width !== W || height !== H) { W = width; H = height; }

  // ---- layout ----
  const pad = 18;
  const titleH = 30;
  const hudH = 56;
  const stripH = 78;
  const stripY = titleH + 6;
  const histY = stripY + stripH + hudH + 8;
  const histH = H - histY - 50;

  layout = {
    strip: { x: pad, y: stripY, w: W - 2 * pad, h: stripH },
    hist:  { x: pad, y: histY,  w: W - 2 * pad, h: Math.max(60, histH) },
    toggleBtn: { x: W - pad - 120, y: 4, w: 120, h: 22 },
    resetBtn:  { x: W - pad - 200, y: 4, w: 72, h: 22 },
  };

  // ---- keys ----
  if (input.justPressed && (input.justPressed("m") || input.justPressed("M"))) {
    mode = (mode === "mean") ? "median" : "mean";
    resetHistogram();
  }
  if (input.justPressed && (input.justPressed("r") || input.justPressed("R"))) {
    originalSample = defaultSample();
    resetHistogram();
  }

  // ---- clicks ----
  handleClicks(input);

  // ---- pacing: draw a steady stream of bootstrap resamples ----
  accum += dt;
  const STEP = 0.05; // 20 resamples/sec baseline
  let toDraw = 0;
  while (accum > STEP) { accum -= STEP; toDraw++; }
  // hover-over-strip slows the rate so the user can aim a click
  if (layout && inRect(input.mouseX, input.mouseY, layout.strip)) {
    toDraw = Math.min(toDraw, 1);
  }
  for (let i = 0; i < toDraw; i++) pushBoot();

  pulseAge += dt;

  // ---- bg ----
  ctx.fillStyle = "#0a0a10";
  ctx.fillRect(0, 0, W, H);

  // ---- title + buttons ----
  ctx.fillStyle = "#e8e8f0";
  ctx.font = "bold 15px monospace";
  ctx.fillText("Bootstrap Distribution Builder", pad, 20);

  // toggle button
  const tb = layout.toggleBtn;
  ctx.fillStyle = mode === "mean" ? "#1e3a5f" : "#5a2a3a";
  ctx.fillRect(tb.x, tb.y, tb.w, tb.h);
  ctx.strokeStyle = "#88a";
  ctx.strokeRect(tb.x + 0.5, tb.y + 0.5, tb.w - 1, tb.h - 1);
  ctx.fillStyle = "#e8e8f0";
  ctx.font = "11px monospace";
  ctx.fillText(`stat: ${mode}  [M]`, tb.x + 10, tb.y + 15);

  // reset button
  const rb = layout.resetBtn;
  ctx.fillStyle = "#2a2a36";
  ctx.fillRect(rb.x, rb.y, rb.w, rb.h);
  ctx.strokeStyle = "#88a";
  ctx.strokeRect(rb.x + 0.5, rb.y + 0.5, rb.w - 1, rb.h - 1);
  ctx.fillStyle = "#e8e8f0";
  ctx.fillText("reset [R]", rb.x + 8, rb.y + 15);

  // ---- top: original-sample strip ----
  const s = layout.strip;
  ctx.fillStyle = "#13131c";
  ctx.fillRect(s.x, s.y, s.w, s.h);

  // x-axis ticks
  ctx.strokeStyle = "rgba(120,140,180,0.35)";
  ctx.fillStyle = "#778";
  ctx.font = "9px monospace";
  for (let v = DATA_LO; v <= DATA_HI + 1e-6; v++) {
    const x = s.x + ((v - DATA_LO) / (DATA_HI - DATA_LO)) * s.w;
    ctx.beginPath();
    ctx.moveTo(x, s.y + s.h - 12);
    ctx.lineTo(x, s.y + s.h - 6);
    ctx.stroke();
    ctx.fillText(v.toFixed(0), x - 3, s.y + s.h - 1);
  }

  // pick lookup for pulse
  const pickedCount = new Int32Array(originalSample.length);
  if (lastPicks) {
    for (let i = 0; i < lastPicks.length; i++) pickedCount[lastPicks[i]]++;
  }
  const pulseAlpha = Math.max(0, 1 - pulseAge / 0.35);

  // points
  const yCenter = s.y + s.h * 0.45;
  for (let i = 0; i < originalSample.length; i++) {
    const v = originalSample[i];
    const x = s.x + ((v - DATA_LO) / (DATA_HI - DATA_LO)) * s.w;
    // stagger y a tiny bit so overlapping points are visible
    const y = yCenter + ((i * 37) % 11 - 5) * 1.2;
    const c = pickedCount[i] || 0;
    const baseR = 4;
    const r = baseR + (c > 0 ? 1.6 + Math.min(c, 4) * 0.6 : 0);
    if (c > 0) {
      // pulse halo
      ctx.fillStyle = `rgba(255,210,120,${(0.45 * pulseAlpha).toFixed(3)})`;
      ctx.beginPath();
      ctx.arc(x, y, r + 4, 0, Math.PI * 2);
      ctx.fill();
    }
    ctx.fillStyle = c > 0
      ? `rgba(255,220,150,${(0.7 + 0.3 * pulseAlpha).toFixed(3)})`
      : "rgba(180,200,255,0.85)";
    ctx.beginPath();
    ctx.arc(x, y, r, 0, Math.PI * 2);
    ctx.fill();
  }

  // mean / median markers on the strip
  if (originalSample.length > 0) {
    const xbar = sampleMean(originalSample);
    const med = sampleMedian(originalSample);
    function vline(v, color, label) {
      const x = s.x + ((v - DATA_LO) / (DATA_HI - DATA_LO)) * s.w;
      if (x < s.x || x > s.x + s.w) return;
      ctx.strokeStyle = color;
      ctx.setLineDash([3, 3]);
      ctx.beginPath();
      ctx.moveTo(x, s.y + 4);
      ctx.lineTo(x, s.y + s.h - 14);
      ctx.stroke();
      ctx.setLineDash([]);
      ctx.fillStyle = color;
      ctx.font = "10px monospace";
      ctx.fillText(label, x + 3, s.y + 12);
    }
    vline(xbar, "rgba(120,220,255,0.95)", `x̄=${xbar.toFixed(2)}`);
    vline(med, "rgba(255,160,200,0.95)", `med=${med.toFixed(2)}`);
  }

  // strip caption
  ctx.fillStyle = "#aab";
  ctx.font = "10px monospace";
  ctx.fillText(`original sample (n = ${originalSample.length}) — click strip to add a point`,
    s.x + 4, s.y - 2);

  // ---- HUD between strip and histogram ----
  const hudY = s.y + s.h + 8;
  ctx.fillStyle = "#9cf";
  ctx.font = "11px monospace";
  const xbar = originalSample.length ? sampleMean(originalSample) : 0;
  const sd = sampleSD(originalSample);
  const n = originalSample.length;
  const se = n > 0 ? sd / Math.sqrt(n) : 0;
  ctx.fillText(
    `x̄ = ${xbar.toFixed(3)}   s = ${sd.toFixed(3)}   n = ${n}   SE = s/√n = ${se.toFixed(3)}`,
    pad, hudY + 12);

  // bootstrap summary
  const empMean = total > 0 ? sumStat / total : 0;
  const empVar = total > 1 ? sumStatSq / total - empMean * empMean : 0;
  const empSE = Math.sqrt(Math.max(0, empVar));

  // percentile band — refresh the sorted copy at ~4Hz instead of every frame.
  sortedAge += dt;
  let p025 = 0, p975 = 0, sorted = null;
  if (bootStats.length >= 40) {
    if (!sortedCache || sortedAge > 0.25 || sortedAtTotal !== bootStats.length) {
      sortedCache = bootStats.slice().sort((a, b) => a - b);
      sortedAtTotal = bootStats.length;
      sortedAge = 0;
    }
    sorted = sortedCache;
    p025 = percentile(sorted, 0.025);
    p975 = percentile(sorted, 0.975);
  } else {
    sortedCache = null;
  }
  ctx.fillStyle = "#cfc";
  ctx.fillText(
    `boot stat (${mode}): mean ≈ ${empMean.toFixed(3)}   SD ≈ ${empSE.toFixed(3)}   ` +
    `95% CI ≈ [${p025.toFixed(3)}, ${p975.toFixed(3)}]   resamples = ${total}`,
    pad, hudY + 28);

  // ---- histogram panel ----
  const hp = layout.hist;
  ctx.fillStyle = "#13131c";
  ctx.fillRect(hp.x, hp.y, hp.w, hp.h);

  const r = statRange();
  const binW = (r.hi - r.lo) / BINS;

  // density max
  let histMax = 0;
  for (let i = 0; i < BINS; i++) if (histBins[i] > histMax) histMax = histBins[i];
  const densMax = total > 0 ? histMax / total / binW : 1;

  // Normal overlay (only for mean): N(xbar, s^2/n)
  const showNormal = (mode === "mean" && n >= 2 && total >= 40);
  const normMax = se > 0 ? 1 / (Math.sqrt(2 * Math.PI) * se) : 0;

  const yMax = Math.max(densMax * 1.1, normMax * 1.1, 1e-6);

  // shaded 95% CI band
  if (sorted) {
    const xL = hp.x + ((p025 - r.lo) / (r.hi - r.lo)) * hp.w;
    const xR = hp.x + ((p975 - r.lo) / (r.hi - r.lo)) * hp.w;
    const xa = Math.max(hp.x, Math.min(xL, xR));
    const xb = Math.min(hp.x + hp.w, Math.max(xL, xR));
    if (xb > xa) {
      ctx.fillStyle = "rgba(255,220,120,0.10)";
      ctx.fillRect(xa, hp.y, xb - xa, hp.h);
      // edges
      ctx.strokeStyle = "rgba(255,220,120,0.55)";
      ctx.setLineDash([2, 3]);
      ctx.beginPath();
      ctx.moveTo(xa, hp.y); ctx.lineTo(xa, hp.y + hp.h);
      ctx.moveTo(xb, hp.y); ctx.lineTo(xb, hp.y + hp.h);
      ctx.stroke();
      ctx.setLineDash([]);
    }
  }

  // bars
  ctx.fillStyle = mode === "mean"
    ? "rgba(120,220,255,0.55)"
    : "rgba(255,160,200,0.55)";
  for (let i = 0; i < BINS; i++) {
    if (histBins[i] === 0) continue;
    const xCenter = r.lo + (i + 0.5) * binW;
    const dens = histBins[i] / Math.max(1, total) / binW;
    const x = hp.x + ((xCenter - r.lo) / (r.hi - r.lo)) * hp.w;
    const wpx = (binW / (r.hi - r.lo)) * hp.w;
    const hpx = (dens / yMax) * hp.h;
    ctx.fillRect(x - wpx / 2, hp.y + hp.h - hpx, Math.max(1, wpx - 0.5), hpx);
  }

  // Normal overlay
  if (showNormal) {
    ctx.strokeStyle = "rgba(255,210,120,0.95)";
    ctx.lineWidth = 2;
    ctx.beginPath();
    for (let px = 0; px <= hp.w; px += 1) {
      const xv = r.lo + (px / hp.w) * (r.hi - r.lo);
      const z = (xv - xbar) / se;
      const p = Math.exp(-0.5 * z * z) / (Math.sqrt(2 * Math.PI) * se);
      const yp = hp.y + hp.h - (p / yMax) * hp.h;
      if (px === 0) ctx.moveTo(hp.x + px, yp);
      else ctx.lineTo(hp.x + px, yp);
    }
    ctx.stroke();
    ctx.lineWidth = 1;
  }

  // marker for sample statistic on the histogram (xbar or median of orig sample)
  if (n > 0) {
    const sv = mode === "mean" ? xbar : sampleMedian(originalSample);
    const xv = hp.x + ((sv - r.lo) / (r.hi - r.lo)) * hp.w;
    if (xv >= hp.x && xv <= hp.x + hp.w) {
      ctx.strokeStyle = "rgba(255,255,255,0.7)";
      ctx.setLineDash([4, 3]);
      ctx.beginPath();
      ctx.moveTo(xv, hp.y);
      ctx.lineTo(xv, hp.y + hp.h);
      ctx.stroke();
      ctx.setLineDash([]);
    }
  }

  // x-axis labels on hist
  ctx.fillStyle = "#778";
  ctx.font = "9px monospace";
  for (let k = 0; k <= 4; k++) {
    const t = k / 4;
    const xv = r.lo + t * (r.hi - r.lo);
    const x = hp.x + t * hp.w;
    ctx.fillText(xv.toFixed(2), x - 10, hp.y + hp.h + 11);
  }

  // panel caption
  ctx.fillStyle = "#aab";
  ctx.font = "10px monospace";
  const capLabel = mode === "mean"
    ? "bootstrap distribution of x̄*   (orange: CLT N(x̄, s²/n))"
    : "bootstrap distribution of median*   (no closed-form normal overlay)";
  ctx.fillText(capLabel, hp.x + 4, hp.y - 2);

  // footer
  ctx.fillStyle = "#778";
  ctx.font = "10px monospace";
  ctx.fillText(
    "click strip = add data point   [M] toggle mean/median   [R] reset sample   " +
    "tip: add a far-right outlier to see mean shift while median holds",
    pad, H - 8);
}

Comments (0)

Log in to comment.