librelad 6346d76a92 feat(system): binary ring history with 7-day retention + fullscreen detail UI
Replaces the JSON history file behind /api/system/history with a fixed-size
binary ring buffer on disk and adds a second, downsampled tier so the chart
can now span seven days, not just twenty-four hours.

Two on-disk rings under frontend/data/system/:
  metrics_ring_1m.bin  1440 pts @ 1 min  ( 24 h)
  metrics_ring_5m.bin  2016 pts @ 5 min  (  7 d)

Each point is 32 bytes (uint32 timestamp + 7 float32 metrics — cpu / mem /
swap / disk / load1 / net_rx / net_tx); files carry a 32-byte header with
magic, version, capacity, head, count, bucket seconds, and last bucket time
so they're self-describing and torn-write recoverable.

A persistent 1-minute ticker inside the backend (independent of whether
anyone's subscribed to /api/system/stream) composes points from /proc plus
the bash generator's latest snapshots and appends to the 1m ring; every
five minutes it averages the last five 1m points into the 5m ring. On
first run, the writer backfills the 1m ring from the legacy
metrics_history.json so first paint already has 24 h.

/api/system/history?range=N auto-selects the tier (≤1440 → 1m, else 5m),
keeps the existing { points, updated } shape, and additionally returns
`tier` for clients that care. Falls back to the legacy JSON on cold start.

Admin → System: 7d added to the range picker (now 1h / 6h / 24h / 7d),
swap + load1 promoted to their own trend cards, and every gauge / chart
card grows an Expand affordance that opens a fullscreen single-metric
deep-dive overlay:
  - Big themed chart with grid, gradient area, peak/min/now markers, and
    a live-pulsing "now" dot
  - Hover crosshair + tooltip scrubs the series with formatted time +
    value
  - now / peak / avg / min stat strip with deltas
  - Range picker (1h / 6h / 24h / 7d) re-fetches and re-themes per metric
  - 1 Hz live SSE feed updates the overlay's now-stat in real time
  - Escape / backdrop / close button all dismiss
  - Per-metric accent colour (cpu=accent, mem=info, disk/swap=warning,
    net_rx=success, net_tx=accent, load=accent) flows through gradient,
    border, dot, and stats card

Zero new dependencies — hand-rolled SVG and pointer events throughout.

Signed-off-by: librelad <librelad@digitalangels.vip>
2026-05-27 21:04:27 +01:00

300 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Live system metrics — the fast path behind the Admin → System gauges and the
// dashboard "pulse" tiles.
//
// Periodic host-side data (disks, network, docker, per-app, 24 h history) is
// produced by the webui_system_metrics generator into frontend/data/system/.
// This file serves the *live* path: CPU / memory / load read straight from
// /proc, optionally fused with the latest host JSON snapshot so a single SSE
// message carries everything a client needs to draw a frame.
//
// Endpoints:
// GET /live — single-shot JSON snapshot (kept for callers that still poll)
// GET /stream — Server-Sent Events; pushes a fused sample once per second.
// One /proc read per second across all subscribers (shared
// ticker), so 100 open tabs still cost one read/sec.
//
// Namespace note: this runs *inside* the libreportal container. /proc/stat,
// /proc/meminfo and /proc/loadavg are not namespaced, so they report host-wide
// values that match the generator's numbers. /proc/net/dev IS per-netns (it
// would show only this container's traffic), so the host generator owns
// network/disk and we splice its latest snapshot into each SSE message.
const express = require('express');
const fs = require('fs').promises;
const path = require('path');
const os = require('os');
const metricsWriter = require('../utils/metrics-writer.js');
const router = express.Router();
const CORES = os.cpus().length || 1;
const MIN_INTERVAL_MS = 750; // serve cache to anything faster than this
const STREAM_TICK_MS = 1000; // SSE push cadence — 1 Hz live feel
const HEARTBEAT_MS = 25000; // SSE comment frame to keep proxies from idling out
const HOST_JSON_DIR = path.join(__dirname, '..', '..', 'frontend', 'data', 'system');
let prevCpu = null; // { total, idle } from the last read
let cache = null; // { sample, at }
let inflight = null; // dedupe concurrent cache-miss reads
async function readCpu() {
const data = await fs.readFile('/proc/stat', 'utf8');
const first = data.split('\n', 1)[0]; // "cpu u n s i io irq sirq steal ..."
const n = first.trim().split(/\s+/).slice(1).map(Number);
const idle = (n[3] || 0) + (n[4] || 0); // idle + iowait
const total = n.reduce((a, b) => a + (b || 0), 0);
return { total, idle };
}
async function readMem() {
const data = await fs.readFile('/proc/meminfo', 'utf8');
const m = {};
for (const line of data.split('\n')) {
const mm = line.match(/^(\w+):\s+(\d+)/);
if (mm) m[mm[1]] = parseInt(mm[2], 10) * 1024; // kB -> bytes
}
const total = m.MemTotal || 0;
const available = m.MemAvailable || 0;
const used = Math.max(0, total - available);
const swapTotal = m.SwapTotal || 0;
const swapUsed = Math.max(0, swapTotal - (m.SwapFree || 0));
return {
total, used, available,
percent: total ? +(used / total * 100).toFixed(1) : 0,
swap_total: swapTotal, swap_used: swapUsed,
swap_percent: swapTotal ? +(swapUsed / swapTotal * 100).toFixed(1) : 0
};
}
async function readLoad() {
const data = await fs.readFile('/proc/loadavg', 'utf8');
const [l1, l5, l15] = data.trim().split(/\s+/).map(Number);
return { load1: l1 || 0, load5: l5 || 0, load15: l15 || 0 };
}
async function sample() {
const [cpuNow, memory, load] = await Promise.all([readCpu(), readMem(), readLoad()]);
let percent = 0;
if (prevCpu) {
const dt = cpuNow.total - prevCpu.total;
const di = cpuNow.idle - prevCpu.idle;
if (dt > 0) percent = +Math.max(0, Math.min(100, (1 - di / dt) * 100)).toFixed(1);
}
prevCpu = cpuNow;
return {
cpu: {
percent,
cores: CORES,
load1: load.load1, load5: load.load5, load15: load.load15,
load1_percent: +Math.min(100, load.load1 / CORES * 100).toFixed(1)
},
memory,
t: Date.now()
};
}
router.get('/live', async (req, res) => {
const now = Date.now();
if (cache && (now - cache.at) < MIN_INTERVAL_MS) {
res.set('Cache-Control', 'no-store');
return res.json(cache.sample);
}
try {
if (!inflight) {
inflight = sample()
.then((s) => { cache = { sample: s, at: Date.now() }; return s; })
.finally(() => { inflight = null; });
}
const s = await inflight;
res.set('Cache-Control', 'no-store');
res.json(s);
} catch (err) {
res.status(500).json({ error: 'metrics_unavailable' });
}
});
// ---------------------------------------------------------------------------
// SSE live stream
// ---------------------------------------------------------------------------
// One ticker for the whole process. Subscribers join/leave; the ticker only
// runs while at least one is connected, so an idle WebUI costs nothing.
const subscribers = new Set();
let tickHandle = null;
let heartbeatHandle = null;
let lastSample = null;
let hostJson = { metrics: null, disk: null, memory: null, apps: null };
let hostJsonLoadedAt = 0;
const HOST_JSON_REFRESH_MS = 5000; // re-read host snapshots every 5 s (they regen at most 1×/min)
// Read a JSON file but never throw — missing/invalid → previous value.
async function readJsonSafe(file, fallback = null) {
try {
const txt = await fs.readFile(file, 'utf8');
return JSON.parse(txt);
} catch (_) {
return fallback;
}
}
// Refresh the cached host-side JSON if it's been at least HOST_JSON_REFRESH_MS
// since the last read. Cheap when the files haven't changed because the OS
// page cache makes the read essentially free.
async function refreshHostJson(now) {
if (now - hostJsonLoadedAt < HOST_JSON_REFRESH_MS) return;
hostJsonLoadedAt = now;
const [metrics, disk, memory, apps] = await Promise.all([
readJsonSafe(path.join(HOST_JSON_DIR, 'metrics.json'), hostJson.metrics),
readJsonSafe(path.join(HOST_JSON_DIR, 'disk_usage.json'), hostJson.disk),
readJsonSafe(path.join(HOST_JSON_DIR, 'memory_usage.json'), hostJson.memory),
readJsonSafe(path.join(HOST_JSON_DIR, 'metrics_apps.json'), hostJson.apps)
]);
hostJson = { metrics, disk, memory, apps };
}
function ssePayload(s) {
// Fuse the live in-container sample with the latest host-side snapshot so
// a client gets everything it needs from one stream. The host fields tick
// slowly (≤ 1/min) but live alongside the 1 Hz CPU/mem feed.
const m = hostJson.metrics || {};
return {
t: s.t,
cpu: s.cpu,
memory: s.memory,
disks: Array.isArray(m.disks) ? m.disks : [],
network: m.network || { rx_rate: 0, tx_rate: 0 },
docker: m.docker || null,
apps: (hostJson.apps && Array.isArray(hostJson.apps.apps)) ? hostJson.apps.apps : []
};
}
async function tick() {
if (subscribers.size === 0) { // nothing to do — defensive
stopTicker();
return;
}
try {
const s = await sample();
const now = Date.now();
cache = { sample: s, at: now };
await refreshHostJson(now);
const payload = ssePayload(s);
lastSample = payload;
const frame = `data: ${JSON.stringify(payload)}\n\n`;
for (const res of subscribers) {
try { res.write(frame); } catch (_) { /* will be reaped on close */ }
}
} catch (_) { /* swallow — try again next tick */ }
}
function startTicker() {
if (tickHandle) return;
tick(); // fire immediately so the first frame is fresh
tickHandle = setInterval(tick, STREAM_TICK_MS);
// Heartbeat keeps proxies (Traefik/nginx) from idling the connection out;
// SSE comments start with ":" and are ignored by EventSource.
heartbeatHandle = setInterval(() => {
for (const res of subscribers) {
try { res.write(': hb\n\n'); } catch (_) {}
}
}, HEARTBEAT_MS);
}
function stopTicker() {
if (tickHandle) { clearInterval(tickHandle); tickHandle = null; }
if (heartbeatHandle) { clearInterval(heartbeatHandle); heartbeatHandle = null; }
}
router.get('/stream', async (req, res) => {
// SSE handshake. `no-transform` tells the compression middleware not to
// gzip this response (gzip buffers and would break streaming). `X-Accel-
// Buffering: no` tells nginx/Traefik to flush each event immediately.
res.set({
'Content-Type': 'text/event-stream; charset=utf-8',
'Cache-Control': 'no-store, no-transform',
Connection: 'keep-alive',
'X-Accel-Buffering': 'no'
});
res.flushHeaders?.();
// Initial "retry" hint — if the connection dies the browser will reopen
// after this many ms (default 3000 is fine but explicit is clearer).
res.write('retry: 3000\n\n');
subscribers.add(res);
startTicker();
// If we already have a fresh sample, ship it right now so the client doesn't
// have to wait STREAM_TICK_MS for its first frame.
if (lastSample) {
try { res.write(`data: ${JSON.stringify(lastSample)}\n\n`); } catch (_) {}
}
const cleanup = () => {
subscribers.delete(res);
if (subscribers.size === 0) stopTicker();
};
req.on('close', cleanup);
req.on('error', cleanup);
});
// ---------------------------------------------------------------------------
// History range query
// ---------------------------------------------------------------------------
// `range` is minutes back from now (1..10080 = 7 days). `keys` is an optional
// comma-list of metric names to project (defaults to the whole point).
//
// Two on-disk binary rings back this:
// 1m tier — 1440 pts @ 1-min (24 h)
// 5m tier — 2016 pts @ 5-min ( 7 d)
//
// Tier auto-selects from `range`: ≤ 1440 reads the 1m ring point-for-point;
// > 1440 reads the 5m ring (range/5 points). Caller can override with
// `?tier=1m|5m`. Falls back to the legacy JSON only if the binary ring is
// completely empty (e.g. fresh container, writer hasn't filled it yet).
const HISTORY_MAX_MIN = 10080; // 7 days
router.get('/history', async (req, res) => {
const range = Math.max(1, Math.min(HISTORY_MAX_MIN, parseInt(req.query.range, 10) || 60));
const tier = req.query.tier === '5m' || req.query.tier === '1m'
? req.query.tier
: (range > 1440 ? '5m' : '1m');
const wantPoints = tier === '5m' ? Math.ceil(range / 5) : range;
const keys = typeof req.query.keys === 'string' && req.query.keys.length
? req.query.keys.split(',').map((s) => s.trim()).filter(Boolean)
: null;
try {
let pts = await metricsWriter.read(wantPoints, tier);
let updated = null;
if (pts.length) {
updated = new Date(pts[pts.length - 1].t * 1000).toISOString();
} else {
// Cold start: writer hasn't filled the ring yet. Serve from the
// legacy JSON so the UI still has something to draw.
const file = path.join(HOST_JSON_DIR, 'metrics_history.json');
try {
const parsed = JSON.parse(await fs.readFile(file, 'utf8'));
pts = (Array.isArray(parsed?.points) ? parsed.points : []).slice(-wantPoints);
updated = parsed?.updated || null;
} catch (_) { /* leave pts empty */ }
}
const points = keys
? pts.map((p) => {
const out = { t: p.t };
for (const k of keys) if (k in p) out[k] = p[k];
return out;
})
: pts;
res.set('Cache-Control', 'no-store');
res.json({ range, tier, points, updated });
} catch (_) {
res.status(500).json({ error: 'history_unavailable', points: [] });
}
});
// Kick the persistent 1-min writer. It needs the same `sample()` we use for
// the SSE stream — passed in to avoid a circular require.
metricsWriter.start({
sampleFn: sample,
hostJsonFn: () => hostJson,
});
module.exports = router;