fix(tasks): reap orphaned running tasks immediately at processor startup

Holding the singleton flock at startup proves no other processor is alive
to heartbeat or complete anything, so every task still marked running is
a corpse from a killed predecessor. Recover them all before the first
dispatch (recoverOrphans now takes an 'all' mode) instead of waiting out
the 60s heartbeat-staleness window — which used to leave a dead task
showing 'running' alongside the genuinely-running next task for a minute
whenever the service was restarted mid-task (e.g. by the deploy chain
during initial setup). The idle-loop pass keeps the stale-only gate.

refactor(dashboard): slim the storage card back to chart + percentage

The disk card was only ever meant to be the donut and the % figure; drop
the Apps/Docker/Other/Free legend rows and signal the deeper view with a
corner expand glyph instead (the System page's chart-expand icon) — the
card already opens /admin/system/storage on click.

Signed-off-by: librelad <librelad@digitalangels.vip>
This commit is contained in:
librelad 2026-06-12 23:39:28 +01:00
parent 0d10284203
commit 168924757e
4 changed files with 57 additions and 60 deletions

View File

@ -10,11 +10,13 @@
<div class="stat-label">Installed Apps</div>
</div>
<div class="stat-card disk-stat-card" id="disk-stat-card" role="button" tabindex="0" title="View storage breakdown">
<span class="disk-expand" aria-hidden="true">
<svg viewBox="0 0 24 24" width="14" height="14"><path d="M3 9V3h6M21 9V3h-6M3 15v6h6M21 15v6h-6" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/></svg>
</span>
<div class="disk-donut-wrap">
<div class="disk-donut" id="disk-donut"></div>
<div class="disk-percentage" id="disk-percent">0%</div>
</div>
<div class="disk-legend" id="disk-legend"></div>
<div class="chart-label">
<div class="chart-text">Disk Used</div>
</div>

View File

@ -531,14 +531,6 @@ function waitForDashboardElements() {
let _diskBreakdown = { apps: 0, docker: 0 };
let _lastDisk = null; // { used, total } in bytes, last value we drew
function _fmtBytes(n) {
n = Number(n) || 0;
const u = ['B', 'KB', 'MB', 'GB', 'TB'];
let i = 0;
while (n >= 1024 && i < u.length - 1) { n /= 1024; i++; }
return `${n.toFixed(i ? 1 : 0)} ${u[i]}`;
}
// Hand-rolled SVG donut. Segments: [{ color, value }]; the ring fills
// proportionally and any remainder shows as the track.
function _diskDonutSvg(segments) {
@ -610,14 +602,6 @@ function updateDiskChart(data) {
{ color: 'rgba(var(--text-rgb), 0.35)', value: other },
{ color: 'rgba(var(--text-rgb), 0.12)', value: free },
]);
const legendEl = document.getElementById('disk-legend');
if (legendEl) {
const row = (cls, label, val) =>
`<div class="disk-leg-row"><span class="disk-leg-dot ${cls}"></span><span class="disk-leg-k">${label}</span><span class="disk-leg-v">${_fmtBytes(val)}</span></div>`;
legendEl.innerHTML = row('apps', 'Apps', apps) + row('docker', 'Docker', docker)
+ row('other', 'Other', other) + row('free', 'Free', free);
}
}
// Minimal data loading (fallback)

View File

@ -2502,8 +2502,11 @@ html[data-theme="nebula"]::after {
}
}
/* Disk donut (frontpage) — apps / docker / other / free, % used in centre. */
/* Disk donut (frontpage) segmented ring with % used in the centre. The whole
card opens the full Storage breakdown; the corner glyph (same as the System
page's chart-expand button) is the visual hint that there's more behind it. */
.disk-stat-card {
position: relative;
cursor: pointer;
gap: 4px;
transition: transform 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease;
@ -2512,6 +2515,21 @@ html[data-theme="nebula"]::after {
border-color: rgba(var(--accent-rgb), 0.4);
box-shadow: 0 6px 22px rgba(var(--accent-rgb), 0.10);
}
.disk-expand {
position: absolute;
top: 10px;
right: 10px;
display: inline-flex;
align-items: center;
justify-content: center;
width: 26px;
height: 26px;
border-radius: 8px;
color: rgba(var(--text-rgb), 0.45);
background: rgba(var(--text-rgb), 0.06);
transition: color 0.15s ease, background 0.15s ease;
}
.disk-stat-card:hover .disk-expand { color: var(--accent); background: rgba(var(--accent-rgb), 0.16); }
.disk-donut-wrap {
position: relative;
width: 104px;
@ -2524,32 +2542,6 @@ html[data-theme="nebula"]::after {
display: block;
}
.disk-stat-card .disk-percentage { font-size: 20px; }
.disk-legend {
display: grid;
grid-template-columns: auto 1fr auto;
gap: 3px 8px;
align-items: center;
max-width: 200px;
margin: 12px auto 0;
text-align: left;
font-size: 0.78rem;
}
.disk-leg-row { display: contents; }
.disk-leg-dot {
width: 10px;
height: 10px;
border-radius: 3px;
}
.disk-leg-dot.apps { background: var(--accent); }
.disk-leg-dot.docker { background: var(--status-info); }
.disk-leg-dot.other { background: rgba(var(--text-rgb), 0.35); }
.disk-leg-dot.free { background: rgba(var(--text-rgb), 0.18); }
.disk-leg-k { color: rgba(var(--text-rgb), 0.7); }
.disk-leg-v {
color: var(--text-primary);
font-variant-numeric: tabular-nums;
text-align: right;
}
/* New disk circle chart styles */
.disk-chart {

View File

@ -16,7 +16,9 @@
# see a half-written file.
# 6. Heartbeat: the processor stamps `heartbeat_at` every 5s while a task
# runs. Stale heartbeats (>60s on a `running` task) are recovered to
# `failed` on the next idle cycle.
# `failed` on the next idle cycle; at startup, holding the singleton
# lock proves every `running` task is orphaned, so they're recovered
# immediately regardless of heartbeat age.
# 7. Cancellable: a `<id>.cancel` marker file triggers SIGTERM → SIGKILL
# to the task's process group.
# ============================================================================
@ -193,11 +195,22 @@ acquireSingletonLock() {
# ============================================================================
# ORPHAN RECOVERY
# ============================================================================
# `running` tasks whose heartbeat is older than HEARTBEAT_STALE_SECS, or
# tasks that have no heartbeat and a started_at older than the same threshold,
# are treated as dead (the processor that owned them is gone).
# A `running` task is orphaned when the processor that owned it is gone. Two
# detection modes:
#
# * "all" (startup) — we hold the singleton flock, which PROVES no other
# processor is alive to heartbeat or complete anything. Every `running`
# task on disk is therefore a corpse from a killed predecessor; reap them
# immediately, regardless of heartbeat age. Waiting out the stale window
# here is what used to leave a dead task showing "running" alongside the
# genuinely-running next task for a minute after a service restart.
#
# * default (idle loop) — only reap heartbeats older than
# HEARTBEAT_STALE_SECS. Belt-and-braces for anything the startup pass
# couldn't see (e.g. a task file landing with status=running mid-flight).
recoverOrphans() {
local mode="$1" # "all" = reap every running task; empty = stale-only
command -v jq >/dev/null 2>&1 || return 0
local now; now=$(date +%s)
@ -212,17 +225,20 @@ recoverOrphans() {
while IFS=$'\t' read -r file stamp; do
[[ -n "$file" ]] || continue
[[ -n "$stamp" ]] || continue
local stampEpoch; stampEpoch=$(date -d "$stamp" +%s 2>/dev/null) || continue
if (( now - stampEpoch > HEARTBEAT_STALE_SECS )); then
local id; id=$(basename "$file" .json)
local id; id=$(basename "$file" .json)
if [[ "$mode" == "all" ]]; then
logInfo "Orphan recovery (startup): $id was left running by a previous processor -> failed"
else
[[ -n "$stamp" ]] || continue
local stampEpoch; stampEpoch=$(date -d "$stamp" +%s 2>/dev/null) || continue
(( now - stampEpoch > HEARTBEAT_STALE_SECS )) || continue
logInfo "Orphan recovery: $id (heartbeat age $((now - stampEpoch))s) -> failed"
updateTaskFields "$file" \
status failed \
error_message "Task interrupted — processor died mid-run." \
completed_at "$(date -Iseconds)" \
updated_at "$(date -Iseconds)"
fi
updateTaskFields "$file" \
status failed \
error_message "Task interrupted — the task processor restarted mid-run." \
completed_at "$(date -Iseconds)" \
updated_at "$(date -Iseconds)"
done < <(jq -r 'select(.status == "running") | "\(input_filename)\t\(.heartbeat_at // .started_at // "")"' "${files[@]}" 2>/dev/null)
}
@ -495,8 +511,11 @@ mainLoop() {
if openFifoReader; then fifoReady=1; fi
# One full housekeeping pass at startup so anything left behind from a
# previous processor gets noticed before we enter the fast path.
recoverOrphans
# previous processor gets noticed before we enter the fast path. "all" —
# nothing can legitimately be running yet (we hold the flock and haven't
# dispatched), so reap leftovers BEFORE the first dispatch can put a real
# running task next to a stale one.
recoverOrphans all
dispatchPending
cleanupZeroByteFiles
@ -538,7 +557,7 @@ run_task_processor() {
setupTaskDir
acquireSingletonLock
cleanupZeroByteFiles
recoverOrphans
recoverOrphans all
mainLoop
}