From 168924757e819deb29703df6d85537da4e5911a9 Mon Sep 17 00:00:00 2001 From: librelad Date: Fri, 12 Jun 2026 23:39:28 +0100 Subject: [PATCH] fix(tasks): reap orphaned running tasks immediately at processor startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Holding the singleton flock at startup proves no other processor is alive to heartbeat or complete anything, so every task still marked running is a corpse from a killed predecessor. Recover them all before the first dispatch (recoverOrphans now takes an 'all' mode) instead of waiting out the 60s heartbeat-staleness window — which used to leave a dead task showing 'running' alongside the genuinely-running next task for a minute whenever the service was restarted mid-task (e.g. by the deploy chain during initial setup). The idle-loop pass keeps the stale-only gate. refactor(dashboard): slim the storage card back to chart + percentage The disk card was only ever meant to be the donut and the % figure; drop the Apps/Docker/Other/Free legend rows and signal the deeper view with a corner expand glyph instead (the System page's chart-expand icon) — the card already opens /admin/system/storage on click. Signed-off-by: librelad --- .../dashboard/html/dashboard-content.html | 4 +- .../core/data-loader/js/data-loader.js | 16 ------ .../frontend/core/theme/css/base.css | 46 +++++++---------- scripts/task/crontab_task_processor.sh | 51 +++++++++++++------ 4 files changed, 57 insertions(+), 60 deletions(-) diff --git a/containers/libreportal/frontend/components/dashboard/html/dashboard-content.html b/containers/libreportal/frontend/components/dashboard/html/dashboard-content.html index 7900cd6..37cab08 100755 --- a/containers/libreportal/frontend/components/dashboard/html/dashboard-content.html +++ b/containers/libreportal/frontend/components/dashboard/html/dashboard-content.html @@ -10,11 +10,13 @@
Installed Apps
+
0%
-
Disk Used
diff --git a/containers/libreportal/frontend/core/data-loader/js/data-loader.js b/containers/libreportal/frontend/core/data-loader/js/data-loader.js index c8c8587..5bf6147 100755 --- a/containers/libreportal/frontend/core/data-loader/js/data-loader.js +++ b/containers/libreportal/frontend/core/data-loader/js/data-loader.js @@ -531,14 +531,6 @@ function waitForDashboardElements() { let _diskBreakdown = { apps: 0, docker: 0 }; let _lastDisk = null; // { used, total } in bytes, last value we drew -function _fmtBytes(n) { - n = Number(n) || 0; - const u = ['B', 'KB', 'MB', 'GB', 'TB']; - let i = 0; - while (n >= 1024 && i < u.length - 1) { n /= 1024; i++; } - return `${n.toFixed(i ? 1 : 0)} ${u[i]}`; -} - // Hand-rolled SVG donut. Segments: [{ color, value }]; the ring fills // proportionally and any remainder shows as the track. function _diskDonutSvg(segments) { @@ -610,14 +602,6 @@ function updateDiskChart(data) { { color: 'rgba(var(--text-rgb), 0.35)', value: other }, { color: 'rgba(var(--text-rgb), 0.12)', value: free }, ]); - - const legendEl = document.getElementById('disk-legend'); - if (legendEl) { - const row = (cls, label, val) => - `
${label}${_fmtBytes(val)}
`; - legendEl.innerHTML = row('apps', 'Apps', apps) + row('docker', 'Docker', docker) - + row('other', 'Other', other) + row('free', 'Free', free); - } } // Minimal data loading (fallback) diff --git a/containers/libreportal/frontend/core/theme/css/base.css b/containers/libreportal/frontend/core/theme/css/base.css index 5026085..6f11de2 100755 --- a/containers/libreportal/frontend/core/theme/css/base.css +++ b/containers/libreportal/frontend/core/theme/css/base.css @@ -2502,8 +2502,11 @@ html[data-theme="nebula"]::after { } } -/* Disk donut (frontpage) — apps / docker / other / free, % used in centre. */ +/* Disk donut (frontpage) — segmented ring with % used in the centre. The whole + card opens the full Storage breakdown; the corner glyph (same as the System + page's chart-expand button) is the visual hint that there's more behind it. */ .disk-stat-card { + position: relative; cursor: pointer; gap: 4px; transition: transform 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease; @@ -2512,6 +2515,21 @@ html[data-theme="nebula"]::after { border-color: rgba(var(--accent-rgb), 0.4); box-shadow: 0 6px 22px rgba(var(--accent-rgb), 0.10); } +.disk-expand { + position: absolute; + top: 10px; + right: 10px; + display: inline-flex; + align-items: center; + justify-content: center; + width: 26px; + height: 26px; + border-radius: 8px; + color: rgba(var(--text-rgb), 0.45); + background: rgba(var(--text-rgb), 0.06); + transition: color 0.15s ease, background 0.15s ease; +} +.disk-stat-card:hover .disk-expand { color: var(--accent); background: rgba(var(--accent-rgb), 0.16); } .disk-donut-wrap { position: relative; width: 104px; @@ -2524,32 +2542,6 @@ html[data-theme="nebula"]::after { display: block; } .disk-stat-card .disk-percentage { font-size: 20px; } -.disk-legend { - display: grid; - grid-template-columns: auto 1fr auto; - gap: 3px 8px; - align-items: center; - max-width: 200px; - margin: 12px auto 0; - text-align: left; - font-size: 0.78rem; -} -.disk-leg-row { display: contents; } -.disk-leg-dot { - width: 10px; - height: 10px; - border-radius: 3px; -} -.disk-leg-dot.apps { background: var(--accent); } -.disk-leg-dot.docker { background: var(--status-info); } -.disk-leg-dot.other { background: rgba(var(--text-rgb), 0.35); } -.disk-leg-dot.free { background: rgba(var(--text-rgb), 0.18); } -.disk-leg-k { color: rgba(var(--text-rgb), 0.7); } -.disk-leg-v { - color: var(--text-primary); - font-variant-numeric: tabular-nums; - text-align: right; -} /* New disk circle chart styles */ .disk-chart { diff --git a/scripts/task/crontab_task_processor.sh b/scripts/task/crontab_task_processor.sh index ee7992b..a868b61 100755 --- a/scripts/task/crontab_task_processor.sh +++ b/scripts/task/crontab_task_processor.sh @@ -16,7 +16,9 @@ # see a half-written file. # 6. Heartbeat: the processor stamps `heartbeat_at` every 5s while a task # runs. Stale heartbeats (>60s on a `running` task) are recovered to -# `failed` on the next idle cycle. +# `failed` on the next idle cycle; at startup, holding the singleton +# lock proves every `running` task is orphaned, so they're recovered +# immediately regardless of heartbeat age. # 7. Cancellable: a `.cancel` marker file triggers SIGTERM → SIGKILL # to the task's process group. # ============================================================================ @@ -193,11 +195,22 @@ acquireSingletonLock() { # ============================================================================ # ORPHAN RECOVERY # ============================================================================ -# `running` tasks whose heartbeat is older than HEARTBEAT_STALE_SECS, or -# tasks that have no heartbeat and a started_at older than the same threshold, -# are treated as dead (the processor that owned them is gone). +# A `running` task is orphaned when the processor that owned it is gone. Two +# detection modes: +# +# * "all" (startup) — we hold the singleton flock, which PROVES no other +# processor is alive to heartbeat or complete anything. Every `running` +# task on disk is therefore a corpse from a killed predecessor; reap them +# immediately, regardless of heartbeat age. Waiting out the stale window +# here is what used to leave a dead task showing "running" alongside the +# genuinely-running next task for a minute after a service restart. +# +# * default (idle loop) — only reap heartbeats older than +# HEARTBEAT_STALE_SECS. Belt-and-braces for anything the startup pass +# couldn't see (e.g. a task file landing with status=running mid-flight). recoverOrphans() { + local mode="$1" # "all" = reap every running task; empty = stale-only command -v jq >/dev/null 2>&1 || return 0 local now; now=$(date +%s) @@ -212,17 +225,20 @@ recoverOrphans() { while IFS=$'\t' read -r file stamp; do [[ -n "$file" ]] || continue - [[ -n "$stamp" ]] || continue - local stampEpoch; stampEpoch=$(date -d "$stamp" +%s 2>/dev/null) || continue - if (( now - stampEpoch > HEARTBEAT_STALE_SECS )); then - local id; id=$(basename "$file" .json) + local id; id=$(basename "$file" .json) + if [[ "$mode" == "all" ]]; then + logInfo "Orphan recovery (startup): $id was left running by a previous processor -> failed" + else + [[ -n "$stamp" ]] || continue + local stampEpoch; stampEpoch=$(date -d "$stamp" +%s 2>/dev/null) || continue + (( now - stampEpoch > HEARTBEAT_STALE_SECS )) || continue logInfo "Orphan recovery: $id (heartbeat age $((now - stampEpoch))s) -> failed" - updateTaskFields "$file" \ - status failed \ - error_message "Task interrupted — processor died mid-run." \ - completed_at "$(date -Iseconds)" \ - updated_at "$(date -Iseconds)" fi + updateTaskFields "$file" \ + status failed \ + error_message "Task interrupted — the task processor restarted mid-run." \ + completed_at "$(date -Iseconds)" \ + updated_at "$(date -Iseconds)" done < <(jq -r 'select(.status == "running") | "\(input_filename)\t\(.heartbeat_at // .started_at // "")"' "${files[@]}" 2>/dev/null) } @@ -495,8 +511,11 @@ mainLoop() { if openFifoReader; then fifoReady=1; fi # One full housekeeping pass at startup so anything left behind from a - # previous processor gets noticed before we enter the fast path. - recoverOrphans + # previous processor gets noticed before we enter the fast path. "all" — + # nothing can legitimately be running yet (we hold the flock and haven't + # dispatched), so reap leftovers BEFORE the first dispatch can put a real + # running task next to a stale one. + recoverOrphans all dispatchPending cleanupZeroByteFiles @@ -538,7 +557,7 @@ run_task_processor() { setupTaskDir acquireSingletonLock cleanupZeroByteFiles - recoverOrphans + recoverOrphans all mainLoop }