fix(tasks): reap orphaned running tasks immediately at processor startup
Holding the singleton flock at startup proves no other processor is alive to heartbeat or complete anything, so every task still marked running is a corpse from a killed predecessor. Recover them all before the first dispatch (recoverOrphans now takes an 'all' mode) instead of waiting out the 60s heartbeat-staleness window — which used to leave a dead task showing 'running' alongside the genuinely-running next task for a minute whenever the service was restarted mid-task (e.g. by the deploy chain during initial setup). The idle-loop pass keeps the stale-only gate. refactor(dashboard): slim the storage card back to chart + percentage The disk card was only ever meant to be the donut and the % figure; drop the Apps/Docker/Other/Free legend rows and signal the deeper view with a corner expand glyph instead (the System page's chart-expand icon) — the card already opens /admin/system/storage on click. Signed-off-by: librelad <librelad@digitalangels.vip>
This commit is contained in:
parent
0d10284203
commit
168924757e
@ -10,11 +10,13 @@
|
||||
<div class="stat-label">Installed Apps</div>
|
||||
</div>
|
||||
<div class="stat-card disk-stat-card" id="disk-stat-card" role="button" tabindex="0" title="View storage breakdown">
|
||||
<span class="disk-expand" aria-hidden="true">
|
||||
<svg viewBox="0 0 24 24" width="14" height="14"><path d="M3 9V3h6M21 9V3h-6M3 15v6h6M21 15v6h-6" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/></svg>
|
||||
</span>
|
||||
<div class="disk-donut-wrap">
|
||||
<div class="disk-donut" id="disk-donut"></div>
|
||||
<div class="disk-percentage" id="disk-percent">0%</div>
|
||||
</div>
|
||||
<div class="disk-legend" id="disk-legend"></div>
|
||||
<div class="chart-label">
|
||||
<div class="chart-text">Disk Used</div>
|
||||
</div>
|
||||
|
||||
@ -531,14 +531,6 @@ function waitForDashboardElements() {
|
||||
let _diskBreakdown = { apps: 0, docker: 0 };
|
||||
let _lastDisk = null; // { used, total } in bytes, last value we drew
|
||||
|
||||
function _fmtBytes(n) {
|
||||
n = Number(n) || 0;
|
||||
const u = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||
let i = 0;
|
||||
while (n >= 1024 && i < u.length - 1) { n /= 1024; i++; }
|
||||
return `${n.toFixed(i ? 1 : 0)} ${u[i]}`;
|
||||
}
|
||||
|
||||
// Hand-rolled SVG donut. Segments: [{ color, value }]; the ring fills
|
||||
// proportionally and any remainder shows as the track.
|
||||
function _diskDonutSvg(segments) {
|
||||
@ -610,14 +602,6 @@ function updateDiskChart(data) {
|
||||
{ color: 'rgba(var(--text-rgb), 0.35)', value: other },
|
||||
{ color: 'rgba(var(--text-rgb), 0.12)', value: free },
|
||||
]);
|
||||
|
||||
const legendEl = document.getElementById('disk-legend');
|
||||
if (legendEl) {
|
||||
const row = (cls, label, val) =>
|
||||
`<div class="disk-leg-row"><span class="disk-leg-dot ${cls}"></span><span class="disk-leg-k">${label}</span><span class="disk-leg-v">${_fmtBytes(val)}</span></div>`;
|
||||
legendEl.innerHTML = row('apps', 'Apps', apps) + row('docker', 'Docker', docker)
|
||||
+ row('other', 'Other', other) + row('free', 'Free', free);
|
||||
}
|
||||
}
|
||||
|
||||
// Minimal data loading (fallback)
|
||||
|
||||
@ -2502,8 +2502,11 @@ html[data-theme="nebula"]::after {
|
||||
}
|
||||
}
|
||||
|
||||
/* Disk donut (frontpage) — apps / docker / other / free, % used in centre. */
|
||||
/* Disk donut (frontpage) — segmented ring with % used in the centre. The whole
|
||||
card opens the full Storage breakdown; the corner glyph (same as the System
|
||||
page's chart-expand button) is the visual hint that there's more behind it. */
|
||||
.disk-stat-card {
|
||||
position: relative;
|
||||
cursor: pointer;
|
||||
gap: 4px;
|
||||
transition: transform 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease;
|
||||
@ -2512,6 +2515,21 @@ html[data-theme="nebula"]::after {
|
||||
border-color: rgba(var(--accent-rgb), 0.4);
|
||||
box-shadow: 0 6px 22px rgba(var(--accent-rgb), 0.10);
|
||||
}
|
||||
.disk-expand {
|
||||
position: absolute;
|
||||
top: 10px;
|
||||
right: 10px;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 26px;
|
||||
height: 26px;
|
||||
border-radius: 8px;
|
||||
color: rgba(var(--text-rgb), 0.45);
|
||||
background: rgba(var(--text-rgb), 0.06);
|
||||
transition: color 0.15s ease, background 0.15s ease;
|
||||
}
|
||||
.disk-stat-card:hover .disk-expand { color: var(--accent); background: rgba(var(--accent-rgb), 0.16); }
|
||||
.disk-donut-wrap {
|
||||
position: relative;
|
||||
width: 104px;
|
||||
@ -2524,32 +2542,6 @@ html[data-theme="nebula"]::after {
|
||||
display: block;
|
||||
}
|
||||
.disk-stat-card .disk-percentage { font-size: 20px; }
|
||||
.disk-legend {
|
||||
display: grid;
|
||||
grid-template-columns: auto 1fr auto;
|
||||
gap: 3px 8px;
|
||||
align-items: center;
|
||||
max-width: 200px;
|
||||
margin: 12px auto 0;
|
||||
text-align: left;
|
||||
font-size: 0.78rem;
|
||||
}
|
||||
.disk-leg-row { display: contents; }
|
||||
.disk-leg-dot {
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
border-radius: 3px;
|
||||
}
|
||||
.disk-leg-dot.apps { background: var(--accent); }
|
||||
.disk-leg-dot.docker { background: var(--status-info); }
|
||||
.disk-leg-dot.other { background: rgba(var(--text-rgb), 0.35); }
|
||||
.disk-leg-dot.free { background: rgba(var(--text-rgb), 0.18); }
|
||||
.disk-leg-k { color: rgba(var(--text-rgb), 0.7); }
|
||||
.disk-leg-v {
|
||||
color: var(--text-primary);
|
||||
font-variant-numeric: tabular-nums;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
/* New disk circle chart styles */
|
||||
.disk-chart {
|
||||
|
||||
@ -16,7 +16,9 @@
|
||||
# see a half-written file.
|
||||
# 6. Heartbeat: the processor stamps `heartbeat_at` every 5s while a task
|
||||
# runs. Stale heartbeats (>60s on a `running` task) are recovered to
|
||||
# `failed` on the next idle cycle.
|
||||
# `failed` on the next idle cycle; at startup, holding the singleton
|
||||
# lock proves every `running` task is orphaned, so they're recovered
|
||||
# immediately regardless of heartbeat age.
|
||||
# 7. Cancellable: a `<id>.cancel` marker file triggers SIGTERM → SIGKILL
|
||||
# to the task's process group.
|
||||
# ============================================================================
|
||||
@ -193,11 +195,22 @@ acquireSingletonLock() {
|
||||
# ============================================================================
|
||||
# ORPHAN RECOVERY
|
||||
# ============================================================================
|
||||
# `running` tasks whose heartbeat is older than HEARTBEAT_STALE_SECS, or
|
||||
# tasks that have no heartbeat and a started_at older than the same threshold,
|
||||
# are treated as dead (the processor that owned them is gone).
|
||||
# A `running` task is orphaned when the processor that owned it is gone. Two
|
||||
# detection modes:
|
||||
#
|
||||
# * "all" (startup) — we hold the singleton flock, which PROVES no other
|
||||
# processor is alive to heartbeat or complete anything. Every `running`
|
||||
# task on disk is therefore a corpse from a killed predecessor; reap them
|
||||
# immediately, regardless of heartbeat age. Waiting out the stale window
|
||||
# here is what used to leave a dead task showing "running" alongside the
|
||||
# genuinely-running next task for a minute after a service restart.
|
||||
#
|
||||
# * default (idle loop) — only reap heartbeats older than
|
||||
# HEARTBEAT_STALE_SECS. Belt-and-braces for anything the startup pass
|
||||
# couldn't see (e.g. a task file landing with status=running mid-flight).
|
||||
|
||||
recoverOrphans() {
|
||||
local mode="$1" # "all" = reap every running task; empty = stale-only
|
||||
command -v jq >/dev/null 2>&1 || return 0
|
||||
local now; now=$(date +%s)
|
||||
|
||||
@ -212,17 +225,20 @@ recoverOrphans() {
|
||||
|
||||
while IFS=$'\t' read -r file stamp; do
|
||||
[[ -n "$file" ]] || continue
|
||||
local id; id=$(basename "$file" .json)
|
||||
if [[ "$mode" == "all" ]]; then
|
||||
logInfo "Orphan recovery (startup): $id was left running by a previous processor -> failed"
|
||||
else
|
||||
[[ -n "$stamp" ]] || continue
|
||||
local stampEpoch; stampEpoch=$(date -d "$stamp" +%s 2>/dev/null) || continue
|
||||
if (( now - stampEpoch > HEARTBEAT_STALE_SECS )); then
|
||||
local id; id=$(basename "$file" .json)
|
||||
(( now - stampEpoch > HEARTBEAT_STALE_SECS )) || continue
|
||||
logInfo "Orphan recovery: $id (heartbeat age $((now - stampEpoch))s) -> failed"
|
||||
fi
|
||||
updateTaskFields "$file" \
|
||||
status failed \
|
||||
error_message "Task interrupted — processor died mid-run." \
|
||||
error_message "Task interrupted — the task processor restarted mid-run." \
|
||||
completed_at "$(date -Iseconds)" \
|
||||
updated_at "$(date -Iseconds)"
|
||||
fi
|
||||
done < <(jq -r 'select(.status == "running") | "\(input_filename)\t\(.heartbeat_at // .started_at // "")"' "${files[@]}" 2>/dev/null)
|
||||
}
|
||||
|
||||
@ -495,8 +511,11 @@ mainLoop() {
|
||||
if openFifoReader; then fifoReady=1; fi
|
||||
|
||||
# One full housekeeping pass at startup so anything left behind from a
|
||||
# previous processor gets noticed before we enter the fast path.
|
||||
recoverOrphans
|
||||
# previous processor gets noticed before we enter the fast path. "all" —
|
||||
# nothing can legitimately be running yet (we hold the flock and haven't
|
||||
# dispatched), so reap leftovers BEFORE the first dispatch can put a real
|
||||
# running task next to a stale one.
|
||||
recoverOrphans all
|
||||
dispatchPending
|
||||
cleanupZeroByteFiles
|
||||
|
||||
@ -538,7 +557,7 @@ run_task_processor() {
|
||||
setupTaskDir
|
||||
acquireSingletonLock
|
||||
cleanupZeroByteFiles
|
||||
recoverOrphans
|
||||
recoverOrphans all
|
||||
mainLoop
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user