From eecc5d29ba68d493c26a87e3103295e9e3d274d3 Mon Sep 17 00:00:00 2001 From: librelad Date: Sun, 31 May 2026 03:05:37 +0100 Subject: [PATCH] feat(reliability): continue-on-error config + honest checkSuccess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit checkSuccess silently reported '✓ Success' for failed commands, which is how the de-sudo write gaps (throttle stamp, passwords, updater) hid. Rework it: - Capture the real exit code up front; success path unchanged. - On failure, ALWAYS append to a greppable $logs_dir/error_report.log tagged with the caller's script:line + exit code — a failure can't hide behind a green check anymore. - New CFG_REQUIREMENT_CONTINUE_ON_ERROR (default true): log + continue so one failure doesn't abort the run and we surface EVERY issue in a single pass. Flip it off later for strict abort/prompt (the prior behaviour, preserved). Documents the 'local VAR=$(cmd); checkSuccess' footgun (local resets $?), which the next commit fixes across the tree. Co-Authored-By: Claude Opus 4.8 Signed-off-by: librelad --- configs/general/general_terminal | 1 + scripts/function/checks/check_success.sh | 95 ++++++++++++++---------- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/configs/general/general_terminal b/configs/general/general_terminal index 4f6821d..e28bb85 100755 --- a/configs/general/general_terminal +++ b/configs/general/general_terminal @@ -13,5 +13,6 @@ CFG_REQUIREMENT_CONFIGS_AUTO_UPDATE=true # Auto Config Updates - CFG_REQUIREMENT_CONFIGS_AUTO_DELETE=true # Auto Config Deletes - Remove config options no longer present in the template CFG_REQUIREMENT_MISSING_IPS=false # IP Configuration Check - Check for and alert about missing IP configurations CFG_REQUIREMENT_CONTINUE_PROMPT=false # Continue Prompts - Show continue prompts during installation for user confirmation +CFG_REQUIREMENT_CONTINUE_ON_ERROR=true # Continue On Error - Log failures to error_report.log and continue instead of aborting (on by default to surface issues; turn off for strict abort once clean) CFG_REQUIREMENT_SUGGEST_INSTALLS=false # Install Suggestions - Enable application suggestions and recommendations during installation CFG_REQUIREMENT_SUGGEST_METRICS=true # Metrics Suggestions - Offer Prometheus and Grafana during first install (requires Install Suggestions enabled) diff --git a/scripts/function/checks/check_success.sh b/scripts/function/checks/check_success.sh index 0accb5e..3b6bcf8 100755 --- a/scripts/function/checks/check_success.sh +++ b/scripts/function/checks/check_success.sh @@ -1,53 +1,68 @@ #!/bin/bash +# checkSuccess "message" — report on the exit status of the PRECEDING command. +# +# IMPORTANT for callers: $? must still be the command's exit when this is called. +# `local VAR=$(cmd); checkSuccess ...` is a BUG — the `local`/`declare` builtin +# resets $? to 0, masking the command's real failure. Use `local VAR; VAR=$(cmd)` +# (split declaration from assignment) so $? survives. +# +# On failure this ALWAYS records the failure to a greppable error report +# ($logs_dir/error_report.log) with the caller's script:line + exit code, so a +# failure can never hide behind a green check again. Then it either continues +# (CFG_REQUIREMENT_CONTINUE_ON_ERROR=true, the default — surface every issue in a +# single pass) or falls back to the strict abort/prompt behaviour when that's off. function checkSuccess() { - if [ $? -eq 0 ]; then - isSuccessful "$1" + local rc=$? + local msg="$1" + + if [ "$rc" -eq 0 ]; then + isSuccessful "$msg" if [ -f "$logs_dir/$docker_log_file" ]; then - echo "✓ Success $1" | runInstallWrite -a "$logs_dir/$docker_log_file" >/dev/null + echo "✓ Success $msg" | runInstallWrite -a "$logs_dir/$docker_log_file" >/dev/null fi - else - isError "$1" + return 0 + fi - # Non-interactive (task processor / cron / piped): bail instead of - # blocking on read. - if [[ "$LIBREPORTAL_NONINTERACTIVE" == "1" ]] || [ ! -t 0 ]; then - if [ -f "$logs_dir/$docker_log_file" ]; then - isError " $1" | runInstallWrite -a "$logs_dir/$docker_log_file" >/dev/null - echo "===================================" | runInstallWrite -a "$logs_dir/$docker_log_file" >/dev/null - fi - isNotice "Non-interactive mode: aborting on error." - exit 1 - fi + # ---- failure ---- + isError "$msg" - while true; do - isQuestion "An error has occurred. Do you want to continue, exit or go to back to the Menu? (c/x/m) " - read -rp "" error_occurred - if [[ -n "$error_occurred" ]]; then - break - fi - isNotice "Please provide a valid input." - done + # Record EVERY failure to a dedicated, greppable report (manager-owned logs), + # tagged with the caller's script:line + exit code. Best-effort; never aborts. + local _where="${BASH_SOURCE[1]##*/}:${BASH_LINENO[0]}" + local _stamp; _stamp="$(date '+%F %T' 2>/dev/null || echo now)" + printf '%s\t[exit %s]\t%s\t(%s)\n' "$_stamp" "$rc" "$msg" "$_where" \ + | runInstallWrite -a "$logs_dir/error_report.log" 2>/dev/null || true + if [ -f "$logs_dir/$docker_log_file" ]; then + isError " $msg (exit $rc, $_where)" | runInstallWrite -a "$logs_dir/$docker_log_file" >/dev/null 2>&1 || true + fi - if [[ "$error_occurred" == [cC] ]]; then - isNotice "Continuing after error has occurred." - fi + # Continue-on-error (default true): log and carry on so a single failure + # doesn't abort the whole run and we surface EVERY issue in one pass. Turn + # CFG_REQUIREMENT_CONTINUE_ON_ERROR off for strict abort once things are clean. + if [[ "${CFG_REQUIREMENT_CONTINUE_ON_ERROR:-true}" == "true" ]]; then + isNotice "continue-on-error: logged to $logs_dir/error_report.log — continuing." + return 0 + fi - if [[ "$error_occurred" == [xX] ]]; then - # Log the error output to the log file - isError " $1" | runInstallWrite -a "$logs_dir/$docker_log_file" - echo "===================================" | runInstallWrite -a "$logs_dir/$docker_log_file" - exit 1 # Exit the script with a non-zero status to stop the current action - fi + # ---- strict mode (continue-on-error off) ---- + # Non-interactive (task processor / cron / piped): bail instead of blocking. + if [[ "$LIBREPORTAL_NONINTERACTIVE" == "1" ]] || [ ! -t 0 ]; then + isNotice "Non-interactive mode: aborting on error." + exit 1 + fi - if [[ "$error_occurred" == [mM] ]]; then - # Log the error output to the log file - isError " $1" | runInstallWrite -a "$logs_dir/$docker_log_file" - echo "===================================" | runInstallWrite -a "$logs_dir/$docker_log_file" - if [[ "$initial_command2" == "terminal" ]]; then - resetToMenu; - fi - fi + while true; do + isQuestion "An error has occurred. Do you want to continue, exit or go to back to the Menu? (c/x/m) " + read -rp "" error_occurred + [[ -n "$error_occurred" ]] && break + isNotice "Please provide a valid input." + done + + [[ "$error_occurred" == [cC] ]] && isNotice "Continuing after error has occurred." + [[ "$error_occurred" == [xX] ]] && exit 1 + if [[ "$error_occurred" == [mM] && "$initial_command2" == "terminal" ]]; then + resetToMenu fi }