Compare commits

...

2 Commits

Author SHA1 Message Date
librelad
a517bd9889 Merge claude/2 2026-05-23 16:55:58 +01:00
librelad
1e6eb628ce fix(backup): survive DB engine first-init restart on restore
Live-restore of a server DB (MariaDB/Postgres) raced the engine's first-init:
it starts a throwaway temp server, runs setup, then restarts the real one. The
old ping-based readiness passed against the temp server and the load hit the
restart, failing once.

- _backupDbWaitReady now requires a real query to succeed on two consecutive
  checks, so the restart breaks the streak and we only proceed once the real
  server is stably up.
- The dump load is retried (idempotent — the dump drops+recreates each object)
  to ride past a final init bounce.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
Signed-off-by: librelad <librelad@digitalangels.vip>
2026-05-23 16:55:58 +01:00

View File

@ -133,24 +133,50 @@ _backupDbDumpName()
esac
}
# Wait until a server database accepts connections (it has just been started
# fresh on restore, or is mid-load on a busy host).
# Wait until a server database is genuinely ready for a load. On a fresh init
# (the restore case) the engine starts a throwaway temp server, runs its setup,
# then stops it and starts the real one — a simple ping passes against the temp
# server and the load then races the restart. So require a real query to
# succeed on two consecutive checks: the restart drops the streak, so we only
# return once the real server is stably up.
_backupDbWaitReady()
{
local kind="$1" container="$2" tries=30
local i
local kind="$1" container="$2" tries="${3:-45}"
local i ok=0 good
for ((i = 0; i < tries; i++)); do
good=0
case "$kind" in
postgres)
docker exec "$container" sh -c 'pg_isready -U "${POSTGRES_USER:-postgres}" -q' >/dev/null 2>&1 && return 0 ;;
docker exec "$container" sh -c 'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; psql -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}" -tAc "SELECT 1"' >/dev/null 2>&1 && good=1 ;;
*)
docker exec "$container" sh -c 'mariadb-admin ping -uroot -p"${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}" 2>/dev/null || mysqladmin ping -uroot -p"${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"' >/dev/null 2>&1 && return 0 ;;
docker exec "$container" sh -c 'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; mariadb -uroot -p"$RP" -N -e "SELECT 1" 2>/dev/null || mysql -uroot -p"$RP" -N -e "SELECT 1"' >/dev/null 2>&1 && good=1 ;;
esac
if [[ $good -eq 1 ]]; then
ok=$((ok + 1))
[[ $ok -ge 2 ]] && return 0
else
ok=0
fi
sleep 2
done
return 1
}
# Load one server dump into its (freshly initialised) container. stdout is the
# dump piped in; returns non-zero if the load fails so the caller can retry.
_backupDbImport()
{
local kind="$1" container="$2" dump="$3"
case "$kind" in
postgres)
sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \
'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; psql -v ON_ERROR_STOP=1 -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' >/dev/null 2>&1 ;;
*)
sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \
'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; (mariadb -uroot -p"$RP" 2>/dev/null || mysql -uroot -p"$RP")' >/dev/null 2>&1 ;;
esac
}
# Dump every declared database for an app to consistent files inside the app
# dir, while the containers keep running. Called on the live path only.
backupDbDump()
@ -309,23 +335,22 @@ restoreDbReplayPostStart()
continue
fi
case "$kind" in
postgres)
if sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \
'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; psql -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' >/dev/null 2>&1; then
isSuccessful "postgres dump loaded into $container"
else
isError "Loading postgres dump into $container failed"
fi
;;
mysql|mariadb)
if sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \
'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; (mariadb -uroot -p"$RP" 2>/dev/null || mysql -uroot -p"$RP")' >/dev/null 2>&1; then
isSuccessful "$kind dump loaded into $container"
else
isError "Loading $kind dump into $container failed"
fi
;;
esac
# Retry the load: even after the readiness streak, a fresh engine can
# bounce once more as it finishes init. The dump drops+recreates each
# object, so re-running is idempotent.
local attempt loaded=1
for attempt in 1 2 3 4 5; do
if _backupDbImport "$kind" "$container" "$dump"; then
loaded=0; break
fi
isNotice "$container not ready for load yet (attempt $attempt) — retrying"
sleep 5
_backupDbWaitReady "$kind" "$container" >/dev/null 2>&1
done
if [[ $loaded -eq 0 ]]; then
isSuccessful "$kind dump loaded into $container"
else
isError "Loading $kind dump into $container failed after retries; data left for manual recovery"
fi
done < <(backupDbDescriptors "$app")
}