From d9f2feef052da4b20307d49e7e2423dc24f1049d Mon Sep 17 00:00:00 2001 From: librelad Date: Sat, 23 May 2026 15:09:14 +0100 Subject: [PATCH] feat(backup): consistent live database backups with auto strategy Adds a logical-dump path so apps with a database can be backed up with zero downtime and full consistency, instead of stopping the container. - backup_db.sh: dump each declared DB live (mysqldump --single-transaction / pg_dump / sqlite3 .backup), exclude the raw data dir from the snapshot, and replay the dump on restore (pre-start rehydrate for sqlite, post-start load for server engines). - Databases are declared via a 'libreportal.backup.db' compose label so the metadata travels with the app in the snapshot. - New 'auto' strategy (now the default): live where a DB is dumpable or the app is marked live-safe, stop-snapshot-start otherwise. Explicit stop/pause/live remain as overrides. - restic/borg/kopia adapters honour an exclude list on the live path. - Manifest records the resolved per-app strategy and dumped databases. Co-Authored-By: Claude Opus 4.7 Signed-off-by: librelad --- configs/backup/backup_engine | 2 +- scripts/backup/app/backup_app_start.sh | 23 +- scripts/backup/db/backup_db.sh | 291 ++++++++++++++++++ scripts/backup/engine/borg_backup.sh | 8 + scripts/backup/engine/kopia_backup.sh | 20 ++ scripts/backup/engine/restic_backup.sh | 9 + scripts/backup/manifest/manifest_collect.sh | 17 +- scripts/restore/restore_app_start.sh | 12 + scripts/source/files/arrays/files_backup.sh | 1 + .../backup/webui_backup_dashboard.sh | 2 +- 10 files changed, 376 insertions(+), 9 deletions(-) create mode 100644 scripts/backup/db/backup_db.sh diff --git a/configs/backup/backup_engine b/configs/backup/backup_engine index af9d866..d8f0484 100644 --- a/configs/backup/backup_engine +++ b/configs/backup/backup_engine @@ -3,7 +3,7 @@ # ================================================================================ CFG_BACKUP_ENGINE=restic # Default Backup Engine - Fallback engine for new locations (each location can override) [restic:Restic|borg:BorgBackup|kopia:Kopia] CFG_BACKUP_DEFAULT_PATH=/docker/backups # Default Backup Location - Base directory for locations set to Automatic path mode; each location lives in its own numbered subfolder (/) -CFG_BACKUP_STRATEGY=stop-snapshot-start # Backup Strategy - How containers are quiesced before snapshotting [stop-snapshot-start:Stop → snapshot → start (safe default)|pause-snapshot-unpause:Pause → snapshot → unpause (less downtime)|live:Live — snapshot while running (only with DB dump hooks)] +CFG_BACKUP_STRATEGY=auto # Backup Strategy - How containers are quiesced before snapshotting [auto:Automatic — live where safe, stop otherwise (recommended)|stop-snapshot-start:Stop → snapshot → start (always safe)|pause-snapshot-unpause:Pause → snapshot → unpause (less downtime)|live:Live — snapshot while running (force)] CFG_BACKUP_VERIFY_AFTER=true # Verify After Backup - Run integrity check after each backup CFG_BACKUP_VERIFY_DATA_PERCENT=5 # Verify Data Sample % - Percentage of repo data to checksum-verify weekly CFG_BACKUP_PARALLEL_REPOS=true # Parallel Repos - Push to all enabled locations in parallel diff --git a/scripts/backup/app/backup_app_start.sh b/scripts/backup/app/backup_app_start.sh index 625c90d..2607a52 100755 --- a/scripts/backup/app/backup_app_start.sh +++ b/scripts/backup/app/backup_app_start.sh @@ -30,14 +30,18 @@ backupAppStart() echo "" backupAppRunHook "$stored_app_name" pre + local strategy + strategy=$(backupResolveStrategy "$stored_app_name") + ((menu_number++)) echo "" - echo "---- $menu_number. Quiescing container(s) for $stored_app_name" + echo "---- $menu_number. Quiescing container(s) for $stored_app_name (strategy: $strategy)" echo "" - if [[ "$CFG_BACKUP_STRATEGY" == "pause-snapshot-unpause" ]]; then + if [[ "$strategy" == "pause-snapshot-unpause" ]]; then dockerComposePause "$stored_app_name" 2>/dev/null || dockerComposeDown "$stored_app_name" - elif [[ "$CFG_BACKUP_STRATEGY" == "live" ]]; then - isNotice "Live strategy — containers stay running (only use with logical-dump pre-hooks)" + elif [[ "$strategy" == "live" ]]; then + isNotice "Live strategy — containers stay running; databases dumped consistently" + backupDbDump "$stored_app_name" else dockerComposeDown "$stored_app_name" fi @@ -54,6 +58,13 @@ backupAppStart() echo "" echo "---- $menu_number. Snapshotting to all enabled locations" echo "" + # On the live path the raw DB data dirs are torn and superseded by the + # dumps written above — exclude them so the snapshot carries only the + # consistent copy. Other strategies quiesced the DB, so keep everything. + backup_exclude_paths="" + if [[ "$strategy" == "live" ]]; then + backup_exclude_paths=$(backupDbExcludePaths "$stored_app_name") + fi local primary_snapshot_id="" local primary_idx="" local first_loc=true @@ -73,9 +84,9 @@ backupAppStart() echo "" echo "---- $menu_number. Restarting container(s) for $stored_app_name" echo "" - if [[ "$CFG_BACKUP_STRATEGY" == "pause-snapshot-unpause" ]]; then + if [[ "$strategy" == "pause-snapshot-unpause" ]]; then dockerComposeUnpause "$stored_app_name" 2>/dev/null || dockerComposeUp "$stored_app_name" - elif [[ "$CFG_BACKUP_STRATEGY" != "live" ]]; then + elif [[ "$strategy" != "live" ]]; then dockerComposeUp "$stored_app_name" fi diff --git a/scripts/backup/db/backup_db.sh b/scripts/backup/db/backup_db.sh new file mode 100644 index 0000000..eea222a --- /dev/null +++ b/scripts/backup/db/backup_db.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# Live, consistent database backups. +# +# A file-level snapshot of a running database is "torn" — pages can be +# half-written when restic reads them, so the restored copy may not even +# mount. The fix is a *logical* dump taken while the service keeps running: +# mysqldump --single-transaction / pg_dump / sqlite3 .backup all produce a +# transactionally-consistent file with zero downtime. We snapshot that dump +# (and exclude the raw data dir, which is now redundant and unreliable), then +# replay it on restore. +# +# Apps declare their databases as compose labels so the metadata travels with +# the app (the compose is always copied to the install dir and always lives in +# the snapshot). One label per database: +# +# labels: +# libreportal.backup.db: ":::" +# +# kind mysql | mariadb | postgres | sqlite +# container service container_name to `docker exec` into (server engines) +# datadir app-dir-relative folder holding raw DB files, excluded on live +# path app-dir-relative path to the sqlite file (sqlite only) +# +# Examples: +# "mysql:nextcloud-db:db_data:" MariaDB/MySQL in nextcloud-db, raw db_data/ excluded +# "postgres:mastodon-db:postgres_data:" Postgres in mastodon-db +# "sqlite:::data/gitea.db" sqlite file at data/gitea.db +# +# An app with no database can still opt into live snapshots (its files are +# static enough to capture safely) with: +# labels: +# libreportal.backup.live: "true" + +# Subdir (relative to the app dir) where consistent dumps are written. It sits +# at the app root so it is never inside an excluded datadir, and rides along in +# the snapshot. +backup_db_dump_subdir=".lp-backup/db" + +# Emit one "kind:container:datadir:path" line per declared database, read from +# the *installed* compose so it reflects what is actually deployed. +backupDbDescriptors() +{ + local app="$1" + local compose="$containers_dir$app/docker-compose.yml" + [[ -f "$compose" ]] || return 0 + + grep -E '^[[:space:]]*libreportal\.backup\.db[[:space:]]*:' "$compose" 2>/dev/null \ + | sed -E 's/^[[:space:]]*libreportal\.backup\.db[[:space:]]*:[[:space:]]*//' \ + | sed -E 's/[[:space:]]*#.*$//' \ + | sed -E 's/^["'\'']//; s/["'\'']$//' \ + | sed -E 's/[[:space:]]+$//' +} + +backupDbHasDescriptors() +{ + local app="$1" + [[ -n "$(backupDbDescriptors "$app")" ]] +} + +# True when the app carries `libreportal.backup.live: "true"` — i.e. its data is +# safe to snapshot while running even though it has no database to dump. +backupAppIsLiveSafe() +{ + local app="$1" + local compose="$containers_dir$app/docker-compose.yml" + [[ -f "$compose" ]] || return 1 + grep -qE '^[[:space:]]*libreportal\.backup\.live[[:space:]]*:[[:space:]]*["'\'']?true' "$compose" 2>/dev/null +} + +# Resolve the effective strategy for one app. Explicit settings are honoured as +# power-user overrides; the default "auto" goes live only where we can guarantee +# consistency (a dumpable database, or an app blessed live-safe) and otherwise +# falls back to the always-safe stop-snapshot-start. +backupResolveStrategy() +{ + local app="$1" + local s="${CFG_BACKUP_STRATEGY:-auto}" + case "$s" in + live|pause-snapshot-unpause|stop-snapshot-start) + echo "$s"; return 0 ;; + esac + if backupDbHasDescriptors "$app" || backupAppIsLiveSafe "$app"; then + echo "live" + else + echo "stop-snapshot-start" + fi +} + +# Deterministic dump filename for a descriptor — backup writes it, restore reads +# it, both deriving the same name from the descriptor with no side metadata. +_backupDbDumpName() +{ + local kind="$1" container="$2" path="$3" + case "$kind" in + sqlite) echo "sqlite-$(echo "$path" | tr '/' '_').sqlite.gz" ;; + *) echo "db-${container}.sql.gz" ;; + esac +} + +# Wait until a server database accepts connections (it has just been started +# fresh on restore, or is mid-load on a busy host). +_backupDbWaitReady() +{ + local kind="$1" container="$2" tries=30 + local i + for ((i = 0; i < tries; i++)); do + case "$kind" in + postgres) + docker exec "$container" sh -c 'pg_isready -U "${POSTGRES_USER:-postgres}" -q' >/dev/null 2>&1 && return 0 ;; + *) + docker exec "$container" sh -c 'mariadb-admin ping -uroot -p"${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}" 2>/dev/null || mysqladmin ping -uroot -p"${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"' >/dev/null 2>&1 && return 0 ;; + esac + sleep 2 + done + return 1 +} + +# Dump every declared database for an app to consistent files inside the app +# dir, while the containers keep running. Called on the live path only. +backupDbDump() +{ + local app="$1" + local app_dir="$containers_dir$app" + local dump_dir="$app_dir/$backup_db_dump_subdir" + local desc kind container datadir path dump rc=0 + + backupDbHasDescriptors "$app" || return 0 + + sudo mkdir -p "$dump_dir" + + while IFS= read -r desc; do + [[ -z "$desc" ]] && continue + IFS=':' read -r kind container datadir path <<< "$desc" + dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")" + + case "$kind" in + postgres) + isNotice "Dumping postgres ($container) — live, consistent" + if docker exec "$container" sh -c \ + 'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; pg_dump --clean --if-exists -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' \ + 2>/dev/null | gzip | sudo tee "$dump" >/dev/null; then + isSuccessful "postgres dump written ($container)" + else + isError "postgres dump failed ($container)"; rc=1 + fi + ;; + mysql|mariadb) + isNotice "Dumping $kind ($container) — live, consistent" + if docker exec "$container" sh -c \ + 'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; DB="${MARIADB_DATABASE:-$MYSQL_DATABASE}"; (mariadb-dump -uroot -p"$RP" --single-transaction --routines --triggers --databases "$DB" 2>/dev/null || mysqldump -uroot -p"$RP" --single-transaction --routines --triggers --databases "$DB")' \ + 2>/dev/null | gzip | sudo tee "$dump" >/dev/null; then + isSuccessful "$kind dump written ($container)" + else + isError "$kind dump failed ($container)"; rc=1 + fi + ;; + sqlite) + isNotice "Dumping sqlite ($path) — live, consistent" + local src="$app_dir/$path" + if [[ ! -f "$src" ]]; then + isNotice "sqlite file $path not present yet — skipping" + continue + fi + # .backup takes a consistent copy even while the app writes. + local tmp="$dump_dir/.$(basename "$path").tmp" + if sudo sqlite3 "$src" ".backup '$tmp'" 2>/dev/null && sudo gzip -c "$tmp" | sudo tee "$dump" >/dev/null; then + sudo rm -f "$tmp" + isSuccessful "sqlite dump written ($path)" + else + sudo rm -f "$tmp" + isError "sqlite dump failed ($path)"; rc=1 + fi + ;; + *) + isError "Unknown db kind '$kind' for $app — skipping"; rc=1 ;; + esac + done < <(backupDbDescriptors "$app") + + sudo chown -R "$docker_install_user":"$docker_install_user" "$dump_dir" 2>/dev/null + return $rc +} + +# Absolute paths to exclude from a live snapshot: the raw data dirs / sqlite +# files the dumps supersede. Echoed one per line for the engine adapters. +backupDbExcludePaths() +{ + local app="$1" + local app_dir="$containers_dir$app" + local desc kind container datadir path + + while IFS= read -r desc; do + [[ -z "$desc" ]] && continue + IFS=':' read -r kind container datadir path <<< "$desc" + case "$kind" in + sqlite) + [[ -n "$path" ]] || continue + echo "$app_dir/$path" + echo "$app_dir/$path-wal" + echo "$app_dir/$path-shm" + ;; + *) + [[ -n "$datadir" ]] || continue + echo "$app_dir/$datadir" + ;; + esac + done < <(backupDbDescriptors "$app") +} + +# Pre-start restore step. Runs after the snapshot is laid down but before the +# containers come up: +# server remove the (absent or stale) raw data dir so the engine first-run +# init builds a clean, empty database for us to load into. +# sqlite put the consistent dump back at the real path so the app opens it. +restoreDbRehydratePreStart() +{ + local app="$1" + local app_dir="$containers_dir$app" + local dump_dir="$app_dir/$backup_db_dump_subdir" + local desc kind container datadir path dump + + backupDbHasDescriptors "$app" || return 0 + + while IFS= read -r desc; do + [[ -z "$desc" ]] && continue + IFS=':' read -r kind container datadir path <<< "$desc" + dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")" + + case "$kind" in + sqlite) + [[ -f "$dump" ]] || { isNotice "No sqlite dump for $path — leaving app to initialise"; continue; } + sudo rm -f "$app_dir/$path" "$app_dir/$path-wal" "$app_dir/$path-shm" + sudo mkdir -p "$(dirname "$app_dir/$path")" + sudo gzip -dc "$dump" | sudo tee "$app_dir/$path" >/dev/null + sudo chown -R "$docker_install_user":"$docker_install_user" "$(dirname "$app_dir/$path")" + isSuccessful "sqlite $path rehydrated from dump" + ;; + *) + [[ -f "$dump" ]] || { isNotice "No dump for $container — keeping restored data dir as-is"; continue; } + [[ -n "$datadir" ]] && sudo rm -rf "${app_dir:?}/$datadir" + isNotice "Cleared $datadir — $container will init fresh, then load the dump" + ;; + esac + done < <(backupDbDescriptors "$app") +} + +# Post-start restore step. Server engines load their dump into the freshly +# initialised database once it is accepting connections. sqlite is already in +# place from the pre-start step, so it is a no-op here. +restoreDbReplayPostStart() +{ + local app="$1" + local app_dir="$containers_dir$app" + local dump_dir="$app_dir/$backup_db_dump_subdir" + local desc kind container datadir path dump + + backupDbHasDescriptors "$app" || return 0 + + while IFS= read -r desc; do + [[ -z "$desc" ]] && continue + IFS=':' read -r kind container datadir path <<< "$desc" + dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")" + [[ "$kind" == "sqlite" ]] && continue + [[ -f "$dump" ]] || continue + + isNotice "Waiting for $container to accept connections" + if ! _backupDbWaitReady "$kind" "$container"; then + isError "$container never became ready — dump not loaded; data dir left for manual recovery" + continue + fi + + case "$kind" in + postgres) + if sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \ + 'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; psql -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' >/dev/null 2>&1; then + isSuccessful "postgres dump loaded into $container" + else + isError "Loading postgres dump into $container failed" + fi + ;; + mysql|mariadb) + if sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \ + 'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; (mariadb -uroot -p"$RP" 2>/dev/null || mysql -uroot -p"$RP")' >/dev/null 2>&1; then + isSuccessful "$kind dump loaded into $container" + else + isError "Loading $kind dump into $container failed" + fi + ;; + esac + done < <(backupDbDescriptors "$app") +} diff --git a/scripts/backup/engine/borg_backup.sh b/scripts/backup/engine/borg_backup.sh index b295a13..d0282be 100644 --- a/scripts/backup/engine/borg_backup.sh +++ b/scripts/backup/engine/borg_backup.sh @@ -20,6 +20,13 @@ borgBackupAppToLocation() local comment="app=$app_name host=$host_tag engine=libreportal" [[ -n "$manifest_sha" ]] && comment+=" manifest=$manifest_sha" + # Exclude the raw DB data dirs on the live path (see backup_db.sh). + local exclude_args=() + local p + while IFS= read -r p; do + [[ -n "$p" ]] && exclude_args+=(--exclude "$p") + done <<< "${backup_exclude_paths:-}" + local loc_name loc_name=$(resticLocationName "$idx") isNotice "Snapshotting $app_name → $loc_name (archive: $archive)" @@ -27,6 +34,7 @@ borgBackupAppToLocation() sudo -E -u "$docker_install_user" borg create \ --comment "$comment" \ --compression auto,zstd \ + "${exclude_args[@]}" \ "::$archive" \ "$source_path" local rc=$? diff --git a/scripts/backup/engine/kopia_backup.sh b/scripts/backup/engine/kopia_backup.sh index 09865ae..8555839 100644 --- a/scripts/backup/engine/kopia_backup.sh +++ b/scripts/backup/engine/kopia_backup.sh @@ -22,10 +22,30 @@ kopiaBackupAppToLocation() loc_name=$(resticLocationName "$idx") isNotice "Snapshotting $app_name → $loc_name (kopia)" + # Kopia has no per-run --exclude; it reads .kopiaignore from the source + # tree. On the live path write the raw DB data dirs (made relative to the + # source) as ignore patterns, snapshot, then remove it so the rule never + # leaks into a later non-live backup of the same app. + local ignore_file="$source_path/.kopiaignore" + local wrote_ignore=false + if [[ -n "${backup_exclude_paths:-}" ]]; then + local rel + : | sudo tee "$ignore_file" >/dev/null + while IFS= read -r p; do + [[ -z "$p" ]] && continue + rel="/${p#"$source_path"/}" + echo "$rel" | sudo tee -a "$ignore_file" >/dev/null + done <<< "$backup_exclude_paths" + sudo chown "$docker_install_user":"$docker_install_user" "$ignore_file" 2>/dev/null + wrote_ignore=true + fi + local output output=$(sudo -E -u "$docker_install_user" kopia snapshot create "$source_path" "${tags[@]}" --json 2>&1) local rc=$? + [[ "$wrote_ignore" == true ]] && sudo rm -f "$ignore_file" + local snapshot_id snapshot_id=$(echo "$output" | grep -oE '"id":\s*"[^"]+"' | head -1 | cut -d'"' -f4) diff --git a/scripts/backup/engine/restic_backup.sh b/scripts/backup/engine/restic_backup.sh index fad6de9..422223f 100644 --- a/scripts/backup/engine/restic_backup.sh +++ b/scripts/backup/engine/restic_backup.sh @@ -22,6 +22,14 @@ resticBackupAppToLocation() ) [[ -n "$manifest_sha" ]] && extra_tags+=(--tag "manifest=$manifest_sha") + # On the live path backup_app_start sets $backup_exclude_paths to the raw + # DB data dirs the dumps replace; keep them out of the snapshot. + local exclude_args=() + local p + while IFS= read -r p; do + [[ -n "$p" ]] && exclude_args+=(--exclude "$p") + done <<< "${backup_exclude_paths:-}" + local loc_name loc_name=$(resticLocationName "$idx") isNotice "Snapshotting $app_name → $loc_name" @@ -29,6 +37,7 @@ resticBackupAppToLocation() output=$(sudo -E -u "$docker_install_user" restic backup \ --host "$host_tag" \ "${extra_tags[@]}" \ + "${exclude_args[@]}" \ --exclude-caches \ --json \ "$source_path" 2>&1) diff --git a/scripts/backup/manifest/manifest_collect.sh b/scripts/backup/manifest/manifest_collect.sh index 289abc3..b275b1f 100644 --- a/scripts/backup/manifest/manifest_collect.sh +++ b/scripts/backup/manifest/manifest_collect.sh @@ -47,6 +47,20 @@ manifestCollect() local file_count file_count=$(sudo find "$app_dir" -type f 2>/dev/null | wc -l | tr -d ' ') + local strategy="${CFG_BACKUP_STRATEGY:-auto}" + declare -f backupResolveStrategy >/dev/null 2>&1 && strategy=$(backupResolveStrategy "$app_name") + + local databases_json="[]" + if declare -f backupDbDescriptors >/dev/null 2>&1; then + local dbs=() desc kind container datadir path + while IFS= read -r desc; do + [[ -z "$desc" ]] && continue + IFS=':' read -r kind container datadir path <<< "$desc" + dbs+=("{\"kind\":\"$kind\",\"container\":\"$container\",\"path\":\"$path\"}") + done < <(backupDbDescriptors "$app_name") + [[ ${#dbs[@]} -gt 0 ]] && databases_json="[$(IFS=,; echo "${dbs[*]}")]" + fi + cat <