feat(backup): consistent live database backups with auto strategy

Adds a logical-dump path so apps with a database can be backed up with zero
downtime and full consistency, instead of stopping the container.

- backup_db.sh: dump each declared DB live (mysqldump --single-transaction /
  pg_dump / sqlite3 .backup), exclude the raw data dir from the snapshot, and
  replay the dump on restore (pre-start rehydrate for sqlite, post-start load
  for server engines).
- Databases are declared via a 'libreportal.backup.db' compose label so the
  metadata travels with the app in the snapshot.
- New 'auto' strategy (now the default): live where a DB is dumpable or the app
  is marked live-safe, stop-snapshot-start otherwise. Explicit stop/pause/live
  remain as overrides.
- restic/borg/kopia adapters honour an exclude list on the live path.
- Manifest records the resolved per-app strategy and dumped databases.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
Signed-off-by: librelad <librelad@digitalangels.vip>
This commit is contained in:
librelad 2026-05-23 15:09:14 +01:00
parent ab5b70129f
commit d9f2feef05
10 changed files with 376 additions and 9 deletions

View File

@ -3,7 +3,7 @@
# ================================================================================
CFG_BACKUP_ENGINE=restic # Default Backup Engine - Fallback engine for new locations (each location can override) [restic:Restic|borg:BorgBackup|kopia:Kopia]
CFG_BACKUP_DEFAULT_PATH=/docker/backups # Default Backup Location - Base directory for locations set to Automatic path mode; each location lives in its own numbered subfolder (<path>/<id>)
CFG_BACKUP_STRATEGY=stop-snapshot-start # Backup Strategy - How containers are quiesced before snapshotting [stop-snapshot-start:Stop → snapshot → start (safe default)|pause-snapshot-unpause:Pause → snapshot → unpause (less downtime)|live:Live — snapshot while running (only with DB dump hooks)]
CFG_BACKUP_STRATEGY=auto # Backup Strategy - How containers are quiesced before snapshotting [auto:Automatic — live where safe, stop otherwise (recommended)|stop-snapshot-start:Stop → snapshot → start (always safe)|pause-snapshot-unpause:Pause → snapshot → unpause (less downtime)|live:Live — snapshot while running (force)]
CFG_BACKUP_VERIFY_AFTER=true # Verify After Backup - Run integrity check after each backup
CFG_BACKUP_VERIFY_DATA_PERCENT=5 # Verify Data Sample % - Percentage of repo data to checksum-verify weekly
CFG_BACKUP_PARALLEL_REPOS=true # Parallel Repos - Push to all enabled locations in parallel

View File

@ -30,14 +30,18 @@ backupAppStart()
echo ""
backupAppRunHook "$stored_app_name" pre
local strategy
strategy=$(backupResolveStrategy "$stored_app_name")
((menu_number++))
echo ""
echo "---- $menu_number. Quiescing container(s) for $stored_app_name"
echo "---- $menu_number. Quiescing container(s) for $stored_app_name (strategy: $strategy)"
echo ""
if [[ "$CFG_BACKUP_STRATEGY" == "pause-snapshot-unpause" ]]; then
if [[ "$strategy" == "pause-snapshot-unpause" ]]; then
dockerComposePause "$stored_app_name" 2>/dev/null || dockerComposeDown "$stored_app_name"
elif [[ "$CFG_BACKUP_STRATEGY" == "live" ]]; then
isNotice "Live strategy — containers stay running (only use with logical-dump pre-hooks)"
elif [[ "$strategy" == "live" ]]; then
isNotice "Live strategy — containers stay running; databases dumped consistently"
backupDbDump "$stored_app_name"
else
dockerComposeDown "$stored_app_name"
fi
@ -54,6 +58,13 @@ backupAppStart()
echo ""
echo "---- $menu_number. Snapshotting to all enabled locations"
echo ""
# On the live path the raw DB data dirs are torn and superseded by the
# dumps written above — exclude them so the snapshot carries only the
# consistent copy. Other strategies quiesced the DB, so keep everything.
backup_exclude_paths=""
if [[ "$strategy" == "live" ]]; then
backup_exclude_paths=$(backupDbExcludePaths "$stored_app_name")
fi
local primary_snapshot_id=""
local primary_idx=""
local first_loc=true
@ -73,9 +84,9 @@ backupAppStart()
echo ""
echo "---- $menu_number. Restarting container(s) for $stored_app_name"
echo ""
if [[ "$CFG_BACKUP_STRATEGY" == "pause-snapshot-unpause" ]]; then
if [[ "$strategy" == "pause-snapshot-unpause" ]]; then
dockerComposeUnpause "$stored_app_name" 2>/dev/null || dockerComposeUp "$stored_app_name"
elif [[ "$CFG_BACKUP_STRATEGY" != "live" ]]; then
elif [[ "$strategy" != "live" ]]; then
dockerComposeUp "$stored_app_name"
fi

View File

@ -0,0 +1,291 @@
#!/bin/bash
# Live, consistent database backups.
#
# A file-level snapshot of a running database is "torn" — pages can be
# half-written when restic reads them, so the restored copy may not even
# mount. The fix is a *logical* dump taken while the service keeps running:
# mysqldump --single-transaction / pg_dump / sqlite3 .backup all produce a
# transactionally-consistent file with zero downtime. We snapshot that dump
# (and exclude the raw data dir, which is now redundant and unreliable), then
# replay it on restore.
#
# Apps declare their databases as compose labels so the metadata travels with
# the app (the compose is always copied to the install dir and always lives in
# the snapshot). One label per database:
#
# labels:
# libreportal.backup.db: "<kind>:<container>:<datadir>:<path>"
#
# kind mysql | mariadb | postgres | sqlite
# container service container_name to `docker exec` into (server engines)
# datadir app-dir-relative folder holding raw DB files, excluded on live
# path app-dir-relative path to the sqlite file (sqlite only)
#
# Examples:
# "mysql:nextcloud-db:db_data:" MariaDB/MySQL in nextcloud-db, raw db_data/ excluded
# "postgres:mastodon-db:postgres_data:" Postgres in mastodon-db
# "sqlite:::data/gitea.db" sqlite file at data/gitea.db
#
# An app with no database can still opt into live snapshots (its files are
# static enough to capture safely) with:
# labels:
# libreportal.backup.live: "true"
# Subdir (relative to the app dir) where consistent dumps are written. It sits
# at the app root so it is never inside an excluded datadir, and rides along in
# the snapshot.
backup_db_dump_subdir=".lp-backup/db"
# Emit one "kind:container:datadir:path" line per declared database, read from
# the *installed* compose so it reflects what is actually deployed.
backupDbDescriptors()
{
local app="$1"
local compose="$containers_dir$app/docker-compose.yml"
[[ -f "$compose" ]] || return 0
grep -E '^[[:space:]]*libreportal\.backup\.db[[:space:]]*:' "$compose" 2>/dev/null \
| sed -E 's/^[[:space:]]*libreportal\.backup\.db[[:space:]]*:[[:space:]]*//' \
| sed -E 's/[[:space:]]*#.*$//' \
| sed -E 's/^["'\'']//; s/["'\'']$//' \
| sed -E 's/[[:space:]]+$//'
}
backupDbHasDescriptors()
{
local app="$1"
[[ -n "$(backupDbDescriptors "$app")" ]]
}
# True when the app carries `libreportal.backup.live: "true"` — i.e. its data is
# safe to snapshot while running even though it has no database to dump.
backupAppIsLiveSafe()
{
local app="$1"
local compose="$containers_dir$app/docker-compose.yml"
[[ -f "$compose" ]] || return 1
grep -qE '^[[:space:]]*libreportal\.backup\.live[[:space:]]*:[[:space:]]*["'\'']?true' "$compose" 2>/dev/null
}
# Resolve the effective strategy for one app. Explicit settings are honoured as
# power-user overrides; the default "auto" goes live only where we can guarantee
# consistency (a dumpable database, or an app blessed live-safe) and otherwise
# falls back to the always-safe stop-snapshot-start.
backupResolveStrategy()
{
local app="$1"
local s="${CFG_BACKUP_STRATEGY:-auto}"
case "$s" in
live|pause-snapshot-unpause|stop-snapshot-start)
echo "$s"; return 0 ;;
esac
if backupDbHasDescriptors "$app" || backupAppIsLiveSafe "$app"; then
echo "live"
else
echo "stop-snapshot-start"
fi
}
# Deterministic dump filename for a descriptor — backup writes it, restore reads
# it, both deriving the same name from the descriptor with no side metadata.
_backupDbDumpName()
{
local kind="$1" container="$2" path="$3"
case "$kind" in
sqlite) echo "sqlite-$(echo "$path" | tr '/' '_').sqlite.gz" ;;
*) echo "db-${container}.sql.gz" ;;
esac
}
# Wait until a server database accepts connections (it has just been started
# fresh on restore, or is mid-load on a busy host).
_backupDbWaitReady()
{
local kind="$1" container="$2" tries=30
local i
for ((i = 0; i < tries; i++)); do
case "$kind" in
postgres)
docker exec "$container" sh -c 'pg_isready -U "${POSTGRES_USER:-postgres}" -q' >/dev/null 2>&1 && return 0 ;;
*)
docker exec "$container" sh -c 'mariadb-admin ping -uroot -p"${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}" 2>/dev/null || mysqladmin ping -uroot -p"${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"' >/dev/null 2>&1 && return 0 ;;
esac
sleep 2
done
return 1
}
# Dump every declared database for an app to consistent files inside the app
# dir, while the containers keep running. Called on the live path only.
backupDbDump()
{
local app="$1"
local app_dir="$containers_dir$app"
local dump_dir="$app_dir/$backup_db_dump_subdir"
local desc kind container datadir path dump rc=0
backupDbHasDescriptors "$app" || return 0
sudo mkdir -p "$dump_dir"
while IFS= read -r desc; do
[[ -z "$desc" ]] && continue
IFS=':' read -r kind container datadir path <<< "$desc"
dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")"
case "$kind" in
postgres)
isNotice "Dumping postgres ($container) — live, consistent"
if docker exec "$container" sh -c \
'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; pg_dump --clean --if-exists -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' \
2>/dev/null | gzip | sudo tee "$dump" >/dev/null; then
isSuccessful "postgres dump written ($container)"
else
isError "postgres dump failed ($container)"; rc=1
fi
;;
mysql|mariadb)
isNotice "Dumping $kind ($container) — live, consistent"
if docker exec "$container" sh -c \
'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; DB="${MARIADB_DATABASE:-$MYSQL_DATABASE}"; (mariadb-dump -uroot -p"$RP" --single-transaction --routines --triggers --databases "$DB" 2>/dev/null || mysqldump -uroot -p"$RP" --single-transaction --routines --triggers --databases "$DB")' \
2>/dev/null | gzip | sudo tee "$dump" >/dev/null; then
isSuccessful "$kind dump written ($container)"
else
isError "$kind dump failed ($container)"; rc=1
fi
;;
sqlite)
isNotice "Dumping sqlite ($path) — live, consistent"
local src="$app_dir/$path"
if [[ ! -f "$src" ]]; then
isNotice "sqlite file $path not present yet — skipping"
continue
fi
# .backup takes a consistent copy even while the app writes.
local tmp="$dump_dir/.$(basename "$path").tmp"
if sudo sqlite3 "$src" ".backup '$tmp'" 2>/dev/null && sudo gzip -c "$tmp" | sudo tee "$dump" >/dev/null; then
sudo rm -f "$tmp"
isSuccessful "sqlite dump written ($path)"
else
sudo rm -f "$tmp"
isError "sqlite dump failed ($path)"; rc=1
fi
;;
*)
isError "Unknown db kind '$kind' for $app — skipping"; rc=1 ;;
esac
done < <(backupDbDescriptors "$app")
sudo chown -R "$docker_install_user":"$docker_install_user" "$dump_dir" 2>/dev/null
return $rc
}
# Absolute paths to exclude from a live snapshot: the raw data dirs / sqlite
# files the dumps supersede. Echoed one per line for the engine adapters.
backupDbExcludePaths()
{
local app="$1"
local app_dir="$containers_dir$app"
local desc kind container datadir path
while IFS= read -r desc; do
[[ -z "$desc" ]] && continue
IFS=':' read -r kind container datadir path <<< "$desc"
case "$kind" in
sqlite)
[[ -n "$path" ]] || continue
echo "$app_dir/$path"
echo "$app_dir/$path-wal"
echo "$app_dir/$path-shm"
;;
*)
[[ -n "$datadir" ]] || continue
echo "$app_dir/$datadir"
;;
esac
done < <(backupDbDescriptors "$app")
}
# Pre-start restore step. Runs after the snapshot is laid down but before the
# containers come up:
# server remove the (absent or stale) raw data dir so the engine first-run
# init builds a clean, empty database for us to load into.
# sqlite put the consistent dump back at the real path so the app opens it.
restoreDbRehydratePreStart()
{
local app="$1"
local app_dir="$containers_dir$app"
local dump_dir="$app_dir/$backup_db_dump_subdir"
local desc kind container datadir path dump
backupDbHasDescriptors "$app" || return 0
while IFS= read -r desc; do
[[ -z "$desc" ]] && continue
IFS=':' read -r kind container datadir path <<< "$desc"
dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")"
case "$kind" in
sqlite)
[[ -f "$dump" ]] || { isNotice "No sqlite dump for $path — leaving app to initialise"; continue; }
sudo rm -f "$app_dir/$path" "$app_dir/$path-wal" "$app_dir/$path-shm"
sudo mkdir -p "$(dirname "$app_dir/$path")"
sudo gzip -dc "$dump" | sudo tee "$app_dir/$path" >/dev/null
sudo chown -R "$docker_install_user":"$docker_install_user" "$(dirname "$app_dir/$path")"
isSuccessful "sqlite $path rehydrated from dump"
;;
*)
[[ -f "$dump" ]] || { isNotice "No dump for $container — keeping restored data dir as-is"; continue; }
[[ -n "$datadir" ]] && sudo rm -rf "${app_dir:?}/$datadir"
isNotice "Cleared $datadir$container will init fresh, then load the dump"
;;
esac
done < <(backupDbDescriptors "$app")
}
# Post-start restore step. Server engines load their dump into the freshly
# initialised database once it is accepting connections. sqlite is already in
# place from the pre-start step, so it is a no-op here.
restoreDbReplayPostStart()
{
local app="$1"
local app_dir="$containers_dir$app"
local dump_dir="$app_dir/$backup_db_dump_subdir"
local desc kind container datadir path dump
backupDbHasDescriptors "$app" || return 0
while IFS= read -r desc; do
[[ -z "$desc" ]] && continue
IFS=':' read -r kind container datadir path <<< "$desc"
dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")"
[[ "$kind" == "sqlite" ]] && continue
[[ -f "$dump" ]] || continue
isNotice "Waiting for $container to accept connections"
if ! _backupDbWaitReady "$kind" "$container"; then
isError "$container never became ready — dump not loaded; data dir left for manual recovery"
continue
fi
case "$kind" in
postgres)
if sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \
'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; psql -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' >/dev/null 2>&1; then
isSuccessful "postgres dump loaded into $container"
else
isError "Loading postgres dump into $container failed"
fi
;;
mysql|mariadb)
if sudo gzip -dc "$dump" | docker exec -i "$container" sh -c \
'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; (mariadb -uroot -p"$RP" 2>/dev/null || mysql -uroot -p"$RP")' >/dev/null 2>&1; then
isSuccessful "$kind dump loaded into $container"
else
isError "Loading $kind dump into $container failed"
fi
;;
esac
done < <(backupDbDescriptors "$app")
}

View File

@ -20,6 +20,13 @@ borgBackupAppToLocation()
local comment="app=$app_name host=$host_tag engine=libreportal"
[[ -n "$manifest_sha" ]] && comment+=" manifest=$manifest_sha"
# Exclude the raw DB data dirs on the live path (see backup_db.sh).
local exclude_args=()
local p
while IFS= read -r p; do
[[ -n "$p" ]] && exclude_args+=(--exclude "$p")
done <<< "${backup_exclude_paths:-}"
local loc_name
loc_name=$(resticLocationName "$idx")
isNotice "Snapshotting $app_name$loc_name (archive: $archive)"
@ -27,6 +34,7 @@ borgBackupAppToLocation()
sudo -E -u "$docker_install_user" borg create \
--comment "$comment" \
--compression auto,zstd \
"${exclude_args[@]}" \
"::$archive" \
"$source_path"
local rc=$?

View File

@ -22,10 +22,30 @@ kopiaBackupAppToLocation()
loc_name=$(resticLocationName "$idx")
isNotice "Snapshotting $app_name$loc_name (kopia)"
# Kopia has no per-run --exclude; it reads .kopiaignore from the source
# tree. On the live path write the raw DB data dirs (made relative to the
# source) as ignore patterns, snapshot, then remove it so the rule never
# leaks into a later non-live backup of the same app.
local ignore_file="$source_path/.kopiaignore"
local wrote_ignore=false
if [[ -n "${backup_exclude_paths:-}" ]]; then
local rel
: | sudo tee "$ignore_file" >/dev/null
while IFS= read -r p; do
[[ -z "$p" ]] && continue
rel="/${p#"$source_path"/}"
echo "$rel" | sudo tee -a "$ignore_file" >/dev/null
done <<< "$backup_exclude_paths"
sudo chown "$docker_install_user":"$docker_install_user" "$ignore_file" 2>/dev/null
wrote_ignore=true
fi
local output
output=$(sudo -E -u "$docker_install_user" kopia snapshot create "$source_path" "${tags[@]}" --json 2>&1)
local rc=$?
[[ "$wrote_ignore" == true ]] && sudo rm -f "$ignore_file"
local snapshot_id
snapshot_id=$(echo "$output" | grep -oE '"id":\s*"[^"]+"' | head -1 | cut -d'"' -f4)

View File

@ -22,6 +22,14 @@ resticBackupAppToLocation()
)
[[ -n "$manifest_sha" ]] && extra_tags+=(--tag "manifest=$manifest_sha")
# On the live path backup_app_start sets $backup_exclude_paths to the raw
# DB data dirs the dumps replace; keep them out of the snapshot.
local exclude_args=()
local p
while IFS= read -r p; do
[[ -n "$p" ]] && exclude_args+=(--exclude "$p")
done <<< "${backup_exclude_paths:-}"
local loc_name
loc_name=$(resticLocationName "$idx")
isNotice "Snapshotting $app_name$loc_name"
@ -29,6 +37,7 @@ resticBackupAppToLocation()
output=$(sudo -E -u "$docker_install_user" restic backup \
--host "$host_tag" \
"${extra_tags[@]}" \
"${exclude_args[@]}" \
--exclude-caches \
--json \
"$source_path" 2>&1)

View File

@ -47,6 +47,20 @@ manifestCollect()
local file_count
file_count=$(sudo find "$app_dir" -type f 2>/dev/null | wc -l | tr -d ' ')
local strategy="${CFG_BACKUP_STRATEGY:-auto}"
declare -f backupResolveStrategy >/dev/null 2>&1 && strategy=$(backupResolveStrategy "$app_name")
local databases_json="[]"
if declare -f backupDbDescriptors >/dev/null 2>&1; then
local dbs=() desc kind container datadir path
while IFS= read -r desc; do
[[ -z "$desc" ]] && continue
IFS=':' read -r kind container datadir path <<< "$desc"
dbs+=("{\"kind\":\"$kind\",\"container\":\"$container\",\"path\":\"$path\"}")
done < <(backupDbDescriptors "$app_name")
[[ ${#dbs[@]} -gt 0 ]] && databases_json="[$(IFS=,; echo "${dbs[*]}")]"
fi
cat <<EOF
{
"version": 2,
@ -60,7 +74,8 @@ manifestCollect()
"volumes": $volumes_json,
"size_bytes": $size_bytes,
"file_count": $file_count,
"strategy": "${CFG_BACKUP_STRATEGY:-stop-snapshot-start}"
"strategy": "$strategy",
"databases": $databases_json
}
EOF
}

View File

@ -77,6 +77,12 @@ restoreAppStart()
fi
sudo chown -R "$docker_install_user":"$docker_install_user" "$containers_dir$stored_app_name"
((menu_number++))
echo ""
echo "---- $menu_number. Rehydrating databases (pre-start)"
echo ""
restoreDbRehydratePreStart "$stored_app_name"
((menu_number++))
echo ""
echo "---- $menu_number. Updating docker compose file(s)"
@ -101,6 +107,12 @@ restoreAppStart()
echo ""
restoreAppRunHook "$stored_app_name" post
((menu_number++))
echo ""
echo "---- $menu_number. Loading database dumps (post-start)"
echo ""
restoreDbReplayPostStart "$stored_app_name"
((menu_number++))
echo ""
echo "---- $menu_number. Logging restore into database"

View File

@ -10,6 +10,7 @@ backup_scripts=(
"backup/app/backup_app_schedule.sh"
"backup/app/backup_app_start.sh"
"backup/app/backup_schedule_all.sh"
"backup/db/backup_db.sh"
"backup/engine/backup_ssh.sh"
"backup/engine/borg_backup.sh"
"backup/engine/borg_check.sh"

View File

@ -74,7 +74,7 @@ webuiGenerateBackupDashboard()
content+="\"install_name\":\"${CFG_INSTALL_NAME:-libreportal}\","
content+="\"engine\":\"restic\","
content+="\"verify_after\":${CFG_BACKUP_VERIFY_AFTER:-false},"
content+="\"strategy\":\"${CFG_BACKUP_STRATEGY:-stop-snapshot-start}\","
content+="\"strategy\":\"${CFG_BACKUP_STRATEGY:-auto}\","
content+="\"locations\":$locations_json,"
content+="\"apps\":$apps_json"
content+="}"