#!/bin/bash # Live, consistent database backups. # # A file-level snapshot of a running database is "torn" — pages can be # half-written when restic reads them, so the restored copy may not even # mount. The fix is a *logical* dump taken while the service keeps running: # mysqldump --single-transaction / pg_dump / sqlite3 .backup all produce a # transactionally-consistent file with zero downtime. We snapshot that dump # (and exclude the raw data dir, which is now redundant and unreliable), then # replay it on restore. # # Apps declare their databases as compose labels so the metadata travels with # the app (the compose is always copied to the install dir and always lives in # the snapshot). One label per database: # # labels: # libreportal.backup.db: ":::" # # kind mysql | mariadb | postgres | sqlite # container service container_name to `docker exec` into (server engines) # datadir app-dir-relative folder holding raw DB files, excluded on live # path app-dir-relative path to the sqlite file (sqlite only) # # Examples: # "mysql:nextcloud-db:db_data:" MariaDB/MySQL in nextcloud-db, raw db_data/ excluded # "postgres:mastodon-db:postgres_data:" Postgres in mastodon-db # "sqlite:::data/gitea.db" sqlite file at data/gitea.db # # An app with no database can still opt into live snapshots (its files are # static enough to capture safely) with: # labels: # libreportal.backup.live: "true" # Subdir (relative to the app dir) where consistent dumps are written. It sits # at the app root so it is never inside an excluded datadir, and rides along in # the snapshot. backup_db_dump_subdir=".lp-backup/db" # Emit one "kind:container:datadir:path" line per declared database, read from # the *installed* compose so it reflects what is actually deployed. backupDbDescriptors() { local app="$1" local compose="$containers_dir$app/docker-compose.yml" [[ -f "$compose" ]] || return 0 grep -E '^[[:space:]]*libreportal\.backup\.db[[:space:]]*:' "$compose" 2>/dev/null \ | sed -E 's/^[[:space:]]*libreportal\.backup\.db[[:space:]]*:[[:space:]]*//' \ | sed -E 's/[[:space:]]*#.*$//' \ | sed -E 's/^["'\'']//; s/["'\'']$//' \ | sed -E 's/[[:space:]]+$//' } backupDbHasDescriptors() { local app="$1" if [[ -n "$(backupDbDescriptors "$app")" ]]; then return 0; fi return 1 } # True when the app carries `libreportal.backup.live: "true"` — i.e. its data is # safe to snapshot while running even though it has no database to dump. backupAppIsLiveSafe() { local app="$1" local compose="$containers_dir$app/docker-compose.yml" [[ -f "$compose" ]] || return 1 if grep -qE '^[[:space:]]*libreportal\.backup\.live[[:space:]]*:[[:space:]]*["'\'']?true' "$compose" 2>/dev/null; then return 0 fi return 1 } # An app can be backed up live without downtime when we can make it consistent: # it has a dumpable database, or it is explicitly blessed live-safe. backupAppLiveCapable() { local app="$1" if backupDbHasDescriptors "$app"; then return 0; fi if declare -f backupFilesHasDescriptors >/dev/null 2>&1 && backupFilesHasDescriptors "$app"; then return 0; fi if backupAppIsLiveSafe "$app"; then return 0; fi return 1 } # Strategy options valid for one app, in .config "[a:b|c:d]" syntax. live is # offered only where the app can actually do it, so the UI never shows a choice # that would just fall back to stop. backupAppStrategyOptions() { local app="$1" local opts="auto:Automatic — recommended|stop-snapshot-start:Stop → snapshot → start|pause-snapshot-unpause:Pause → snapshot → unpause" if backupAppLiveCapable "$app"; then opts="$opts|live:Live — no downtime" fi echo "$opts" return 0 } # Resolve the effective strategy for one app. Order of precedence: # 1. per-app override CFG__BACKUP_STRATEGY (advanced, defaults to auto) # 2. global default CFG_BACKUP_STRATEGY (auto) # An explicit stop/pause/live is honoured as-is; "auto" goes live only where the # app is live-capable and otherwise uses the always-safe stop-snapshot-start. backupResolveStrategy() { local app="$1" local override_key="CFG_${app^^}_BACKUP_STRATEGY" local s="${!override_key}" if [[ -z "$s" || "$s" == "auto" ]]; then s="${CFG_BACKUP_STRATEGY:-auto}" fi case "$s" in live|pause-snapshot-unpause|stop-snapshot-start) echo "$s"; return 0 ;; esac if backupAppLiveCapable "$app"; then echo "live" else echo "stop-snapshot-start" fi return 0 } # Deterministic dump filename for a descriptor — backup writes it, restore reads # it, both deriving the same name from the descriptor with no side metadata. _backupDbDumpName() { local kind="$1" container="$2" path="$3" case "$kind" in sqlite) echo "sqlite-$(echo "$path" | tr '/' '_').sqlite.gz" ;; *) echo "db-${container}.sql.gz" ;; esac } # Wait until a server database is genuinely ready for a load. On a fresh init # (the restore case) the engine starts a throwaway temp server, runs its setup, # then stops it and starts the real one — a simple ping passes against the temp # server and the load then races the restart. So require a real query to # succeed on two consecutive checks: the restart drops the streak, so we only # return once the real server is stably up. _backupDbWaitReady() { local kind="$1" container="$2" tries="${3:-45}" local i ok=0 good for ((i = 0; i < tries; i++)); do good=0 case "$kind" in postgres) runFileOp docker exec "$container" sh -c 'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; psql -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}" -tAc "SELECT 1"' >/dev/null 2>&1 && good=1 ;; *) runFileOp docker exec "$container" sh -c 'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; mariadb -uroot -p"$RP" -N -e "SELECT 1" 2>/dev/null || mysql -uroot -p"$RP" -N -e "SELECT 1"' >/dev/null 2>&1 && good=1 ;; esac if [[ $good -eq 1 ]]; then ok=$((ok + 1)) [[ $ok -ge 2 ]] && return 0 else ok=0 fi sleep 2 done return 1 } # Load one server dump into its (freshly initialised) container. stdout is the # dump piped in; returns non-zero if the load fails so the caller can retry. _backupDbImport() { local kind="$1" container="$2" dump="$3" case "$kind" in postgres) runFileOp gzip -dc "$dump" | docker exec -i "$container" sh -c \ 'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; psql -v ON_ERROR_STOP=1 -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' >/dev/null 2>&1 ;; *) runFileOp gzip -dc "$dump" | docker exec -i "$container" sh -c \ 'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; (mariadb -uroot -p"$RP" 2>/dev/null || mysql -uroot -p"$RP")' >/dev/null 2>&1 ;; esac } # Dump every declared database for an app to consistent files inside the app # dir, while the containers keep running. Called on the live path only. backupDbDump() { local app="$1" local app_dir="$containers_dir$app" local dump_dir="$app_dir/$backup_db_dump_subdir" local desc kind container datadir path dump rc=0 backupDbHasDescriptors "$app" || return 0 runFileOp mkdir -p "$dump_dir" while IFS= read -r desc; do [[ -z "$desc" ]] && continue IFS=':' read -r kind container datadir path <<< "$desc" dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")" case "$kind" in postgres) isNotice "Dumping postgres ($container) — live, consistent" if runFileOp docker exec "$container" sh -c \ 'export PGPASSWORD="${POSTGRES_PASSWORD:-}"; pg_dump --clean --if-exists -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-${POSTGRES_USER:-postgres}}"' \ 2>/dev/null | gzip | runFileWrite "$dump"; then isSuccessful "postgres dump written ($container)" else isError "postgres dump failed ($container)"; rc=1 fi ;; mysql|mariadb) isNotice "Dumping $kind ($container) — live, consistent" if runFileOp docker exec "$container" sh -c \ 'RP="${MARIADB_ROOT_PASSWORD:-$MYSQL_ROOT_PASSWORD}"; DB="${MARIADB_DATABASE:-$MYSQL_DATABASE}"; (mariadb-dump -uroot -p"$RP" --single-transaction --routines --triggers --databases "$DB" 2>/dev/null || mysqldump -uroot -p"$RP" --single-transaction --routines --triggers --databases "$DB")' \ 2>/dev/null | gzip | runFileWrite "$dump"; then isSuccessful "$kind dump written ($container)" else isError "$kind dump failed ($container)"; rc=1 fi ;; sqlite) isNotice "Dumping sqlite ($path) — live, consistent" local src="$app_dir/$path" if [[ ! -f "$src" ]]; then # Declared but not found — could be a fresh app, or a wrong # path. Treat as a dump failure so the caller falls back to # the safe stop-snapshot-start rather than snapshotting a # live sqlite file untorn. isError "sqlite file $path not found — cannot dump" rc=1 continue fi # .backup takes a consistent copy even while the app writes. local tmp="$dump_dir/.$(basename "$path").tmp" if runFileOp sqlite3 "$src" ".backup '$tmp'" 2>/dev/null && runFileOp gzip -c "$tmp" | runFileWrite "$dump"; then runFileOp rm -f "$tmp" isSuccessful "sqlite dump written ($path)" else runFileOp rm -f "$tmp" isError "sqlite dump failed ($path)"; rc=1 fi ;; *) isError "Unknown db kind '$kind' for $app — skipping"; rc=1 ;; esac done < <(backupDbDescriptors "$app") runFileOp chown -R "$docker_install_user":"$docker_install_user" "$dump_dir" 2>/dev/null return $rc } # Absolute paths to exclude from a live snapshot: the raw data dirs / sqlite # files the dumps supersede. Echoed one per line for the engine adapters. backupDbExcludePaths() { local app="$1" local app_dir="$containers_dir$app" local desc kind container datadir path while IFS= read -r desc; do [[ -z "$desc" ]] && continue IFS=':' read -r kind container datadir path <<< "$desc" case "$kind" in sqlite) [[ -n "$path" ]] || continue echo "$app_dir/$path" echo "$app_dir/$path-wal" echo "$app_dir/$path-shm" ;; *) [[ -n "$datadir" ]] || continue echo "$app_dir/$datadir" ;; esac done < <(backupDbDescriptors "$app") } # Pre-start restore step. Runs after the snapshot is laid down but before the # containers come up: # server remove the (absent or stale) raw data dir so the engine first-run # init builds a clean, empty database for us to load into. # sqlite put the consistent dump back at the real path so the app opens it. restoreDbRehydratePreStart() { local app="$1" local app_dir="$containers_dir$app" local dump_dir="$app_dir/$backup_db_dump_subdir" local desc kind container datadir path dump backupDbHasDescriptors "$app" || return 0 while IFS= read -r desc; do [[ -z "$desc" ]] && continue IFS=':' read -r kind container datadir path <<< "$desc" dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")" case "$kind" in sqlite) [[ -f "$dump" ]] || { isNotice "No sqlite dump for $path — leaving app to initialise"; continue; } runFileOp rm -f "$app_dir/$path" "$app_dir/$path-wal" "$app_dir/$path-shm" runFileOp mkdir -p "$(dirname "$app_dir/$path")" runFileOp gzip -dc "$dump" | runFileWrite "$app_dir/$path" runFileOp chown -R "$docker_install_user":"$docker_install_user" "$(dirname "$app_dir/$path")" isSuccessful "sqlite $path rehydrated from dump" ;; *) [[ -f "$dump" ]] || { isNotice "No dump for $container — keeping restored data dir as-is"; continue; } [[ -n "$datadir" ]] && runFileOp rm -rf "${app_dir:?}/$datadir" isNotice "Cleared $datadir — $container will init fresh, then load the dump" ;; esac done < <(backupDbDescriptors "$app") } # Post-start restore step. Server engines load their dump into the freshly # initialised database once it is accepting connections. sqlite is already in # place from the pre-start step, so it is a no-op here. restoreDbReplayPostStart() { local app="$1" local app_dir="$containers_dir$app" local dump_dir="$app_dir/$backup_db_dump_subdir" local desc kind container datadir path dump backupDbHasDescriptors "$app" || return 0 while IFS= read -r desc; do [[ -z "$desc" ]] && continue IFS=':' read -r kind container datadir path <<< "$desc" dump="$dump_dir/$(_backupDbDumpName "$kind" "$container" "$path")" [[ "$kind" == "sqlite" ]] && continue [[ -f "$dump" ]] || continue isNotice "Waiting for $container to accept connections" if ! _backupDbWaitReady "$kind" "$container"; then isError "$container never became ready — dump not loaded; data dir left for manual recovery" continue fi # Retry the load: even after the readiness streak, a fresh engine can # bounce once more as it finishes init. The dump drops+recreates each # object, so re-running is idempotent. local attempt loaded=1 for attempt in 1 2 3 4 5; do if _backupDbImport "$kind" "$container" "$dump"; then loaded=0; break fi isNotice "$container not ready for load yet (attempt $attempt) — retrying" sleep 5 _backupDbWaitReady "$kind" "$container" >/dev/null 2>&1 done if [[ $loaded -eq 0 ]]; then isSuccessful "$kind dump loaded into $container" else isError "Loading $kind dump into $container failed after retries; data left for manual recovery" fi done < <(backupDbDescriptors "$app") }