LibrePortal/scripts/backup/files/backup_files.sh

#!/bin/bash

# Container-side file capture.
#
# Some apps store private data the backup user can't read from the host: in
# rooted Docker the files are owned by the container's real UIDs (e.g. Nextcloud
# data is www-data 0750), and in rootless they're owned by mapped sub-UIDs — in
# both cases restic, running as the unprivileged backup user, gets "permission
# denied" and the snapshot silently drops that data.
#
# So we read those paths the same way we dump databases: THROUGH the container.
# `docker exec <c> tar` runs in the container's namespace and sees every file as
# the app's own user, needing no host root and no host read access (works
# identically rooted and rootless). We extract the stream to a staging dir as
# PLAIN FILES (not a piped tar blob) so restic keeps full per-file dedup and
# per-file restore; the live path is then excluded from the snapshot. On restore
# we stream the staging copy back through a throwaway container that recreates
# the tree with the app's ownership in-namespace — again no host root.
#
# Declared per app as a compose label (multiple allowed):
#
#   labels:
#     libreportal.backup.files: "<container>:<container_path>:<host_subdir>"
#
#   container       service to exec/read through
#   container_path  path inside the container to capture (a bind-mount target)
#   host_subdir     app-dir-relative dir it maps to (excluded from the snapshot)
#
# Ownership for restore is **auto-discovered** at capture time: the engine runs
# `stat -c '%u:%g'` inside the container and writes the result to a sidecar
# `<host_subdir>.lp-owner` in staging. Restore reads it back — so a PUID/UID
# change on the running container is reflected on the next backup with no label
# edit needed.
#
# Legacy 5-field form is still supported as an explicit override:
#   "<container>:<container_path>:<host_subdir>:<uid>:<gid>"
# If uid:gid are present, they override the discovered value.
#
# Example (Nextcloud):
#   "nextcloud-service:/var/www/html:html"

# Staging lives at the app root (never inside an excluded path) so it rides in
# the snapshot alongside the DB dumps under .lp-backup/.
backup_files_stage_subdir=".lp-backup/files"

# Tiny image used as a throwaway, in-namespace extractor on restore.
backup_files_helper_image="busybox"

backupFilesDescriptors()
{
    local app="$1"
    local compose="$containers_dir$app/docker-compose.yml"
    [[ -f "$compose" ]] || return 0

    grep -E '^[[:space:]]*libreportal\.backup\.files[[:space:]]*:' "$compose" 2>/dev/null \
        | sed -E 's/^[[:space:]]*libreportal\.backup\.files[[:space:]]*:[[:space:]]*//' \
        | sed -E 's/[[:space:]]*#.*$//' \
        | sed -E 's/^["'\'']//; s/["'\'']$//' \
        | sed -E 's/[[:space:]]+$//'
}

backupFilesHasDescriptors()
{
    local app="$1"
    if [[ -n "$(backupFilesDescriptors "$app")" ]]; then return 0; fi
    return 1
}

# Capture each declared path live, through its container, into staging. No host
# root, no host read perms.
backupFilesCapture()
{
    local app="$1"
    local app_dir="$containers_dir$app"
    local desc container cpath subdir uid gid stage rc=0

    backupFilesHasDescriptors "$app" || return 0

    while IFS= read -r desc; do
        [[ -z "$desc" ]] && continue
        IFS=':' read -r container cpath subdir uid gid <<< "$desc"
        [[ -z "$container" || -z "$cpath" || -z "$subdir" ]] && { isError "Bad backup.files descriptor: $desc"; rc=1; continue; }
        stage="$app_dir/$backup_files_stage_subdir/$subdir"

        isNotice "Capturing $subdir from $container — live, via container"
        runFileOp rm -rf "$stage" 2>/dev/null
        runFileOp mkdir -p "$stage"
        # Read in the container's namespace, write the plain tree to staging.
        if docker exec "$container" tar -C "$cpath" -cf - . 2>/dev/null | runFileOp tar -xf - -C "$stage" 2>/dev/null; then
            # The capture preserves the app's ownership (e.g. www-data, 0640),
            # which the backup user still couldn't read. Hand the staging tree to
            # the backup user so restic can read it; modes are unchanged, so the
            # owner can now read everything. Real ownership is reapplied from the
            # discovered-or-overridden uid:gid on restore.
            runFileOp chown -R "$docker_install_user":"$docker_install_user" "$stage" 2>/dev/null

            # Auto-discover the in-container uid:gid and write a sidecar that
            # rides in the snapshot beside the staging dir. Restore reads this
            # back, so a PUID/UID change is picked up on the next backup without
            # any label edit. An explicit uid:gid in the descriptor wins; this
            # only writes when the descriptor didn't pin them.
            if [[ -z "$uid" || -z "$gid" ]]; then
                local meta_file="$app_dir/$backup_files_stage_subdir/$subdir.lp-owner"
                local discovered
                discovered=$(docker exec "$container" stat -c '%u:%g' "$cpath" 2>/dev/null)
                if [[ -n "$discovered" ]]; then
                    echo "$discovered" | runFileWrite "$meta_file" 2>/dev/null
                    runFileOp chown "$docker_install_user":"$docker_install_user" "$meta_file" 2>/dev/null
                fi
            fi

            isSuccessful "captured $subdir ($(du -sh "$stage" 2>/dev/null | cut -f1))"
        else
            isError "capture of $subdir from $container failed"
            rc=1
        fi
    done < <(backupFilesDescriptors "$app")

    return $rc
}

# Live paths the staging copies supersede — excluded from the snapshot.
backupFilesExcludePaths()
{
    local app="$1"
    local app_dir="$containers_dir$app"
    local desc container cpath subdir uid gid

    while IFS= read -r desc; do
        [[ -z "$desc" ]] && continue
        IFS=':' read -r container cpath subdir uid gid <<< "$desc"
        [[ -n "$subdir" ]] && echo "$app_dir/$subdir"
    done < <(backupFilesDescriptors "$app")
}

# Pre-start restore: rebuild each captured tree at its host path with the app's
# ownership, by extracting through a throwaway container running in-namespace.
# Runs after the snapshot is laid down (staging present, live path absent) and
# before the app starts. No host root.
restoreFilesRehydratePreStart()
{
    local app="$1"
    local app_dir="$containers_dir$app"
    local desc container cpath subdir uid gid stage

    backupFilesHasDescriptors "$app" || return 0

    while IFS= read -r desc; do
        [[ -z "$desc" ]] && continue
        IFS=':' read -r container cpath subdir uid gid <<< "$desc"
        stage="$app_dir/$backup_files_stage_subdir/$subdir"
        [[ -d "$stage" ]] || { isNotice "No captured files for $subdir — skipping"; continue; }

        # If the descriptor didn't pin uid:gid, read the auto-discovered sidecar
        # written at capture time. Falls back to 0:0 (container root) if the
        # sidecar is missing — safe but worth a notice so misconfig is visible.
        if [[ -z "$uid" || -z "$gid" ]]; then
            local meta_file="$app_dir/$backup_files_stage_subdir/$subdir.lp-owner"
            if [[ -f "$meta_file" ]]; then
                local meta; meta=$(cat "$meta_file" 2>/dev/null)
                IFS=':' read -r uid gid <<< "$meta"
            else
                isNotice "No owner sidecar for $subdir — restoring as 0:0 (set uid:gid explicitly or re-capture to fix)"
            fi
        fi
        uid="${uid:-0}"; gid="${gid:-0}"

        isNotice "Restoring $subdir as $uid:$gid — via container"
        # Helper runs as in-namespace root: it can clear/create the dir under the
        # app dir, extract the streamed tree, and chown to the app's uid:gid
        # (which maps to the right owner in rooted and rootless alike).
        if runFileOp tar -C "$stage" -cf - . 2>/dev/null | docker run --rm -i \
                -v "$app_dir:/parent" "$backup_files_helper_image" \
                sh -c "rm -rf '/parent/$subdir' && mkdir -p '/parent/$subdir' && tar -C '/parent/$subdir' -xf - && chown -R $uid:$gid '/parent/$subdir'" 2>/dev/null; then
            isSuccessful "restored $subdir"
        else
            isError "restoring $subdir failed"
        fi
    done < <(backupFilesDescriptors "$app")
}