Compare commits

..

2 Commits

Author SHA1 Message Date
librelad
a16c93721e Merge claude/1 2026-06-02 16:03:53 +01:00
librelad
20f8ca2eb5 feat(network): detect + heal apps stranded off the docker subnet
Closes the gap behind the vpn-recreate bug: when the shared network is
recreated with a different /24, every app's stored static IP is left
outside it and adoptDockerSubnet only realigns CFG, not the apps.

- networkScanConflicts (network_conflicts.sh): read-only scan diffing each
  active network_resources IP against docker's real subnet (via ipInSubnet).
  Per-service routing-aware — skips gateway-routed services whose ipv4 is
  commented out in the deployed compose, so gluetun apps don't false-positive.
  Distinguishes 'daemon down' (benign) from 'network missing' (real).

- webuiSystemNetworkCheck (webui_system_network.sh): self-throttled generator
  that writes frontend/data/system/network_status.json (modelled on
  verify_status.json). Wired into webuiSystemUpdate AND run unconditionally
  every ~60s from the task-processor poll (regen webui is mtime-gated and
  would never fire on drift, which touches no source file).

- networkHealConflicts (network_heal.sh) + 'libreportal system network
  check|heal [app]': the heal adopts docker's subnet in-process, then re-IPs
  stranded apps with reset_network=ip (ports preserved), gluetun first.
  Mutating path runs only through the task system (dual-mode, like update
  apply); read-only check runs inline.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-02 16:03:53 +01:00
11 changed files with 292 additions and 0 deletions

View File

@ -85,6 +85,30 @@ cliHandleSystemCommands()
reclaimDockerSpace
;;
"network")
# libreportal system network check [force] (read-only, rewrites
# network_status.json — used by the task-processor poll + WebUI)
# libreportal system network heal [<app>] (mutating — re-IPs
# stranded apps from the corrected subnet, ports preserved; routes
# through the task system like update apply)
case "$initial_command3" in
"check")
webuiSystemNetworkCheck "${initial_command4:-force}"
;;
"heal")
if [[ "$LIBREPORTAL_TASK_EXEC" == "1" ]]; then
networkHealConflicts "$initial_command4"
else
cliTaskRun "libreportal system network heal${initial_command4:+ $initial_command4}" "system_network_heal" "" ""
fi
;;
*)
isNotice "Invalid network command: $initial_command3"
cliShowSystemHelp
;;
esac
;;
"image")
# libreportal system image rm [--force] <comma-separated ids>
case "$initial_command3" in

View File

@ -13,5 +13,7 @@ cliShowSystemHelp()
echo " libreportal system reset - Reinstall LibrePortal install files"
echo " libreportal system reclaim - Reclaim Docker space (build cache + dangling images)"
echo " libreportal system image rm [--force] <ids> - Remove specific images (comma-separated ids)"
echo " libreportal system network check - Re-scan for apps stranded off the docker subnet"
echo " libreportal system network heal [app] - Re-IP stranded apps from the current subnet (ports kept)"
echo ""
}

View File

@ -0,0 +1,67 @@
#!/bin/bash
# Read-only network-drift scan — the shared detection used by both the WebUI
# status generator (webuiSystemNetworkCheck) and the heal verb (network heal),
# so the two never diverge.
#
# networkScanConflicts sets these globals (call it DIRECTLY, never in $(...) —
# a subshell would drop the globals):
# NET_DAEMON_OK "true"/"false" — docker daemon reachable
# NET_PRESENT "true"/"false" — the shared network ($CFG_NETWORK_NAME) exists
# NET_DOCKER_SUBNET — its real subnet CIDR (e.g. 10.123.154.0/24)
# NET_SCAN_ERROR — human note when the daemon/network is off (else "")
# NET_CONFLICTS (array) — one "app|service|ip" entry per active IP that
# no longer falls inside the docker network's
# real subnet (the "network recreated with a
# different /24, app stranded" drift).
# Gateway-routed services (no live shared-net ipv4 in their deployed compose,
# e.g. gluetun-routed service-1) are skipped, so they don't false-positive.
#
# Nothing here mutates state.
# Is this app/service NOT live on the shared network? Routed via a gateway, or
# its ipv4 simply isn't present uncommented in the deployed compose -> skip it.
# We key on the IP (unique per service): a routed service has its whole
# `ipv4_address:` block commented out (GLUETUN_OFF region), so an uncommented
# assignment carrying this exact IP means it IS live on the shared net.
_netServiceIsRouted() {
local app="$1" ip="$2"
local compose="${containers_dir}${app}/docker-compose.yml"
[[ -f "$compose" ]] || return 1 # no compose to consult -> don't skip
local esc_ip="${ip//./\\.}"
grep -Eq "^[[:space:]]*ipv4_address:[[:space:]]*${esc_ip}([[:space:]]|#|$)" "$compose" && return 1
return 0
}
networkScanConflicts() {
NET_DAEMON_OK="false"; NET_PRESENT="false"; NET_DOCKER_SUBNET=""; NET_SCAN_ERROR=""
NET_CONFLICTS=()
# Distinguish "daemon down" (transient/benign — never alarm on what we can't
# verify) from "daemon up but our network is gone" (a real conflict).
if ! dockerCommandRun "docker info" >/dev/null 2>&1; then
NET_SCAN_ERROR="docker daemon unreachable"
return 0
fi
NET_DAEMON_OK="true"
NET_DOCKER_SUBNET=$(dockerCommandRun "docker network inspect $CFG_NETWORK_NAME --format '{{range .IPAM.Config}}{{.Subnet}}{{end}}'" 2>/dev/null | tr -d '[:space:]')
if [[ -z "$NET_DOCKER_SUBNET" ]]; then
NET_SCAN_ERROR="network '$CFG_NETWORK_NAME' not found"
return 0
fi
NET_PRESENT="true"
local rows app service ip
rows=$(runInstallOp sqlite3 "$docker_dir/$db_file" \
"SELECT app_name, service_name, resource_value FROM network_resources WHERE resource_type='ip' AND status='active';" 2>/dev/null)
[[ -z "$rows" ]] && return 0
while IFS='|' read -r app service ip; do
[[ -z "$app" || -z "$ip" ]] && continue
_netServiceIsRouted "$app" "$ip" && continue
if ! ipInSubnet "$ip" "$NET_DOCKER_SUBNET"; then
NET_CONFLICTS+=("${app}|${service}|${ip}")
fi
done <<< "$rows"
}

View File

@ -0,0 +1,88 @@
#!/bin/bash
# Network-drift heal — the mutating half of the detector. Runs ONLY through the
# task system (see cli_system_commands.sh `network heal`, which enqueues unless
# LIBREPORTAL_TASK_EXEC=1), never a direct API.
#
# It (1) realigns CFG to docker's real subnet IN-PROCESS so re-IP draws from the
# corrected /24, then (2) re-IPs each stranded app with ports PRESERVED
# (reset_network="ip"), healing a gateway provider (gluetun) first so recreating
# it doesn't orphan the apps routed through it. Re-IP runs sequentially, and a
# fresh scan afterwards rewrites network_status.json so the WebUI badge clears
# (or stays, if anything failed to heal).
#
# networkHealConflicts [app] # heal one app, or all detected conflicts
networkHealConflicts() {
local target_app="$1"
isHeader "Healing network drift"
# 1) Realign CFG to docker's real subnet in this process (ipFindAvailable
# reads CFG_NETWORK_SUBNET, so this must happen before any re-IP).
local docker_subnet
docker_subnet=$(dockerCommandRun "docker network inspect $CFG_NETWORK_NAME --format '{{range .IPAM.Config}}{{.Subnet}}{{end}}'" 2>/dev/null | tr -d '[:space:]')
if [[ -z "$docker_subnet" ]]; then
isNotice "Network '$CFG_NETWORK_NAME' not present — (re)creating it."
DOCKER_NETWORK_SETUP_NEEDED="true"
declare -f installDockerNetwork >/dev/null 2>&1 && installDockerNetwork
docker_subnet=$(dockerCommandRun "docker network inspect $CFG_NETWORK_NAME --format '{{range .IPAM.Config}}{{.Subnet}}{{end}}'" 2>/dev/null | tr -d '[:space:]')
fi
if [[ -n "$docker_subnet" && "$docker_subnet" != "$CFG_NETWORK_SUBNET" ]]; then
declare -f adoptDockerSubnet >/dev/null 2>&1 && adoptDockerSubnet "$docker_subnet"
fi
# 2) Build the app set (unique). Either the requested app, or a fresh scan.
local -a apps=() a
if [[ -n "$target_app" ]]; then
apps=("$target_app")
else
networkScanConflicts # populates NET_CONFLICTS (call direct, not $(...))
local row seen=""
for row in "${NET_CONFLICTS[@]}"; do
a="${row%%|*}"
[[ -n "$a" ]] || continue
# de-dupe app names (an app can have several drifted services)
[[ " $seen " == *" $a "* ]] && continue
seen+=" $a"
apps+=("$a")
done
fi
if [[ ${#apps[@]} -eq 0 ]]; then
isSuccessful "No network conflicts to heal."
declare -f webuiSystemNetworkCheck >/dev/null 2>&1 && webuiSystemNetworkCheck "force" >/dev/null 2>&1
return 0
fi
# 3) Heal a gateway PROVIDER first — recreating gluetun re-attaches every app
# routed through it, so it must settle before (or be reconciled after) them.
local -a ordered=()
for a in "${apps[@]}"; do [[ "$a" == "gluetun" ]] && ordered+=("$a"); done
for a in "${apps[@]}"; do [[ "$a" != "gluetun" ]] && ordered+=("$a"); done
# 4) Re-IP each (IP-only — ports preserved), sequentially.
local attempted=0
for a in "${ordered[@]}"; do
if [[ ! "$a" =~ ^[a-z0-9][a-z0-9_-]*$ ]]; then
isError "Skipping invalid app slug: $a"; continue
fi
isNotice "Re-IPing '$a' into ${CFG_NETWORK_SUBNET} (ports preserved)…"
dockerInstallApp "$a" "" ip
((attempted++))
done
# 5) Reconcile gateway-routed apps onto the (possibly recreated) provider.
declare -f appGluetunRecreateRouted >/dev/null 2>&1 && appGluetunRecreateRouted >/dev/null 2>&1 || true
# 6) Fresh scan rewrites the status file; report what (if anything) remains.
if declare -f webuiSystemNetworkCheck >/dev/null 2>&1; then
webuiSystemNetworkCheck "force" >/dev/null 2>&1
fi
networkScanConflicts
local remaining=${#NET_CONFLICTS[@]}
if (( remaining > 0 )); then
isError "Network heal attempted ${attempted} app(s); ${remaining} conflict(s) still detected — re-run or inspect manually."
return 1
fi
isSuccessful "Network heal complete — re-IP'd ${attempted} app(s); no conflicts remain."
}

View File

@ -44,6 +44,8 @@ docker_scripts=(
"docker/install/rootless/rootless_start_setup.sh"
"docker/install/rootless/rootless_uninstall.sh"
"docker/install/rootless/rootless_user.sh"
"docker/network/network_conflicts.sh"
"docker/network/network_heal.sh"
"docker/network/network_prune.sh"
"docker/network/network_randomize_subnet.sh"
"docker/network/network_setup.sh"

View File

@ -23,6 +23,7 @@ network_scripts=(
"network/firewall/rules/firewall_refresh_all.sh"
"network/ip/ip_allocation.sh"
"network/ip/ip_find_available.sh"
"network/ip/ip_in_subnet.sh"
"network/ip/ip_is_available.sh"
"network/ip/ip_remove_from_db.sh"
"network/ip/ip_replace_tags.sh"

View File

@ -32,6 +32,7 @@ webui_scripts=(
"webui/data/generators/system/webui_system_info.sh"
"webui/data/generators/system/webui_system_memory.sh"
"webui/data/generators/system/webui_system_metrics.sh"
"webui/data/generators/system/webui_system_network.sh"
"webui/data/generators/system/webui_system_update.sh"
"webui/data/generators/updater/webui_artifact_scan.sh"
"webui/data/generators/updater/webui_updater_scan.sh"

View File

@ -524,6 +524,7 @@ declare -gA LP_FN_MAP=(
[invidiousToolsMenu]="menu/tools/manage_invidious.sh"
[ipAllocation]="network/ip/ip_allocation.sh"
[ipFindAvailable]="network/ip/ip_find_available.sh"
[ipInSubnet]="network/ip/ip_in_subnet.sh"
[ipIsAvailable]="network/ip/ip_is_available.sh"
[ipRemoveFromDatabase]="network/ip/ip_remove_from_db.sh"
[ip_scan_all_network_services]="network/display/show_all_network_services_detailed.sh"
@ -647,6 +648,9 @@ declare -gA LP_FN_MAP=(
[monitoringResolveScrapeTags]="network/monitoring/monitoring.sh"
[monitoringToggleAppConfig]="network/monitoring/monitoring.sh"
[moveFile]="function/file/move_file.sh"
[_netServiceIsRouted]="docker/network/network_conflicts.sh"
[networkHealConflicts]="docker/network/network_heal.sh"
[networkScanConflicts]="docker/network/network_conflicts.sh"
[_nextcloudOcc]="nextcloud/scripts/nextcloud_auth.sh"
[_nextcloudOccWithPass]="nextcloud/scripts/nextcloud_auth.sh"
[onlyoffice_install_message_data]="onlyoffice/scripts/onlyoffice_install_hooks.sh"
@ -933,6 +937,7 @@ declare -gA LP_FN_MAP=(
[webuiSystemInfo]="webui/data/generators/system/webui_system_info.sh"
[webuiSystemMemory]="webui/data/generators/system/webui_system_memory.sh"
[webuiSystemMetrics]="webui/data/generators/system/webui_system_metrics.sh"
[webuiSystemNetworkCheck]="webui/data/generators/system/webui_system_network.sh"
[webuiSystemUpdate]="webui/data/generators/system/webui_system_update.sh"
[webuiSystemUpdateCheck]="webui/data/generators/system/webui_system_update.sh"
[webuiSystemVerify]="webui/data/generators/system/webui_system_update.sh"
@ -1466,6 +1471,7 @@ declare -gA LP_FN_ROOT=(
[invidiousToolsMenu]="scripts"
[ipAllocation]="scripts"
[ipFindAvailable]="scripts"
[ipInSubnet]="scripts"
[ipIsAvailable]="scripts"
[ipRemoveFromDatabase]="scripts"
[ip_scan_all_network_services]="scripts"
@ -1589,6 +1595,9 @@ declare -gA LP_FN_ROOT=(
[monitoringResolveScrapeTags]="scripts"
[monitoringToggleAppConfig]="scripts"
[moveFile]="scripts"
[_netServiceIsRouted]="scripts"
[networkHealConflicts]="scripts"
[networkScanConflicts]="scripts"
[_nextcloudOcc]="containers"
[_nextcloudOccWithPass]="containers"
[onlyoffice_install_message_data]="containers"
@ -1875,6 +1884,7 @@ declare -gA LP_FN_ROOT=(
[webuiSystemInfo]="scripts"
[webuiSystemMemory]="scripts"
[webuiSystemMetrics]="scripts"
[webuiSystemNetworkCheck]="scripts"
[webuiSystemUpdate]="scripts"
[webuiSystemUpdateCheck]="scripts"
[webuiSystemVerify]="scripts"
@ -2429,6 +2439,7 @@ _invidiousPsql() { source "${install_containers_dir}invidious/scripts/invidious_
invidiousToolsMenu() { source "${install_scripts_dir}menu/tools/manage_invidious.sh"; invidiousToolsMenu "$@"; }
ipAllocation() { source "${install_scripts_dir}network/ip/ip_allocation.sh"; ipAllocation "$@"; }
ipFindAvailable() { source "${install_scripts_dir}network/ip/ip_find_available.sh"; ipFindAvailable "$@"; }
ipInSubnet() { source "${install_scripts_dir}network/ip/ip_in_subnet.sh"; ipInSubnet "$@"; }
ipIsAvailable() { source "${install_scripts_dir}network/ip/ip_is_available.sh"; ipIsAvailable "$@"; }
ipRemoveFromDatabase() { source "${install_scripts_dir}network/ip/ip_remove_from_db.sh"; ipRemoveFromDatabase "$@"; }
ip_scan_all_network_services() { source "${install_scripts_dir}network/display/show_all_network_services_detailed.sh"; ip_scan_all_network_services "$@"; }
@ -2552,6 +2563,9 @@ monitoringRefreshPrometheus() { source "${install_scripts_dir}network/monitoring
monitoringResolveScrapeTags() { source "${install_scripts_dir}network/monitoring/monitoring.sh"; monitoringResolveScrapeTags "$@"; }
monitoringToggleAppConfig() { source "${install_scripts_dir}network/monitoring/monitoring.sh"; monitoringToggleAppConfig "$@"; }
moveFile() { source "${install_scripts_dir}function/file/move_file.sh"; moveFile "$@"; }
_netServiceIsRouted() { source "${install_scripts_dir}docker/network/network_conflicts.sh"; _netServiceIsRouted "$@"; }
networkHealConflicts() { source "${install_scripts_dir}docker/network/network_heal.sh"; networkHealConflicts "$@"; }
networkScanConflicts() { source "${install_scripts_dir}docker/network/network_conflicts.sh"; networkScanConflicts "$@"; }
_nextcloudOcc() { source "${install_containers_dir}nextcloud/scripts/nextcloud_auth.sh"; _nextcloudOcc "$@"; }
_nextcloudOccWithPass() { source "${install_containers_dir}nextcloud/scripts/nextcloud_auth.sh"; _nextcloudOccWithPass "$@"; }
onlyoffice_install_message_data() { source "${install_containers_dir}onlyoffice/scripts/onlyoffice_install_hooks.sh"; onlyoffice_install_message_data "$@"; }
@ -2838,6 +2852,7 @@ webuiSystemDisk() { source "${install_scripts_dir}webui/data/generators/system/w
webuiSystemInfo() { source "${install_scripts_dir}webui/data/generators/system/webui_system_info.sh"; webuiSystemInfo "$@"; }
webuiSystemMemory() { source "${install_scripts_dir}webui/data/generators/system/webui_system_memory.sh"; webuiSystemMemory "$@"; }
webuiSystemMetrics() { source "${install_scripts_dir}webui/data/generators/system/webui_system_metrics.sh"; webuiSystemMetrics "$@"; }
webuiSystemNetworkCheck() { source "${install_scripts_dir}webui/data/generators/system/webui_system_network.sh"; webuiSystemNetworkCheck "$@"; }
webuiSystemUpdate() { source "${install_scripts_dir}webui/data/generators/system/webui_system_update.sh"; webuiSystemUpdate "$@"; }
webuiSystemUpdateCheck() { source "${install_scripts_dir}webui/data/generators/system/webui_system_update.sh"; webuiSystemUpdateCheck "$@"; }
webuiSystemVerify() { source "${install_scripts_dir}webui/data/generators/system/webui_system_update.sh"; webuiSystemVerify "$@"; }

View File

@ -454,6 +454,11 @@ maybeRegenPoll() {
(( now - last < REGEN_POLL_INTERVAL )) && return 0
: > "$REGEN_POLL_STAMP" 2>/dev/null || true
command -v libreportal >/dev/null 2>&1 && libreportal regen webui >/dev/null 2>&1 || true
# Network-drift detector (read-only). Unlike `regen webui` it is NOT gated on
# source-file mtimes — drift is an external/runtime event that touches no
# config file — so it runs every poll, self-throttled to its own interval
# (cheap no-op most ticks), writing network_status.json when a scan is due.
command -v libreportal >/dev/null 2>&1 && libreportal system network check >/dev/null 2>&1 || true
}
# ============================================================================

View File

@ -0,0 +1,86 @@
#!/bin/bash
# WebUI network-drift detector.
# Writes frontend/data/system/network_status.json so the dashboard + topbar can
# surface apps whose allocated static IP no longer falls inside the docker
# network's real subnet (the "network recreated with a different /24, apps
# stranded" case — see scripts/checks/requirements/check_docker_network.sh).
#
# Read-only: it inspects docker + the network_resources DB and writes a status
# file. The actual fix is the locked-down `libreportal system network heal`.
#
# Scheduling: invoked from the task processor's idle poll (~60s) via
# `libreportal system network check`, and from webuiSystemUpdate on full
# refreshes. Self-throttled to CFG_NETWORK_CHECK_INTERVAL so most calls no-op.
# Pass "force" to bypass the throttle (the manual re-check button / post-heal).
webuiSystemNetworkCheck() {
local force_flag="$1"
local system_dir="$containers_dir/libreportal/frontend/data/system"
local final_file="${system_dir}/network_status.json"
local stamp_file="${system_dir}/.network_check_stamp"
local interval="${CFG_NETWORK_CHECK_INTERVAL:-300}"
createFolders "quiet" "$sudo_user_name" "$system_dir"
local do_run="false"
if [[ "$force_flag" == "force" || ! -f "$final_file" || ! -f "$stamp_file" ]]; then
do_run="true"
else
local _now _last; _now=$(date +%s); _last=$(stat -c '%Y' "$stamp_file" 2>/dev/null || echo 0)
(( _now - _last >= interval )) && do_run="true"
fi
[[ "$do_run" == "true" ]] || return 0
runFileOp touch "$stamp_file" 2>/dev/null || true
# Read-only scan — call directly (NOT in $(...)): it sets NET_* globals + the
# NET_CONFLICTS array, which a subshell would discard.
networkScanConflicts
local conflicts_found="false" can_auto_heal="false" conflict_count=0
local apps_json="" error_json="null"
if [[ "$NET_DAEMON_OK" != "true" ]]; then
# Daemon unreachable — neutral status; never alarm on what we can't check.
error_json="\"${NET_SCAN_ERROR//\"/\\\"}\""
elif [[ "$NET_PRESENT" != "true" ]]; then
# Network gone — a conflict only if something is installed to use it.
error_json="\"${NET_SCAN_ERROR//\"/\\\"}\""
local installed; installed=$(runInstallOp sqlite3 "$docker_dir/$db_file" "SELECT COUNT(*) FROM apps WHERE status=1;" 2>/dev/null)
if [[ "${installed:-0}" -gt 0 ]]; then
conflicts_found="true"; can_auto_heal="true"; conflict_count="$installed"
fi
else
local row app service ip _a _s
for row in "${NET_CONFLICTS[@]}"; do
IFS='|' read -r app service ip <<< "$row"
[[ -z "$app" || -z "$ip" ]] && continue
_a=${app//\\/\\\\}; _a=${_a//\"/\\\"}
_s=${service//\\/\\\\}; _s=${_s//\"/\\\"}
apps_json+="${apps_json:+,}"$'\n'" {\"app\": \"${_a}\", \"service\": \"${_s}\", \"reason\": \"ip_out_of_subnet\", \"stored_ip\": \"${ip}\", \"expected_subnet\": \"${NET_DOCKER_SUBNET}\", \"heal\": \"reset-ip\"}"
((conflict_count++))
done
if (( conflict_count > 0 )); then
conflicts_found="true"; can_auto_heal="true"
fi
fi
local apps_arr="[]"
[[ -n "$apps_json" ]] && apps_arr="[${apps_json}"$'\n'" ]"
local temp_file; temp_file="$(mktemp)"
cat << EOF > "$temp_file"
{
"conflicts_found": ${conflicts_found},
"conflict_count": ${conflict_count},
"can_auto_heal": ${can_auto_heal},
"network_name": "${CFG_NETWORK_NAME}",
"docker_subnet": "${NET_DOCKER_SUBNET}",
"config_subnet": "${CFG_NETWORK_SUBNET}",
"apps": ${apps_arr},
"error": ${error_json},
"checked_at": "$(date -Iseconds)"
}
EOF
runFileWrite "$final_file" < "$temp_file"; rm -f "$temp_file"
}

View File

@ -10,6 +10,7 @@ webuiSystemUpdate() {
webuiSystemMetrics
webuiSystemUpdateCheck
webuiSystemVerify
webuiSystemNetworkCheck
isSuccessful "System information updated!"
}