librelad a8248ccf7f harden(desudo): convert monitoring subsystem + global log-append idiom
- Global uniform pass: the $logs_dir/$docker_log_file log-append idiom
  (always /docker/logs, data-plane) -> runFileWrite -a across runtime
  files (check_success.sh logging backbone + several app scripts).
- monitoring.sh fully converted: containers_dir/docker_dir file ops
  (sqlite3/sed/mkdir/cp/rm/chmod/find, grafana tee-heredocs) -> runFileOp/
  runFileWrite; prometheus/grafana docker ps/kill/restart -> dockerCommandRun.
Byte-identical in rooted (all helpers reduce to sudo there).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
Signed-off-by: librelad <librelad@digitalangels.vip>
2026-05-23 23:33:51 +01:00

225 lines
9.0 KiB
Bash

#!/bin/bash
# LibrePortal monitoring integration — aggregator helpers.
#
# Apps that export metrics opt in with CFG_<APP>_MONITORING=true and ship
# fragments under containers/<app>/resources/monitoring/:
# prometheus-scrape.yml — a YAML list of Prometheus scrape configs
# grafana-dashboards/*.json — Grafana dashboard definitions
#
# These helpers scan every installed app, gather the fragments of the ones
# with monitoring enabled, and refresh the Prometheus / Grafana installs so
# they pick them up. Prometheus's and Grafana's own installs call these too,
# so reinstalling either re-gathers every app's contribution — the config is
# always generated from "installed apps with monitoring on", never hand-edited.
# Installed app names (docker + host) from the apps DB — one per line.
monitoringInstalledApps()
{
[[ -f "$docker_dir/$db_file" ]] || return 0
runFileOp sqlite3 "$docker_dir/$db_file" \
"SELECT name FROM apps WHERE status = 1 ORDER BY name;" 2>/dev/null
}
# True if app $1 is installed (status=1 in the apps DB).
monitoringIsInstalled()
{
[[ -f "$docker_dir/$db_file" ]] || return 1
local n
n="$(runFileOp sqlite3 "$docker_dir/$db_file" \
"SELECT COUNT(*) FROM apps WHERE name = '$1' AND status = 1;" 2>/dev/null)"
[[ -n "$n" && "$n" -gt 0 ]]
}
# True if app $1 has CFG_<APP>_MONITORING=true in its deployed config.
monitoringAppEnabled()
{
local app_name="$1"
local cfg="${containers_dir}${app_name}/${app_name}.config"
[[ -f "$cfg" ]] || return 1
local upper="${app_name^^}"; upper="${upper//-/_}"
local val
val="$(grep -E "^CFG_${upper}_MONITORING=" "$cfg" 2>/dev/null \
| head -1 | cut -d'=' -f2 | tr -d '\r" ')"
[[ "$val" == "true" ]]
}
# Toggle the "# >>> libreportal-monitoring >>>" marker block(s) in an app's
# deployed config file — uncomment (enable) when CFG_<APP>_MONITORING=true,
# re-comment (disable) otherwise. The metrics-enabling config ships
# commented-out in each app's resource file; the app's install calls this
# after deploying the resource so an app only runs a metrics endpoint when the
# user opted in. $2 is the config path relative to the app's container dir.
monitoringToggleAppConfig()
{
local app_name="$1"
local rel_path="$2"
local file="${containers_dir}${app_name}/${rel_path}"
if [[ ! -f "$file" ]]; then
isNotice "monitoringToggleAppConfig: $file not found — skipping."
return 0
fi
if monitoringAppEnabled "$app_name"; then
# Uncomment: strip the leading # from every non-marker line in range.
runFileOp sed -i '/# >>> libreportal-monitoring >>>/,/# <<< libreportal-monitoring <<</ {
/libreportal-monitoring/! s/^\([[:space:]]*\)#/\1/
}' "$file"
isSuccessful "Monitoring config enabled in $rel_path"
else
# Comment: prefix # to every non-marker line in range not already so.
runFileOp sed -i '/# >>> libreportal-monitoring >>>/,/# <<< libreportal-monitoring <<</ {
/libreportal-monitoring/! { /^[[:space:]]*#/! s/^\([[:space:]]*\)/\1#/ }
}' "$file"
isNotice "Monitoring config left disabled in $rel_path (CFG_${app_name^^}_MONITORING not true)."
fi
}
# Apply the framework's tag substitution to a copied scrape fragment, sourcing
# resolved values from the app's deployed compose. The scrape fragment carries
# `#LIBREPORTAL|PORT_INTERNAL_TAG_<n>|PORT_INTERNAL_DATA_<n>` annotations on the
# placeholder lines — same shape as compose — so `tagsManagerUpdateUniversalTag`
# can rewrite them directly. Idempotent: once the annotation's value slot holds
# a resolved port, repeated calls with the same value are no-ops.
monitoringResolveScrapeTags()
{
local scrape_file="$1"
local compose_file="$2"
local app_label="${3:-$(basename "$(dirname "$compose_file")")}"
[[ -f "$scrape_file" && -f "$compose_file" ]] || return 0
local entry tag_name value skipped=()
while read -r entry; do
tag_name="${entry%%|*}"
value="${entry#*|}"
[[ -z "$tag_name" || -z "$value" ]] && continue
if [[ "$value" == PORT_INTERNAL_DATA_* ]]; then
skipped+=("$tag_name")
continue
fi
tagsManagerUpdateUniversalTag "$scrape_file" "$tag_name" "$value"
done < <(grep -oE 'PORT_INTERNAL_TAG_[0-9]+\|[^ #]*' "$compose_file" 2>/dev/null | sort -u)
if [[ ${#skipped[@]} -gt 0 ]]; then
isNotice "Monitoring: ${app_label}'s deployed compose has unresolved ${skipped[*]} — reinstall ${app_label} to flush latest CFG values."
fi
if grep -qE 'PORT_INTERNAL_DATA_[0-9]+' "$scrape_file" 2>/dev/null; then
local leftovers
leftovers=$(grep -oE 'PORT_INTERNAL_DATA_[0-9]+' "$scrape_file" | sort -u | tr '\n' ' ')
isError "Monitoring: ${app_label} scrape config still has unresolved placeholders (${leftovers}) — check CFG_${app_label^^}_PORT_<n> entries."
fi
}
# Rebuild Prometheus's scrape.d/ from every monitoring-enabled app's
# prometheus-scrape.yml fragment, then SIGHUP Prometheus to reload. The
# prometheus.yml resource globs scrape.d/*.yml via scrape_config_files.
monitoringRefreshPrometheus()
{
if ! monitoringIsInstalled "prometheus"; then
isNotice "Prometheus not installed — skipping scrape-config refresh."
return 0
fi
local scrape_dir="${containers_dir}prometheus/prometheus/scrape.d"
runFileOp mkdir -p "$scrape_dir"
local count=0 app_name fragment
for app_name in $(monitoringInstalledApps); do
fragment="${containers_dir}${app_name}/resources/monitoring/prometheus-scrape.yml"
if monitoringAppEnabled "$app_name" && [[ -f "$fragment" ]]; then
runFileOp cp "$fragment" "$scrape_dir/${app_name}.yml"
monitoringResolveScrapeTags "$scrape_dir/${app_name}.yml" "${containers_dir}${app_name}/docker-compose.yml" "$app_name"
count=$((count + 1))
else
runFileOp rm -f "$scrape_dir/${app_name}.yml"
fi
done
runFileOp chmod -R a+rX "$scrape_dir" 2>/dev/null
if dockerCommandRun "docker ps --format '{{.Names}}'" "sudo" 2>/dev/null | grep -q '^prometheus-service$'; then
local result=$(dockerCommandRun "docker kill --signal=HUP prometheus-service" "sudo" 2>&1)
checkSuccess "Reloaded Prometheus ($count monitored app(s))"
else
isNotice "Prometheus container not running — scrape.d updated, applied on next start ($count app(s))."
fi
}
# Rebuild Grafana's provisioning/ — a Prometheus datasource plus every
# monitoring-enabled app's dashboards — then restart Grafana to load it.
monitoringRefreshGrafana()
{
if ! monitoringIsInstalled "grafana"; then
isNotice "Grafana not installed — skipping dashboard refresh."
return 0
fi
local prov="${containers_dir}grafana/provisioning"
local ds_dir="$prov/datasources"
local dash_provider_dir="$prov/dashboards"
local dash_dir="$prov/dashboards/libreportal"
runFileOp mkdir -p "$ds_dir" "$dash_dir"
# Prometheus datasource — reachable from the grafana container by the
# prometheus service name on the shared libreportal docker network.
runFileWrite "$ds_dir/libreportal-prometheus.yml" <<'EOF'
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus-service:9090
isDefault: true
editable: true
EOF
# Dashboard provider — points Grafana at the gathered dashboards dir.
runFileWrite "$dash_provider_dir/libreportal.yml" <<'EOF'
apiVersion: 1
providers:
- name: LibrePortal
orgId: 1
folder: LibrePortal
type: file
disableDeletion: false
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards/libreportal
EOF
# Gather each monitoring-enabled app's dashboard JSONs (prefixed with the
# app name to avoid filename clashes). Clear stale ones first.
runFileOp find "$dash_dir" -type f -name '*.json' -delete 2>/dev/null
local count=0 app_name app_dash f
for app_name in $(monitoringInstalledApps); do
app_dash="${containers_dir}${app_name}/resources/monitoring/grafana-dashboards"
if monitoringAppEnabled "$app_name" && [[ -d "$app_dash" ]]; then
for f in "$app_dash"/*.json; do
[[ -f "$f" ]] || continue
runFileOp cp "$f" "$dash_dir/${app_name}-$(basename "$f")"
count=$((count + 1))
done
fi
done
runFileOp chmod -R a+rX "$prov" 2>/dev/null
if dockerCommandRun "docker ps --format '{{.Names}}'" "sudo" 2>/dev/null | grep -q '^grafana-service$'; then
local result=$(dockerCommandRun "docker restart grafana-service" "sudo" 2>&1)
checkSuccess "Restarted Grafana ($count dashboard(s) provisioned)"
else
isNotice "Grafana container not running — provisioning updated, applied on next start ($count dashboard(s))."
fi
}
# Refresh both — called by an app's install when its MONITORING toggle is on,
# and by the Prometheus / Grafana installs themselves.
monitoringRefreshAll()
{
monitoringRefreshPrometheus;
monitoringRefreshGrafana;
}