LibrePortal/scripts/source/files/generate_function_manifest.sh
librelad 77342c8047 feat(lazy-load): extend manifest to containers/ + skip container scan (Phase 5)
Containers used to be eager-loaded via `sourceScanFiles "containers"`
even under LP_LAZY=1 — sourcing all ~160 installer functions up front.
Phase 5 brings them into the autoload-stub mechanism.

generate_function_manifest.sh now scans BOTH scripts/ AND containers/
(maxdepth 3, matching sourceScanFiles' existing prune), with a per-entry
root selector so stub emission uses the right base directory:

  scripts/peer/peer_add.sh    →  source "${install_scripts_dir}peer/peer_add.sh"
  containers/linkding/linkding.sh →  source "${install_containers_dir}linkding/linkding.sh"

New manifest exports:
  LP_FN_MAP             funcname → relpath        (existing)
  LP_FN_ROOT            funcname → scripts|containers   NEW
  LP_EAGER_FILES        "<root>:<relpath>" entries     NEW format
  ~860 autoload stubs   (was ~700; +160 from containers)

Loader changes (initilize_files.sh):
  - Parses LP_EAGER_FILES entries as `root:path`, dispatches to the
    right install_*_dir. Pre-Phase-5 entries without a colon default to
    scripts (backwards-compatible).
  - sourceScanFiles "containers" is skipped when LP_LAZY=1 AND
    LP_FN_MAP is loaded (manifest-driven autoload covers it).
    Eager mode and lazy-with-missing-manifest both still run the scan.

Measurement target: ~70 ms saved on top of Phase 4. Verified separately
in the commit message of the next deploy.

Signed-off-by: librelad <librelad@digitalangels.vip>
2026-05-26 21:30:36 +01:00

301 lines
14 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Function manifest generator — sidekick to generate_arrays.sh, supports the
# lazy-load path. Scans every script under scripts/ for top-level function
# definitions and writes scripts/source/files/arrays/function_manifest.sh:
#
# declare -gA LP_FN_MAP=(
# [funcname]="rel/path/to/file.sh"
# ...
# )
# LP_EAGER_FILES=( "rel/path1.sh" "rel/path2.sh" ... )
#
# LP_FN_MAP is what the lazy loader uses to install autoload stubs:
# funcname() { source "$install_scripts_dir${LP_FN_MAP[funcname]}"; funcname "$@"; }
#
# LP_EAGER_FILES are files with side effects at source time (set vars, run
# commands, etc.) that the lazy loader MUST source unconditionally — skipping
# them would skip the side effect, not just defer a function definition.
#
# Heuristic for eager detection (pragmatic, not a real bash parser):
# - Walk the file line-by-line, tracking { } depth to know "inside function".
# - A file is LAZY-SAFE iff every non-blank/non-comment line outside
# functions is either: (a) a function header `funcname() [{`, (b) `}`
# closing a function, or (c) a `local`/`declare` only inside functions.
# - Anything else at depth 0 (assignments, source calls, bare commands) →
# mark file EAGER. False positives are harmless (file just stays eager-
# loaded, same as today). False negatives WOULD be bugs, so the heuristic
# errs on the safe side.
#
# Collisions: if two files define the same function name, the LAST scan wins
# in LP_FN_MAP (matches what eager loading does — last source wins). All
# collisions are reported to stderr so they can be audited.
#
# Usage: ./generate_function_manifest.sh run
#
# SAFETY: only runs when executed directly with 'run' (mirrors generate_arrays.sh).
if [[ "${BASH_SOURCE[0]}" == "${0}" && "$1" == "run" ]]; then
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ARRAYS_DIR="$SCRIPT_DIR/arrays"
SCRIPTS_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")" # …/scripts
INSTALL_DIR="$(dirname "$SCRIPTS_DIR")" # …/install (parent of scripts)
CONTAINERS_DIR="$INSTALL_DIR/containers" # sibling of scripts
OUTPUT="$ARRAYS_DIR/function_manifest.sh"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; NC='\033[0m'
isSuccessful() { echo -e "${GREEN}✓ Success${NC} $1"; }
isNotice() { echo -e "${YELLOW}! Notice${NC} $1"; }
isError() { echo -e "${RED}✗ Error${NC} $1" >&2; }
# Skip-list mirrors generate_arrays.sh — these are either deployment targets
# (peer/peer_shell.sh runs standalone via sshd's forced-command, never sourced),
# or build infrastructure that the loader bootstraps separately.
should_skip() {
local rel="$1"
case "$rel" in
source/files/app_files.sh|source/files/cli_files.sh) return 0 ;;
source/files/generate_arrays.sh|source/files/generate_function_manifest.sh) return 0 ;;
# The manifest itself is sourced explicitly by the lazy loader — don't
# also flag it eager, that would double-source (28ms × 2 ≈ wasted 55ms).
source/files/arrays/function_manifest.sh) return 0 ;;
# The files_*.sh arrays are only meaningful for the eager loader's
# iteration. Lazy mode doesn't iterate them, so eager-sourcing them
# under lazy mode is pure overhead. Skip from the scan; the eager
# loader still sources them via its own existing path.
source/files/arrays/files_*.sh) return 0 ;;
source/loading/check_files.sh|source/loading/initilize_files.sh|source/loading/scan_files.sh) return 0 ;;
source/load_sources.sh|source/paths.sh) return 0 ;;
webui/data/generators/webui_test_generate.sh) return 0 ;;
peer/peer_shell.sh) return 0 ;;
unused/*|system/*|release/*) return 0 ;;
esac
return 1
}
# Walk a file. Outputs to two named-pipe-equivalent variables via stdout:
# fn:<funcname> — a function definition was found
# eager: — file has top-level side effects
#
# Depth tracking: count `{` and `}` only when they appear at the start of a
# token (anchored), avoiding most false positives from strings/heredocs.
# That's good enough for the LibrePortal codebase style; cleaner files would
# need a real parser. Errs on the side of marking files eager.
analyze_file() {
local file="$1"
awk '
# Skip shebang and pure comments — they have no semantic effect at source.
/^#!/ { next }
/^[[:space:]]*#/ { next }
/^[[:space:]]*$/ { next }
{
line = $0
stripped = line
sub(/^[[:space:]]+/, "", stripped)
sub(/[[:space:]]+$/, "", stripped)
# Function header: POSIX `funcname()` with EMPTY parens. We require
# the empty parens so that lines like `if (...)`, `for (...)`,
# `while (...)` are NOT misread as function definitions when we
# scan files that contain embedded awk/perl/other code. Whatever
# follows the `)` can be the opening `{`, a one-liner body, or
# just a newline.
is_fn_paren = (stripped ~ /^[A-Za-z_][A-Za-z0-9_]*[[:space:]]*\([[:space:]]*\)/)
is_fn_kw = (stripped ~ /^function[[:space:]]+[A-Za-z_][A-Za-z0-9_]*/)
if (depth == 0 && (is_fn_paren || is_fn_kw)) {
# Extract the name (everything up to `(` or trailing whitespace).
name = stripped
sub(/[[:space:]]*\(.*$/, "", name)
sub(/^function[[:space:]]+/, "", name)
sub(/[[:space:]]+.*$/, "", name)
print "fn:" name
# Net brace balance on THIS line: every { adds 1, every } subtracts.
# One-liner `name() { body; }` has equal counts → depth stays 0.
# Multi-line opener `name() {` has +1 → depth becomes 1.
# Brace-on-next-line `name()` has 0 → set expecting_open.
tmp = stripped
n_open = gsub(/\{/, "{", tmp)
tmp = stripped
n_close = gsub(/\}/, "}", tmp)
delta = n_open - n_close
if (delta > 0) depth += delta
else if (n_open == 0) expecting_open = 1
next
}
# Bare `{` at depth 0 after a function header is the continuation
# of that header (`name()` then newline then `{`).
if (depth == 0 && expecting_open && stripped == "{") {
depth++
expecting_open = 0
next
}
# Closing brace at depth 1 ends a function.
if (depth > 0 && stripped == "}") { depth--; next }
# Track depth roughly for content inside functions. We only need
# to know if depth == 0 for the eager check; bumping on any `{`
# at end-of-line and decrementing on `}` keeps it close enough.
if (depth > 0) {
# heredocs, strings — ignore detailed accounting; the only
# thing that matters is staying > 0 until the closing }.
if (stripped ~ /\{[[:space:]]*$/) depth++
# Multiple `}` on a line: count them.
n_close = gsub(/\}/, "&", stripped)
# dont double-count the line we ate above
next
}
# At depth 0 AND not a recognised function header → side effect.
print "eager:"
# Keep scanning to find any further function defs in the file.
}
' "$file"
}
mkdir -p "$ARRAYS_DIR"
declare -A fn_to_file
declare -A fn_collisions # name -> "file1\tfile2..."
declare -a eager_files
total_files=0
total_fns=0
# fn_to_root parallels fn_to_file: tracks whether the source file lives under
# scripts/ ("scripts") or containers/ ("containers") so the stub emission can
# pick the right ${install_*_dir} prefix. Same for eager_root for LP_EAGER_FILES.
declare -A fn_to_root
declare -A eager_root
# Walk one root. $1 = directory, $2 = root label ("scripts" or "containers"),
# $3 = max depth for find (-maxdepth N). The scan also honours the skip-list
# (paths relative to the root, so scripts/ skip entries don't match container
# files and vice versa).
scan_root() {
local root_dir="$1" root_label="$2" depth="$3"
while IFS= read -r -d '' file; do
local rel
rel=$(realpath --relative-to="$root_dir" "$file")
# Skip-list keys are scoped per root. should_skip() only handles the
# scripts/ ones today; containers/ skip-list is inline here.
if [[ "$root_label" == "scripts" ]]; then
should_skip "$rel" && continue
else
# Container-side skips: anything under a resources/ subdir is data
# not code; the existing sourceScanFiles already prunes those.
case "$rel" in
*/resources/*) continue ;;
esac
fi
total_files=$((total_files + 1))
local is_eager=0
while IFS= read -r tag; do
case "$tag" in
fn:*)
local name="${tag#fn:}"
if [[ -n "${fn_to_file[$name]:-}" && "${fn_to_file[$name]}" != "$rel" ]]; then
fn_collisions[$name]="${fn_collisions[$name]:-${fn_to_file[$name]}}"$'\t'"$rel"
fi
fn_to_file[$name]="$rel"
fn_to_root[$name]="$root_label"
total_fns=$((total_fns + 1))
;;
eager:)
is_eager=1
;;
esac
done < <(analyze_file "$file")
if (( is_eager )); then
eager_files+=("$rel")
eager_root["$rel"]="$root_label"
fi
done < <(find "$root_dir" -maxdepth "$depth" -type f -name '*.sh' -print0)
}
# scripts/ — deep walk, the existing behaviour.
scan_root "$SCRIPTS_DIR" scripts 99
# containers/ — match the sourceScanFiles "containers" maxdepth of 3.
if [[ -d "$CONTAINERS_DIR" ]]; then
scan_root "$CONTAINERS_DIR" containers 3
fi
# Emit the manifest.
{
printf '#!/bin/bash\n\n'
printf '# This file is auto-generated by generate_function_manifest.sh\n'
printf '# Do not edit manually — run\n'
printf '# ./scripts/source/files/generate_function_manifest.sh run\n\n'
printf '# Function name → relative path. Used by the lazy loader (LP_LAZY=1)\n'
printf '# to install an autoload stub for each public function. First call to a\n'
printf '# stub sources the real file, which redefines the function with the real\n'
printf '# body; subsequent calls hit the real one directly. Path is relative to\n'
printf '# install_scripts_dir for "scripts" entries, install_containers_dir for\n'
printf '# "containers" entries — see LP_FN_ROOT below.\n'
printf 'declare -gA LP_FN_MAP=(\n'
while IFS= read -r name; do
printf ' [%s]="%s"\n' "$name" "${fn_to_file[$name]}"
done < <(printf '%s\n' "${!fn_to_file[@]}" | sort)
printf ')\n\n'
printf '# Per-function root selector — "scripts" or "containers". Mirrors\n'
printf '# LP_FN_MAP; used by the loader / debugging tools.\n'
printf 'declare -gA LP_FN_ROOT=(\n'
while IFS= read -r name; do
printf ' [%s]="%s"\n' "$name" "${fn_to_root[$name]}"
done < <(printf '%s\n' "${!fn_to_root[@]}" | sort)
printf ')\n\n'
printf '# Files with top-level side effects. Lazy mode MUST source these\n'
printf '# unconditionally — deferring them would skip the side effect, not just\n'
printf '# defer a function definition. Stored as "<root>:<relpath>" so the\n'
printf '# loader picks the right base dir; existing entries without a prefix\n'
printf '# (pre-Phase-5 manifests) default to scripts/.\n'
printf 'LP_EAGER_FILES=(\n'
while IFS= read -r f; do
# No `local` here — we're inside a `{ … } > FILE` command group, not
# a function. `local` errors out at this scope in bash.
root="${eager_root[$f]:-scripts}"
printf ' "%s:%s"\n' "$root" "$f"
done < <(printf '%s\n' "${eager_files[@]}" | sort -u)
printf ')\n\n'
printf '# Autoload stubs — one per public function. First call sources the\n'
printf '# real file (which redefines this stub with the real body), then\n'
printf '# re-invokes. Sourced inline instead of eval-in-loop because bash\n'
printf '# parses one large file faster than it evals snippets at startup.\n'
while IFS= read -r name; do
root="${fn_to_root[$name]}"
case "$root" in
containers) base_var='install_containers_dir' ;;
*) base_var='install_scripts_dir' ;;
esac
printf '%s() { source "${%s}%s"; %s "$@"; }\n' \
"$name" "$base_var" "${fn_to_file[$name]}" "$name"
done < <(printf '%s\n' "${!fn_to_file[@]}" | sort)
} > "$OUTPUT"
isSuccessful "Wrote $(realpath --relative-to="$SCRIPTS_DIR" "$OUTPUT")"
isNotice "Scanned $total_files files, indexed $total_fns function definitions"
isNotice "${#eager_files[@]} files flagged eager (will always source)"
# Collisions: report so they can be audited. The manifest reflects last-write-
# wins, which matches the existing eager-load semantics, so behaviour is
# identical — the warnings are about *avoidable* fragility, not bugs.
if (( ${#fn_collisions[@]} > 0 )); then
isNotice "Function name collisions (last write wins, matches eager-load behaviour):"
while IFS= read -r name; do
IFS=$'\t' read -ra files <<< "${fn_collisions[$name]}"
printf ' %s\n' "$name"
for f in "${files[@]}"; do printf ' - %s\n' "$f"; done
done < <(printf '%s\n' "${!fn_collisions[@]}" | sort)
fi
fi