#!/bin/bash # Function manifest generator — sidekick to generate_arrays.sh, supports the # lazy-load path. Scans every script under scripts/ for top-level function # definitions and writes scripts/source/files/arrays/function_manifest.sh: # # declare -gA LP_FN_MAP=( # [funcname]="rel/path/to/file.sh" # ... # ) # LP_EAGER_FILES=( "rel/path1.sh" "rel/path2.sh" ... ) # # LP_FN_MAP is what the lazy loader uses to install autoload stubs: # funcname() { source "$install_scripts_dir${LP_FN_MAP[funcname]}"; funcname "$@"; } # # LP_EAGER_FILES are files with side effects at source time (set vars, run # commands, etc.) that the lazy loader MUST source unconditionally — skipping # them would skip the side effect, not just defer a function definition. # # Heuristic for eager detection (pragmatic, not a real bash parser): # - Walk the file line-by-line, tracking { } depth to know "inside function". # - A file is LAZY-SAFE iff every non-blank/non-comment line outside # functions is either: (a) a function header `funcname() [{`, (b) `}` # closing a function, or (c) a `local`/`declare` only inside functions. # - Anything else at depth 0 (assignments, source calls, bare commands) → # mark file EAGER. False positives are harmless (file just stays eager- # loaded, same as today). False negatives WOULD be bugs, so the heuristic # errs on the safe side. # # Collisions: if two files define the same function name, the LAST scan wins # in LP_FN_MAP (matches what eager loading does — last source wins). All # collisions are reported to stderr so they can be audited. # # Usage: ./generate_function_manifest.sh run # # SAFETY: only runs when executed directly with 'run' (mirrors generate_arrays.sh). if [[ "${BASH_SOURCE[0]}" == "${0}" && "$1" == "run" ]]; then SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ARRAYS_DIR="$SCRIPT_DIR/arrays" SCRIPTS_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")" OUTPUT="$ARRAYS_DIR/function_manifest.sh" RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; NC='\033[0m' isSuccessful() { echo -e "${GREEN}✓ Success${NC} $1"; } isNotice() { echo -e "${YELLOW}! Notice${NC} $1"; } isError() { echo -e "${RED}✗ Error${NC} $1" >&2; } # Skip-list mirrors generate_arrays.sh — these are either deployment targets # (peer/peer_shell.sh runs standalone via sshd's forced-command, never sourced), # or build infrastructure that the loader bootstraps separately. should_skip() { local rel="$1" case "$rel" in source/files/app_files.sh|source/files/cli_files.sh) return 0 ;; source/files/generate_arrays.sh|source/files/generate_function_manifest.sh) return 0 ;; # The manifest itself is sourced explicitly by the lazy loader — don't # also flag it eager, that would double-source (28ms × 2 ≈ wasted 55ms). source/files/arrays/function_manifest.sh) return 0 ;; # The files_*.sh arrays are only meaningful for the eager loader's # iteration. Lazy mode doesn't iterate them, so eager-sourcing them # under lazy mode is pure overhead. Skip from the scan; the eager # loader still sources them via its own existing path. source/files/arrays/files_*.sh) return 0 ;; source/loading/check_files.sh|source/loading/initilize_files.sh|source/loading/scan_files.sh) return 0 ;; source/load_sources.sh|source/paths.sh) return 0 ;; webui/data/generators/webui_test_generate.sh) return 0 ;; peer/peer_shell.sh) return 0 ;; unused/*|system/*|release/*) return 0 ;; esac return 1 } # Walk a file. Outputs to two named-pipe-equivalent variables via stdout: # fn: — a function definition was found # eager: — file has top-level side effects # # Depth tracking: count `{` and `}` only when they appear at the start of a # token (anchored), avoiding most false positives from strings/heredocs. # That's good enough for the LibrePortal codebase style; cleaner files would # need a real parser. Errs on the side of marking files eager. analyze_file() { local file="$1" awk ' # Skip shebang and pure comments — they have no semantic effect at source. /^#!/ { next } /^[[:space:]]*#/ { next } /^[[:space:]]*$/ { next } { line = $0 stripped = line sub(/^[[:space:]]+/, "", stripped) sub(/[[:space:]]+$/, "", stripped) # Function header: POSIX `funcname()` with EMPTY parens. We require # the empty parens so that lines like `if (...)`, `for (...)`, # `while (...)` are NOT misread as function definitions when we # scan files that contain embedded awk/perl/other code. Whatever # follows the `)` can be the opening `{`, a one-liner body, or # just a newline. is_fn_paren = (stripped ~ /^[A-Za-z_][A-Za-z0-9_]*[[:space:]]*\([[:space:]]*\)/) is_fn_kw = (stripped ~ /^function[[:space:]]+[A-Za-z_][A-Za-z0-9_]*/) if (depth == 0 && (is_fn_paren || is_fn_kw)) { # Extract the name (everything up to `(` or trailing whitespace). name = stripped sub(/[[:space:]]*\(.*$/, "", name) sub(/^function[[:space:]]+/, "", name) sub(/[[:space:]]+.*$/, "", name) print "fn:" name # Net brace balance on THIS line: every { adds 1, every } subtracts. # One-liner `name() { body; }` has equal counts → depth stays 0. # Multi-line opener `name() {` has +1 → depth becomes 1. # Brace-on-next-line `name()` has 0 → set expecting_open. tmp = stripped n_open = gsub(/\{/, "{", tmp) tmp = stripped n_close = gsub(/\}/, "}", tmp) delta = n_open - n_close if (delta > 0) depth += delta else if (n_open == 0) expecting_open = 1 next } # Bare `{` at depth 0 after a function header is the continuation # of that header (`name()` then newline then `{`). if (depth == 0 && expecting_open && stripped == "{") { depth++ expecting_open = 0 next } # Closing brace at depth 1 ends a function. if (depth > 0 && stripped == "}") { depth--; next } # Track depth roughly for content inside functions. We only need # to know if depth == 0 for the eager check; bumping on any `{` # at end-of-line and decrementing on `}` keeps it close enough. if (depth > 0) { # heredocs, strings — ignore detailed accounting; the only # thing that matters is staying > 0 until the closing }. if (stripped ~ /\{[[:space:]]*$/) depth++ # Multiple `}` on a line: count them. n_close = gsub(/\}/, "&", stripped) # don’t double-count the line we ate above next } # At depth 0 AND not a recognised function header → side effect. print "eager:" # Keep scanning to find any further function defs in the file. } ' "$file" } mkdir -p "$ARRAYS_DIR" declare -A fn_to_file declare -A fn_collisions # name -> "file1\tfile2..." declare -a eager_files total_files=0 total_fns=0 # Find all .sh under scripts/ (no symlinks, no hidden). while IFS= read -r -d '' file; do rel=$(realpath --relative-to="$SCRIPTS_DIR" "$file") should_skip "$rel" && continue total_files=$((total_files + 1)) is_eager=0 while IFS= read -r tag; do case "$tag" in fn:*) name="${tag#fn:}" if [[ -n "${fn_to_file[$name]:-}" && "${fn_to_file[$name]}" != "$rel" ]]; then fn_collisions[$name]="${fn_collisions[$name]:-${fn_to_file[$name]}}"$'\t'"$rel" fi fn_to_file[$name]="$rel" total_fns=$((total_fns + 1)) ;; eager:) is_eager=1 ;; esac done < <(analyze_file "$file") (( is_eager )) && eager_files+=("$rel") done < <(find "$SCRIPTS_DIR" -type f -name '*.sh' -print0) # Emit the manifest. { printf '#!/bin/bash\n\n' printf '# This file is auto-generated by generate_function_manifest.sh\n' printf '# Do not edit manually — run\n' printf '# ./scripts/source/files/generate_function_manifest.sh run\n\n' printf '# Function name → relative path. Used by the lazy loader (LP_LAZY=1)\n' printf '# to install an autoload stub for each public function. First call to a\n' printf '# stub sources the real file, which redefines the function with the real\n' printf '# body; subsequent calls hit the real one directly.\n' printf 'declare -gA LP_FN_MAP=(\n' # Sort for stable diff output. while IFS= read -r name; do printf ' [%s]="%s"\n' "$name" "${fn_to_file[$name]}" done < <(printf '%s\n' "${!fn_to_file[@]}" | sort) printf ')\n\n' printf '# Files with top-level side effects (variable assignments, source calls,\n' printf '# command invocations outside any function). Lazy mode MUST source these\n' printf '# unconditionally — deferring them would skip the side effect, not just\n' printf '# defer a function definition.\n' printf 'LP_EAGER_FILES=(\n' while IFS= read -r f; do printf ' "%s"\n' "$f" done < <(printf '%s\n' "${eager_files[@]}" | sort -u) printf ')\n\n' printf '# Autoload stubs — one per public function. First call sources the\n' printf '# real file (which redefines this stub with the real body), then\n' printf '# re-invokes. Sourced inline instead of eval-in-loop because bash\n' printf '# parses one large file faster than it evals 700 small snippets.\n' printf '# Only emitted when the manifest is read; behaviour-neutral when the\n' printf '# loader does not flip into LP_LAZY=1 mode.\n' while IFS= read -r name; do printf '%s() { source "${install_scripts_dir}%s"; %s "$@"; }\n' \ "$name" "${fn_to_file[$name]}" "$name" done < <(printf '%s\n' "${!fn_to_file[@]}" | sort) } > "$OUTPUT" isSuccessful "Wrote $(realpath --relative-to="$SCRIPTS_DIR" "$OUTPUT")" isNotice "Scanned $total_files files, indexed $total_fns function definitions" isNotice "${#eager_files[@]} files flagged eager (will always source)" # Collisions: report so they can be audited. The manifest reflects last-write- # wins, which matches the existing eager-load semantics, so behaviour is # identical — the warnings are about *avoidable* fragility, not bugs. if (( ${#fn_collisions[@]} > 0 )); then isNotice "Function name collisions (last write wins, matches eager-load behaviour):" while IFS= read -r name; do IFS=$'\t' read -ra files <<< "${fn_collisions[$name]}" printf ' %s\n' "$name" for f in "${files[@]}"; do printf ' - %s\n' "$f"; done done < <(printf '%s\n' "${!fn_collisions[@]}" | sort) fi fi