scripts/source/loading/initilize_files.sh gains an LP_LAZY=1 branch:
- Sources scripts/source/files/arrays/function_manifest.sh once. The
manifest defines LP_FN_MAP, LP_EAGER_FILES, AND ~700 autoload stubs
(precompiled by the generator — one parse cost vs evaluating 700
snippets at startup).
- Eager-sources every file listed in LP_EAGER_FILES (top-level side
effects: variable assignments, source calls, bare commands). These
can't safely be deferred — they'd skip the side effect, not just the
function definition.
- Skips the bulk loop that sources every files_to_source[@] entry.
Default behaviour (LP_LAZY unset or 0) is byte-identical to the previous
loader — every file gets eager-sourced up front. Long-running processes
(WebUI service, task processor) leave LP_LAZY unset because their first
call to anything wants the function already hot.
Each autoload stub looks like:
funcname() {
source "${install_scripts_dir}path/to/file.sh"
funcname "$@"
}
First call sources the real file, which redefines the function with the
real body; the stub's trailing `funcname "$@"` then calls the freshly-
defined real implementation. Sourcing the file also redefines stubs for
any sibling functions the same file declares, so they don't re-source.
Safety nets:
- Missing manifest → fall back to eager loading (`export LP_LAZY=0`).
No regression risk if someone enables LP_LAZY=1 on a stale install
whose regen never ran.
- LP_LOAD_TRACE=1 still works in lazy mode — it records the manifest
parse + each eager file (tagged LAZY-manifest / LAZY-EAGER) so Phase 4
can measure the actual saving.
No automatic flip yet — this commit only adds the path. Phase 4 will set
LP_LAZY=1 by default for the CLI entrypoint (and re-measure with the
trace tool from Phase 1).
Signed-off-by: librelad <librelad@digitalangels.vip>
242 lines
11 KiB
Bash
242 lines
11 KiB
Bash
#!/bin/bash
|
||
|
||
# Function manifest generator — sidekick to generate_arrays.sh, supports the
|
||
# lazy-load path. Scans every script under scripts/ for top-level function
|
||
# definitions and writes scripts/source/files/arrays/function_manifest.sh:
|
||
#
|
||
# declare -gA LP_FN_MAP=(
|
||
# [funcname]="rel/path/to/file.sh"
|
||
# ...
|
||
# )
|
||
# LP_EAGER_FILES=( "rel/path1.sh" "rel/path2.sh" ... )
|
||
#
|
||
# LP_FN_MAP is what the lazy loader uses to install autoload stubs:
|
||
# funcname() { source "$install_scripts_dir${LP_FN_MAP[funcname]}"; funcname "$@"; }
|
||
#
|
||
# LP_EAGER_FILES are files with side effects at source time (set vars, run
|
||
# commands, etc.) that the lazy loader MUST source unconditionally — skipping
|
||
# them would skip the side effect, not just defer a function definition.
|
||
#
|
||
# Heuristic for eager detection (pragmatic, not a real bash parser):
|
||
# - Walk the file line-by-line, tracking { } depth to know "inside function".
|
||
# - A file is LAZY-SAFE iff every non-blank/non-comment line outside
|
||
# functions is either: (a) a function header `funcname() [{`, (b) `}`
|
||
# closing a function, or (c) a `local`/`declare` only inside functions.
|
||
# - Anything else at depth 0 (assignments, source calls, bare commands) →
|
||
# mark file EAGER. False positives are harmless (file just stays eager-
|
||
# loaded, same as today). False negatives WOULD be bugs, so the heuristic
|
||
# errs on the safe side.
|
||
#
|
||
# Collisions: if two files define the same function name, the LAST scan wins
|
||
# in LP_FN_MAP (matches what eager loading does — last source wins). All
|
||
# collisions are reported to stderr so they can be audited.
|
||
#
|
||
# Usage: ./generate_function_manifest.sh run
|
||
#
|
||
# SAFETY: only runs when executed directly with 'run' (mirrors generate_arrays.sh).
|
||
|
||
if [[ "${BASH_SOURCE[0]}" == "${0}" && "$1" == "run" ]]; then
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
ARRAYS_DIR="$SCRIPT_DIR/arrays"
|
||
SCRIPTS_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")"
|
||
OUTPUT="$ARRAYS_DIR/function_manifest.sh"
|
||
|
||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; NC='\033[0m'
|
||
isSuccessful() { echo -e "${GREEN}✓ Success${NC} $1"; }
|
||
isNotice() { echo -e "${YELLOW}! Notice${NC} $1"; }
|
||
isError() { echo -e "${RED}✗ Error${NC} $1" >&2; }
|
||
|
||
# Skip-list mirrors generate_arrays.sh — these are either deployment targets
|
||
# (peer/peer_shell.sh runs standalone via sshd's forced-command, never sourced),
|
||
# or build infrastructure that the loader bootstraps separately.
|
||
should_skip() {
|
||
local rel="$1"
|
||
case "$rel" in
|
||
source/files/app_files.sh|source/files/cli_files.sh) return 0 ;;
|
||
source/files/generate_arrays.sh|source/files/generate_function_manifest.sh) return 0 ;;
|
||
source/loading/check_files.sh|source/loading/initilize_files.sh|source/loading/scan_files.sh) return 0 ;;
|
||
source/load_sources.sh|source/paths.sh) return 0 ;;
|
||
webui/data/generators/webui_test_generate.sh) return 0 ;;
|
||
peer/peer_shell.sh) return 0 ;;
|
||
unused/*|system/*|release/*) return 0 ;;
|
||
esac
|
||
return 1
|
||
}
|
||
|
||
# Walk a file. Outputs to two named-pipe-equivalent variables via stdout:
|
||
# fn:<funcname> — a function definition was found
|
||
# eager: — file has top-level side effects
|
||
#
|
||
# Depth tracking: count `{` and `}` only when they appear at the start of a
|
||
# token (anchored), avoiding most false positives from strings/heredocs.
|
||
# That's good enough for the LibrePortal codebase style; cleaner files would
|
||
# need a real parser. Errs on the side of marking files eager.
|
||
analyze_file() {
|
||
local file="$1"
|
||
awk '
|
||
# Skip shebang and pure comments — they have no semantic effect at source.
|
||
/^#!/ { next }
|
||
/^[[:space:]]*#/ { next }
|
||
/^[[:space:]]*$/ { next }
|
||
|
||
{
|
||
line = $0
|
||
stripped = line
|
||
sub(/^[[:space:]]+/, "", stripped)
|
||
sub(/[[:space:]]+$/, "", stripped)
|
||
|
||
# Function header: POSIX `funcname()` with EMPTY parens. We require
|
||
# the empty parens so that lines like `if (...)`, `for (...)`,
|
||
# `while (...)` are NOT misread as function definitions when we
|
||
# scan files that contain embedded awk/perl/other code. Whatever
|
||
# follows the `)` can be the opening `{`, a one-liner body, or
|
||
# just a newline.
|
||
is_fn_paren = (stripped ~ /^[A-Za-z_][A-Za-z0-9_]*[[:space:]]*\([[:space:]]*\)/)
|
||
is_fn_kw = (stripped ~ /^function[[:space:]]+[A-Za-z_][A-Za-z0-9_]*/)
|
||
if (depth == 0 && (is_fn_paren || is_fn_kw)) {
|
||
# Extract the name (everything up to `(` or trailing whitespace).
|
||
name = stripped
|
||
sub(/[[:space:]]*\(.*$/, "", name)
|
||
sub(/^function[[:space:]]+/, "", name)
|
||
sub(/[[:space:]]+.*$/, "", name)
|
||
print "fn:" name
|
||
# Net brace balance on THIS line: every { adds 1, every } subtracts.
|
||
# One-liner `name() { body; }` has equal counts → depth stays 0.
|
||
# Multi-line opener `name() {` has +1 → depth becomes 1.
|
||
# Brace-on-next-line `name()` has 0 → set expecting_open.
|
||
tmp = stripped
|
||
n_open = gsub(/\{/, "{", tmp)
|
||
tmp = stripped
|
||
n_close = gsub(/\}/, "}", tmp)
|
||
delta = n_open - n_close
|
||
if (delta > 0) depth += delta
|
||
else if (n_open == 0) expecting_open = 1
|
||
next
|
||
}
|
||
|
||
# Bare `{` at depth 0 after a function header is the continuation
|
||
# of that header (`name()` then newline then `{`).
|
||
if (depth == 0 && expecting_open && stripped == "{") {
|
||
depth++
|
||
expecting_open = 0
|
||
next
|
||
}
|
||
|
||
# Closing brace at depth 1 ends a function.
|
||
if (depth > 0 && stripped == "}") { depth--; next }
|
||
|
||
# Track depth roughly for content inside functions. We only need
|
||
# to know if depth == 0 for the eager check; bumping on any `{`
|
||
# at end-of-line and decrementing on `}` keeps it close enough.
|
||
if (depth > 0) {
|
||
# heredocs, strings — ignore detailed accounting; the only
|
||
# thing that matters is staying > 0 until the closing }.
|
||
if (stripped ~ /\{[[:space:]]*$/) depth++
|
||
# Multiple `}` on a line: count them.
|
||
n_close = gsub(/\}/, "&", stripped)
|
||
# don’t double-count the line we ate above
|
||
next
|
||
}
|
||
|
||
# At depth 0 AND not a recognised function header → side effect.
|
||
print "eager:"
|
||
# Keep scanning to find any further function defs in the file.
|
||
}
|
||
' "$file"
|
||
}
|
||
|
||
mkdir -p "$ARRAYS_DIR"
|
||
|
||
declare -A fn_to_file
|
||
declare -A fn_collisions # name -> "file1\tfile2..."
|
||
declare -a eager_files
|
||
|
||
total_files=0
|
||
total_fns=0
|
||
|
||
# Find all .sh under scripts/ (no symlinks, no hidden).
|
||
while IFS= read -r -d '' file; do
|
||
rel=$(realpath --relative-to="$SCRIPTS_DIR" "$file")
|
||
should_skip "$rel" && continue
|
||
|
||
total_files=$((total_files + 1))
|
||
|
||
is_eager=0
|
||
while IFS= read -r tag; do
|
||
case "$tag" in
|
||
fn:*)
|
||
name="${tag#fn:}"
|
||
if [[ -n "${fn_to_file[$name]:-}" && "${fn_to_file[$name]}" != "$rel" ]]; then
|
||
fn_collisions[$name]="${fn_collisions[$name]:-${fn_to_file[$name]}}"$'\t'"$rel"
|
||
fi
|
||
fn_to_file[$name]="$rel"
|
||
total_fns=$((total_fns + 1))
|
||
;;
|
||
eager:)
|
||
is_eager=1
|
||
;;
|
||
esac
|
||
done < <(analyze_file "$file")
|
||
|
||
(( is_eager )) && eager_files+=("$rel")
|
||
done < <(find "$SCRIPTS_DIR" -type f -name '*.sh' -print0)
|
||
|
||
# Emit the manifest.
|
||
{
|
||
printf '#!/bin/bash\n\n'
|
||
printf '# This file is auto-generated by generate_function_manifest.sh\n'
|
||
printf '# Do not edit manually — run\n'
|
||
printf '# ./scripts/source/files/generate_function_manifest.sh run\n\n'
|
||
|
||
printf '# Function name → relative path. Used by the lazy loader (LP_LAZY=1)\n'
|
||
printf '# to install an autoload stub for each public function. First call to a\n'
|
||
printf '# stub sources the real file, which redefines the function with the real\n'
|
||
printf '# body; subsequent calls hit the real one directly.\n'
|
||
printf 'declare -gA LP_FN_MAP=(\n'
|
||
# Sort for stable diff output.
|
||
while IFS= read -r name; do
|
||
printf ' [%s]="%s"\n' "$name" "${fn_to_file[$name]}"
|
||
done < <(printf '%s\n' "${!fn_to_file[@]}" | sort)
|
||
printf ')\n\n'
|
||
|
||
printf '# Files with top-level side effects (variable assignments, source calls,\n'
|
||
printf '# command invocations outside any function). Lazy mode MUST source these\n'
|
||
printf '# unconditionally — deferring them would skip the side effect, not just\n'
|
||
printf '# defer a function definition.\n'
|
||
printf 'LP_EAGER_FILES=(\n'
|
||
while IFS= read -r f; do
|
||
printf ' "%s"\n' "$f"
|
||
done < <(printf '%s\n' "${eager_files[@]}" | sort -u)
|
||
printf ')\n\n'
|
||
|
||
printf '# Autoload stubs — one per public function. First call sources the\n'
|
||
printf '# real file (which redefines this stub with the real body), then\n'
|
||
printf '# re-invokes. Sourced inline instead of eval-in-loop because bash\n'
|
||
printf '# parses one large file faster than it evals 700 small snippets.\n'
|
||
printf '# Only emitted when the manifest is read; behaviour-neutral when the\n'
|
||
printf '# loader does not flip into LP_LAZY=1 mode.\n'
|
||
while IFS= read -r name; do
|
||
printf '%s() { source "${install_scripts_dir}%s"; %s "$@"; }\n' \
|
||
"$name" "${fn_to_file[$name]}" "$name"
|
||
done < <(printf '%s\n' "${!fn_to_file[@]}" | sort)
|
||
} > "$OUTPUT"
|
||
|
||
isSuccessful "Wrote $(realpath --relative-to="$SCRIPTS_DIR" "$OUTPUT")"
|
||
isNotice "Scanned $total_files files, indexed $total_fns function definitions"
|
||
isNotice "${#eager_files[@]} files flagged eager (will always source)"
|
||
|
||
# Collisions: report so they can be audited. The manifest reflects last-write-
|
||
# wins, which matches the existing eager-load semantics, so behaviour is
|
||
# identical — the warnings are about *avoidable* fragility, not bugs.
|
||
if (( ${#fn_collisions[@]} > 0 )); then
|
||
isNotice "Function name collisions (last write wins, matches eager-load behaviour):"
|
||
while IFS= read -r name; do
|
||
IFS=$'\t' read -ra files <<< "${fn_collisions[$name]}"
|
||
printf ' %s\n' "$name"
|
||
for f in "${files[@]}"; do printf ' - %s\n' "$f"; done
|
||
done < <(printf '%s\n' "${!fn_collisions[@]}" | sort)
|
||
fi
|
||
|
||
fi
|