LibrePortal/scripts/crontab/task/crontab_check_processor.sh
librelad 875a60f90f LibrePortal v0.1.0 — initial release
A free, open, self-hosted app platform (GNU AGPLv3): one-click app deploys,
Traefik reverse proxy with automatic SSL, rootless Docker support, gluetun
VPN routing, and a web dashboard to manage it all.

Free & open forever to self-host; optional paid hosted services fund it.
See PROMISE.md.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Signed-off-by: librelad <librelad@digitalangels.vip>
2026-05-21 20:37:54 +01:00

742 lines
21 KiB
Bash
Executable File

#!/bin/bash
# ========================================
# LIBREPORTAL SYSTEM HEALTH CHECKER
# Comprehensive system validation and repair tool
# ========================================
script_check_processor_flag="$1"
# Only run when executed directly, not when sourced
if [[ "$script_check_processor_flag" == "start_script" ]]; then
# ========================================
# HEALTH CHECK CONFIGURATION
# ========================================
# Essential configuration (keeping your original structure)
TASK_DIR=""
if [[ "$TASK_DIR" == "" ]]; then
TASK_DIR="/docker/containers/libreportal/frontend/data/tasks"
fi
LOCK_FILE="$TASK_DIR/task_processor.lock"
# Health thresholds and limits
CRITICAL_FILES=("queue.json" "current.json")
REQUIRED_DIRECTORIES=("tasks")
SERVICE_NAME="libreportal"
MIN_DISK_SPACE_KB=10240 # 10MB minimum
MAX_LOG_SIZE_MB=50 # 50MB max log size
HEALTH_CHECK_TIMEOUT=30 # 30 seconds max per check
# Health status codes
HEALTH_STATUS_HEALTHY=0
HEALTH_STATUS_WARNING=1
HEALTH_STATUS_CRITICAL=2
HEALTH_STATUS_ERROR=3
# Logging
HEALTH_LOG_FILE="$TASK_DIR/health_check.log"
HEALTH_REPORT_FILE="$TASK_DIR/health_report.json"
# ========================================
# HEALTH LOGGING SYSTEM
# ========================================
healthLogInfo() {
local message="$1"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "$timestamp [INFO] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE"
}
healthLogError() {
local message="$1"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "$timestamp [ERROR] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE"
}
healthLogWarning() {
local message="$1"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "$timestamp [WARN] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE"
}
healthLogSuccess() {
local message="$1"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "$timestamp [SUCCESS] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE"
}
# ========================================
# COMPREHENSIVE SYSTEM VALIDATION
# ========================================
validateSystemHealth() {
healthLogInfo "Starting comprehensive system health check..."
local issues=()
local warnings=()
local critical_issues=()
# 1. LibrePortal Installation Check
healthLogInfo "Checking LibrePortal installation..."
if ! validateLibrePortalInstallation; then
critical_issues+=("libreportal_not_installed")
fi
# 2. Docker Service Check
healthLogInfo "Checking Docker service..."
if ! validateDockerService; then
critical_issues+=("docker_service_not_running")
fi
# 3. Container Health Check
healthLogInfo "Checking LibrePortal container..."
if ! validateContainerHealth; then
issues+=("container_health_issues")
fi
# 4. WebUI Readiness Check
healthLogInfo "Checking WebUI readiness..."
if ! validateWebUIReadiness; then
issues+=("webui_not_ready")
fi
# 5. Directory Structure Check
healthLogInfo "Checking directory structure..."
if ! validateDirectoryStructure; then
warnings+=("directory_structure_issues")
fi
# 6. File System Check
healthLogInfo "Checking file system..."
if ! validateFileSystem; then
issues+=("file_system_issues")
fi
# 7. Permissions Check
healthLogInfo "Checking file permissions..."
if ! validatePermissions; then
issues+=("permission_issues")
fi
# 8. Disk Space Check
healthLogInfo "Checking disk space..."
if ! validateDiskSpace; then
critical_issues+=("insufficient_disk_space")
fi
# 9. Task System Check
healthLogInfo "Checking task system..."
if ! validateTaskSystem; then
issues+=("task_system_issues")
fi
# 10. Service Status Check
healthLogInfo "Checking systemd service..."
if ! validateSystemService; then
warnings+=("service_not_optimal")
fi
# 11. Log Health Check
healthLogInfo "Checking log health..."
if ! validateLogHealth; then
warnings+=("log_health_issues")
fi
# Generate results
local total_issues=$((${#issues[@]} + ${#warnings[@]} + ${#critical_issues[@]}))
if [ ${#critical_issues[@]} -gt 0 ]; then
healthLogError "❌ CRITICAL ISSUES FOUND: ${critical_issues[*]}"
elif [ ${#issues[@]} -gt 0 ]; then
healthLogWarning "⚠️ ISSUES FOUND: ${issues[*]}"
elif [ ${#warnings[@]} -gt 0 ]; then
healthLogWarning "⚠️ WARNINGS: ${warnings[*]}"
else
healthLogSuccess "✅ All health checks passed"
fi
}
validateLibrePortalInstallation() {
if command -v libreportal >/dev/null 2>&1; then
local version=$(libreportal --version 2>/dev/null)
healthLogSuccess "LibrePortal installed: $version"
else
healthLogError "LibrePortal command not found"
fi
}
validateDockerService() {
if command -v docker >/dev/null 2>&1; then
if systemctl is-active --quiet docker; then
healthLogSuccess "Docker service is running"
else
healthLogError "Docker service is not running"
fi
else
healthLogError "Docker command not found"
fi
}
validateContainerHealth() {
local containerInfo=$(docker ps -a --filter "name=libreportal-service" --format "{{.Status}}|{{.Names}}" 2>/dev/null)
if [ -z "$containerInfo" ]; then
healthLogError "LibrePortal WebUI container not found"
fi
local status=$(echo "$containerInfo" | cut -d'|' -f1)
if echo "$status" | grep -q "Up"; then
# Check if container is responsive
if docker exec libreportal-service pgrep -f "node.*webui" >/dev/null 2>&1; then
healthLogSuccess "Container is running and responsive"
else
healthLogWarning "Container is running but WebUI process not found"
fi
else
healthLogWarning "Container status: $status"
fi
}
validateWebUIReadiness() {
# Get the actual port mapping from the container
local portMapping=$(docker port libreportal-service 2>/dev/null | head -1)
if [ -z "$portMapping" ]; then
healthLogWarning "No port mapping found for libreportal-service container"
fi
# Extract the host port
local hostPort=$(echo "$portMapping" | sed 's/.*://' | sed 's/[^0-9]//g')
if [ -z "$hostPort" ]; then
healthLogWarning "Could not extract host port from: $portMapping"
fi
healthLogInfo "Checking WebUI on port: $hostPort"
# Try multiple endpoints to check if WebUI is ready
local endpoints=(
"/" # Main page
"/api/health" # Health endpoint (if exists)
"/js/system-loader.js" # A known JavaScript file
"/css/main.css" # A known CSS file
)
for endpoint in "${endpoints[@]}"; do
if curl -s --max-time 3 "http://localhost:$hostPort$endpoint" >/dev/null 2>&1; then
healthLogInfo "WebUI is responding on port $hostPort (endpoint: $endpoint)"
# Additional check: verify it's actually the LibrePortal WebUI
if curl -s --max-time 3 "http://localhost:$hostPort/" | grep -q -i "libreportal\|webui\|dashboard" 2>/dev/null; then
healthLogSuccess "WebUI is ready on port $hostPort"
else
healthLogWarning "Port $hostPort is responding but doesn't appear to be LibrePortal WebUI"
fi
fi
done
healthLogWarning "WebUI not ready on port $hostPort"
}
validateDirectoryStructure() {
local issues=()
# Check main tasks directory
if [ ! -d "$TASK_DIR" ]; then
issues+=("tasks_directory_missing")
fi
# Check subdirectories
for dir in "${REQUIRED_DIRECTORIES[@]}"; do
local dirPath="$TASK_DIR/$dir"
if [ ! -d "$dirPath" ]; then
issues+=("directory_missing:$dir")
fi
done
if [ ${#issues[@]} -gt 0 ]; then
healthLogWarning "Directory issues: ${issues[*]}"
else
healthLogSuccess "Directory structure is valid"
fi
}
validateFileSystem() {
local issues=()
# Check critical files
for file in "${CRITICAL_FILES[@]}"; do
local filePath="$TASK_DIR/$file"
if [ ! -f "$filePath" ]; then
issues+=("missing_file:$file")
elif [ ! -r "$filePath" ]; then
issues+=("file_not_readable:$file")
elif [ ! -w "$filePath" ]; then
issues+=("file_not_writable:$file")
fi
done
if [ ${#issues[@]} -gt 0 ]; then
healthLogWarning "File system issues: ${issues[*]}"
else
healthLogSuccess "File system is valid"
fi
}
validatePermissions() {
local issues=()
# Check directory permissions
if [ ! -w "$TASK_DIR" ]; then
issues+=("directory_not_writable")
fi
# Check file permissions for critical files
for file in "${CRITICAL_FILES[@]}"; do
local filePath="$TASK_DIR/$file"
if [ -f "$filePath" ]; then
if [ ! -r "$filePath" ] || [ ! -w "$filePath" ]; then
issues+=("permission_issues:$file")
fi
fi
done
if [ ${#issues[@]} -gt 0 ]; then
healthLogWarning "Permission issues: ${issues[*]}"
else
healthLogSuccess "Permissions are valid"
fi
}
validateDiskSpace() {
local availableSpace=$(df "$TASK_DIR" 2>/dev/null | awk 'NR==2 {print $4}')
if [ -z "$availableSpace" ]; then
healthLogError "Could not determine disk space"
fi
if [ "$availableSpace" -lt $MIN_DISK_SPACE_KB ]; then
healthLogError "Insufficient disk space: ${availableSpace}KB available, ${MIN_DISK_SPACE_KB}KB required"
else
healthLogSuccess "Sufficient disk space: ${availableSpace}KB available"
fi
}
validateTaskSystem() {
local issues=()
# Validate queue.json format
if [ -f "$TASK_DIR/queue.json" ]; then
if ! python3 -c "import json; json.load(open('$TASK_DIR/queue.json'))" 2>/dev/null; then
issues+=("queue_json_invalid")
fi
fi
# Validate current.json format
if [ -f "$TASK_DIR/current.json" ]; then
if ! python3 -c "import json; json.load(open('$TASK_DIR/current.json'))" 2>/dev/null; then
issues+=("current_json_invalid")
fi
fi
# Check for orphaned task files
local queueContent=""
local currentContent=""
if [ -f "$TASK_DIR/queue.json" ]; then
queueContent=$(cat "$TASK_DIR/queue.json" 2>/dev/null)
fi
if [ -f "$TASK_DIR/current.json" ]; then
currentContent=$(cat "$TASK_DIR/current.json" 2>/dev/null)
fi
# Find task files not referenced in queue or current
for taskFile in "$TASK_DIR"/task_*.json; do
if [ -f "$taskFile" ]; then
local taskId=$(basename "$taskFile" .json)
if ! echo "$queueContent $currentContent" | grep -q "$taskId"; then
issues+=("orphaned_task_file:$taskId")
fi
fi
done
if [ ${#issues[@]} -gt 0 ]; then
healthLogWarning "Task system issues: ${issues[*]}"
else
healthLogSuccess "Task system is valid"
fi
}
validateSystemService() {
if systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then
if systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then
healthLogSuccess "Systemd service is enabled and active"
else
healthLogWarning "Systemd service is enabled but not active"
fi
else
healthLogWarning "Systemd service is not enabled"
fi
}
validateLogHealth() {
local issues=()
# Check log file sizes
for logFile in "$TASK_DIR"/*.log; do
if [ -f "$logFile" ]; then
local fileSize=$(stat -f%z "$logFile" 2>/dev/null || stat -c%s "$logFile" 2>/dev/null)
if [ -n "$fileSize" ]; then
local fileSizeMB=$((fileSize / 1024 / 1024))
if [ $fileSizeMB -gt $MAX_LOG_SIZE_MB ]; then
issues+=("large_log_file:$(basename $logFile):${fileSizeMB}MB")
fi
fi
fi
done
if [ ${#issues[@]} -gt 0 ]; then
healthLogWarning "Log health issues: ${issues[*]}"
else
healthLogSuccess "Log files are healthy"
fi
}
# ========================================
# ADVANCED REPAIR TOOLS
# ========================================
repairSystemIssues() {
healthLogInfo "Starting system repair process..."
local repaired=0
local failed=0
# 1. Repair directory structure
if repairDirectoryStructure; then
((repaired++))
else
((failed++))
fi
# 2. Repair file system
if repairFileSystem; then
((repaired++))
else
((failed++))
fi
# 3. Repair permissions
if repairPermissions; then
((repaired++))
else
((failed++))
fi
# 4. Repair task system
if repairTaskSystem; then
((repaired++))
else
((failed++))
fi
# 5. Repair service
if repairSystemService; then
((repaired++))
else
((failed++))
fi
healthLogInfo "Repair completed: $repaired repaired, $failed failed"
if [ $failed -eq 0 ]; then
healthLogSuccess "All repairs completed successfully"
else
healthLogWarning "Some repairs failed"
fi
}
repairDirectoryStructure() {
healthLogInfo "Repairing directory structure..."
# Create main tasks directory
if [ ! -d "$TASK_DIR" ]; then
healthLogInfo "Creating tasks directory: $TASK_DIR"
sudo mkdir -p "$TASK_DIR" || {
healthLogError "Failed to create tasks directory"
exit 1
}
fi
# Create required subdirectories
for dir in "${REQUIRED_DIRECTORIES[@]}"; do
local dirPath="$TASK_DIR/$dir"
if [ ! -d "$dirPath" ]; then
healthLogInfo "Creating directory: $dirPath"
sudo mkdir -p "$dirPath" || {
healthLogError "Failed to create directory: $dirPath"
exit 1
}
fi
done
healthLogSuccess "Directory structure repaired"
}
repairFileSystem() {
healthLogInfo "Repairing file system..."
local repaired=0
# Create missing critical files
for file in "${CRITICAL_FILES[@]}"; do
local filePath="$TASK_DIR/$file"
if [ ! -f "$filePath" ]; then
healthLogInfo "Creating missing file: $file"
case "$file" in
"queue.json")
echo "[]" | sudo tee "$filePath" > /dev/null && ((repaired++))
;;
"current.json")
echo "{}" | sudo tee "$filePath" > /dev/null && ((repaired++))
;;
esac
fi
done
healthLogSuccess "File system repaired: $repaired files created"
}
repairPermissions() {
healthLogInfo "Repairing permissions..."
# Fix directory permissions
sudo chmod 755 "$TASK_DIR" 2>/dev/null
# Fix file permissions
for file in "${CRITICAL_FILES[@]}"; do
local filePath="$TASK_DIR/$file"
if [ -f "$filePath" ]; then
sudo chmod 644 "$filePath" 2>/dev/null
fi
done
# Set ownership (if docker_install_user is available)
if [ -n "$docker_install_user" ]; then
sudo chown -R $docker_install_user:$docker_install_user "$TASK_DIR" 2>/dev/null
fi
healthLogSuccess "Permissions repaired"
}
repairTaskSystem() {
healthLogInfo "Repairing task system..."
local repaired=0
# Validate and repair queue.json
if [ -f "$TASK_DIR/queue.json" ]; then
if ! python3 -c "import json; json.load(open('$TASK_DIR/queue.json'))" 2>/dev/null; then
healthLogInfo "Repairing corrupted queue.json"
echo "[]" | sudo tee "$TASK_DIR/queue.json" > /dev/null && ((repaired++))
fi
fi
# Validate and repair current.json
if [ -f "$TASK_DIR/current.json" ]; then
if ! python3 -c "import json; json.load(open('$TASK_DIR/current.json'))" 2>/dev/null; then
healthLogInfo "Repairing corrupted current.json"
echo "{}" | sudo tee "$TASK_DIR/current.json" > /dev/null && ((repaired++))
fi
fi
# Clean up orphaned task files
local queueContent=""
local currentContent=""
if [ -f "$TASK_DIR/queue.json" ]; then
queueContent=$(cat "$TASK_DIR/queue.json" 2>/dev/null)
fi
if [ -f "$TASK_DIR/current.json" ]; then
currentContent=$(cat "$TASK_DIR/current.json" 2>/dev/null)
fi
for taskFile in "$TASK_DIR"/task_*.json; do
if [ -f "$taskFile" ]; then
local taskId=$(basename "$taskFile" .json)
if ! echo "$queueContent $currentContent" | grep -q "$taskId"; then
healthLogInfo "Removing orphaned task file: $taskId"
sudo rm -f "$taskFile" && ((repaired++))
fi
fi
done
healthLogSuccess "Task system repaired: $repaired issues fixed"
}
repairSystemService() {
healthLogInfo "Repairing systemd service..."
# Enable service if not enabled
if ! systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then
healthLogInfo "Enabling systemd service: $SERVICE_NAME"
sudo systemctl enable "$SERVICE_NAME" 2>/dev/null
fi
# Start service if not active
if ! systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then
healthLogInfo "Starting systemd service: $SERVICE_NAME"
sudo systemctl start "$SERVICE_NAME" 2>/dev/null
fi
healthLogSuccess "Systemd service repaired"
}
# ========================================
# HEALTH REPORTING
# ========================================
generateHealthReport() {
healthLogInfo "Generating comprehensive health report..."
local report="{"
report+="\"timestamp\":\"$(date -Iseconds)\","
report+="\"status\":\"$1\","
report+="\"checks\":{"
# Add individual check results
report+="\"libreportal_installation\":$(validateLibrePortalInstallation && echo "true" || echo "false"),"
report+="\"docker_service\":$(validateDockerService && echo "true" || echo "false"),"
report+="\"container_health\":$(validateContainerHealth && echo "true" || echo "false"),"
report+="\"webui_readiness\":$(validateWebUIReadiness && echo "true" || echo "false"),"
report+="\"directory_structure\":$(validateDirectoryStructure && echo "true" || echo "false"),"
report+="\"file_system\":$(validateFileSystem && echo "true" || echo "false"),"
report+="\"permissions\":$(validatePermissions && echo "true" || echo "false"),"
report+="\"disk_space\":$(validateDiskSpace && echo "true" || echo "false"),"
report+="\"task_system\":$(validateTaskSystem && echo "true" || echo "false"),"
report+="\"system_service\":$(validateSystemService && echo "true" || echo "false"),"
report+="\"log_health\":$(validateLogHealth && echo "true" || echo "false")"
report+="},"
report+="\"system_info\":{"
report+="\"task_dir\":\"$TASK_DIR\","
report+="\"service_name\":\"$SERVICE_NAME\","
report+="\"disk_available_kb\":\"$(df \"$TASK_DIR\" 2>/dev/null | awk 'NR==2 {print $4}')\""
report+="}"
report+="}"
echo "$report" | sudo tee "$HEALTH_REPORT_FILE" > /dev/null
healthLogSuccess "Health report generated: $HEALTH_REPORT_FILE"
}
# ========================================
# MAINTENANCE OPERATIONS
# ========================================
performMaintenance() {
healthLogInfo "Performing system maintenance..."
# Clean up old logs
find "$TASK_DIR" -name "*.log" -mtime +7 -delete 2>/dev/null
# Clean up temporary files
find "$TASK_DIR" -name "*.tmp" -delete 2>/dev/null
find "$TASK_DIR" -name ".queue.*" -delete 2>/dev/null
# Rotate large logs
for logFile in "$TASK_DIR"/*.log; do
if [ -f "$logFile" ]; then
local fileSize=$(stat -f%z "$logFile" 2>/dev/null || stat -c%s "$logFile" 2>/dev/null)
if [ -n "$fileSize" ]; then
local fileSizeMB=$((fileSize / 1024 / 1024))
if [ $fileSizeMB -gt $MAX_LOG_SIZE_MB ]; then
healthLogInfo "Rotating large log: $(basename $logFile) (${fileSizeMB}MB)"
sudo mv "$logFile" "${logFile}.old"
sudo touch "$logFile"
fi
fi
fi
done
healthLogSuccess "Maintenance completed"
}
# ========================================
# MAIN HEALTH CHECK FUNCTION
# ========================================
check_task_processor_health() {
healthLogInfo "=== LibrePortal System Health Check Starting ==="
# Run comprehensive health check
validateSystemHealth
local healthStatus=$?
# Generate health report
case $healthStatus in
$HEALTH_STATUS_HEALTHY)
generateHealthReport "healthy"
healthLogSuccess "✅ System is healthy"
;;
$HEALTH_STATUS_WARNING)
generateHealthReport "warning"
healthLogWarning "⚠️ System has warnings"
;;
$HEALTH_STATUS_CRITICAL)
generateHealthReport "critical"
healthLogError "❌ System has critical issues"
;;
$HEALTH_STATUS_ERROR)
generateHealthReport "error"
healthLogError "❌ Health check failed with errors"
;;
esac
healthLogInfo "=== Health Check Complete ==="
}
# ========================================
# COMMAND LINE INTERFACE
# ========================================
case "$script_check_processor_flag" in
"check")
check_task_processor_health
;;
"repair")
healthLogInfo "Starting system repair..."
repairSystemIssues
;;
"report")
generateHealthReport "manual"
echo "Health report generated: $HEALTH_REPORT_FILE"
;;
"maintenance")
performMaintenance
;;
"start_script")
check_task_processor_health
;;
*)
echo "Usage: $0 {check|repair|report|maintenance|start_script}"
echo " check - Run comprehensive health check"
echo " repair - Attempt to repair detected issues"
echo " report - Generate detailed health report"
echo " maintenance- Perform system maintenance"
echo " start_script- Run health check (for cron compatibility)"
exit 1
;;
esac
fi