#!/bin/bash # ======================================== # LIBREPORTAL SYSTEM HEALTH CHECKER # Comprehensive system validation and repair tool # ======================================== script_check_processor_flag="$1" # Only run when executed directly, not when sourced if [[ "$script_check_processor_flag" == "start_script" ]]; then # ======================================== # HEALTH CHECK CONFIGURATION # ======================================== # Essential configuration (keeping your original structure) TASK_DIR="" if [[ "$TASK_DIR" == "" ]]; then TASK_DIR="/docker/containers/libreportal/frontend/data/tasks" fi LOCK_FILE="$TASK_DIR/task_processor.lock" # Health thresholds and limits CRITICAL_FILES=("queue.json" "current.json") REQUIRED_DIRECTORIES=("tasks") SERVICE_NAME="libreportal" MIN_DISK_SPACE_KB=10240 # 10MB minimum MAX_LOG_SIZE_MB=50 # 50MB max log size HEALTH_CHECK_TIMEOUT=30 # 30 seconds max per check # Health status codes HEALTH_STATUS_HEALTHY=0 HEALTH_STATUS_WARNING=1 HEALTH_STATUS_CRITICAL=2 HEALTH_STATUS_ERROR=3 # Logging HEALTH_LOG_FILE="$TASK_DIR/health_check.log" HEALTH_REPORT_FILE="$TASK_DIR/health_report.json" # ======================================== # HEALTH LOGGING SYSTEM # ======================================== healthLogInfo() { local message="$1" local timestamp=$(date '+%Y-%m-%d %H:%M:%S') echo "$timestamp [INFO] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE" } healthLogError() { local message="$1" local timestamp=$(date '+%Y-%m-%d %H:%M:%S') echo "$timestamp [ERROR] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE" } healthLogWarning() { local message="$1" local timestamp=$(date '+%Y-%m-%d %H:%M:%S') echo "$timestamp [WARN] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE" } healthLogSuccess() { local message="$1" local timestamp=$(date '+%Y-%m-%d %H:%M:%S') echo "$timestamp [SUCCESS] [HEALTH_CHECK] $message" | tee -a "$HEALTH_LOG_FILE" } # ======================================== # COMPREHENSIVE SYSTEM VALIDATION # ======================================== validateSystemHealth() { healthLogInfo "Starting comprehensive system health check..." local issues=() local warnings=() local critical_issues=() # 1. LibrePortal Installation Check healthLogInfo "Checking LibrePortal installation..." if ! validateLibrePortalInstallation; then critical_issues+=("libreportal_not_installed") fi # 2. Docker Service Check healthLogInfo "Checking Docker service..." if ! validateDockerService; then critical_issues+=("docker_service_not_running") fi # 3. Container Health Check healthLogInfo "Checking LibrePortal container..." if ! validateContainerHealth; then issues+=("container_health_issues") fi # 4. WebUI Readiness Check healthLogInfo "Checking WebUI readiness..." if ! validateWebUIReadiness; then issues+=("webui_not_ready") fi # 5. Directory Structure Check healthLogInfo "Checking directory structure..." if ! validateDirectoryStructure; then warnings+=("directory_structure_issues") fi # 6. File System Check healthLogInfo "Checking file system..." if ! validateFileSystem; then issues+=("file_system_issues") fi # 7. Permissions Check healthLogInfo "Checking file permissions..." if ! validatePermissions; then issues+=("permission_issues") fi # 8. Disk Space Check healthLogInfo "Checking disk space..." if ! validateDiskSpace; then critical_issues+=("insufficient_disk_space") fi # 9. Task System Check healthLogInfo "Checking task system..." if ! validateTaskSystem; then issues+=("task_system_issues") fi # 10. Service Status Check healthLogInfo "Checking systemd service..." if ! validateSystemService; then warnings+=("service_not_optimal") fi # 11. Log Health Check healthLogInfo "Checking log health..." if ! validateLogHealth; then warnings+=("log_health_issues") fi # Generate results local total_issues=$((${#issues[@]} + ${#warnings[@]} + ${#critical_issues[@]})) if [ ${#critical_issues[@]} -gt 0 ]; then healthLogError "❌ CRITICAL ISSUES FOUND: ${critical_issues[*]}" elif [ ${#issues[@]} -gt 0 ]; then healthLogWarning "⚠️ ISSUES FOUND: ${issues[*]}" elif [ ${#warnings[@]} -gt 0 ]; then healthLogWarning "⚠️ WARNINGS: ${warnings[*]}" else healthLogSuccess "✅ All health checks passed" fi } validateLibrePortalInstallation() { if command -v libreportal >/dev/null 2>&1; then local version=$(libreportal --version 2>/dev/null) healthLogSuccess "LibrePortal installed: $version" else healthLogError "LibrePortal command not found" fi } validateDockerService() { if command -v docker >/dev/null 2>&1; then if systemctl is-active --quiet docker; then healthLogSuccess "Docker service is running" else healthLogError "Docker service is not running" fi else healthLogError "Docker command not found" fi } validateContainerHealth() { local containerInfo=$(docker ps -a --filter "name=libreportal-service" --format "{{.Status}}|{{.Names}}" 2>/dev/null) if [ -z "$containerInfo" ]; then healthLogError "LibrePortal WebUI container not found" fi local status=$(echo "$containerInfo" | cut -d'|' -f1) if echo "$status" | grep -q "Up"; then # Check if container is responsive if docker exec libreportal-service pgrep -f "node.*webui" >/dev/null 2>&1; then healthLogSuccess "Container is running and responsive" else healthLogWarning "Container is running but WebUI process not found" fi else healthLogWarning "Container status: $status" fi } validateWebUIReadiness() { # Get the actual port mapping from the container local portMapping=$(docker port libreportal-service 2>/dev/null | head -1) if [ -z "$portMapping" ]; then healthLogWarning "No port mapping found for libreportal-service container" fi # Extract the host port local hostPort=$(echo "$portMapping" | sed 's/.*://' | sed 's/[^0-9]//g') if [ -z "$hostPort" ]; then healthLogWarning "Could not extract host port from: $portMapping" fi healthLogInfo "Checking WebUI on port: $hostPort" # Try multiple endpoints to check if WebUI is ready local endpoints=( "/" # Main page "/api/health" # Health endpoint (if exists) "/js/system-loader.js" # A known JavaScript file "/css/main.css" # A known CSS file ) for endpoint in "${endpoints[@]}"; do if curl -s --max-time 3 "http://localhost:$hostPort$endpoint" >/dev/null 2>&1; then healthLogInfo "WebUI is responding on port $hostPort (endpoint: $endpoint)" # Additional check: verify it's actually the LibrePortal WebUI if curl -s --max-time 3 "http://localhost:$hostPort/" | grep -q -i "libreportal\|webui\|dashboard" 2>/dev/null; then healthLogSuccess "WebUI is ready on port $hostPort" else healthLogWarning "Port $hostPort is responding but doesn't appear to be LibrePortal WebUI" fi fi done healthLogWarning "WebUI not ready on port $hostPort" } validateDirectoryStructure() { local issues=() # Check main tasks directory if [ ! -d "$TASK_DIR" ]; then issues+=("tasks_directory_missing") fi # Check subdirectories for dir in "${REQUIRED_DIRECTORIES[@]}"; do local dirPath="$TASK_DIR/$dir" if [ ! -d "$dirPath" ]; then issues+=("directory_missing:$dir") fi done if [ ${#issues[@]} -gt 0 ]; then healthLogWarning "Directory issues: ${issues[*]}" else healthLogSuccess "Directory structure is valid" fi } validateFileSystem() { local issues=() # Check critical files for file in "${CRITICAL_FILES[@]}"; do local filePath="$TASK_DIR/$file" if [ ! -f "$filePath" ]; then issues+=("missing_file:$file") elif [ ! -r "$filePath" ]; then issues+=("file_not_readable:$file") elif [ ! -w "$filePath" ]; then issues+=("file_not_writable:$file") fi done if [ ${#issues[@]} -gt 0 ]; then healthLogWarning "File system issues: ${issues[*]}" else healthLogSuccess "File system is valid" fi } validatePermissions() { local issues=() # Check directory permissions if [ ! -w "$TASK_DIR" ]; then issues+=("directory_not_writable") fi # Check file permissions for critical files for file in "${CRITICAL_FILES[@]}"; do local filePath="$TASK_DIR/$file" if [ -f "$filePath" ]; then if [ ! -r "$filePath" ] || [ ! -w "$filePath" ]; then issues+=("permission_issues:$file") fi fi done if [ ${#issues[@]} -gt 0 ]; then healthLogWarning "Permission issues: ${issues[*]}" else healthLogSuccess "Permissions are valid" fi } validateDiskSpace() { local availableSpace=$(df "$TASK_DIR" 2>/dev/null | awk 'NR==2 {print $4}') if [ -z "$availableSpace" ]; then healthLogError "Could not determine disk space" fi if [ "$availableSpace" -lt $MIN_DISK_SPACE_KB ]; then healthLogError "Insufficient disk space: ${availableSpace}KB available, ${MIN_DISK_SPACE_KB}KB required" else healthLogSuccess "Sufficient disk space: ${availableSpace}KB available" fi } validateTaskSystem() { local issues=() # Validate queue.json format if [ -f "$TASK_DIR/queue.json" ]; then if ! python3 -c "import json; json.load(open('$TASK_DIR/queue.json'))" 2>/dev/null; then issues+=("queue_json_invalid") fi fi # Validate current.json format if [ -f "$TASK_DIR/current.json" ]; then if ! python3 -c "import json; json.load(open('$TASK_DIR/current.json'))" 2>/dev/null; then issues+=("current_json_invalid") fi fi # Check for orphaned task files local queueContent="" local currentContent="" if [ -f "$TASK_DIR/queue.json" ]; then queueContent=$(cat "$TASK_DIR/queue.json" 2>/dev/null) fi if [ -f "$TASK_DIR/current.json" ]; then currentContent=$(cat "$TASK_DIR/current.json" 2>/dev/null) fi # Find task files not referenced in queue or current for taskFile in "$TASK_DIR"/task_*.json; do if [ -f "$taskFile" ]; then local taskId=$(basename "$taskFile" .json) if ! echo "$queueContent $currentContent" | grep -q "$taskId"; then issues+=("orphaned_task_file:$taskId") fi fi done if [ ${#issues[@]} -gt 0 ]; then healthLogWarning "Task system issues: ${issues[*]}" else healthLogSuccess "Task system is valid" fi } validateSystemService() { if systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then if systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then healthLogSuccess "Systemd service is enabled and active" else healthLogWarning "Systemd service is enabled but not active" fi else healthLogWarning "Systemd service is not enabled" fi } validateLogHealth() { local issues=() # Check log file sizes for logFile in "$TASK_DIR"/*.log; do if [ -f "$logFile" ]; then local fileSize=$(stat -f%z "$logFile" 2>/dev/null || stat -c%s "$logFile" 2>/dev/null) if [ -n "$fileSize" ]; then local fileSizeMB=$((fileSize / 1024 / 1024)) if [ $fileSizeMB -gt $MAX_LOG_SIZE_MB ]; then issues+=("large_log_file:$(basename $logFile):${fileSizeMB}MB") fi fi fi done if [ ${#issues[@]} -gt 0 ]; then healthLogWarning "Log health issues: ${issues[*]}" else healthLogSuccess "Log files are healthy" fi } # ======================================== # ADVANCED REPAIR TOOLS # ======================================== repairSystemIssues() { healthLogInfo "Starting system repair process..." local repaired=0 local failed=0 # 1. Repair directory structure if repairDirectoryStructure; then ((repaired++)) else ((failed++)) fi # 2. Repair file system if repairFileSystem; then ((repaired++)) else ((failed++)) fi # 3. Repair permissions if repairPermissions; then ((repaired++)) else ((failed++)) fi # 4. Repair task system if repairTaskSystem; then ((repaired++)) else ((failed++)) fi # 5. Repair service if repairSystemService; then ((repaired++)) else ((failed++)) fi healthLogInfo "Repair completed: $repaired repaired, $failed failed" if [ $failed -eq 0 ]; then healthLogSuccess "All repairs completed successfully" else healthLogWarning "Some repairs failed" fi } repairDirectoryStructure() { healthLogInfo "Repairing directory structure..." # Create main tasks directory if [ ! -d "$TASK_DIR" ]; then healthLogInfo "Creating tasks directory: $TASK_DIR" runFileOp mkdir -p "$TASK_DIR" || { healthLogError "Failed to create tasks directory" exit 1 } fi # Create required subdirectories for dir in "${REQUIRED_DIRECTORIES[@]}"; do local dirPath="$TASK_DIR/$dir" if [ ! -d "$dirPath" ]; then healthLogInfo "Creating directory: $dirPath" runFileOp mkdir -p "$dirPath" || { healthLogError "Failed to create directory: $dirPath" exit 1 } fi done healthLogSuccess "Directory structure repaired" } repairFileSystem() { healthLogInfo "Repairing file system..." local repaired=0 # Create missing critical files for file in "${CRITICAL_FILES[@]}"; do local filePath="$TASK_DIR/$file" if [ ! -f "$filePath" ]; then healthLogInfo "Creating missing file: $file" case "$file" in "queue.json") echo "[]" | runFileWrite "$filePath" && ((repaired++)) ;; "current.json") echo "{}" | runFileWrite "$filePath" && ((repaired++)) ;; esac fi done healthLogSuccess "File system repaired: $repaired files created" } repairPermissions() { healthLogInfo "Repairing permissions..." # Fix directory permissions runFileOp chmod 755 "$TASK_DIR" 2>/dev/null # Fix file permissions for file in "${CRITICAL_FILES[@]}"; do local filePath="$TASK_DIR/$file" if [ -f "$filePath" ]; then runFileOp chmod 644 "$filePath" 2>/dev/null fi done # Set ownership (if docker_install_user is available) if [ -n "$docker_install_user" ]; then runFileOp chown -R $docker_install_user:$docker_install_user "$TASK_DIR" 2>/dev/null fi healthLogSuccess "Permissions repaired" } repairTaskSystem() { healthLogInfo "Repairing task system..." local repaired=0 # Validate and repair queue.json if [ -f "$TASK_DIR/queue.json" ]; then if ! python3 -c "import json; json.load(open('$TASK_DIR/queue.json'))" 2>/dev/null; then healthLogInfo "Repairing corrupted queue.json" echo "[]" | runFileWrite "$TASK_DIR/queue.json" && ((repaired++)) fi fi # Validate and repair current.json if [ -f "$TASK_DIR/current.json" ]; then if ! python3 -c "import json; json.load(open('$TASK_DIR/current.json'))" 2>/dev/null; then healthLogInfo "Repairing corrupted current.json" echo "{}" | runFileWrite "$TASK_DIR/current.json" && ((repaired++)) fi fi # Clean up orphaned task files local queueContent="" local currentContent="" if [ -f "$TASK_DIR/queue.json" ]; then queueContent=$(cat "$TASK_DIR/queue.json" 2>/dev/null) fi if [ -f "$TASK_DIR/current.json" ]; then currentContent=$(cat "$TASK_DIR/current.json" 2>/dev/null) fi for taskFile in "$TASK_DIR"/task_*.json; do if [ -f "$taskFile" ]; then local taskId=$(basename "$taskFile" .json) if ! echo "$queueContent $currentContent" | grep -q "$taskId"; then healthLogInfo "Removing orphaned task file: $taskId" runFileOp rm -f "$taskFile" && ((repaired++)) fi fi done healthLogSuccess "Task system repaired: $repaired issues fixed" } repairSystemService() { healthLogInfo "Repairing systemd service..." # Enable service if not enabled if ! systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then healthLogInfo "Enabling systemd service: $SERVICE_NAME" runSystem systemctl enable "$SERVICE_NAME" 2>/dev/null fi # Start service if not active if ! systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then healthLogInfo "Starting systemd service: $SERVICE_NAME" runSystem systemctl start "$SERVICE_NAME" 2>/dev/null fi healthLogSuccess "Systemd service repaired" } # ======================================== # HEALTH REPORTING # ======================================== generateHealthReport() { healthLogInfo "Generating comprehensive health report..." local report="{" report+="\"timestamp\":\"$(date -Iseconds)\"," report+="\"status\":\"$1\"," report+="\"checks\":{" # Add individual check results report+="\"libreportal_installation\":$(validateLibrePortalInstallation && echo "true" || echo "false")," report+="\"docker_service\":$(validateDockerService && echo "true" || echo "false")," report+="\"container_health\":$(validateContainerHealth && echo "true" || echo "false")," report+="\"webui_readiness\":$(validateWebUIReadiness && echo "true" || echo "false")," report+="\"directory_structure\":$(validateDirectoryStructure && echo "true" || echo "false")," report+="\"file_system\":$(validateFileSystem && echo "true" || echo "false")," report+="\"permissions\":$(validatePermissions && echo "true" || echo "false")," report+="\"disk_space\":$(validateDiskSpace && echo "true" || echo "false")," report+="\"task_system\":$(validateTaskSystem && echo "true" || echo "false")," report+="\"system_service\":$(validateSystemService && echo "true" || echo "false")," report+="\"log_health\":$(validateLogHealth && echo "true" || echo "false")" report+="}," report+="\"system_info\":{" report+="\"task_dir\":\"$TASK_DIR\"," report+="\"service_name\":\"$SERVICE_NAME\"," report+="\"disk_available_kb\":\"$(df \"$TASK_DIR\" 2>/dev/null | awk 'NR==2 {print $4}')\"" report+="}" report+="}" echo "$report" | runFileWrite "$HEALTH_REPORT_FILE" healthLogSuccess "Health report generated: $HEALTH_REPORT_FILE" } # ======================================== # MAINTENANCE OPERATIONS # ======================================== performMaintenance() { healthLogInfo "Performing system maintenance..." # Clean up old logs find "$TASK_DIR" -name "*.log" -mtime +7 -delete 2>/dev/null # Clean up temporary files find "$TASK_DIR" -name "*.tmp" -delete 2>/dev/null find "$TASK_DIR" -name ".queue.*" -delete 2>/dev/null # Rotate large logs for logFile in "$TASK_DIR"/*.log; do if [ -f "$logFile" ]; then local fileSize=$(stat -f%z "$logFile" 2>/dev/null || stat -c%s "$logFile" 2>/dev/null) if [ -n "$fileSize" ]; then local fileSizeMB=$((fileSize / 1024 / 1024)) if [ $fileSizeMB -gt $MAX_LOG_SIZE_MB ]; then healthLogInfo "Rotating large log: $(basename $logFile) (${fileSizeMB}MB)" runFileOp mv "$logFile" "${logFile}.old" runFileOp touch "$logFile" fi fi fi done healthLogSuccess "Maintenance completed" } # ======================================== # MAIN HEALTH CHECK FUNCTION # ======================================== check_task_processor_health() { healthLogInfo "=== LibrePortal System Health Check Starting ===" # Run comprehensive health check validateSystemHealth local healthStatus=$? # Generate health report case $healthStatus in $HEALTH_STATUS_HEALTHY) generateHealthReport "healthy" healthLogSuccess "✅ System is healthy" ;; $HEALTH_STATUS_WARNING) generateHealthReport "warning" healthLogWarning "⚠️ System has warnings" ;; $HEALTH_STATUS_CRITICAL) generateHealthReport "critical" healthLogError "❌ System has critical issues" ;; $HEALTH_STATUS_ERROR) generateHealthReport "error" healthLogError "❌ Health check failed with errors" ;; esac healthLogInfo "=== Health Check Complete ===" } # ======================================== # COMMAND LINE INTERFACE # ======================================== case "$script_check_processor_flag" in "check") check_task_processor_health ;; "repair") healthLogInfo "Starting system repair..." repairSystemIssues ;; "report") generateHealthReport "manual" echo "Health report generated: $HEALTH_REPORT_FILE" ;; "maintenance") performMaintenance ;; "start_script") check_task_processor_health ;; *) echo "Usage: $0 {check|repair|report|maintenance|start_script}" echo " check - Run comprehensive health check" echo " repair - Attempt to repair detected issues" echo " report - Generate detailed health report" echo " maintenance- Perform system maintenance" echo " start_script- Run health check (for cron compatibility)" exit 1 ;; esac fi