#!/bin/bash ############################################################################### # NUT UPS Monitoring Script with Advanced Communication Loss Protection # Version: 2.1 # # Features: # - Prevents shutdown on communication loss with all UPS units # - Respects MINSUPPLIES parameter from upsmon.conf # - Differentiates between power loss and communication issues # - Comprehensive logging and status tracking # - Supports calibration mode (CAL) as safe state ############################################################################### ### Configuration Section - Adjust these parameters as needed ### UPSMON_CONF="/etc/nut/upsmon.conf" # Path to NUT main configuration LOG_IDENT="NUT-UPS-Monitor" # Syslog identification tag SAFE_UPS_STATES="OL CAL" # States considered operational (space-separated) COMM_RETRY_INTERVAL=30 # Seconds between communication retries MAX_COMM_FAILURES=3 # Allowed consecutive communication failures ### Initialization Section ### # Read MINSUPPLIES from config with fallback to default value 1 MINSUPPLIES=$(grep -oP '^MINSUPPLIES\s+\K\d+' "$UPSMON_CONF" 2>/dev/null || echo 1) # Get list of all configured UPS devices (filtering out SSL messages) UPS_LIST=$(upsc -l 2>/dev/null | grep -v '^Init SSL') TOTAL_UPS=$(echo "$UPS_LIST" | wc -w) ### Status Tracking Variables ### SAFE_UPS_COUNT=0 # Count of UPS in safe states COMM_FAILURE_COUNT=0 # Count of consecutive communication failures declare -a UPS_STATUSES # Array to store detailed UPS states ############################################################################### # Function: check_ups_status # Description: Checks and logs the status of a specific UPS unit # Arguments: # $1 - UPS identifier ############################################################################### check_ups_status() { local ups_id=$1 local status # Get UPS status with error handling status=$(upsc "$ups_id" 2>/dev/null | grep -oP 'ups.status:\s*\K\w+') # Handle communication failures if [ -z "$status" ]; then status="NOCOMM" ((COMM_FAILURE_COUNT++)) logger -t "$LOG_IDENT" -p warn "Communication failed with UPS $ups_id (Attempt $COMM_FAILURE_COUNT/$MAX_COMM_FAILURES)" else # Reset communication failure counter on successful contact COMM_FAILURE_COUNT=0 fi # Check against safe states for safe_state in $SAFE_UPS_STATES; do if [[ "$status" == "$safe_state" ]]; then ((SAFE_UPS_COUNT++)) logger -t "$LOG_IDENT" "UPS $ups_id operational (Status: $status)" break fi done # Store detailed status for reporting UPS_STATUSES+=("$ups_id:$status") } ############################################################################### # Main Event Handler # Processes ups-on-battery events and makes shutdown decisions ############################################################################### case $1 in ups-on-battery) ### Check all UPS units for ups in $UPS_LIST; do check_ups_status "$ups" done ### Communication Failure Protection ### if [ $COMM_FAILURE_COUNT -ge $MAX_COMM_FAILURES ]; then logger -t "$LOG_IDENT" -p err \ "CRITICAL: Persistent communication failures with $COMM_FAILURE_COUNT UPS units" logger -t "$LOG_IDENT" -p err \ "Possible network/SNMP issue. NOT initiating shutdown." exit 2 fi ### Power State Evaluation ### if [ "$SAFE_UPS_COUNT" -lt "$MINSUPPLIES" ]; then # Genuine power failure condition logger -t "$LOG_IDENT" -p crit \ "POWER FAILURE: Only $SAFE_UPS_COUNT/$TOTAL_UPS UPS operational (Required: $MINSUPPLIES)" logger -t "$LOG_IDENT" -p crit \ "Detailed status: ${UPS_STATUSES[*]}" logger -t "$LOG_IDENT" "Initiating controlled shutdown..." # Create powerdown flag file touch /etc/nut/killpower 2>/dev/null # Execute shutdown command /usr/sbin/upsmon -c fsd else # Adequate power protection available logger -t "$LOG_IDENT" \ "Power stable: $SAFE_UPS_COUNT/$TOTAL_UPS UPS operational (Required: $MINSUPPLIES)" fi ;; *) # Unknown event type handler logger -t "$LOG_IDENT" -p warn "Received unconfigured event: $1" exit 1 ;; esac exit 0