K8 Monitoring

#!/bin/bash
# =============================================================
# Script: Cronjob_monitorpod.sh
# Purpose:
# 1. Trigger a Kubernetes CronJob manually.
# 2. Monitor the created Job & Pod lifecycle.
# 3. Collect pod logs and period…


This content originally appeared on DEV Community and was authored by ByteLedger

#!/bin/bash
# =============================================================
# Script: Cronjob_monitorpod.sh
# Purpose:
#   1. Trigger a Kubernetes CronJob manually.
#   2. Monitor the created Job & Pod lifecycle.
#   3. Collect pod logs and periodic CPU/memory metrics.
#   4. Additionally, detect "/app/ShieldCache" activity in logs
#      and correlate it with real-time pod resource usage.
#
# Author: 
# Compatible OS: Linux (RHEL/CentOS)
# =============================================================

# --- 1. Basic Setup ---
SCRIPT="$(realpath -s "$0")"
SCRIPTPATH="$(dirname "$SCRIPT")"

NAMESPACE="onprem"                # Namespace where the pods are located
CRONJOB_NAME="teams-chat"         # Name of the CronJob you want to trigger the job from
JOB_NAME_BASE="teams-chat"        # Base job name (will add timestamp)
TIMESTAMP="$(date +'%Y%m%d_%H%M%S')"
JOB_NAME="${JOB_NAME_BASE}-${TIMESTAMP}"   # Unique job name per run

# Folder setup
LOG_DIR="$SCRIPTPATH/Logs"
LASTLOG_DIR="$SCRIPTPATH/Lastlog"
OUTPUT_DIR="$SCRIPTPATH/output"
mkdir -p "$LOG_DIR" "$LASTLOG_DIR" "$OUTPUT_DIR" >/dev/null 2>&1 || true

# File paths
LOG_PATH="$LOG_DIR/${TIMESTAMP}-${JOB_NAME_BASE}.log"
RESOURCE_CSV="$LOG_DIR/${TIMESTAMP}-${JOB_NAME_BASE}_pod_resources.csv"
CACHE_MONITOR_CSV="$LOG_DIR/${TIMESTAMP}-${JOB_NAME_BASE}_shieldcache_activity.csv"

# Keywords
ERROR_KEYWORDS=("java.io.EOFException" "NoSuchMethodError" "Error")
SUCCESS_KEYWORDS=("TGM Policy Ended" "Job completed")

# =============================================================
# 2. Validate the Namespace
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Checking if namespace '$NAMESPACE' exists..."
if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
  echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Namespace '$NAMESPACE' not found. Exiting." | tee -a "$LOG_PATH"
  cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
  exit 1
fi

# =============================================================
# 3. Trigger the CronJob manually (create a Job)
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Creating Job '$JOB_NAME' from CronJob '$CRONJOB_NAME' in namespace '$NAMESPACE'..." | tee -a "$LOG_PATH"
if ! kubectl create job --from=cronjob/"$CRONJOB_NAME" -n "$NAMESPACE" "$JOB_NAME" >/dev/null; then
  echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Failed to create job from CronJob." | tee -a "$LOG_PATH"
  exit 1
fi

# =============================================================
# 4. Wait for the Pod to appear for this Job (pick newest)
# =============================================================
get_newest_pod() {
  kubectl -n "$NAMESPACE" get pods -l "job-name=${JOB_NAME}" \
    --sort-by=.metadata.creationTimestamp \
    -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | tail -n 1
}

echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Waiting for Pod associated with Job '$JOB_NAME'..." | tee -a "$LOG_PATH"

POD_NAME=""
while true; do
  POD_NAME="$(get_newest_pod)"
  if [[ -n "$POD_NAME" ]]; then
    NODE_NAME="$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.spec.nodeName}')"
    echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod '$POD_NAME' created on node '$NODE_NAME'." | tee -a "$LOG_PATH"
    break
  fi
  echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod not yet created... retrying in 10s." | tee -a "$LOG_PATH"
  sleep 10
done

# =============================================================
# 5. Prepare CSV headers
# =============================================================
echo "timestamp,pod,cpu,mem" > "$RESOURCE_CSV"
echo "timestamp,pod,cpu,mem,log_snippet" > "$CACHE_MONITOR_CSV"

# =============================================================
# 6. Start ShieldCache-specific monitor (background)
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Starting background monitor for /app/ShieldCache activity..." | tee -a "$LOG_PATH"

kubectl logs -f "$POD_NAME" -n "$NAMESPACE" --timestamps=true 2>/dev/null | while read -r line; do
  if echo "$line" | grep -q "/app/ShieldCache"; then
      ts="$(date +'%Y-%m-%d %H:%M:%S')"
      usage_line="$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | awk '{print $2","$3}')"
      short_msg="$(echo "$line" | awk '{print substr($0,1,150)}')"  # truncate long lines
      echo "$ts,$POD_NAME,$usage_line,\"$short_msg\"" >> "$CACHE_MONITOR_CSV"
  fi
done &
CACHE_MON_PID=$!

# =============================================================
# 7. Main pod monitoring loop
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Monitoring pod '$POD_NAME' resource usage..." | tee -a "$LOG_PATH"

while true; do
  POD_STATUS="$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "")"

  # Periodic CPU/MEM usage collection
  USAGE_LINE="$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | awk -v ts="$(date +'%Y-%m-%d %H:%M:%S')" '{print ts","$1","$2","$3}')"
  [[ -n "$USAGE_LINE" ]] && echo "$USAGE_LINE" >> "$RESOURCE_CSV"

  case "$POD_STATUS" in
    Running|Pending)
      echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod '$POD_NAME' is $POD_STATUS. Sleeping 30s..." | tee -a "$LOG_PATH"
      sleep 30
      ;;
    Succeeded|Completed)
      echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod '$POD_NAME' completed successfully." | tee -a "$LOG_PATH"
      break
      ;;
    Failed|Error|CrashLoopBackOff)
      echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Pod '$POD_NAME' failed with status $POD_STATUS." | tee -a "$LOG_PATH"
      kill $CACHE_MON_PID >/dev/null 2>&1
      exit 2
      ;;
    *)
      echo "$(date +'%Y-%m-%d %H:%M:%S') [WARN] Unknown pod status '$POD_STATUS'. Waiting 20s..." | tee -a "$LOG_PATH"
      sleep 20
      ;;
  esac
done

# =============================================================
# 8. Fetch final logs (after completion)
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Fetching final logs for pod '$POD_NAME'..." | tee -a "$LOG_PATH"
kubectl logs "$POD_NAME" -n "$NAMESPACE" --timestamps=true >> "$LOG_PATH" 2>&1

# =============================================================
# 9. Check for error & success keywords
# =============================================================
for keyword in "${ERROR_KEYWORDS[@]}"; do
  if grep -iq -- "$keyword" "$LOG_PATH"; then
    echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Found error keyword: '$keyword'" | tee -a "$LOG_PATH"
    cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
    kill $CACHE_MON_PID >/dev/null 2>&1
    exit 3
  fi
done

for keyword in "${SUCCESS_KEYWORDS[@]}"; do
  if ! grep -iq -- "$keyword" "$LOG_PATH"; then
    echo "$(date +'%Y-%m-%d %H:%M:%S') [WARN] Success keyword '$keyword' not found." | tee -a "$LOG_PATH"
    cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
    kill $CACHE_MON_PID >/dev/null 2>&1
    exit 4
  fi
done

# =============================================================
# 10. Wrap up
# =============================================================
kill $CACHE_MON_PID >/dev/null 2>&1 || true
cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true

echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Job '$JOB_NAME' completed successfully." | tee -a "$LOG_PATH"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Output saved to: $LOG_DIR" | tee -a "$LOG_PATH"
exit 0


This content originally appeared on DEV Community and was authored by ByteLedger


Print Share Comment Cite Upload Translate Updates
APA

ByteLedger | Sciencx (2025-10-12T15:52:27+00:00) K8 Monitoring. Retrieved from https://www.scien.cx/2025/10/12/k8-monitoring/

MLA
" » K8 Monitoring." ByteLedger | Sciencx - Sunday October 12, 2025, https://www.scien.cx/2025/10/12/k8-monitoring/
HARVARD
ByteLedger | Sciencx Sunday October 12, 2025 » K8 Monitoring., viewed ,<https://www.scien.cx/2025/10/12/k8-monitoring/>
VANCOUVER
ByteLedger | Sciencx - » K8 Monitoring. [Internet]. [Accessed ]. Available from: https://www.scien.cx/2025/10/12/k8-monitoring/
CHICAGO
" » K8 Monitoring." ByteLedger | Sciencx - Accessed . https://www.scien.cx/2025/10/12/k8-monitoring/
IEEE
" » K8 Monitoring." ByteLedger | Sciencx [Online]. Available: https://www.scien.cx/2025/10/12/k8-monitoring/. [Accessed: ]
rf:citation
» K8 Monitoring | ByteLedger | Sciencx | https://www.scien.cx/2025/10/12/k8-monitoring/ |

Please log in to upload a file.




There are no updates yet.
Click the Upload button above to add an update.

You must be logged in to translate posts. Please log in or register.