This content originally appeared on DEV Community and was authored by ByteLedger
#!/bin/bash
# =============================================================
# Script: Cronjob_monitorpod.sh
# Purpose:
# 1. Trigger a Kubernetes CronJob manually.
# 2. Monitor the created Job & Pod lifecycle.
# 3. Collect pod logs and periodic CPU/memory metrics.
# 4. Additionally, detect "/app/ShieldCache" activity in logs
# and correlate it with real-time pod resource usage.
#
# Author:
# Compatible OS: Linux (RHEL/CentOS)
# =============================================================
# --- 1. Basic Setup ---
SCRIPT="$(realpath -s "$0")"
SCRIPTPATH="$(dirname "$SCRIPT")"
NAMESPACE="onprem" # Namespace where the pods are located
CRONJOB_NAME="teams-chat" # Name of the CronJob you want to trigger the job from
JOB_NAME_BASE="teams-chat" # Base job name (will add timestamp)
TIMESTAMP="$(date +'%Y%m%d_%H%M%S')"
JOB_NAME="${JOB_NAME_BASE}-${TIMESTAMP}" # Unique job name per run
# Folder setup
LOG_DIR="$SCRIPTPATH/Logs"
LASTLOG_DIR="$SCRIPTPATH/Lastlog"
OUTPUT_DIR="$SCRIPTPATH/output"
mkdir -p "$LOG_DIR" "$LASTLOG_DIR" "$OUTPUT_DIR" >/dev/null 2>&1 || true
# File paths
LOG_PATH="$LOG_DIR/${TIMESTAMP}-${JOB_NAME_BASE}.log"
RESOURCE_CSV="$LOG_DIR/${TIMESTAMP}-${JOB_NAME_BASE}_pod_resources.csv"
CACHE_MONITOR_CSV="$LOG_DIR/${TIMESTAMP}-${JOB_NAME_BASE}_shieldcache_activity.csv"
# Keywords
ERROR_KEYWORDS=("java.io.EOFException" "NoSuchMethodError" "Error")
SUCCESS_KEYWORDS=("TGM Policy Ended" "Job completed")
# =============================================================
# 2. Validate the Namespace
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Checking if namespace '$NAMESPACE' exists..."
if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Namespace '$NAMESPACE' not found. Exiting." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
exit 1
fi
# =============================================================
# 3. Trigger the CronJob manually (create a Job)
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Creating Job '$JOB_NAME' from CronJob '$CRONJOB_NAME' in namespace '$NAMESPACE'..." | tee -a "$LOG_PATH"
if ! kubectl create job --from=cronjob/"$CRONJOB_NAME" -n "$NAMESPACE" "$JOB_NAME" >/dev/null; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Failed to create job from CronJob." | tee -a "$LOG_PATH"
exit 1
fi
# =============================================================
# 4. Wait for the Pod to appear for this Job (pick newest)
# =============================================================
get_newest_pod() {
kubectl -n "$NAMESPACE" get pods -l "job-name=${JOB_NAME}" \
--sort-by=.metadata.creationTimestamp \
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | tail -n 1
}
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Waiting for Pod associated with Job '$JOB_NAME'..." | tee -a "$LOG_PATH"
POD_NAME=""
while true; do
POD_NAME="$(get_newest_pod)"
if [[ -n "$POD_NAME" ]]; then
NODE_NAME="$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.spec.nodeName}')"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod '$POD_NAME' created on node '$NODE_NAME'." | tee -a "$LOG_PATH"
break
fi
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod not yet created... retrying in 10s." | tee -a "$LOG_PATH"
sleep 10
done
# =============================================================
# 5. Prepare CSV headers
# =============================================================
echo "timestamp,pod,cpu,mem" > "$RESOURCE_CSV"
echo "timestamp,pod,cpu,mem,log_snippet" > "$CACHE_MONITOR_CSV"
# =============================================================
# 6. Start ShieldCache-specific monitor (background)
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Starting background monitor for /app/ShieldCache activity..." | tee -a "$LOG_PATH"
kubectl logs -f "$POD_NAME" -n "$NAMESPACE" --timestamps=true 2>/dev/null | while read -r line; do
if echo "$line" | grep -q "/app/ShieldCache"; then
ts="$(date +'%Y-%m-%d %H:%M:%S')"
usage_line="$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | awk '{print $2","$3}')"
short_msg="$(echo "$line" | awk '{print substr($0,1,150)}')" # truncate long lines
echo "$ts,$POD_NAME,$usage_line,\"$short_msg\"" >> "$CACHE_MONITOR_CSV"
fi
done &
CACHE_MON_PID=$!
# =============================================================
# 7. Main pod monitoring loop
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Monitoring pod '$POD_NAME' resource usage..." | tee -a "$LOG_PATH"
while true; do
POD_STATUS="$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "")"
# Periodic CPU/MEM usage collection
USAGE_LINE="$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | awk -v ts="$(date +'%Y-%m-%d %H:%M:%S')" '{print ts","$1","$2","$3}')"
[[ -n "$USAGE_LINE" ]] && echo "$USAGE_LINE" >> "$RESOURCE_CSV"
case "$POD_STATUS" in
Running|Pending)
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod '$POD_NAME' is $POD_STATUS. Sleeping 30s..." | tee -a "$LOG_PATH"
sleep 30
;;
Succeeded|Completed)
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod '$POD_NAME' completed successfully." | tee -a "$LOG_PATH"
break
;;
Failed|Error|CrashLoopBackOff)
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Pod '$POD_NAME' failed with status $POD_STATUS." | tee -a "$LOG_PATH"
kill $CACHE_MON_PID >/dev/null 2>&1
exit 2
;;
*)
echo "$(date +'%Y-%m-%d %H:%M:%S') [WARN] Unknown pod status '$POD_STATUS'. Waiting 20s..." | tee -a "$LOG_PATH"
sleep 20
;;
esac
done
# =============================================================
# 8. Fetch final logs (after completion)
# =============================================================
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Fetching final logs for pod '$POD_NAME'..." | tee -a "$LOG_PATH"
kubectl logs "$POD_NAME" -n "$NAMESPACE" --timestamps=true >> "$LOG_PATH" 2>&1
# =============================================================
# 9. Check for error & success keywords
# =============================================================
for keyword in "${ERROR_KEYWORDS[@]}"; do
if grep -iq -- "$keyword" "$LOG_PATH"; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Found error keyword: '$keyword'" | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
kill $CACHE_MON_PID >/dev/null 2>&1
exit 3
fi
done
for keyword in "${SUCCESS_KEYWORDS[@]}"; do
if ! grep -iq -- "$keyword" "$LOG_PATH"; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [WARN] Success keyword '$keyword' not found." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
kill $CACHE_MON_PID >/dev/null 2>&1
exit 4
fi
done
# =============================================================
# 10. Wrap up
# =============================================================
kill $CACHE_MON_PID >/dev/null 2>&1 || true
cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Job '$JOB_NAME' completed successfully." | tee -a "$LOG_PATH"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Output saved to: $LOG_DIR" | tee -a "$LOG_PATH"
exit 0
This content originally appeared on DEV Community and was authored by ByteLedger
Print
Share
Comment
Cite
Upload
Translate
Updates
There are no updates yet.
Click the Upload button above to add an update.

APA
MLA
ByteLedger | Sciencx (2025-10-12T15:52:27+00:00) K8 Monitoring. Retrieved from https://www.scien.cx/2025/10/12/k8-monitoring/
" » K8 Monitoring." ByteLedger | Sciencx - Sunday October 12, 2025, https://www.scien.cx/2025/10/12/k8-monitoring/
HARVARDByteLedger | Sciencx Sunday October 12, 2025 » K8 Monitoring., viewed ,<https://www.scien.cx/2025/10/12/k8-monitoring/>
VANCOUVERByteLedger | Sciencx - » K8 Monitoring. [Internet]. [Accessed ]. Available from: https://www.scien.cx/2025/10/12/k8-monitoring/
CHICAGO" » K8 Monitoring." ByteLedger | Sciencx - Accessed . https://www.scien.cx/2025/10/12/k8-monitoring/
IEEE" » K8 Monitoring." ByteLedger | Sciencx [Online]. Available: https://www.scien.cx/2025/10/12/k8-monitoring/. [Accessed: ]
rf:citation » K8 Monitoring | ByteLedger | Sciencx | https://www.scien.cx/2025/10/12/k8-monitoring/ |
Please log in to upload a file.
There are no updates yet.
Click the Upload button above to add an update.