From fa29bb4489a750d20a6dd58f95e24e309fa8cf3b Mon Sep 17 00:00:00 2001 From: yzapf <108809174+yzapf@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:45:42 +0200 Subject: [PATCH 1/5] Update check_rancher2.sh added cron check --- check_rancher2.sh | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/check_rancher2.sh b/check_rancher2.sh index 005a92b..5f24aad 100755 --- a/check_rancher2.sh +++ b/check_rancher2.sh @@ -178,6 +178,7 @@ Check Types: \tproject -> Checks the current status of all projects or of a specific project (defined with -p projectid) \tworkload -> Checks the current status of all or a specific (-w workloadname) workload within a project (-p projectid must be set!) \tpod -> Checks the current status of all or a specific (-o podname -n namespace) pod within a project (-p projectid must be set!) +\tcron -> Checks the current status of a single (-w workloadname) cronjob within a project (-p projectid and -w workloadname must be set!) " exit ${STATE_UNKNOWN} } @@ -1146,6 +1147,77 @@ else fi ;; +# --- cronjob status check (requires project, namespace and workload name)--- # + +cron) +if [ -z $projectname ] || [ -z $workloadname ] || [ -z $namespacename ]; then + echo -e "CHECK_RANCHER2 UNKNOWN - To check a cronjob you must define the project (-p), workloadname (-w) and the namespace (-n)." + exit ${STATE_UNKNOWN} +fi + +if [[ -n $namespacename && $namespacename != "" ]]; then + nsappend="&namespaceId=$namespacename" + nsoutputappend="in namespace $namespacename " +fi + +api_out_single_cronjob=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${clustername}:${projectname}/workloads/cronjob:${namespacename}:${workloadname}") + +# Check if cluster is available +if [[ -n $(echo "$api_out_single_cronjob" | grep -i "ClusterUnavailable") ]]; then + clustername=$(echo ${projectname} | awk -F':' '{print $1}') + echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."; exit ${STATE_CRITICAL} +fi + +# Check if that given project name exists +if [[ -z $(echo "$api_out_single_cronjob" | grep -i "containers") ]]; then + echo "CHECK_RANCHER2 CRITICAL - Cronjob $workloadname ${nsoutputappend}not found."; exit ${STATE_CRITICAL} +fi + +# Path to cron schedule converter +cronconverter=./convert_cron_schedule.py + +# Check healthstatus of cronjob +healthstatus=$(echo "$api_out_single_cronjob" | jq -r '.state') +suspended=$(echo "$api_out_single_cronjob" | jq -r '.cronJobConfig.suspend') + +# Check if cronjob is active and runs in defined schedule +if test -f "$cronconverter"; then + + if [[ ${healthstatus} = active ]]; then + cronschedule=$(echo "$api_out_single_cronjob" | jq -r '.cronJobConfig.schedule') + crondiff=$(python3 convert_cron_schedule.py "$cronschedule" difftimestamp) + currenttime=$(date +%s%N | cut -b1-13) + lastruntime=$(echo "$api_out_single_cronjob" | jq -r '.cronJobStatus.lastScheduleTimeTS') + currentdiff=$((currenttime-lastruntime)) + + if [[ ${suspended} = true]]; then + echo "CHECK_RANCHER2 WARNING - Cronjob $workloadname ${nsoutputappend}is suspended.|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;;" + exit ${STATE_WARNING} + + elif [[ $currentdiff -gt $crondiff ]]; then + echo "CHECK_RANCHER2 CRITICAL - Cronjob $workloadname ${nsoutputappend}is not running in schedule time." + exit ${STATE_CRITICAL} + else + echo "CHECK_RANCHER2 OK - Cronjob $workloadname ${nsoutputappend}is running properly." + exit ${STATE_OK} + fi + + elif [[ ${healthstatus} = updating ]]; then + echo "CHECK_RANCHER2 WARNING - Cronjob $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;;" + exit ${STATE_WARNING} + + else + echo "CHECK_RANCHER2 CRITICAL - Cronjob $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;;" + exit ${STATE_CRITICAL} + fi + +else + echo "CHECK_RANCHER2 UNKNOWN - Resource not found: convert_cron_schedule.py" + exit ${STATE_UNKNOWN} +fi +;; + + esac echo "UNKNOWN: should never reach this part" exit ${STATE_UNKNOWN} From 068e944d7cd2a151e5b7c73481ee5c11cad3538f Mon Sep 17 00:00:00 2001 From: yzapf <108809174+yzapf@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:46:29 +0200 Subject: [PATCH 2/5] Create convert_cron_schedule.py --- convert_cron_schedule.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 convert_cron_schedule.py diff --git a/convert_cron_schedule.py b/convert_cron_schedule.py new file mode 100644 index 0000000..2a6aa84 --- /dev/null +++ b/convert_cron_schedule.py @@ -0,0 +1,28 @@ +import croniter +import datetime +import sys + +# Variables +now = datetime.datetime.now() +sched = sys.argv[1] +argument = sys.argv[2] +cron = croniter.croniter(sched, now) + +# Dates +nextdate = cron.get_next(datetime.datetime) +nextdatets_int = int(round(nextdate.timestamp() * 1000)) +prevdate = cron.get_prev(datetime.datetime) +prevdatets_int = int(round(prevdate.timestamp() * 1000)) + +schedulediff = nextdatets_int - prevdatets_int + +# Get next cron run +if argument == "next_run": + print(nextdatets_int) + +# Get difference between cron runs +elif argument == "difftimestamp": + print(schedulediff) + +else: + print("Unknown arguments.") From 2e825dc7e40d109485a8854f75bbbb3b5c201f8b Mon Sep 17 00:00:00 2001 From: yzapf <108809174+yzapf@users.noreply.github.com> Date: Fri, 19 Aug 2022 14:51:54 +0200 Subject: [PATCH 3/5] fix: updated syntax --- check_rancher2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/check_rancher2.sh b/check_rancher2.sh index 5f24aad..f57d170 100755 --- a/check_rancher2.sh +++ b/check_rancher2.sh @@ -1190,7 +1190,7 @@ if test -f "$cronconverter"; then lastruntime=$(echo "$api_out_single_cronjob" | jq -r '.cronJobStatus.lastScheduleTimeTS') currentdiff=$((currenttime-lastruntime)) - if [[ ${suspended} = true]]; then + if [[ ${suspended} = true ]]; then echo "CHECK_RANCHER2 WARNING - Cronjob $workloadname ${nsoutputappend}is suspended.|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;;" exit ${STATE_WARNING} From c77f62db3b8d3e356c10a7f6e0d1c2b2a5b7e883 Mon Sep 17 00:00:00 2001 From: yzapf <108809174+yzapf@users.noreply.github.com> Date: Fri, 19 Aug 2022 15:00:28 +0200 Subject: [PATCH 4/5] fix: updated api call --- check_rancher2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/check_rancher2.sh b/check_rancher2.sh index f57d170..f0b9f02 100755 --- a/check_rancher2.sh +++ b/check_rancher2.sh @@ -1160,7 +1160,7 @@ if [[ -n $namespacename && $namespacename != "" ]]; then nsoutputappend="in namespace $namespacename " fi -api_out_single_cronjob=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${clustername}:${projectname}/workloads/cronjob:${namespacename}:${workloadname}") +api_out_single_cronjob=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${projectname}/workloads/cronjob:${namespacename}:${workloadname}") # Check if cluster is available if [[ -n $(echo "$api_out_single_cronjob" | grep -i "ClusterUnavailable") ]]; then From a2d2a0957c5f96afd3019889b1b2033fac45dd67 Mon Sep 17 00:00:00 2001 From: yzapf <108809174+yzapf@users.noreply.github.com> Date: Tue, 18 Oct 2022 15:21:12 +0200 Subject: [PATCH 5/5] added: check for cronjob added an additional check to monitor whether a cronjob ran inside of its defined timeframe. --- check_rancher2.sh | 69 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/check_rancher2.sh b/check_rancher2.sh index f0b9f02..f8cb29d 100755 --- a/check_rancher2.sh +++ b/check_rancher2.sh @@ -49,6 +49,8 @@ # 20211201 1.7.1 Fix cluster state detection (#26) # # 20220610 1.8.0 More performance data, long parameters, other improvements (#31) # # 20220729 1.9.0 Output improvements (#32), show workload namespace (#33) # +# 20220909 1.10.0 Fix ComponentStatus (#35), show K8s version in single cluster check # +# 20220909 1.10.0 Allow ignoring statuses on workload checks (#29) # ########################################################################################## # (Pre-)Define some fixed variables STATE_OK=0 # define the exit code if status is OK @@ -57,7 +59,7 @@ STATE_CRITICAL=2 # define the exit code if status is Critical STATE_UNKNOWN=3 # define the exit code if status is Unknown export PATH=/usr/local/bin:/usr/bin:/bin:$PATH # Set path proto=http # Protocol to use, default is http, can be overwritten with -S parameter -version=1.9.0 +version=1.10.0 ########################################################################################## # functions @@ -178,7 +180,7 @@ Check Types: \tproject -> Checks the current status of all projects or of a specific project (defined with -p projectid) \tworkload -> Checks the current status of all or a specific (-w workloadname) workload within a project (-p projectid must be set!) \tpod -> Checks the current status of all or a specific (-o podname -n namespace) pod within a project (-p projectid must be set!) -\tcron -> Checks the current status of a single (-w workloadname) cronjob within a project (-p projectid and -w workloadname must be set!) +\tcron -> Checks the current status of a single (-w workloadname) cronjob within a project (-p projectid and -w workloadname must be set!) " exit ${STATE_UNKNOWN} } @@ -339,8 +341,8 @@ if [[ -z $clustername ]]; then #echo $cluster # For Debug clusteralias=$(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'")|.name') declare -a clusterstate=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .state') ) - declare -a component=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .componentStatuses[].name') ) - declare -a healthstatus=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .componentStatuses[].conditions[].status') ) + declare -a component=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .componentStatuses[]?.name') ) + declare -a healthstatus=( $(echo "$api_out_clusters" | jq -r '.data[] | select(.id == "'${cluster}'") | .componentStatuses[]?.conditions[].status') ) if [[ "${clusterstate}" != "active" ]]; then componenterrors[$e]="cluster ${clusteralias} is in ${clusterstate} state -" @@ -382,8 +384,9 @@ else clusteralias=$(echo "$api_out_single_cluster" | jq -r '.name') clusterstate=$(echo "$api_out_single_cluster" | jq -r '.state') - declare -a component=( $(echo "$api_out_single_cluster" | jq -r '.componentStatuses[].name') ) - declare -a healthstatus=( $(echo "$api_out_single_cluster" | jq -r '.componentStatuses[].conditions[].status') ) + k8sversion=$(echo "$api_out_single_cluster" | jq -r '.version.gitVersion') + declare -a component=( $(echo "$api_out_single_cluster" | jq -r '.componentStatuses[]?.name') ) + declare -a healthstatus=( $(echo "$api_out_single_cluster" | jq -r '.componentStatuses[]?.conditions[].status') ) # capacity declare -a capacity_cpu=( $(echo "$api_out_single_cluster" | jq -r '.capacity.cpu') ) @@ -492,7 +495,7 @@ else echo "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias has resource problems: ${resourceerrors}|'cluster_healthy'=0;;;; ${perf_output}" exit ${STATE_CRITICAL} else - echo "CHECK_RANCHER2 OK - Cluster $clusteralias is healthy|'cluster_healthy'=1;;;; ${perf_output}" + echo "CHECK_RANCHER2 OK - Cluster $clusteralias ($k8sversion) is healthy|'cluster_healthy'=1;;;; ${perf_output}" exit ${STATE_OK} fi @@ -993,9 +996,17 @@ if [[ -z $workloadname ]]; then for workload in ${workload_names[*]}; do for status in ${healthstatus[$i]}; do if [[ ${status} = updating ]]; then - workloadwarnings[$i]="Workload ${workload} is ${status} -" + if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then + workloadignored[$i]="Workload ${workload} is ${status} but ignored -" + else + workloadwarnings[$i]="Workload ${workload} is ${status} -" + fi elif [[ ${status} != active ]]; then - workloaderrors[$i]="Workload ${workload} is ${status} -" + if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then + workloadignored[$i]="Workload ${workload} is ${status} but ignored -" + else + workloaderrors[$i]="Workload ${workload} is ${status} -" + fi fi done for paused in ${pausedstatus[$i]}; do @@ -1006,17 +1017,21 @@ if [[ -z $workloadname ]]; then let i++ done + if [[ ${#workloadignored[*]} -gt 0 ]]; then + ignoreoutput="- ${workloadignored[*]}" + fi + if [[ ${#workloaderrors[*]} -gt 0 ]]; then - echo "CHECK_RANCHER2 CRITICAL - ${#workloaderrors[*]} workload(s) in error state: ${workloaderrors[*]}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;" + echo "CHECK_RANCHER2 CRITICAL - ${#workloaderrors[*]} workload(s) in error state: ${workloaderrors[*]} ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;" exit ${STATE_CRITICAL} elif [[ ${#workloadwarnings[*]} -gt 0 ]]; then - echo "CHECK_RANCHER2 WARNING - ${#workloadwarnings[*]} workload(s) in warning state: ${workloadwarnings[*]}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;" + echo "CHECK_RANCHER2 WARNING - ${#workloadwarnings[*]} workload(s) in warning state: ${workloadwarnings[*]} ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;" exit ${STATE_WARNING} else if [[ ${#workloadpaused[*]} -gt 0 ]]; then - echo "CHECK_RANCHER2 OK - All workloads (${#workload_names[*]}) in project ${projectname} are healthy/active ( Note: ${#workloadpaused[*]} workloads currently paused: ${workloadpaused[*]})|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;" + echo "CHECK_RANCHER2 OK - All workloads (${#workload_names[*]}) in project ${projectname} are healthy/active ( Note: ${#workloadpaused[*]} workloads currently paused: ${workloadpaused[*]}) ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;" else - echo "CHECK_RANCHER2 OK - All workloads (${#workload_names[*]}) in project ${projectname} are healthy/active|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;" + echo "CHECK_RANCHER2 OK - All workloads (${#workload_names[*]}) in project ${projectname} are healthy/active ${ignoreoutput}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;; 'workloads_ignored'=${#workloadignored[*]};;;;" fi exit ${STATE_OK} fi @@ -1051,13 +1066,23 @@ else healthstatus=$(echo "$api_out_single_workload" | jq -r '.data[].state') if [[ ${healthstatus} = updating ]]; then - echo "CHECK_RANCHER2 WARNING - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;;" - exit ${STATE_WARNING} + if [[ -n $(echo ${ignore} | grep -i ${healthstatus}) ]]; then + echo "CHECK_RANCHER2 OK - Workload $workloadname ${nsoutputappend}is ${healthstatus} but ignored|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;; 'workload_ignored'=1;;;;" + exit ${STATE_WARNING} + else + echo "CHECK_RANCHER2 WARNING - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;; 'workload_ignored'=0;;;;" + exit ${STATE_WARNING} + fi elif [[ ${healthstatus} != active ]]; then - echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;;" - exit ${STATE_CRITICAL} + if [[ -n $(echo ${ignore} | grep -i ${healthstatus}) ]]; then + echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}is ${healthstatus} but ignored|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;; 'workload_ignored'=1;;;;" + exit ${STATE_CRITICAL} + else + echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;; 'workload_ignored'=0;;;;" + exit ${STATE_CRITICAL} + fi else - echo "CHECK_RANCHER2 OK - Workload $workloadname ${nsoutputappend}is active|'workload_active'=1;;;; 'workload_error'=0;;;; 'workload_warning'=0;;;;" + echo "CHECK_RANCHER2 OK - Workload $workloadname ${nsoutputappend}is active|'workload_active'=1;;;; 'workload_error'=0;;;; 'workload_warning'=0;;;; 'workload_ignored'=0;;;;" exit ${STATE_OK} fi @@ -1165,12 +1190,14 @@ api_out_single_cronjob=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${pro # Check if cluster is available if [[ -n $(echo "$api_out_single_cronjob" | grep -i "ClusterUnavailable") ]]; then clustername=$(echo ${projectname} | awk -F':' '{print $1}') - echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."; exit ${STATE_CRITICAL} + echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names." + exit ${STATE_CRITICAL} fi # Check if that given project name exists if [[ -z $(echo "$api_out_single_cronjob" | grep -i "containers") ]]; then - echo "CHECK_RANCHER2 CRITICAL - Cronjob $workloadname ${nsoutputappend}not found."; exit ${STATE_CRITICAL} + echo "CHECK_RANCHER2 CRITICAL - Cronjob $workloadname ${nsoutputappend}not found." + exit ${STATE_CRITICAL} fi # Path to cron schedule converter @@ -1185,7 +1212,7 @@ if test -f "$cronconverter"; then if [[ ${healthstatus} = active ]]; then cronschedule=$(echo "$api_out_single_cronjob" | jq -r '.cronJobConfig.schedule') - crondiff=$(python3 convert_cron_schedule.py "$cronschedule" difftimestamp) + crondiff=$(python3 $cronconverter "$cronschedule" difftimestamp) currenttime=$(date +%s%N | cut -b1-13) lastruntime=$(echo "$api_out_single_cronjob" | jq -r '.cronJobStatus.lastScheduleTimeTS') currentdiff=$((currenttime-lastruntime))