Skip to content

Commit 4d19d1d

Browse files
authored
Add ignore parameter (#17)
* Add ignore parameter
1 parent b06ae45 commit 4d19d1d

File tree

3 files changed

+41
-13
lines changed

3 files changed

+41
-13
lines changed

check_rancher2.sh

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
# 20190913 1.2.1 Detect additional redirect (308) #
3939
# 20200129 1.2.2 Fix typos in workload perfdata (#11) and single cluster health (#12) #
4040
# 20200523 1.2.3 Handle 403 forbidden error (#15) #
41+
# 20200617 1.3.0 Added ignore parameter (-i) #
4142
##########################################################################################
4243
# (Pre-)Define some fixed variables
4344
STATE_OK=0 # define the exit code if status is OK
@@ -46,7 +47,7 @@ STATE_CRITICAL=2 # define the exit code if status is Critical
4647
STATE_UNKNOWN=3 # define the exit code if status is Unknown
4748
export PATH=/usr/local/bin:/usr/bin:/bin:$PATH # Set path
4849
proto=http # Protocol to use, default is http, can be overwritten with -S parameter
49-
version=1.2.3
50+
version=1.3.0
5051

5152
# Check for necessary commands
5253
for cmd in jshon curl [
@@ -73,11 +74,12 @@ Usage: $0 -H Rancher2Address -U user-token -P password [-S] -t checktype [-c clu
7374
\t-n Namespace name (needed for specific pod checks)\n
7475
\t-w Workload name (for specific workload check)\n
7576
\t-o Pod name (for specific pod check, this makes only sense if you use static pods)\n
77+
\t-i Comma-separated list of status(es) to ignore (currently only supported in node check type)\n
7678
\t-h Help. I need somebody. Help. Not just anybody. Heeeeeelp!\n
7779
\nCheck Types:\n
7880
\tinfo -> Informs about available clusters and projects and their API ID's. These ID's are needed for specific checks.\n
7981
\tcluster -> Checks the current status of all clusters or of a specific cluster (defined with -c clusterid)\n
80-
\tnode -> Checks the current status of all nodes or of nodes in a specific cluster (defined with -c clusterid)\n
82+
\tnode -> Checks the current status of nodes in all clusters or of nodes in a specific cluster (defined with -c clusterid)\n
8183
\tproject -> Checks the current status of all projects or of a specific project (defined with -p projectid)\n
8284
\tworkload -> Checks the current status of all or a specific (-w workloadname) workload within a project (-p projectid must be set!)\n
8385
\tpod -> Checks the current status of all or a specific (-o podname -n namespace) pod within a project (-p projectid must be set!)\n
@@ -88,7 +90,7 @@ if [ "${1}" = "--help" -o "${#}" = "0" ];
8890
fi
8991
#########################################################################
9092
# Get user-given variables
91-
while getopts "H:U:P:t:c:p:n:w:o:Ssh" Input;
93+
while getopts "H:U:P:t:c:p:n:w:o:Ssi:h" Input;
9294
do
9395
case ${Input} in
9496
H) apihost=${OPTARG};;
@@ -102,6 +104,7 @@ do
102104
o) podname=${OPTARG};;
103105
S) proto=https;;
104106
s) selfsigned="-k";;
107+
i) ignore=${OPTARG};;
105108
h) echo -e ${help}; exit ${STATE_UNKNOWN};;
106109
*) echo -e ${help}; exit ${STATE_UNKNOWN};;
107110
esac
@@ -251,18 +254,24 @@ if [[ -z $clustername ]]; then
251254
for status in ${node_status[$i]}
252255
do
253256
if [[ ${status} != active ]]; then
254-
nodeerrors[$i]="${node} in cluster ${node_cluster_member[$i]} is ${node_status[$i]} -"
257+
if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then
258+
nodeignored[$i]="${node} in cluster ${node_cluster_member[$i]} is ${node_status[$i]} but ignored -"
259+
else
260+
nodeerrors[$i]="${node} in cluster ${node_cluster_member[$i]} is ${node_status[$i]} -"
261+
fi
255262
fi
256263
done
257264
let i++
258265
done
259266

260-
if [[ ${#nodeerrors[*]} -gt 0 ]]
261-
then
262-
echo "CHECK_RANCHER2 CRITICAL - ${nodeerrors[*]}|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;;"
267+
if [[ ${#nodeerrors[*]} -gt 0 ]]; then
268+
echo "CHECK_RANCHER2 CRITICAL - ${nodeerrors[*]}|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;;"
263269
exit ${STATE_CRITICAL}
270+
elif [[ ${#nodeignored[*]} -gt 0 ]]; then
271+
echo "CHECK_RANCHER2 OK - All nodes OK - Info: ${nodeignored[*]}|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;;"
272+
exit ${STATE_OK}
264273
else
265-
echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;;"
274+
echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;;"
266275
exit ${STATE_OK}
267276
fi
268277

@@ -285,18 +294,24 @@ else
285294
for status in ${node_status[$i]}
286295
do
287296
if [[ ${status} != active ]]; then
288-
nodeerrors[$i]="${node} in cluster ${clustername} is ${node_status[$i]} -"
297+
if [[ -n $(echo ${ignore} | grep -i ${status}) ]]; then
298+
nodeignored[$i]="${node} in cluster ${node_cluster_member[$i]} is ${node_status[$i]} but ignored -"
299+
else
300+
nodeerrors[$i]="${node} in cluster ${clustername} is ${node_status[$i]} -"
301+
fi
289302
fi
290303
done
291304
let i++
292305
done
293306

294-
if [[ ${#nodeerrors[*]} -gt 0 ]]
295-
then
296-
echo "CHECK_RANCHER2 CRITICAL - ${nodeerrors[*]}|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;;"
307+
if [[ ${#nodeerrors[*]} -gt 0 ]]; then
308+
echo "CHECK_RANCHER2 CRITICAL - ${nodeerrors[*]}|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;;"
297309
exit ${STATE_CRITICAL}
310+
elif [[ ${#nodeignored[*]} -gt 0 ]]; then
311+
echo "CHECK_RANCHER2 OK - All nodes OK - Info: ${nodeignored[*]}|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;;"
312+
exit ${STATE_OK}
298313
else
299-
echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;;"
314+
echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;;"
300315
exit ${STATE_OK}
301316
fi
302317

icinga2/command_check_rancher2.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ object CheckCommand "check_rancher2" {
1515
"-n" = "$rancher2_namespace$"
1616
"-w" = "$rancher2_workload$"
1717
"-o" = "$rancher2_pod$"
18+
"-i" = "$rancher2_ignore_status$"
1819
}
1920

2021
vars.rancher2_address = "$address$"

icinga2/example_service_checks.conf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,18 @@ object Service "Rancher2 Cluster Test" {
4242
vars.rancher2_cluster = "c-4kd22"
4343
}
4444

45+
# Check nodes in all clusters for their status but ignore cordoned and drained nodes
46+
object Service "Rancher2 Nodes" {
47+
import "generic-service"
48+
host_name = "my-rancher2-host"
49+
check_command = "check_rancher2"
50+
vars.rancher2_username = "token-XXXXX"
51+
vars.rancher2_password = "iWahca3ohngeiReedeingaiiWahca3ohngeiReedeingai432k1dda"
52+
vars.rancher2_ssl = true
53+
vars.rancher2_type = "node"
54+
vars.rancher2_ignore_status = "cordoned,drained"
55+
}
56+
4557
# Check all avaiable/found projects (across all clusters) for their health
4658
object Service "Rancher2 All Projects" {
4759
import "generic-service"

0 commit comments

Comments
 (0)