Skip to content

Commit 3774696

Browse files
authored
Adding "local-certs" check type (#43)
Added local-certs check type
1 parent f36626b commit 3774696

File tree

2 files changed

+69
-13
lines changed

2 files changed

+69
-13
lines changed

check_rancher2.sh

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
# Official repo: https://github.com/Napsty/check_rancher2 #
66
# Documentation: https://www.claudiokuenzler.com/monitoring-plugins/check_rancher2.php #
77
# Purpose: Monitor Rancher 2.x Kubernetes cluster and their containers #
8-
# Description: Checks status of resources within the Kubernetes cluster(s) using #
9-
# Rancher 2.x API #
8+
# Description: Checks status of resources within the Rancher managed Kubernetes #
9+
# cluster(s) using Rancher 2.x API #
1010
# #
1111
# License : GNU General Public Licence (GPL) http://www.gnu.org/ #
1212
# This program is free software; you can redistribute it and/or modify it under the #
@@ -52,6 +52,7 @@
5252
# 20220909 1.10.0 Fix ComponentStatus (#35), show K8s version in single cluster check #
5353
# 20220909 1.10.0 Allow ignoring statuses on workload checks (#29) #
5454
# 20230110 1.11.0 Allow ignoring workload names, provisioning cluster not critical (#39) #
55+
# 20230202 1.12.0 Add local-certs check type #
5556
##########################################################################################
5657
# (Pre-)Define some fixed variables
5758
STATE_OK=0 # define the exit code if status is OK
@@ -60,7 +61,7 @@ STATE_CRITICAL=2 # define the exit code if status is Critical
6061
STATE_UNKNOWN=3 # define the exit code if status is Unknown
6162
export PATH=/usr/local/bin:/usr/bin:/bin:$PATH # Set path
6263
proto=http # Protocol to use, default is http, can be overwritten with -S parameter
63-
version=1.11.0
64+
version=1.12.0
6465
##########################################################################################
6566
# functions
6667

@@ -150,8 +151,8 @@ function convertPods()
150151
# We all need help from time to time
151152
usage ()
152153
{
153-
printf "check_rancher2 v ${version} (c) 2018-2022 Claudio Kuenzler and contributers (published under GPLv2)
154-
Usage: $0 -H Rancher2Address -U user-token -P password [-S] -t checktype [-c cluster] [-p project] [-w workload]
154+
printf "check_rancher2 v ${version} (c) 2018-2023 Claudio Kuenzler and contributers (published under GPLv2)
155+
Usage: $0 -H Rancher2Address -U user-token -P password [-S] -t checktype [-c cluster] [-p project] [-n namespace] [-w workload] [-o pod]
155156
156157
Options:
157158
\t[ -H | --apihost ] Address of Rancher 2 API (e.g. rancher.example.com)
@@ -165,13 +166,14 @@ Options:
165166
\t[ -n | --namespacename ] Namespace name (needed for specific workload or pod checks)
166167
\t[ -w | --workloadname ] Workload name (for specific workload check)
167168
\t[ -o | --podname ] Pod name (for specific pod check, this makes only sense if you use static pods)
168-
\t[ -i | --ignore ] Comma-separated list of status(es) to ignore (on node and workload check type) or list of workload name(s) to ignore (on workload check type)
169-
\t[ --cpu-warn ] Exit with WARNING status if more than PERCENT of cpu capacity is used (currently only supported in cluster specific node and cluster check type)
170-
\t[ --cpu-crit ] Exit with CRITICAL status if more than PERCENT of cpu capacity is used (currently only supported in cluster specific node and cluster check type)
171-
\t[ --memory-warn ] Exit with WARNING status if more than PERCENT of mem capacity is used (currently only supported in cluster specific node and cluster check type)
172-
\t[ --memory-crit ] Exit with CRITICAL status if more than PERCENT of mem capacity is used (currently only supported in cluster specific node and cluster check type)
173-
\t[ --pods-warn ] Exit with WARNING status if more than PERCENT of pod capacity is used (currently only supported in cluster specific node and cluster check type)
174-
\t[ --pods-crit ] Exit with CRITICAL status if more than PERCENT of pod capacity is used (currently only supported in cluster specific node and cluster check type)
169+
\t[ -i | --ignore ] Comma-separated list of status(es) to ignore (node and workload check types), list of workload name(s) to ignore (workload check type) or certificate to ignore (local-certs check type)
170+
\t[ --cpu-warn ] Exit with WARNING status if more than PERCENT of cpu capacity is used (supported check types: node, cluster)
171+
\t[ --cpu-crit ] Exit with CRITICAL status if more than PERCENT of cpu capacity is used (supported check types: node, cluster)
172+
\t[ --memory-warn ] Exit with WARNING status if more than PERCENT of mem capacity is used (supported check types: node, cluster)
173+
\t[ --memory-crit ] Exit with CRITICAL status if more than PERCENT of mem capacity is used (supported check types: node, cluster)
174+
\t[ --pods-warn ] Exit with WARNING status if more than PERCENT of pod capacity is used (supported check types: node, cluster)
175+
\t[ --pods-crit ] Exit with CRITICAL status if more than PERCENT of pod capacity is used (supported check types: node, cluster)
176+
\t[ --cert-warn ] Warning threshold in days to warn before a certificate expires (supported check types: local-certs)
175177
\t[ -h | --help ] Help. I need somebody. Help. Not just anybody. Heeeeeelp!
176178
177179
Check Types:
@@ -181,6 +183,7 @@ Check Types:
181183
\tproject -> Checks the current status of all projects or of a specific project (defined with -p projectid)
182184
\tworkload -> Checks the current status of all or a specific (-w workloadname) workload within a project (-p projectid must be set!)
183185
\tpod -> Checks the current status of all or a specific (-o podname -n namespace) pod within a project (-p projectid must be set!)
186+
\tlocal-certs -> Checks the current status of all internal Rancher certificates (e.g. rancher-webhook) in local cluster under the System project (namespace: cattle-system)
184187
"
185188
exit ${STATE_UNKNOWN}
186189
}
@@ -193,7 +196,7 @@ for cmd in jq curl; do
193196
fi
194197
done
195198
#########################################################################
196-
PARSED_ARGUMENTS=$(getopt -a -n check_rancher2 -o H:U:P:t:c:p:n:w:o:Ssi:h --long apihost:,apiuser:,apipass:,type:,clustername:,projectname:,namespacename:,workloadname:,podname:,secure,selfsigned,ignore:,cpu-warn:,cpu-crit:,memory-warn:,memory-crit:,pods-warn:,pods-crit: -- "$@")
199+
PARSED_ARGUMENTS=$(getopt -a -n check_rancher2 -o H:U:P:t:c:p:n:w:o:Ssi:h --long apihost:,apiuser:,apipass:,type:,clustername:,projectname:,namespacename:,workloadname:,podname:,secure,selfsigned,ignore:,cpu-warn:,cpu-crit:,memory-warn:,memory-crit:,pods-warn:,pods-crit:,cert-warn: -- "$@")
197200
VALID_ARGUMENTS=$?
198201
if [ "$VALID_ARGUMENTS" != "0" ]; then
199202
usage
@@ -221,6 +224,7 @@ while :; do
221224
--memory-crit) memory_crit=${2} ; shift 2 ;;
222225
--pods-warn) pods_warn=${2} ; shift 2 ;;
223226
--pods-crit) pods_crit=${2} ; shift 2 ;;
227+
--cert-warn) cert_warn=${2} ; shift 2 ;;
224228
--) shift; break ;;
225229
-h | --help) usage;;
226230
*) echo "Unexpected option: $1 - this should not happen. Please consult --help for valid options."
@@ -1183,6 +1187,54 @@ else
11831187
fi
11841188
;;
11851189

1190+
# --- local-certs --- #
1191+
local-certs)
1192+
rightnow=$(date +%s)
1193+
if [[ ${cert_warn} -gt 0 ]]; then let warning=(${rightnow}+${cert_warn}*86400); fi
1194+
projectid=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/cluster/local/projects" | jq -r '.data[] | select(.name == "System").id')
1195+
1196+
api_out_certs=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/projects/${projectid}/namespacedcertificates?namespaceId=cattle-system")
1197+
declare -a cert_names=( $(echo "$api_out_certs" | jq -r '.data[] | select(.type == "namespacedCertificate").name') )
1198+
declare -a cert_expiry=( $(echo "$api_out_certs" | jq -r '.data[] | select(.type == "namespacedCertificate").expiresAt') )
1199+
1200+
#echo ${cert_names[*]} # Enable for debugging
1201+
#echo ${cert_expiry[*]} # Enable for debugging
1202+
1203+
i=0
1204+
for entry in ${cert_expiry[*]}; do
1205+
if [[ -n $(echo ${ignore} | grep -x ${cert_names[${i}]}) ]]; then
1206+
cert_ignored[${i}]="${cert_names[${i}]}"
1207+
continue
1208+
fi
1209+
expiry=$(date --date="${entry}" +%s)
1210+
if [[ ${rightnow} -gt ${expiry} ]]; then
1211+
let diff=(${rightnow}-${expiry})/86400
1212+
cert_expired[${i}]="${cert_names[${i}]} expired ${diff} days ago -"
1213+
elif [[ ${warning} -gt ${expiry} ]]; then
1214+
let diff=(${warning}-${expiry})/86400
1215+
echo "${cert_names[${i}]} will expire in ${diff} days -"
1216+
cert_warning[${i}]="${cert_names[${i}]} will expire in ${diff} days -"
1217+
fi
1218+
let i++
1219+
done
1220+
1221+
if [[ ${#cert_ignored[*]} -gt 0 ]]; then
1222+
ignoreoutput="- ${#cert_ignored[*]} certificate(s) ignored: ${cert_ignored[*]}"
1223+
fi
1224+
1225+
if [[ ${#cert_expired[*]} -gt 0 ]]; then
1226+
echo "CHECK_RANCHER2 CRITICAL - ${#cert_expired[*]} certificate(s) expired (${cert_expired[*]}) ${ignoreoutput}|'total_certs'=${#cert_names[*]};;;; 'expired_certs'=${#cert_expired[*]};;;; 'warning_certs'=${#cert_warning[*]};;;; 'ignored_certs'=${#cert_ignored[*]};;;;"
1227+
exit ${STATE_CRITICAL}
1228+
elif [[ ${#cert_warning[*]} -gt 0 ]]; then
1229+
echo "CHECK_RANCHER2 WANRING - ${#cert_warning[*]} certificate(s) will expire soon (${cert_warning[*]}) ${ignoreoutput}|'total_certs'=${#cert_names[*]};;;; 'expired_certs'=${#cert_expired[*]};;;; 'warning_certs'=${#cert_warning[*]};;;; 'ignored_certs'=${#cert_ignored[*]};;;;"
1230+
exit ${STATE_WARNING}
1231+
else
1232+
echo "CHECK_RANCHER2 OK - All ${#cert_names[*]} certificates are valid ${ignoreoutput}|'total_certs'=${#cert_names[*]};;;; 'expired_certs'=${#cert_expired[*]};;;; 'warning_certs'=${#cert_warning[*]};;;; 'ignored_certs'=${#cert_ignored[*]};;;;"
1233+
exit ${STATE_OK}
1234+
fi
1235+
1236+
;;
1237+
11861238
esac
11871239
echo "UNKNOWN: should never reach this part"
11881240
exit ${STATE_UNKNOWN}

icinga2/command_check_rancher2.conf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ object CheckCommand "check_rancher2" {
7777
description = "Exit with CRITICAL status if more than PERCENT of pod capacity is used (currently only supported in cluster specific node and cluster check type)"
7878
value = "$rancher2_pods_crit$"
7979
}
80+
"--cert-warn" = {
81+
description = "Warning threshold in days to warn before a certificate expires (supported check types: local-certs)"
82+
value = "$rancher2_cert_warn$"
83+
}
8084
"-h" = {
8185
description = "Help. I need somebody. Help. Not just anybody. Heeeeeelp!"
8286
set_if = "$rancher2_help$"

0 commit comments

Comments
 (0)