diff --git a/truenas-agent.py b/truenas_checkmk_agent.py similarity index 59% rename from truenas-agent.py rename to truenas_checkmk_agent.py index f2e814b..d382411 100644 --- a/truenas-agent.py +++ b/truenas_checkmk_agent.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # vim: set fileencoding=utf-8:noet -## Copyright 2022 Bashclub +## Copyright 2024 Bashclub https://github.com/bashclub ## BSD-2-Clause ## ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -21,7 +21,7 @@ ## save the file in any Datastore in a subfolder named check_mk_agent and start it Task -> Init/Shutdown Scripts as POSTINIT ## optional create a folder local in the check_mk_agent folder and execute local checks (subdirs for caching data supported) -__VERSION__ = "0.87" +__VERSION__ = "0.64" import sys import os @@ -39,16 +39,21 @@ import threading import ipaddress import base64 import traceback +import syslog +import requests +import hashlib from cryptography import x509 from cryptography.hazmat.backends import default_backend as crypto_default_backend from cryptography.hazmat.primitives import hashes from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +from datetime import datetime from xml.etree import cElementTree as ELementTree from collections import Counter,defaultdict from pprint import pprint from socketserver import TCPServer,StreamRequestHandler -SCRIPTPATH = os.path.abspath(os.path.basename(__file__)) +ISLINUX=sys.platform == "linux" +SCRIPTPATH = os.path.abspath(__file__) if os.path.islink(SCRIPTPATH): SCRIPTPATH = os.path.realpath(os.readlink(SCRIPTPATH)) BASEDIR = os.path.dirname(SCRIPTPATH) @@ -57,8 +62,20 @@ if BASEDIR.endswith("/bin"): MK_CONFDIR = os.path.join(BASEDIR,"etc") CHECKMK_CONFIG = os.path.join(MK_CONFDIR,"checkmk.conf") LOCALDIR = os.path.join(BASEDIR,"local") +PLUGINSDIR = os.path.join(BASEDIR,"plugins") SPOOLDIR = os.path.join(BASEDIR,"spool") +for _dir in (LOCALDIR, PLUGINSDIR, SPOOLDIR): + if not os.path.exists(_dir): + try: + os.mkdir(_dir) + except: + pass + +os.environ["MK_CONFDIR"] = MK_CONFDIR +os.environ["MK_LIBDIR"] = BASEDIR +os.environ["MK_VARDIR"] = BASEDIR + class object_dict(defaultdict): def __getattr__(self,name): return self[name] if name in self else "" @@ -83,6 +100,17 @@ def etree_to_dict(t): d[t.tag] = text return d +def log(message,prio="notice"): + priority = { + "crit" :syslog.LOG_CRIT, + "err" :syslog.LOG_ERR, + "warning" :syslog.LOG_WARNING, + "notice" :syslog.LOG_NOTICE, + "info" :syslog.LOG_INFO, + }.get(str(prio).lower(),syslog.LOG_DEBUG) + syslog.openlog(ident="checkmk_agent",logoption=syslog.LOG_PID | syslog.LOG_NDELAY,facility=syslog.LOG_DAEMON) + syslog.syslog(priority,message) + def pad_pkcs7(message,size=16): _pad = size - (len(message) % size) if type(message) == str: @@ -111,16 +139,19 @@ class checkmk_handler(StreamRequestHandler): pass class checkmk_checker(object): + _available_sysctl_list = [] + _available_sysctl_temperature_list = [] _check_cache = {} - def encrypt(self,message,password='secretpassword'): + def encrypt_msg(self,message,password='secretpassword'): SALT_LENGTH = 8 KEY_LENGTH = 32 IV_LENGTH = 16 PBKDF2_CYCLES = 10_000 - SALT = b"Salted__" + #SALT = b"Salted__" + SALT = os.urandom(SALT_LENGTH) _backend = crypto_default_backend() _kdf_key = PBKDF2HMAC( - algorithm = hashes.SHA256, + algorithm = hashes.SHA256(), length = KEY_LENGTH + IV_LENGTH, salt = SALT, iterations = PBKDF2_CYCLES, @@ -132,16 +163,38 @@ class checkmk_checker(object): modes.CBC(_iv), backend = _backend ).encryptor() - message = pad_pkcs7(message) message = message.encode("utf-8") + message = pad_pkcs7(message) _encrypted_message = _encryptor.update(message) + _encryptor.finalize() return pad_pkcs7(b"03",10) + SALT + _encrypted_message - def _encrypt(self,message): ## openssl ## todo ## remove - _cmd = shlex.split('openssl enc -aes-256-cbc -md sha256 -iter 10000 -k "secretpassword"',posix=True) - _proc = subprocess.Popen(_cmd,stderr=subprocess.DEVNULL,stdout=subprocess.PIPE,stdin=subprocess.PIPE) - _out,_err = _proc.communicate(input=message.encode("utf-8")) - return b"03" + _out + def decrypt_msg(self,message,password='secretpassword'): + SALT_LENGTH = 8 + KEY_LENGTH = 32 + IV_LENGTH = 16 + PBKDF2_CYCLES = 10_000 + message = message[10:] # strip header + SALT = message[:SALT_LENGTH] + message = message[SALT_LENGTH:] + _backend = crypto_default_backend() + _kdf_key = PBKDF2HMAC( + algorithm = hashes.SHA256(), + length = KEY_LENGTH + IV_LENGTH, + salt = SALT, + iterations = PBKDF2_CYCLES, + backend = _backend + ).derive(password.encode("utf-8")) + _key, _iv = _kdf_key[:KEY_LENGTH],_kdf_key[KEY_LENGTH:] + _decryptor = Cipher( + algorithms.AES(_key), + modes.CBC(_iv), + backend = _backend + ).decryptor() + _decrypted_message = _decryptor.update(message) + try: + return _decrypted_message.decode("utf-8").strip() + except UnicodeDecodeError: + return ("invalid key") def do_checks(self,debug=False,remote_ip=None,**kwargs): self._getosinfo() @@ -156,6 +209,7 @@ class checkmk_checker(object): _lines.append("OnlyFrom: {0}".format(",".join(self.onlyfrom))) _lines.append(f"LocalDirectory: {LOCALDIR}") + _lines.append(f"PluginsDirectory: {PLUGINSDIR}") _lines.append(f"AgentDirectory: {MK_CONFDIR}") _lines.append(f"SpoolDirectory: {SPOOLDIR}") @@ -170,6 +224,21 @@ class checkmk_checker(object): _failed_sections.append(_name) _errors.append(traceback.format_exc()) + if os.path.isdir(PLUGINSDIR): + for _plugin_file in glob.glob(f"{PLUGINSDIR}/**",recursive=True): + if os.path.isfile(_plugin_file) and os.access(_plugin_file,os.X_OK): + try: + _cachetime = int(_plugin_file.split(os.path.sep)[-2]) + except: + _cachetime = 0 + try: + if _cachetime > 0: + _lines.append(self._run_cache_prog(_plugin_file,_cachetime)) + else: + _lines.append(self._run_prog(_plugin_file)) + except: + _errors.append(traceback.format_exc()) + _lines.append("<<>>") for _check in dir(self): if _check.startswith("checklocal_"): @@ -190,17 +259,20 @@ class checkmk_checker(object): except: _cachetime = 0 try: - _lines.append(self._run_cache_prog(_local_file,_cachetime)) + if _cachetime > 0: + _lines.append(self._run_cache_prog(_local_file,_cachetime)) + else: + _lines.append(self._run_prog(_local_file)) except: _errors.append(traceback.format_exc()) if os.path.isdir(SPOOLDIR): _now = time.time() for _filename in glob.glob(f"{SPOOLDIR}/*"): - _maxage = re.search("^\d+",_filename) + _maxage = re.search("^(\d+)_",_filename) if _maxage: - _maxage = int(_maxage.group()) + _maxage = int(_maxage.group(1)) _mtime = os.stat(_filename).st_mtime if _now - _mtime > _maxage: continue @@ -209,43 +281,67 @@ class checkmk_checker(object): _lines.append("") if debug: - sys.stderr.write("\n".join(_errors)) - sys.stderr.flush() + sys.stdout.write("\n".join(_errors)) + sys.stdout.flush() if _failed_sections: _lines.append("<<>>") _lines.append("FailedPythonPlugins: {0}".format(",".join(_failed_sections))) - if self.encryptionkey: - return self.encrypt("\n".join(_lines),password=self.encryptionkey) + if self.encrypt and not debug: + return self.encrypt_msg("\n".join(_lines),password=self.encrypt) return "\n".join(_lines).encode("utf-8") def _getosinfo(self): - _version_file = "/conf/base/etc/version" - _version_modified = os.stat(_version_file).st_mtime - _os,_version = open(_version_file,"rt").read().strip().split("-",1) + _data = self._midclt("system.info") + _truenas_version = re.findall(".?(SCALE)?-(\S+)",_data.get("version",""))[0] + + #_version_file = "/conf/base/etc/version" + #_version_modified = os.stat(_version_file).st_mtime + #_os,_version = open(_version_file,"rt").read().strip().split("-",1) self._info = { - "os" : _os, - "os_version" : _version.split(" ")[0], - "version_age" : int(time.time() - _version_modified), + "os" : "TrueNAS{0}".format(*_truenas_version), + "os_version" : "{1}".format(*_truenas_version), + "version_age" : int(time.time() - 0), "config_age" : 0, "last_configchange" : "", "latest_version" : "", "latest_date" : "", - "hostname" : self._run_prog("hostname").strip(" \n") + "hostname" : _data.get("hostname","") } def pidof(self,prog,default=None): _allprogs = re.findall("(\w+)\s+(\d+)",self._run_prog("ps ax -c -o command,pid")) return int(dict(_allprogs).get(prog,default)) + def _midclt(self,command,cachetime=0): + if cachetime == 0: + _res = self._run_prog(f"midclt call {command}") + else: + _res = self._run_cache_prog(f"midclt call {command}",cachetime=cachetime) + try: + return json.loads(_res) + except: + return {} + + def checklocal_firmware(self): + if self._midclt("update.check_available",900).get("status","") == "UNAVAILABLE": + return ["0 Firmware available=0 Version {0} up to date".format(self._info.get("os_version"))] + else: + _pending = self._midclt("update.pending",9000).get("new",{}) + return ["1 Firmware available=1 Version {0} ({1} available)".format(self._info.get("os_version"),_pending.get("version",""))] + def check_label(self): _ret = ["<<>>"] - _dmsg = self._run_prog("dmesg",timeout=10) - if _dmsg.lower().find("hypervisor:") > -1: - _ret.append('{{"cmk/device_type":"vm"}}') + if ISLINUX: + if open("/proc/cpuinfo","rt").read().find("hypervisor") > -1: + _ret.append('{"cmk/device_type":"vm"}') + else: + _dmsg = self._run_prog("dmesg",timeout=10) + if _dmsg.lower().find("hypervisor:") > -1: + _ret.append('{"cmk/device_type":"vm"}') return _ret - def check_net(self): + def _check_bsd_net(self): _now = int(time.time()) _ret = ["<<>>"] _interface_data = [] @@ -263,7 +359,7 @@ class checkmk_checker(object): ) ) - _ifconfig_out = self._run_prog("ifconfig -m -v") + _ifconfig_out = self._run_prog("ifconfig -m -v -f inet:cidr,inet6:cidr") _ifconfig_out += "END" ## fix regex _all_interfaces = object_dict() for _interface, _data in re.findall("^(?P[\w.]+):\s(?P.*?(?=^\w))",_ifconfig_out,re.DOTALL | re.MULTILINE): @@ -288,28 +384,6 @@ class checkmk_checker(object): if _match: _interface_dict["speed"] = _match.group("speed").replace("G","000") _interface_dict["duplex"] = _match.group("duplex") - if _key == "inet": - _match = re.search("^(?P[\d.]+).*?netmask\s(?P0x[0-9a-f]{8}).*?(?:vhid\s(?P\d+)|$)",_val,re.M) - if _match: - _cidr = bin(int(_match.group("netmask"),16)).count("1") - _ipaddr = _match.group("ipaddr") - _vhid = _match.group("vhid") - ## fixme ipaddr dict / vhid dict - if _key == "inet6": - _match = re.search("^(?P[0-9a-f]+).*?prefixlen\s(?P\d+).*?(?:vhid\s(?P\d+)|$)",_val,re.M) - if _match: - _ipaddr = _match.group("ipaddr") - _prefix = _match.group("prefix") - _vhid = _match.group("vhid") - ## fixme ipaddr dict / vhid dict - if _key == "carp": - _match = re.search("(?PMASTER|BACKUP)\svhid\s(?P\d+)\sadvbase\s(?P\d+)\sadvskew\s(?P\d+)",_val,re.M) - if _match: - _carpstatus = _match.group("status") - _vhid = _match.group("vhid") - _advbase = _match.group("base") - _advskew = _match.group("skew") - ## fixme vhid dict if _key == "id": _match = re.search("priority\s(\d+)",_val) if _match: @@ -318,15 +392,7 @@ class checkmk_checker(object): _member = _interface_dict.get("member",[]) _member.append(_val.split()[0]) _interface_dict["member"] = _member - if _key == "Opened": - try: - _pid = int(_val.split(" ")[-1]) - if check_pid(_pid): - _interface_dict["up"] = "true" - except ValueError: - pass - #pprint(_interface_dict) _all_interfaces[_interface] = _interface_dict if re.search("^[*]?(pflog|pfsync|lo)\d?",_interface): continue @@ -337,13 +403,43 @@ class checkmk_checker(object): if type(_val) in (str,int,float): _ret.append(f"{_interface}.{_key} {_val}") + _ret += ["<<>>"] + _out = self._run_prog("netstat -inb") + for _line in re.finditer("^(?!Name|lo|plip)(?P\w+)\s+(?P\d+).*?Link.*?\s+.*?\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)$",_out,re.M): + _ret.append("{iface} {inbytes} {inpkts} {inerr} {indrop} 0 0 0 0 {outbytes} {outpkts} {outerr} 0 0 0 0 0".format(**_line.groupdict())) + return _ret + + def check_net(self): + if not ISLINUX: + return self._check_bsd_net() + _ret = ["<<>>"] + _ret.append("[start_iplink]") + _ifaces = self._run_prog("ip address") + _ret.append(_ifaces) + _ret.append("[end_iplink]") + _ret.append("<<>>") + for _iface in re.findall("\d+:\s(\w+)",_ifaces): + _ret.append(f"[{_iface}]") + try: + _ethtool = self._run_prog(f"ethtool {_iface}") + _ret += re.findall("(?:Speed|Duplex|Link detected|Auto-negotiation).*",_ethtool) + _mac = open(f"/sys/class/net/{_iface}/address","rt").read().strip() + except: + pass + _ret.append(f"Address: {_mac}") + + if os.path.isdir("/proc/net/bonding"): + _ret.append("<<>>") + for _bond in os.listdir("/proc/net/bonding"): + _ret.append(f"==> ./{_bond} <==") + _ret.append(open(f"/proc/net/bonding/{_bond}","rt").read()) return _ret def checklocal_samba(self): - if not os.path.exists("/usr/local/bin/smbstatus"): + if not os.path.exists("/bin/smbstatus") and not not os.path.exists("/usr/local/bin/smbstatus") : return [] try: - _json = self._run_prog("/usr/local/bin/smbstatus --json") + _json = self._run_prog("smbstatus --json") _data = json.loads(_json) _locks = _data.get("locked_files",[]) _xlocks = len(list(filter(lambda x: True in x.get("oplock",{}).values(),_locks))) @@ -353,32 +449,100 @@ class checkmk_checker(object): except: return ["2 Samba share_locks=0|exclusive_locks=0|active_sessions=0 Server Error"] + def check_ntp(self): + if not os.path.exists("/usr/bin/ntpq"): + return [] + _ret = ["<<>>"] + for _line in self._run_prog("ntpq -np",timeout=30).split("\n")[2:]: + if _line.strip(): + _ret.append("{0} {1}".format(_line[0],_line[1:])) + return _ret + def check_smartinfo(self): - if not os.path.exists("/usr/local/sbin/smartctl"): + if not os.path.exists("/sbin/smartctl") and not os.path.exists("/usr/local/sbin/smartctl"): return [] REGEX_DISCPATH = re.compile("(sd[a-z]+|da[0-9]+|nvme[0-9]+|ada[0-9]+)$") + _disk_descriptions = {} + for _disk in self._midclt("disk.query"): + _diskname = re.sub("(nvme\d+)n\d","\\1",_disk.get("name")) + _disk_descriptions[_diskname] = _disk.get("description","") _ret = ["<<>>"] for _dev in filter(lambda x: REGEX_DISCPATH.match(x),os.listdir("/dev/")): try: - _ret.append(str(smart_disc(_dev))) + _ret.append(str(smart_disc(_dev,description=_disk_descriptions.get(f"{_dev}")))) except: pass return _ret def check_ipmi(self): - if not os.path.exists("/usr/local/bin/ipmitool"): + if not os.path.exists("/bin/ipmitool") and not os.path.exists("/usr/local/bin/ipmitool"): return [] - _ret = ["<<>>"] - _out = self._run_prog("/usr/local/bin/ipmitool sensor list") - _ret += re.findall("^(?!.*\sna\s.*$).*",_out,re.M) + _out = self._run_prog("ipmitool sensor list") + _ipmisensor = re.findall("^(?!.*\sna\s.*$).*",_out,re.M) + if _ipmisensor: + return ["<<>>"] + _ipmisensor + return [] + + def check_temperature(self): + if ISLINUX: + return [] + _ret = ["<<>>"] + _out = self._run_prog("sysctl dev.cpu",timeout=10) + _cpus = dict([_v.split(": ") for _v in _out.split("\n") if len(_v.split(": ")) == 2]) + _cpu_temperatures = list(map( + lambda x: float(x[1].replace("C","")), + filter( + lambda x: x[0].endswith("temperature"), + _cpus.items() + ) + )) + if _cpu_temperatures: + _cpu_temperature = int(max(_cpu_temperatures) * 1000) + _ret.append(f"CPU|enabled|unknown|{_cpu_temperature}") + + _count = 0 + for _tempsensor in self._available_sysctl_temperature_list: + _out = self._run_prog(f"sysctl -n {_tempsensor}",timeout=10) + if _out: + try: + _zone_temp = int(float(_out.replace("C","")) * 1000) + except ValueError: + _zone_temp = None + if _zone_temp: + if _tempsensor.find(".pchtherm.") > -1: + _ret.append(f"thermal_zone{_count}|enabled|unknown|{_zone_temp}|111000|critical|108000|passive") + else: + _ret.append(f"thermal_zone{_count}|enabled|unknown|{_zone_temp}") + _count += 1 + if len(_ret) < 2: + return [] return _ret + def check_df(self): _ret = ["<<>>"] _ret += self._run_prog("df -kTP -t ufs").split("\n")[1:] return _ret - def check_kernel(self): + def check_diskstat(self): + if not ISLINUX: + return [] + _ret = ["<<>>"] + _now = int(time.time()) + _ret.append(f"{_now}") + for _disk in list(filter(lambda x: re.search("nvme[0-9]+n[0-9]+|[svh]d[a-z]*[0-9]*|zd[0-9]+",x),open("/proc/diskstats","rt").readlines())): + _ret.append(_disk.strip()) + return _ret + + def check_ssh(self): + _ret = ["<<>>"] + with open("/etc/ssh/sshd_config","r") as _f: + for _line in _f.readlines(): + if re.search("^[a-zA-Z]",_line): + _ret.append(_line.replace("\n","")) + return _ret + + def _check_bsd_kernel(self): _ret = ["<<>>"] _out = self._run_prog("sysctl vm.stats",timeout=10) _kernel = dict([_v.split(": ") for _v in _out.split("\n") if len(_v.split(": ")) == 2]) @@ -389,7 +553,17 @@ class checkmk_checker(object): _ret.append("processes {0}".format(_sum)) return _ret - def check_mem(self): + def check_kernel(self): + if not ISLINUX: + return self._check_bsd_kernel() + _ret = ["<<>>"] + _now = int(time.time()) + _ret.append(f"{_now}") + _ret.append(open("/proc/vmstat","rt").read()) + _ret.append(open("/proc/stat","rt").read()) + return _ret + + def _check_bsd_mem(self): _ret = ["<<>>"] _pagesize = int(self._run_prog("sysctl -n hw.pagesize")) _out = self._run_prog("sysctl vm.stats",timeout=10) @@ -407,34 +581,38 @@ class checkmk_checker(object): _ret.append("swap.free 0") _ret.append("swap.total 0") _ret.append("swap.used 0") - + return _ret + + def check_mem(self): + if not ISLINUX: + return self._check_bsd_mem() + _ret = ["<<>>"] + _ret.append(open("/proc/meminfo","rt").read()) return _ret def check_zpool(self): _ret = ["<<>>"] - try: - for _line in self._run_prog("zpool status -x").split("\n"): - if _line.find("errors: No known data errors") == -1: - _ret.append(_line) - except: - return [] + _ret.append(self._run_prog("zpool status -x")) + _ret.append("<<>>") + _ret.append(self._run_prog("zpool list")) return _ret def check_zfs(self): - _ret = ["<<>>"] + _ret = ["<<>>"] _ret.append(self._run_prog("zfs get -t filesystem,volume -Hp name,quota,used,avail,mountpoint,type")) _ret.append("[df]") _ret.append(self._run_prog("df -kP -t zfs")) _ret.append("<<>>") - _ret.append(self._run_prog("sysctl -q kstat.zfs.misc.arcstats").replace("kstat.zfs.misc.arcstats.","").replace(": "," = ").strip()) + if ISLINUX: + _arc = "".join(open("/proc/spl/kstat/zfs/arcstats","rt").readlines()[2:]) + for _stat in re.findall("^([\w_]+)\s*\d\s*(\d+)",_arc,re.M): + _ret.append("{0} = {1}".format(*_stat)) + else: + _ret.append(self._run_prog("sysctl -q kstat.zfs.misc.arcstats").replace("kstat.zfs.misc.arcstats.","").replace(": "," = ").strip()) + return _ret - def check_mounts(self): - _ret = ["<<>>"] - _ret.append(self._run_prog("mount -p -t ufs").strip()) - return _ret - - def check_cpu(self): + def _check_bsd_cpu(self): _ret = ["<<>>"] _loadavg = self._run_prog("sysctl -n vm.loadavg").strip("{} \n") _proc = self._run_prog("top -b -n 1").split("\n")[1].split(" ") @@ -444,21 +622,15 @@ class checkmk_checker(object): _ret.append(f"{_loadavg} {_proc} {_lastpid} {_ncpu}") return _ret - def check_netctr(self): - _ret = ["<<>>"] - _out = self._run_prog("netstat -inb") - for _line in re.finditer("^(?!Name|lo|plip)(?P\w+)\s+(?P\d+).*?Link.*?\s+.*?\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P\d+)$",_out,re.M): - _ret.append("{iface} {inbytes} {inpkts} {inerr} {indrop} 0 0 0 0 {outbytes} {outpkts} {outerr} 0 0 0 0 0".format(**_line.groupdict())) + def check_cpu(self): + if not ISLINUX: + return self._check_bsd_cpu() + _ret = ["<<>>"] + _ret += open("/proc/loadavg","rt").readlines() + if os.path.exists("/proc/sys/kernel/threads-max"): + _ret += open("/proc/sys/kernel/threads-max","rt").readlines() return _ret - def check_ntp(self): - _ret = ["<<>>"] - for _line in self._run_prog("ntpq -np",timeout=30).split("\n")[2:]: - if _line.strip(): - _ret.append("{0} {1}".format(_line[0],_line[1:])) - return _ret - - def check_tcp(self): _ret = ["<<>>"] _out = self._run_prog("netstat -na") @@ -473,16 +645,22 @@ class checkmk_checker(object): for _line in re.finditer("^(?P\w+)\s+(?P\w+)\s+(?P\d+)\s+(?P\d+)\s+(?P[\d.]+)\s+(?P.*)$",_out,re.M): _ret.append("({user},{vsz},{rss},{cpu}) {command}".format(**_line.groupdict())) return _ret - - def check_uptime(self): + def _check_bsd_uptime(self): _ret = ["<<>>"] _uptime_sec = time.time() - int(self._run_prog("sysctl -n kern.boottime").split(" ")[3].strip(" ,")) _idle_sec = re.findall("(\d+):[\d.]+\s+\[idle\]",self._run_prog("ps axw"))[0] _ret.append(f"{_uptime_sec} {_idle_sec}") return _ret - def _run_prog(self,cmdline="",*args,shell=False,timeout=60): + def check_uptime(self): + if not ISLINUX: + return self._check_bsd_uptime() + _ret = ["<<>>"] + _ret += open("/proc/uptime","rt").readlines() + return _ret + + def _run_prog(self,cmdline="",*args,shell=False,timeout=60,ignore_error=False): if type(cmdline) == str: _process = shlex.split(cmdline,posix=True) else: @@ -490,11 +668,13 @@ class checkmk_checker(object): try: return subprocess.check_output(_process,encoding="utf-8",shell=shell,stderr=subprocess.DEVNULL,timeout=timeout) except subprocess.CalledProcessError as e: + if ignore_error: + return e.stdout return "" except subprocess.TimeoutExpired: return "" - def _run_cache_prog(self,cmdline="",cachetime=10,*args,shell=False): + def _run_cache_prog(self,cmdline="",cachetime=10,*args,shell=False,ignore_error=False): if type(cmdline) == str: _process = shlex.split(cmdline,posix=True) else: @@ -502,15 +682,16 @@ class checkmk_checker(object): _process_id = "".join(_process) _runner = self._check_cache.get(_process_id) if _runner == None: - _runner = checkmk_cached_process(_process,shell=shell) + _runner = checkmk_cached_process(_process,shell=shell,ignore_error=ignore_error) self._check_cache[_process_id] = _runner return _runner.get(cachetime) class checkmk_cached_process(object): - def __init__(self,process,shell=False): + def __init__(self,process,shell=False,ignore_error=False): self._processs = process self._islocal = os.path.dirname(process[0]).startswith(LOCALDIR) self._shell = shell + self._ignore_error = ignore_error self._mutex = threading.Lock() with self._mutex: self._data = (0,"") @@ -520,7 +701,10 @@ class checkmk_cached_process(object): try: _data = subprocess.check_output(self._processs,shell=self._shell,encoding="utf-8",stderr=subprocess.DEVNULL,timeout=timeout) except subprocess.CalledProcessError as e: - _data = "" + if self._ignore_error: + _data = e.stdout + else: + _data = "" except subprocess.TimeoutExpired: _data = "" with self._mutex: @@ -553,13 +737,13 @@ class checkmk_cached_process(object): return _data class checkmk_server(TCPServer,checkmk_checker): - def __init__(self,port,pidfile,user,onlyfrom=None,encryptionkey=None,skipcheck=None,**kwargs): + def __init__(self,port,pidfile,onlyfrom=None,encrypt=None,skipcheck=None,**kwargs): self.pidfile = pidfile self.onlyfrom = onlyfrom.split(",") if onlyfrom else None self.skipcheck = skipcheck.split(",") if skipcheck else [] - self.encryptionkey = encryptionkey + self.encrypt = encrypt self._mutex = threading.Lock() - self.user = pwd.getpwnam(user) + self.user = pwd.getpwnam("root") self.allow_reuse_address = True TCPServer.__init__(self,("",port),checkmk_handler,bind_and_activate=False) @@ -571,12 +755,12 @@ class checkmk_server(TCPServer,checkmk_checker): def verify_request(self, request, client_address): if self.onlyfrom and client_address[0] not in self.onlyfrom: + log("Client {0} not allowed".format(*client_address),"warn") return False return True def server_start(self): - sys.stderr.write("starting checkmk_agent\n") - sys.stderr.flush() + log("starting checkmk_agent") signal.signal(signal.SIGTERM, self._signal_handler) signal.signal(signal.SIGINT, self._signal_handler) signal.signal(signal.SIGHUP, self._signal_handler) @@ -594,13 +778,36 @@ class checkmk_server(TCPServer,checkmk_checker): sys.stdout.write("\n") pass + def cmkclient(self,checkoutput="127.0.0.1",port=None,encrypt=None,**kwargs): + _sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM) + _sock.settimeout(3) + try: + _sock.connect((checkoutput,port)) + _sock.settimeout(None) + _msg = b"" + while True: + _data = _sock.recv(2048) + if not _data: + break + _msg += _data + except TimeoutError: + sys.stderr.write("timeout\n") + sys.stderr.flush() + sys.exit(1) + + if _msg[:2] == b"03": + if encrypt: + return self.decrypt_msg(_msg,encrypt) + else: + pprint(repr(_msg[:2])) + return "missing key" + return _msg.decode("utf-8") + def _signal_handler(self,signum,*args): if signum in (signal.SIGTERM,signal.SIGINT): - sys.stderr.write("stopping checkmk_agent\n") + log("stopping checkmk_agent") threading.Thread(target=self.shutdown,name='shutdown').start() sys.exit(0) - sys.stderr.write("checkmk_agent running\n") - sys.stderr.flush() def daemonize(self): try: @@ -609,7 +816,8 @@ class checkmk_server(TCPServer,checkmk_checker): ## first parent sys.exit(0) except OSError as e: - print("Fork failed") + sys.stderr.write("Fork failed\n") + sys.stderr.flush() sys.exit(1) os.chdir("/") os.setsid() @@ -620,13 +828,14 @@ class checkmk_server(TCPServer,checkmk_checker): ## second sys.exit(0) except OSError as e: - print("Fork 2 failed") + sys.stderr.write("Fork 2 failed\n") + sys.stderr.flush() sys.exit(1) sys.stdout.flush() sys.stderr.flush() self._redirect_stream(sys.stdin,None) self._redirect_stream(sys.stdout,None) - #self._redirect_stream(sys.stderr,None) + self._redirect_stream(sys.stderr,None) with open(self.pidfile,"wt") as _pidfile: _pidfile.write(str(os.getpid())) os.chown(self.pidfile,self.user[2],self.user[3]) @@ -653,8 +862,10 @@ class checkmk_server(TCPServer,checkmk_checker): REGEX_SMART_VENDOR = re.compile(r"^\s*(?P\d+)\s(?P[-\w]+).*\s{2,}(?P[\w\/() ]+)$",re.M) REGEX_SMART_DICT = re.compile(r"^(.*?):\s*(.*?)$",re.M) class smart_disc(object): - def __init__(self,device): + def __init__(self,device,description=""): self.device = device + if description: + self.description = description MAPPING = { "Model Family" : ("model_family" ,lambda x: x), "Model Number" : ("model_family" ,lambda x: x), @@ -677,13 +888,16 @@ class smart_disc(object): "Data Units Read" : ("data_read_bytes" ,lambda x: x.split(" ")[0].replace(",","")), "Data Units Written": ("data_write_bytes" ,lambda x: x.split(" ")[0].replace(",","")), "Power On Hours" : ("poweronhours" ,lambda x: x.replace(",","")), + "number of hours powered up" : ("poweronhours" ,lambda x: x.replace(",","")), "Power Cycles" : ("powercycles" ,lambda x: x.replace(",","")), "NVMe Version" : ("transport" ,lambda x: f"NVMe {x}"), - "Raw_Read_Error_Rate" : ("error_rate" ,lambda x: x.replace(",","")), + "Raw_Read_Error_Rate" : ("error_rate" ,lambda x: x.split(" ")[-1].replace(",","")), "Reallocated_Sector_Ct" : ("reallocate" ,lambda x: x.replace(",","")), - "Seek_Error_Rate" : ("seek_error_rate",lambda x: x.replace(",","")), + "Seek_Error_Rate" : ("seek_error_rate",lambda x: x.split(" ")[-1].replace(",","")), "Power_Cycle_Count" : ("powercycles" ,lambda x: x.replace(",","")), "Temperature_Celsius" : ("temperature" ,lambda x: x.split(" ")[0]), + "Temperature_Internal" : ("temperature" ,lambda x: x.split(" ")[0]), + "Drive_Temperature" : ("temperature" ,lambda x: x.split(" ")[0]), "UDMA_CRC_Error_Count" : ("udma_error" ,lambda x: x.replace(",","")), "Offline_Uncorrectable" : ("uncorrectable" ,lambda x: x.replace(",","")), "Power_On_Hours" : ("poweronhours" ,lambda x: x.replace(",","")), @@ -695,7 +909,10 @@ class smart_disc(object): "Critical Comp. Temp. Threshold" : ("temperature_crit" ,lambda x: x.split(" ")[0]), "Media and Data Integrity Errors" : ("media_errors" ,lambda x: x), "Airflow_Temperature_Cel" : ("temperature" ,lambda x: x), - "SMART overall-health self-assessment test result" : ("smart_status" ,lambda x: int(x.lower() == "passed")), + "number of hours powered up" : ("poweronhours" ,lambda x: x.split(".")[0]), + "Accumulated start-stop cycles" : ("powercycles" ,lambda x: x), + "Available Spare" : ("wearoutspare" ,lambda x: x.replace("%","")), + "SMART overall-health self-assessment test result" : ("smart_status" ,lambda x: int(x.lower().strip() == "passed")), "SMART Health Status" : ("smart_status" ,lambda x: int(x.lower() == "ok")), } self._get_data() @@ -736,6 +953,8 @@ class smart_disc(object): def __str__(self): _ret = [] + if getattr(self,"transport","").lower() == "iscsi": ## ignore ISCSI + return "" if not getattr(self,"model_type",None): self.model_type = getattr(self,"model_family","unknown") for _k,_v in self.__dict__.items(): @@ -747,7 +966,6 @@ class smart_disc(object): if __name__ == "__main__": import argparse class SmartFormatter(argparse.HelpFormatter): - def _split_lines(self, text, width): if text.startswith('R|'): return text[2:].splitlines() @@ -755,94 +973,240 @@ if __name__ == "__main__": return argparse.HelpFormatter._split_lines(self, text, width) _checks_available = sorted(list(map(lambda x: x.split("_")[1],filter(lambda x: x.startswith("check_") or x.startswith("checklocal_"),dir(checkmk_checker))))) _ = lambda x: x - _parser = argparse.ArgumentParser(f"checkmk_agent for TrueNAS\nVersion: {__VERSION__}\n##########################################\n", formatter_class=SmartFormatter) - _parser.add_argument("--port",type=int,default=6556, - help=_("Port checkmk_agent listen")) + _parser = argparse.ArgumentParser( + add_help=False, + formatter_class=SmartFormatter + ) + _parser.add_argument("--help",action="store_true", + help=_("show help message")) _parser.add_argument("--start",action="store_true", help=_("start the daemon")) + _parser.add_argument("--restart",action="store_true", + help=_("stop and restart the daemon")) _parser.add_argument("--stop",action="store_true", help=_("stop the daemon")) + _parser.add_argument("--status",action="store_true", + help=_("show daemon status")) _parser.add_argument("--nodaemon",action="store_true", help=_("run in foreground")) - _parser.add_argument("--status",action="store_true", - help=_("show status if running")) + _parser.add_argument("--checkoutput",nargs="?",const="127.0.0.1",type=str,metavar="hostname", + help=_("connect to [hostname]port and decrypt if needed")) + _parser.add_argument("--update",nargs="?",const="main",type=str,metavar="branch/commitid", + help=_("check for update")) _parser.add_argument("--config",type=str,dest="configfile",default=CHECKMK_CONFIG, help=_("path to config file")) - _parser.add_argument("--user",type=str,default="root", - help=_("")) - _parser.add_argument("--encrypt",type=str,dest="encryptionkey", - help=_("Encryption password (do not use from cmdline)")) + _parser.add_argument("--port",type=int,default=6556, + help=_("port checkmk_agent listen")) + _parser.add_argument("--encrypt",type=str,dest="encrypt", + help=_("encryption password (do not use from cmdline)")) _parser.add_argument("--pidfile",type=str,default="/var/run/checkmk_agent.pid", - help=_("")) + help=_("path to pid file")) _parser.add_argument("--onlyfrom",type=str, help=_("comma seperated ip addresses to allow")) _parser.add_argument("--skipcheck",type=str, help=_("R|comma seperated checks that will be skipped \n{0}".format("\n".join([", ".join(_checks_available[i:i+10]) for i in range(0,len(_checks_available),10)])))) _parser.add_argument("--debug",action="store_true", help=_("debug Ausgabe")) + + def _args_error(message): + print("#"*35) + print("checkmk_agent for opnsense") + print(f"Version: {__VERSION__}") + print("#"*35) + print(message) + print("") + print("use --help or -h for help") + sys.exit(1) + _parser.error = _args_error args = _parser.parse_args() if args.configfile and os.path.exists(args.configfile): for _k,_v in re.findall(f"^(\w+):\s*(.*?)(?:\s+#|$)",open(args.configfile,"rt").read(),re.M): if _k == "port": args.port = int(_v) - if _k == "encrypt": - args.encryptionkey = _v + if _k == "encrypt" and args.encrypt == None: + args.encrypt = _v if _k == "onlyfrom": args.onlyfrom = _v if _k == "skipcheck": args.skipcheck = _v if _k.lower() == "localdir": LOCALDIR = _v + if _k.lower() == "plugindir": + PLUGINSDIR = _v if _k.lower() == "spooldir": SPOOLDIR = _v _server = checkmk_server(**args.__dict__) - _pid = None + _pid = 0 try: with open(args.pidfile,"rt") as _pidfile: _pid = int(_pidfile.read()) except (FileNotFoundError,IOError): - _out = subprocess.check_output(["sockstat", "-l", "-p", str(args.port),"-P", "tcp"],encoding=sys.stdout.encoding) + if ISLINUX: + _out = subprocess.check_output(["/bin/ss", "-l", "-p","-t","-n", f'sport = :{args.port}'],encoding=sys.stdout.encoding) + else: + _out = subprocess.check_output(["sockstat", "-l", "-p", str(args.port),"-P", "tcp"],encoding=sys.stdout.encoding) try: _pid = int(re.findall("\s(\d+)\s",_out.split("\n")[1])[0]) except (IndexError,ValueError): pass if args.start: - if _pid: + if _pid > 0: try: os.kill(_pid,0) + sys.stderr.write(f"allready running with pid {_pid}\n") + sys.stderr.flush() + sys.exit(1) except OSError: pass - else: - sys.stderr.write(f"allready running with pid {_pid}") - sys.exit(1) _server.daemonize() elif args.status: - if not _pid: - sys.stderr.write("Not running\n") + if _pid <= 0: + print("not running") else: - os.kill(int(_pid),signal.SIGHUP) - elif args.stop: - if not _pid: - sys.stderr.write("Not running\n") - sys.exit(1) - os.kill(int(_pid),signal.SIGTERM) + try: + os.kill(_pid,0) + print("running") + except OSError: + print("not running") + + elif args.stop or args.restart: + if _pid == 0: + sys.stderr.write("not running\n") + sys.stderr.flush() + if args.stop: + sys.exit(1) + try: + print("stopping") + os.kill(_pid,signal.SIGTERM) + except ProcessLookupError: + if os.path.exists(args.pidfile): + os.remove(args.pidfile) + + if args.restart: + print("starting") + time.sleep(3) + _server.daemonize() + + elif args.checkoutput: + sys.stdout.write(_server.cmkclient(**args.__dict__)) + sys.stdout.write("\n") + sys.stdout.flush() elif args.debug: sys.stdout.write(_server.do_checks(debug=True).decode(sys.stdout.encoding)) sys.stdout.flush() + elif args.nodaemon: _server.server_start() - else: -# _server.server_start() -## default start daemon - if _pid: + + elif args.update: + import hashlib + import difflib + from pkg_resources import parse_version + _github_req = requests.get(f"https://api.github.com/repos/bashclub/check-truenas/contents/truenas_checkmk_agent.py?ref={args.update}") + if _github_req.status_code != 200: + raise Exception(f"Github Error {_github_req.status_code}") + _github_version = _github_req.json() + _github_last_modified = datetime.strptime(_github_req.headers.get("last-modified"),"%a, %d %b %Y %X %Z") + _new_script = base64.b64decode(_github_version.get("content")).decode("utf-8") + _new_version = re.findall("^__VERSION__.*?\"([0-9.]*)\"",_new_script,re.M) + _new_version = _new_version[0] if _new_version else "0.0.0" + _script_location = os.path.realpath(__file__) + _current_last_modified = datetime.fromtimestamp(int(os.path.getmtime(_script_location))) + with (open(_script_location,"rb")) as _f: + _content = _f.read() + _current_sha = hashlib.sha1(f"blob {len(_content)}\0".encode("utf-8") + _content).hexdigest() + _content = _content.decode("utf-8") + if _current_sha == _github_version.get("sha"): + print(f"allready up to date {_current_sha}") + sys.exit(0) + else: + _version = parse_version(__VERSION__) + _nversion = parse_version(_new_version) + if _version == _nversion: + print("same Version but checksums mismatch") + elif _version > _nversion: + print(f"ATTENTION: Downgrade from {__VERSION__} to {_new_version}") + while True: try: - os.kill(_pid,0) - except OSError: - pass + _answer = input(f"Update {_script_location} to {_new_version} (y/n) or show difference (d)? ") + except KeyboardInterrupt: + print("") + sys.exit(0) + if _answer in ("Y","y","yes","j","J"): + with open(_script_location,"wb") as _f: + _f.write(_new_script.encode("utf-8")) + + print(f"updated to Version {_new_version}") + if _pid > 0: + try: + os.kill(_pid,0) + try: + _answer = input(f"Daemon is running (pid:{_pid}), reload and restart (Y/N)? ") + except KeyboardInterrupt: + print("") + sys.exit(0) + if _answer in ("Y","y","yes","j","J"): + print("stopping Daemon") + os.kill(_pid,signal.SIGTERM) + print("waiting") + time.sleep(5) + print("restart") + os.system(f"{_script_location} --start") + sys.exit(0) + except OSError: + pass + break + elif _answer in ("D","d"): + for _line in difflib.unified_diff(_content.split("\n"), + _new_script.split("\n"), + fromfile=f"Version: {__VERSION__}", + fromfiledate=_current_last_modified.isoformat(), + tofile=f"Version: {_new_version}", + tofiledate=_github_last_modified.isoformat(), + n=1, + lineterm=""): + print(_line) else: - sys.stderr.write(f"allready running with pid {_pid}") - sys.exit(1) - _server.daemonize() + break + + elif args.help: + _isinstalled = list(filter(lambda x: x.get("command","") == f"{SCRIPTPATH} --start",checkmk_checker()._midclt("initshutdownscript.query"))) + print("#"*35) + print("checkmk_agent for TrueNAS Scale/Core") + print(f"Version: {__VERSION__}") + print("#"*35) + print("") + print("Latest Version under https://github.com/bashclub/check-truenas") + print("Server-side implementation for") + print("-"*35) + print("\t* smartdisk - install the mkp from https://github.com/bashclub/checkmk-smart plugins os-smart") + _parser.print_help() + print("\n") + if _isinstalled: + print("INSTALLED: '{command}' is installed {when} enabled:{enabled}".format(**_isinstalled[0])) + print("\n") + else: + print("to install the script run the following command\n") + print(f'midclt call initshutdownscript.create \'{{"type": "COMMAND", "command": "{SCRIPTPATH} --start", "enabled": true, "comment": "Start CheckMK-Agent", "when": "POSTINIT"}}\'') + print("\n\n") + print(f"The CHECKMK_BASEDIR is under {BASEDIR} (local,plugin,spool).") + print(f"Default config file location is {args.configfile}, create it if it doesn't exist.") + print("Config file options port,encrypt,onlyfrom,skipcheck with a colon and the value like the commandline option\n") + print("active config:") + print("-"*35) + for _opt in ("port","encrypt","onlyfrom","skipcheck"): + _val = getattr(args,_opt,None) + if _val: + print(f"{_opt}: {_val}") + print("") + + else: + log("no arguments") + print("#"*35) + print("checkmk_agent for TrueNAS") + print(f"Version: {__VERSION__}") + print("#"*35) + print("use --help or -h for help")