Coverage for reactive/telegraf.py : 82%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright 2015-2018 Canonical Ltd.
2#
3# This file is part of the Telegraf Charm for Juju.
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License version 3, as
7# published by the Free Software Foundation.
8#
9# This program is distributed in the hope that it will be useful, but
10# WITHOUT ANY WARRANTY; without even the implied warranties of
11# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
12# PURPOSE. See the GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
17import base64
18import binascii
19import collections
20import hashlib
21import io
22import ipaddress
23import json
24import os
25import re
26import socket
27import subprocess
28import sys
29import time
30from distutils.version import LooseVersion
32from charmhelpers import context
33from charmhelpers.contrib.charmsupport import nrpe
34from charmhelpers.core import hookenv, host, unitdata
35from charmhelpers.core.host import is_container
36from charmhelpers.core.templating import render
38import charms.promreg
39from charms import apt
40from charms.layer import snap
41from charms.reactive import (
42 clear_flag,
43 endpoint_from_flag,
44 helpers,
45 hook,
46 set_flag,
47 toggle_flag,
48 when,
49 when_not,
50)
51from charms.reactive.bus import get_states
53from files.telegraf_exec_metrics import METRICS
55from jinja2 import Environment, FileSystemLoader, Template, exceptions
57import netifaces
59import yaml
62DEB_BASE_DIR = "/etc/telegraf"
63SNAP_BASE_DIR = "/var/snap/telegraf/current"
64SUDOERS_DIR = "/etc/sudoers.d"
66CONFIG_FILE = "telegraf.conf"
68CONFIG_DIR = "telegraf.d"
70GRAFANA_DASHBOARD_TELEGRAF_FILE_NAME = "Telegraf.json.j2"
72GRAFANA_DASHBOARD_NAME = "telegraf"
74SNAP_SERVICE = "snap.telegraf.telegraf"
75DEB_SERVICE = "telegraf"
77SNAP_OWNER = "snap_daemon"
78SNAP_GROUP = "snap_daemon"
79SNAP_USER = "snap_daemon"
80DEB_OWNER = "root"
81DEB_GROUP = "telegraf"
82DEB_USER = "telegraf"
84# Utilities #
87class InvalidInstallMethod(Exception):
88 pass
91class InvalidPrometheusIPRange(Exception):
92 pass
95def write_telegraf_file(path, content):
96 return host.write_file(
97 path,
98 content.encode("UTF-8"),
99 owner=get_telegraf_owner(),
100 group=get_telegraf_group(),
101 perms=0o640,
102 )
105def get_install_method():
106 config = hookenv.config()
108 if config["install_method"] in ["deb", "snap"]:
109 return config["install_method"]
110 else:
111 hookenv.log(
112 "Invalid install_method for telegraf: {}".format(config["install_method"]),
113 level=hookenv.ERROR,
114 )
115 raise InvalidInstallMethod()
118def render_sudoers_file(filename):
119 """Generate and install a sudoers file from a template to /etc/sudoers.d ."""
120 template = "{}.tmpl".format(filename)
121 context = {"telegraf_user": get_telegraf_user()}
122 hookenv.log("Installing sudoers file {}".format(filename), level="DEBUG")
123 render(
124 source=template,
125 templates_dir=os.path.join(get_templates_dir(), "sudoers"),
126 target=os.path.join(SUDOERS_DIR, filename),
127 context=context,
128 perms=0o640,
129 )
132def remove_sudoers_file(filename):
133 """Remove a sudoers file. Do nothing if the file doesn't exist."""
134 path = os.path.join(SUDOERS_DIR, filename)
135 if os.path.isfile(path):
136 os.unlink(path)
139def get_telegraf_user():
140 install_method = get_install_method()
142 # get_install_method() already checked that we got a valid value
143 if install_method == "snap":
144 return SNAP_USER
145 return DEB_USER
148def get_telegraf_owner():
149 install_method = get_install_method()
151 # get_install_method() already checked that we got a valid value
152 if install_method == "snap":
153 return SNAP_OWNER
154 return DEB_OWNER
157def get_telegraf_group():
158 install_method = get_install_method()
160 # get_install_method() already checked that we got a valid value
161 if install_method == "snap":
162 return SNAP_GROUP
163 return DEB_GROUP
166def get_base_dir():
167 config = hookenv.config()
169 if config["install_method"] == "deb":
170 return DEB_BASE_DIR
171 elif config["install_method"] == "snap":
172 return SNAP_BASE_DIR
173 else:
174 hookenv.log(
175 "Invalid install_method for telegraf: {}".format(config["install_method"]),
176 level=hookenv.ERROR,
177 )
178 raise InvalidInstallMethod()
181def get_service():
182 config = hookenv.config()
184 if config["install_method"] == "deb":
185 return DEB_SERVICE
186 elif config["install_method"] == "snap":
187 return SNAP_SERVICE
188 else:
189 hookenv.log(
190 "Invalid install_method for telegraf: {}".format(config["install_method"]),
191 level=hookenv.ERROR,
192 )
193 raise InvalidInstallMethod()
196def get_templates_dir():
197 return os.path.join(hookenv.charm_dir(), "templates")
200def get_main_config_path():
201 return os.path.join(get_base_dir(), CONFIG_FILE)
204def get_configs_dir():
205 return os.path.join(get_base_dir(), CONFIG_DIR)
208def get_files_dir():
209 return os.path.join(hookenv.charm_dir(), "files")
212def list_supported_plugins():
213 return [k for k in hookenv.metadata()["requires"].keys() if k != "juju-info"] + [
214 k for k in hookenv.metadata()["provides"].keys() if k != "juju-info"
215 ]
218def list_config_files():
219 config_files = [get_main_config_path()]
220 # only include config files for configured plugins
221 current_states = get_states()
223 for plugin in list_supported_plugins():
224 # The prometheus_client plugin can be configured either from a relation or
225 # from the juju config
226 if ("plugins.{}.configured".format(plugin) in current_states.keys()) or (
227 "{}.configured".format(plugin) in current_states.keys()
228 ):
229 # The "prometheus-client" relation sets the "prometheus_client" plugin
230 if plugin == "prometheus-client":
231 plugin = "prometheus_client"
232 config_path = "{}/{}.conf".format(get_configs_dir(), plugin)
233 config_files.append(config_path)
235 if "extra_plugins.configured" in current_states.keys():
236 config_files.append("{}/extra_plugins.conf".format(get_configs_dir()))
237 config_files.append("{}/socket_listener.conf".format(get_configs_dir()))
239 return config_files
242def get_hostname_label():
243 config = hookenv.config()
244 hostname_fmt = config["hostname"]
245 unit = get_remote_unit_name().replace("/", "-") # / is invalid in labels.
247 if hostname_fmt == "UNIT_NAME": # Deprecated
248 return unit
249 env = os.environ
250 model = env.get("JUJU_ENV_NAME") or env.get("JUJU_MODEL_NAME", "")
251 uuid = env.get("JUJU_ENV_UUID") or env.get("JUJU_MODEL_UUID", "")
252 syshost = socket.gethostname()
254 return hostname_fmt.format(unit=unit, model=model, uuid=uuid, host=syshost)
257def get_remote_unit_name():
258 unit = hookenv.principal_unit()
260 if unit:
261 # Note(aluria): use Juju env var available since 2017
263 return unit
264 else:
265 # Note(aluria): lookup all available IPv4/IPv6 addresses (except lo)
266 ip_addresses = set()
268 for iface in netifaces.interfaces():
269 if iface == "lo":
270 continue
271 ip_addrs = netifaces.ifaddresses(iface)
273 for iface_type in ip_addrs:
274 if iface_type in (netifaces.AF_INET, netifaces.AF_INET6):
275 for addrs in ip_addrs[iface_type]:
276 ip_addresses.add(addrs["addr"])
278 # Note(aluria): and try to match them against rel['private-address']
280 for rel_type in hookenv.metadata()["requires"].keys():
281 for rel in hookenv.relations_of_type(rel_type):
282 if rel["private-address"] in ip_addresses:
283 return rel["__unit__"]
286def get_disabled_plugins():
287 """Return consolidated list of all plugins to be disabled."""
288 config = hookenv.config()
289 str_disabled_plugins = config["disabled_plugins"]
290 disabled_plugins = str_disabled_plugins.split(":") if str_disabled_plugins else []
291 # if LXD container, disable a list of unsuitable plugins
292 if is_container():
293 disabled_plugins_lxd = {
294 "cpu",
295 "disk",
296 "diskio",
297 "mem",
298 "swap",
299 "system",
300 "bcache",
301 "bond",
302 "cgroup",
303 "kernel_vmstat",
304 "zoneinfo",
305 "buddyinfo",
306 "softnet_stat",
307 }
308 disabled_plugins_lxd.update(set(disabled_plugins))
309 disabled_plugins = sorted(disabled_plugins_lxd)
310 return disabled_plugins
313def get_base_inputs():
314 """Make a structure for rendering the base_inputs template.
316 Returns a dict of items for the template.
317 """
318 extra_options = get_extra_options()
319 conntrack = os.path.exists("/proc/sys/net/netfilter/nf_conntrack_max")
320 config = hookenv.config()
321 iptables = config["collect_iptables_metrics"]
322 smart = config["collect_smart_metrics"]
323 disabled_plugins = get_disabled_plugins()
325 return {
326 "extra_options": extra_options["inputs"],
327 "bcache": is_bcache(),
328 "python": sys.executable,
329 "files_dir": get_files_dir(),
330 "disabled_plugins": disabled_plugins,
331 "conntrack": conntrack,
332 "iptables": iptables,
333 "smart": smart,
334 }
337def render_base_inputs():
338 # use base inputs from charm templates
339 with open(os.path.join(get_templates_dir(), "base_inputs.conf"), "r") as fd:
340 return render_template(fd.read(), get_base_inputs())
343def get_extra_options():
344 extra_options = {"inputs": {}, "outputs": {}}
345 extra_options_raw = hookenv.config()["extra_options"]
346 extra_opts = yaml.full_load(extra_options_raw) or {}
347 extra_options.update(extra_opts)
348 # jsonify value, required as the telegraf config values format is similar
349 # to raw json
350 json_vals = {}
351 # kind level
353 for k, v in extra_options.items():
354 json_vals[k] = {}
355 # plugins level
357 for plugin, values in v.items():
358 json_vals[k][plugin] = {}
359 # inner plugin (aka key:value)
361 for key, val in values.items():
362 if key in ("tagpass", "tagdrop"):
363 # this is a tagpass/drop, we need to go deeper
364 json_vals[k][plugin][key] = {}
366 for tag, tagvalue in val.items():
367 json_vals[k][plugin][key][tag] = json.dumps(tagvalue)
368 else:
369 json_vals[k][plugin][key] = json.dumps(val)
371 return json_vals
374def render_extra_options(kind, name, extra_options=None):
375 template = """
376 {% if extra_options %}
377 {% for key, value in extra_options.items() %}
378 {% if key != 'tagpass' and key != 'tagdrop' %}
379 {{ key }} = {{ value }}
380 {% endif %}
381 {% endfor %}
382 {% for key, value in extra_options.items() %}
383 {% if key == 'tagpass' or key == 'tagdrop' %}
384 [{{ kind }}.{{ name }}.{{ key }}]
385 {% for tag, tagvalue in value.items() %}
386 {{ tag }} = {{ tagvalue }}
387 {% endfor %}
388 {% endif %}
389 {% endfor %}
390 {% endif %}
391 """
392 if extra_options is None:
393 extra_options = get_extra_options()
394 context = {
395 "extra_options": extra_options[kind].get(name, {}),
396 "kind": kind,
397 "name": name,
398 }
399 return render_template(template, context)
402def render_template(template, context):
403 tmpl = Template(template, lstrip_blocks=True, trim_blocks=True)
404 return tmpl.render(**context)
407def check_prometheus_port(key, new_port):
408 unitdata_key = "{}.port".format(key)
409 kv = unitdata.kv()
410 existing_port = kv.get(unitdata_key)
411 if existing_port != new_port:
412 hookenv.open_port(new_port)
413 hookenv.log("Opened port {}".format(new_port))
414 if existing_port: # Dont try to close non existing ports
415 hookenv.close_port(existing_port)
416 charms.promreg.deregister(None, existing_port)
417 kv.set(unitdata_key, new_port)
419 # Register the port, even if this is a re-registration
420 charms.promreg.register(None, new_port)
423def get_prometheus_port():
424 config = hookenv.config()
425 if not config.get("prometheus_output_port", False):
426 return False
427 if config.get("prometheus_output_port") == "default":
428 return 9103
429 return int(config.get("prometheus_output_port"))
432def get_prometheus_ip_range():
433 config = hookenv.config()
434 if config.get("prometheus_ip_range") == "":
435 return []
436 # we should have a list of IPs, confirm that's the case
437 ips = []
438 for ip in config.get("prometheus_ip_range").split(","):
439 # strip any spaces
440 ip = ip.strip()
441 try:
442 ipaddress.ip_network(ip)
443 except ValueError:
444 hookenv.log(
445 "Invalid prometheus_ip_range provided: {}".format(
446 config.get("prometheus_ip_range")
447 ),
448 level=hookenv.ERROR,
449 )
450 raise InvalidPrometheusIPRange()
451 else:
452 ips.append(ip)
453 return ips
456def get_socket_listener_port():
457 config = hookenv.config()
458 if not config.get("socket_listener_port", False):
459 return 8094
460 else:
461 return int(config.get("socket_listener_port"))
464def render_socket_listener_config(context):
465 # Remove deprecated config file
466 old_config_path = "{}/tcp_listener.conf".format(get_configs_dir())
467 if os.path.exists(old_config_path):
468 os.remove(old_config_path)
470 config_path = "{}/socket_listener.conf".format(get_configs_dir())
471 render(
472 source="socket_listener.tmpl",
473 templates_dir=get_templates_dir(),
474 target=config_path,
475 context=context,
476 )
479def get_sysstat_config_with_sadc_xall(content):
480 """Get updated sysstat config content with `-S XALL` in `SADC_OPTIONS`.
482 `/etc/sysstat/systat` consists of a sequence of shell variable assignments
483 used to configure sysstat logging.
485 Check the original config content.
487 If change needed, make the change and return new config content.
489 If no change, return None.
490 """
491 # if SADC_OPTIONS already exists with `-S XALL` in value, no need to change,
492 # return None
493 if re.search(r'^SADC_OPTIONS=".*-S\s+XALL.*"', content, flags=re.M):
494 return None
496 # if no such option, append and return
497 # this also ignores commented option, e.g.: # SADC_OPTIONS="-S ALL"
498 if not re.search(r'^SADC_OPTIONS=".*"', content, flags=re.M):
499 return content.rstrip() + '\nSADC_OPTIONS="-S XALL"'
501 match = re.search(r'^SADC_OPTIONS="(.*)"', content, flags=re.M)
502 if match:
503 # if option exists, check the value in quotes
504 value = match.group(1)
505 if "-S" in value:
506 # if `-S` in value, replace it to XALL, keep other options unchanged.
507 return re.sub(
508 r'^SADC_OPTIONS="(.*-S\s+)(\w+)(.*)"',
509 r'SADC_OPTIONS="\1XALL\3"',
510 content,
511 flags=re.M,
512 )
513 else:
514 # if `-S` not in value, append `-S XALL` to value
515 return re.sub(
516 r'^SADC_OPTIONS="(.*)"',
517 r'SADC_OPTIONS="\1 -S XALL"',
518 content,
519 flags=re.M,
520 )
523def update_sysstat_config_with_sdac_xall(path="/etc/sysstat/sysstat"):
524 """Update `/etc/sysstat/sysstat` to ensure `-S XALL` in `SADC_OPTIONS`."""
525 if os.path.isfile(path):
526 with open(path, mode="r", encoding="utf8") as f:
527 new_text = get_sysstat_config_with_sadc_xall(f.read())
529 if new_text:
530 hookenv.log("updating {} to ensure `-S XALL` in SADC_OPTIONS".format(path))
531 with open(path, mode="w", encoding="utf8") as f:
532 f.write(new_text)
533 else:
534 hookenv.log(
535 "sysstat config file not found: {}".format(path), level=hookenv.WARNING
536 )
539def configure_telegraf(): # noqa: C901
540 hookenv.log("Generating telegraf.conf", level=hookenv.DEBUG)
541 update_sysstat_config_with_sdac_xall()
542 config = hookenv.config()
543 context = config.copy()
544 try:
545 config_path = get_main_config_path()
546 except InvalidInstallMethod:
547 hookenv.status_set(
548 "blocked",
549 "Wrong install_method provided: {!r}".format(config["install_method"]),
550 )
552 return
554 if get_remote_unit_name() is None:
555 hookenv.status_set("waiting", "Waiting for juju-info relation")
556 # if UNIT_NAME in hostname config and relation not yet available,
557 # make telegraf unable to start to not get weird metrics names
559 if os.path.exists(config_path):
560 os.unlink(config_path)
562 return
564 inputs = config.get("inputs_config", "")
565 outputs = config.get("outputs_config", "")
566 # just for the migration out of base64
568 if inputs:
569 try:
570 inputs = base64.b64decode(inputs.encode("utf-8"), validate=True).decode(
571 "utf-8"
572 )
573 except binascii.Error:
574 # not bas64, probably already up to date configs
575 pass
577 if outputs:
578 try:
579 outputs = base64.b64decode(outputs.encode("utf-8"), validate=True).decode(
580 "utf-8"
581 )
582 except binascii.Error:
583 # not bas64, probably already up to date configs
584 pass
586 tags = []
587 # Initialize juju-related tags to be exposed as labels
588 # Using a dict to avoid duplicates, which telegraf will see as conflicts
589 tags_dict = collections.OrderedDict()
590 tags_dict["juju_application"] = get_remote_unit_name().split("/")[0]
591 tags_dict["juju_unit"] = get_remote_unit_name().replace("/", "-")
592 try:
593 tags_dict["juju_model"] = hookenv.model_name()
594 except KeyError:
595 pass # support older Juju 1.x deploys
596 # Parse juju-configured tags, which can override the 3 juju_* defined above
597 if config["tags"]:
598 for tag in config["tags"].split(","):
599 key, value = tag.split("=")
600 tags_dict[key] = value
601 # Render the tags dict into an array
602 for key, value in tags_dict.items():
603 tags.append('{} = "{}"'.format(key, value))
604 context["tags"] = tags
606 if inputs:
607 context["inputs"] = inputs
608 else:
609 # use base inputs from charm templates
610 context["inputs"] = render_base_inputs()
612 if outputs:
613 context["outputs"] = outputs
614 else:
615 context["outputs"] = ""
616 hookenv.log("No output plugins in main config.")
618 context["hostname"] = get_hostname_label()
620 logfile_path = os.path.normpath(context["logfile"])
622 if (
623 context["logfile"]
624 and not logfile_path.startswith("/var/log/") # noqa W503
625 and not ( # noqa W503
626 config["install_method"] == "snap"
627 and logfile_path.startswith("/var/snap/telegraf/common/") # noqa W503
628 )
629 ):
630 # only allow logging in /var/log, syslog, or /var/snap/telegraf/common
631 hookenv.log(
632 "logfile value reset to stderr."
633 " Original value: {}".format(context["logfile"]),
634 hookenv.DEBUG,
635 )
636 context["logfile"] = ""
638 context["extra_options"] = get_extra_options()
640 if get_socket_listener_port():
641 context["socket_listener_port"] = get_socket_listener_port()
643 render_socket_listener_config(context=context)
645 disabled_plugins = ":".join(get_disabled_plugins())
647 # handle plugin configuration for OVS
648 if host.service_running("openvswitch-switch"):
649 # add sudoers file for telegraf if openvswitch is running
650 sudoers_filename = "telegraf_ovs"
651 render_sudoers_file(sudoers_filename)
652 else:
653 # disable the OVS checks if the service is not curently running
654 # no need to handle duplicates here, as those are handled during
655 # template rendering later
656 ovs_metric_names = [
657 metric_name for metric_name in METRICS if metric_name.startswith("ovs_")
658 ]
660 hookenv.log(
661 "disabling the following metrics, since OVS is not available: {}".format(
662 ovs_metric_names
663 )
664 )
666 ovs_disabled_plugins = ":".join(ovs_metric_names)
668 if disabled_plugins:
669 disabled_plugins = disabled_plugins + ":" + ovs_disabled_plugins
670 else:
671 disabled_plugins = ovs_disabled_plugins
673 # handle the sudoers for iptables
674 sudoers_filename = "telegraf_iptables"
675 if config["collect_iptables_metrics"]:
676 render_sudoers_file(sudoers_filename)
677 else:
678 remove_sudoers_file(sudoers_filename)
680 # handle the sudoers for smart
681 sudoers_filename = "telegraf_smart"
682 if config["collect_smart_metrics"]:
683 render_sudoers_file(sudoers_filename)
684 else:
685 remove_sudoers_file(sudoers_filename)
687 telegraf_exec_metrics = os.path.join(get_files_dir(), "telegraf_exec_metrics.py")
688 cmd = [
689 telegraf_exec_metrics,
690 "--render-config-files",
691 "--disabled-metrics",
692 disabled_plugins,
693 "--configs-dir",
694 get_configs_dir(), # unit test can monkeypatch this path
695 "--python",
696 sys.executable, # this will be venv python interpreter path
697 ]
698 hookenv.log("Rendering exec metrics config files: {}".format(" ".join(cmd)))
699 subprocess.check_call(cmd)
701 hookenv.log("Updating main config file")
702 render(
703 source="telegraf.conf.tmpl",
704 templates_dir=get_templates_dir(),
705 target=config_path,
706 context=context,
707 )
709 # Make sure that only the right service is enabled, then defer to start_or_restart()
710 for service in [DEB_SERVICE, SNAP_SERVICE]:
711 if service == get_service():
712 host.service_resume(service)
713 host.service_reload(service)
714 else:
715 try:
716 host.service_pause(service)
717 except ValueError:
718 # On machines not supporting snaps, it won't find the service
719 # If we're using the snap, and the deb failed to install, it's fine too
720 pass
722 set_flag("telegraf.configured")
723 set_flag("telegraf.needs_reload")
725 if config["install_method"] == "deb":
726 set_flag("telegraf.apt.configured")
727 else:
728 set_flag("telegraf.snap.configured")
731# States
734@when_not("telegraf.installed")
735def install_telegraf():
736 try:
737 install_method = get_install_method()
738 except InvalidInstallMethod:
739 hookenv.status_set(
740 "blocked", "Wrong install_method provided. Expected either 'deb' or 'snap'."
741 )
743 return
745 if install_method == "deb":
746 try:
747 snap.remove("telegraf")
748 except Exception:
749 # the snap may already be absent, or snaps may not even be supported
750 # in this environment
751 pass
752 apt.queue_install(["telegraf"])
753 elif install_method == "snap":
754 apt.purge("telegraf")
755 config = hookenv.config()
756 snap_channel = config.get("snap_channel")
757 snap.install("telegraf", channel=snap_channel, classic=True)
759 if install_method:
760 set_flag("telegraf.installed")
763@when("telegraf.installed")
764@when("apt.installed.telegraf")
765@when("plugins.prometheus-client.configured")
766@when_not("telegraf.configured")
767@when_not("telegraf.apt.configured")
768def configure_telegraf_deb():
769 configure_telegraf()
772@when("telegraf.installed")
773@when("snap.installed.telegraf")
774@when("plugins.prometheus-client.configured")
775@when_not("telegraf.configured")
776@when_not("telegraf.snap.configured")
777def configure_telegraf_snap():
778 configure_telegraf()
781@hook("upgrade-charm")
782def upgrade_charm():
783 for plugin in list_supported_plugins():
784 clear_flag("plugins.{}.configured".format(plugin))
785 clear_flag("extra_plugins.configured")
786 clear_flag("telegraf.configured")
787 clear_flag("telegraf.apt.configured")
788 clear_flag("telegraf.snap.configured")
789 clear_flag("grafana.configured")
792@when("config.changed")
793def handle_config_changes():
794 config = hookenv.config()
796 if config.changed("extra_options"):
797 for plugin in list_supported_plugins():
798 clear_flag("plugins.{}.configured".format(plugin))
799 clear_flag("prometheus-client.relation.configured")
800 # if something else changed, let's reconfigure telegraf itself just in case
802 if config.changed("extra_plugins"):
803 clear_flag("extra_plugins.configured")
805 if config.get("collect_smart_metrics"):
806 set_flag("telegraf.smart_metrics.enabled")
807 else:
808 clear_flag("telegraf.smart_metrics.enabled")
810 if (
811 config.changed("install_method")
812 or config.changed("snap_channel") # noqa W503
813 or config.changed("install_sources") # noqa W503
814 ):
815 clear_flag("telegraf.installed")
816 clear_flag("extra_plugins.configured")
817 clear_flag("plugins.prometheus-client.configured")
818 clear_flag("prometheus-client.relation.configured")
820 if config.changed("prometheus_output_port") or config.changed(
821 "prometheus_ip_range"
822 ):
823 clear_flag("plugins.prometheus-client.configured")
824 clear_flag("prometheus-client.relation.configured")
825 clear_flag("telegraf.configured")
826 clear_flag("telegraf.apt.configured")
827 clear_flag("telegraf.snap.configured")
828 clear_flag("telegraf.nagios-setup.complete")
829 clear_flag("grafana.configured")
832@when("telegraf.configured")
833@when_not("extra_plugins.configured")
834def configure_extra_plugins():
835 config = hookenv.config()
836 plugins = config["extra_plugins"]
838 if plugins:
839 config_path = "{}/extra_plugins.conf".format(get_configs_dir())
840 host.write_file(config_path, plugins.encode("utf-8"))
841 set_flag("extra_plugins.configured")
842 set_flag("telegraf.needs_reload")
845@when("elasticsearch.available")
846@when("telegraf.installed")
847def elasticsearch_input(es):
848 template = """
849[[inputs.elasticsearch]]
850 servers = {{ servers }}
851"""
852 hosts = []
853 rels = hookenv.relations_of_type("elasticsearch")
854 for rel in rels:
855 es_host = rel.get("host")
856 port = rel.get("port")
857 if not es_host or not port:
858 hookenv.log("No host received for relation: {}.".format(rel))
859 continue
860 hosts.append("http://{}:{}".format(es_host, port))
861 config_path = "{}/{}.conf".format(get_configs_dir(), "elasticsearch")
862 if hosts:
863 context = {"servers": json.dumps(hosts)}
864 input_config = render_template(template, context) + render_extra_options(
865 "inputs", "elasticsearch"
866 )
867 hookenv.log("Updating {} plugin config file".format("elasticsearch"))
868 host.write_file(config_path, input_config.encode("utf-8"))
869 set_flag("plugins.elasticsearch.configured")
870 elif os.path.exists(config_path):
871 os.unlink(config_path)
872 clear_flag("plugins.elasticsearch.configured")
873 set_flag("telegraf.needs_reload")
876@when("memcached.available")
877@when("telegraf.installed")
878def memcached_input(memcache):
879 template = """
880[[inputs.memcached]]
881 servers = {{ servers }}
882"""
883 required_keys = ["host", "port"]
884 rels = hookenv.relations_of_type("memcached")
885 addresses = []
886 for rel in rels:
887 if all([rel.get(key) for key in required_keys]):
888 addr = rel["host"]
889 port = rel["port"]
890 address = "{}:{}".format(addr, port)
891 addresses.append(address)
892 config_path = "{}/{}.conf".format(get_configs_dir(), "memcached")
893 if addresses:
894 context = {"servers": json.dumps(addresses)}
895 input_config = render_template(template, context) + render_extra_options(
896 "inputs", "memcached"
897 )
898 hookenv.log("Updating {} plugin config file".format("memcached"))
899 host.write_file(config_path, input_config.encode("utf-8"))
900 set_flag("plugins.memcached.configured")
901 elif os.path.exists(config_path):
902 os.unlink(config_path)
903 set_flag("telegraf.needs_reload")
906@when("mongodb.database.available")
907@when("telegraf.installed")
908def mongodb_input(mongodb):
909 template = """
910[[inputs.mongodb]]
911 servers = {{ servers }}
912"""
913 rels = hookenv.relations_of_type("mongodb")
914 mongo_addresses = []
915 for rel in rels:
916 addr = rel["private-address"]
917 port = rel.get("port", None)
918 if port:
919 mongo_address = "{}:{}".format(addr, port)
920 else:
921 mongo_address = addr
922 mongo_addresses.append(mongo_address)
923 config_path = "{}/{}.conf".format(get_configs_dir(), "mongodb")
924 if mongo_addresses:
925 context = {"servers": json.dumps(mongo_addresses)}
926 input_config = render_template(template, context) + render_extra_options(
927 "inputs", "mongodb"
928 )
929 hookenv.log("Updating {} plugin config file".format("mongodb"))
930 host.write_file(config_path, input_config.encode("utf-8"))
931 set_flag("plugins.mongodb.configured")
932 elif os.path.exists(config_path):
933 os.unlink(config_path)
934 set_flag("telegraf.needs_reload")
937@when("mysql.available")
938@when("telegraf.installed")
939def mysql_input(mysql):
940 contexts = []
941 for relid, relation in context.Relations()[mysql.relation_name].items():
942 for unit, reldata in relation.items():
943 hookenv.log("Available relations", level="DEBUG")
944 if reldata["private-address"] == hookenv.unit_private_ip():
945 if mysql.connection_string():
946 contexts.append(
947 {
948 "host": mysql.host(),
949 "port": mysql.port(),
950 "user": mysql.user(),
951 "pass": mysql.password(),
952 "is_secure": "false", # TODO: provide config intf for this
953 "slave": reldata.get("slave", None),
954 }
955 )
956 break
958 render_mysql_tmpl(contexts)
959 toggle_flag("plugins.mysql.configured", bool(contexts))
960 set_flag("telegraf.needs_reload")
963@when("postgresql.database.connected")
964def choose_postgresql_database():
965 # We have no need for our own database. If we need to create
966 # custom views, we can remove this handler. Or maybe it should
967 # be configurable, so people can use telegraf to export custom
968 # metrics about their data.
969 pgsql = endpoint_from_flag("postgresql.database.connected")
970 pgsql.set_database("postgres")
973@when("postgresql.database.connected")
974@when_not("postgresql.database.available")
975def postgresql_waiting():
976 hookenv.status_set("waiting", "Waiting for PostgreSQL relation")
979@when("postgresql.database.available", "postgresql.database.changed")
980@when("telegraf.installed")
981def postgresql_input():
982 # The subordinate may be connected to several services.
983 contexts = []
984 extra_options = get_extra_options()
985 pg_options = extra_options["inputs"].get("postgresql")
986 principal = hookenv.principal_unit()
988 if principal not in hookenv.expected_related_units("postgresql"):
989 return
991 pgsql = endpoint_from_flag("postgresql.database.available")
992 assert pgsql is not None
993 for css in pgsql:
994 cs = css.get(principal)
995 if cs:
996 ver = css.version or "9.3"
997 contexts.append(
998 {
999 "conn_str": str(cs),
1000 "server": cs.host,
1001 "replica": "master" if cs == css.master else "hot standby",
1002 "extra_options": pg_options,
1003 "version": ver,
1004 "pg10": LooseVersion(ver + ".0") >= LooseVersion("10.0"),
1005 }
1006 )
1007 hookenv.status_set(
1008 "maintenance",
1009 "Monitoring PostgreSQL {} on {}".format(css.version, principal),
1010 )
1011 if not contexts:
1012 hookenv.status_set(
1013 "waiting", "Waiting for PostgreSQL database on {}".format(principal)
1014 )
1015 clear_flag("plugins.postgresql.configured")
1016 return
1018 render_postgresql_tmpl(contexts)
1019 set_flag("plugins.postgresql.configured")
1020 set_flag("telegraf.needs_reload")
1023def render_mysql_tmpl(contexts):
1024 config_path = "{}/{}.conf".format(get_configs_dir(), "mysql")
1026 if contexts:
1027 f = io.StringIO()
1028 template = open(os.path.join(get_templates_dir(), "mysql.tmpl"), "r").read()
1029 for ctx in contexts:
1030 f.write(render_template(template, ctx))
1031 f.write(render_extra_options("inputs", "mysql"))
1032 write_telegraf_file(config_path, f.getvalue())
1033 elif os.path.exists(config_path):
1034 os.unlink(config_path)
1037def render_postgresql_tmpl(contexts):
1038 config_path = "{}/{}.conf".format(get_configs_dir(), "postgresql")
1040 if contexts:
1041 f = io.StringIO()
1042 template = open(
1043 os.path.join(get_templates_dir(), "postgresql.tmpl"), "r"
1044 ).read()
1045 for ctx in contexts:
1046 f.write(render_template(template, ctx))
1047 write_telegraf_file(config_path, f.getvalue())
1048 elif os.path.exists(config_path):
1049 os.unlink(config_path)
1052@when("haproxy.available")
1053@when("telegraf.installed")
1054def haproxy_input(haproxy):
1055 template = """
1056[[inputs.haproxy]]
1057 servers = {{ servers }}
1058"""
1059 rels = hookenv.relations_of_type("haproxy")
1060 haproxy_addresses = []
1061 for rel in rels:
1062 enabled = rel.get("enabled", False)
1063 # Juju gives us a string instead of a boolean, fix it
1064 if isinstance(enabled, str):
1065 if enabled in ["y", "yes", "true", "t", "on", "True"]:
1066 enabled = True
1067 else:
1068 enabled = False
1069 if not enabled:
1070 continue
1071 addr = rel.get("listener-address", rel["private-address"])
1072 port = rel["port"]
1073 user = rel["user"]
1074 password = rel.get("password", None)
1075 userpass = user
1076 if password:
1077 userpass += ":{}".format(password)
1078 haproxy_address = "http://{}@{}:{}".format(userpass, addr, port)
1079 haproxy_addresses.append(haproxy_address)
1080 config_path = "{}/{}.conf".format(get_configs_dir(), "haproxy")
1081 if haproxy_addresses:
1082 input_config = render_template(
1083 template, {"servers": json.dumps(haproxy_addresses)}
1084 ) + render_extra_options("inputs", "haproxy")
1085 hookenv.log("Updating {} plugin config file".format("haproxy"))
1086 write_telegraf_file(config_path, input_config)
1087 set_flag("plugins.haproxy.configured")
1088 elif os.path.exists(config_path):
1089 os.unlink(config_path)
1090 set_flag("telegraf.needs_reload")
1093@when("apache.available")
1094@when("telegraf.installed")
1095def apache_input(apache):
1096 template = """
1097[[inputs.apache]]
1098 urls = {{ urls }}
1099"""
1100 config_path = "{}/{}.conf".format(get_configs_dir(), "apache")
1101 port = "8080"
1102 vhost = render(
1103 source="apache-server-status.tmpl",
1104 templates_dir=get_templates_dir(),
1105 target=None,
1106 context={"port": port},
1107 )
1108 relation_info = {
1109 "ports": port,
1110 "domain": "apache-status",
1111 "enabled": True,
1112 "site_config": vhost,
1113 "site_modules": "status",
1114 }
1115 urls = []
1116 rels = hookenv.relations_of_type("apache")
1117 for rel in rels:
1118 hookenv.relation_set(rel["__relid__"], relation_settings=relation_info)
1119 addr = rel["private-address"]
1120 url = "http://{}:{}/server-status?auto".format(addr, port)
1121 urls.append(url)
1122 if urls:
1123 context = {"urls": json.dumps(urls)}
1124 input_config = render_template(template, context) + render_extra_options(
1125 "inputs", "apache"
1126 )
1127 hookenv.log("Updating {} plugin config file".format("apache"))
1128 host.write_file(config_path, input_config.encode("utf-8"))
1129 set_flag("plugins.apache.configured")
1130 elif os.path.exists(config_path):
1131 os.unlink(config_path)
1132 set_flag("telegraf.needs_reload")
1135@when("endpoint.redis.available")
1136@when("telegraf.installed")
1137def redis_input(redis):
1138 template = """
1139[[inputs.redis]]
1140 servers = ["tcp://{{ host }}:{{ port }}"]
1141 # Until https://github.com/influxdata/telegraf/issues/5036 is fixed
1142 fielddrop = ["aof_last_bgrewrite_status","aof_last_write_status","maxmemory_policy","rdb_last_bgsave_status","used_memory_dataset_perc","used_memory_peak_perc"]
1143""" # noqa E501 (inline template)
1144 config_path = "{}/{}.conf".format(get_configs_dir(), "redis")
1146 rels = hookenv.relations_of_type("redis")
1147 if rels:
1148 if len(rels) != 1:
1149 hookenv.log(
1150 "Unexpected number of units in the redis relation."
1151 "Expected 1, got {}".format(len(rels)),
1152 "WARNING",
1153 )
1155 ctxt = {}
1156 ctxt["host"] = rels[0]["host"].strip('"')
1157 ctxt["port"] = rels[0]["port"]
1158 input_config = render_template(template, ctxt) + render_extra_options(
1159 "inputs", "redis"
1160 )
1161 hookenv.log("Updating {} plugin config file".format("redis"))
1162 host.write_file(config_path, input_config.encode("utf-8"))
1163 set_flag("plugins.redis.configured")
1164 elif os.path.exists(config_path):
1165 os.unlink(config_path)
1166 clear_flag("plugins.redis.configured")
1168 set_flag("telegraf.needs_reload")
1171@when("endpoint.sentry.joined")
1172@when("telegraf.installed")
1173def sentry_input(sentry):
1174 template = """
1175[[inputs.statsd]]
1176 protocol = "udp"
1177 service_address = ":8125"
1178 delete_gauges = false
1179 delete_counters = false
1180 delete_sets = false
1181 delete_timings = false
1182"""
1183 config_path = "{}/{}.conf".format(get_configs_dir(), "sentry")
1185 rels = hookenv.relations_of_type("sentry")
1186 if rels:
1187 for rel in rels:
1188 input_config = render_template(template, {}) + render_extra_options(
1189 "inputs", "sentry"
1190 )
1191 hookenv.log("Updating {} plugin config file".format("sentry"))
1192 host.write_file(config_path, input_config.encode("utf-8"))
1193 set_flag("plugins.sentry.configured")
1194 elif os.path.exists(config_path):
1195 os.unlink(config_path)
1196 clear_flag("plugins.sentry.configured")
1198 set_flag("telegraf.needs_reload")
1201@when("exec.available")
1202@when("telegraf.installed")
1203def exec_input(exec_rel):
1204 template = """
1205{% for cmd in commands %}
1206[[inputs.exec]]
1207 commands = {{ cmd.commands }}
1208 {% for key, value in cmd|dictsort %}
1209 {% if key not in ["commands", "tags"] %}
1210 {{ key }} = "{{ value }}"
1211 {% endif %}
1212 {% endfor %}
1213 {% if cmd.tags %}
1214 [inputs.exec.tags]
1215 {% for tag, tag_value in cmd.tags|dictsort %}
1216 {{ tag }} = "{{ tag_value }}"
1217 {% endfor %}
1218 {% endif %}
1220{% endfor %}
1221"""
1222 config_path = "{}/{}.conf".format(get_configs_dir(), "exec")
1223 commands = exec_rel.commands()
1224 if not commands:
1225 hookenv.log("No Commands defined in the exec relation, doing nothing.")
1226 return
1227 pre_proc_cmds = []
1228 for command in commands:
1229 run_on_this_unit = command.pop("run_on_this_unit")
1230 if run_on_this_unit:
1231 pre_proc_cmds.append(command)
1232 if pre_proc_cmds:
1233 input_config = render_template(template, {"commands": pre_proc_cmds})
1234 hookenv.log("Updating {} plugin config file".format("exec"))
1235 host.write_file(config_path, input_config.encode("utf-8"))
1236 set_flag("plugins.exec.configured")
1237 else:
1238 # if no commands, remove previous config
1239 if os.path.exists(config_path):
1240 os.unlink(config_path)
1241 set_flag("telegraf.needs_reload")
1244@when_not("exec.available")
1245@when("plugins.exec.configured")
1246def exec_input_departed():
1247 config_path = "{}/{}.conf".format(get_configs_dir(), "exec")
1248 rels = hookenv.relations_of_type("exec")
1249 if not rels:
1250 clear_flag("plugins.exec.configured")
1251 if os.path.exists(config_path):
1252 os.unlink(config_path)
1253 set_flag("telegraf.needs_reload")
1256@when("amqp.connected")
1257@when_not("amqp.available")
1258def rabbitmq_input_setup(rabbitmq):
1259 # Requires management_plugin=true on the rabbitmq-server application.
1260 # vhost will not be used, but still needs to be requested.
1261 username = vhost = "telegraf-" + hookenv.local_unit().replace("/", "-")
1262 rabbitmq.set_local(admin=True)
1263 rabbitmq.set_remote(admin=True)
1264 rabbitmq.request_access(username=username, vhost=vhost)
1267@when("amqp.available")
1268@when("telegraf.installed")
1269def rabbitmq_input(rabbitmq):
1270 template = """
1271[[inputs.rabbitmq]]
1272 url = "http://{{ server }}:{{ port }}"
1273 username = "{{ username }}"
1274 password = "{{ password }}"
1275 fielddrop = ["idle_since"]
1276"""
1277 addr = rabbitmq.private_address()
1278 port = "15672"
1279 username = rabbitmq.username()
1280 password = rabbitmq.password()
1282 if not (addr and username and password):
1283 return
1285 config_path = "{}/{}.conf".format(get_configs_dir(), "rabbitmq")
1286 input_config = render_template(
1287 template,
1288 {"server": addr, "username": username, "password": password, "port": port},
1289 )
1291 hookenv.log("Updating {} plugin config file".format("rabbitmq"))
1292 write_telegraf_file(config_path, input_config)
1294 set_flag("plugins.rabbitmq.configured")
1295 set_flag("telegraf.needs_reload")
1298@when_not("amqp.available")
1299@when("plugins.rabbitmq.configured")
1300def rabbitmq_input_departed():
1301 config_path = "{}/{}.conf".format(get_configs_dir(), "rabbitmq")
1302 clear_flag("plugins.rabbitmq.configured")
1303 if os.path.exists(config_path):
1304 os.unlink(config_path)
1305 set_flag("telegraf.needs_reload")
1308@when("influxdb-api.available")
1309@when("telegraf.installed")
1310def influxdb_api_output(influxdb):
1311 required_keys = ["hostname", "port", "user", "password"]
1312 rels = hookenv.relations_of_type("influxdb-api")
1313 endpoints = []
1314 user = None
1315 password = None
1316 for rel in rels:
1317 if all([rel.get(key) for key in required_keys]):
1318 endpoints.append("http://{}:{}".format(rel["hostname"], rel["port"]))
1319 if user is None:
1320 user = rel["user"]
1321 if password is None:
1322 password = rel["password"]
1323 config_path = "{}/{}.conf".format(get_configs_dir(), "influxdb-api")
1324 if endpoints:
1325 hookenv.log("Updating {} plugin config file".format("influxdb-api"))
1326 content = render(
1327 source="influxdb-api.conf.tmpl",
1328 target=None,
1329 templates_dir=get_templates_dir(),
1330 context={
1331 "urls": json.dumps(endpoints),
1332 "username": "{}".format(user),
1333 "password": "{}".format(password),
1334 },
1335 )
1336 extra_opts = render_extra_options("outputs", "influxdb")
1337 write_telegraf_file(config_path, "\n".join([content, extra_opts]))
1338 set_flag("plugins.influxdb-api.configured")
1339 elif os.path.exists(config_path):
1340 os.unlink(config_path)
1341 set_flag("telegraf.needs_reload")
1344def generate_prometheus_output_config(prometheus_output_port, prometheus_ip_range):
1345 # If extra_options are set for prometheus_client, let's integrate them
1346 extra_options = get_extra_options()
1347 options = extra_options["outputs"].get("prometheus_client", {})
1348 listen = options.pop("listen", None)
1349 if not listen:
1350 listen = ":{}".format(prometheus_output_port)
1351 elif int(listen.split(":", 1)[1]) != prometheus_output_port:
1352 hookenv.log(
1353 """prometheus_output_port is {}, but extra_options would set it
1354 to {}. Choosing {} from prometheus_output_port.""".format(
1355 prometheus_output_port,
1356 int(listen.split(":", 1)[1]),
1357 prometheus_output_port,
1358 ),
1359 level=hookenv.WARNING,
1360 )
1361 listen = "{}:{}".format(listen.split(":", 1)[0], prometheus_output_port)
1363 options_ip_range = options.pop("ip_range", [])
1364 ip_range = options_ip_range + prometheus_ip_range
1366 return {
1367 "listen": listen,
1368 "ip_range": ip_range,
1369 "extra_options": render_extra_options(
1370 "outputs", "prometheus_client", extra_options=extra_options
1371 ),
1372 }
1375def render_prometheus_client_config(port, ip_range):
1376 config_path = "{}/{}.conf".format(get_configs_dir(), "prometheus_client")
1377 hookenv.log(
1378 "Updating {} plugin config file. Port is {} and ip_range is {}".format(
1379 "prometheus_client", port, ip_range
1380 ),
1381 level=hookenv.INFO,
1382 )
1383 context = generate_prometheus_output_config(port, ip_range)
1384 render(
1385 source="prometheus_client.tmpl",
1386 templates_dir=get_templates_dir(),
1387 target=config_path,
1388 context=context,
1389 )
1392@when("prometheus-client.available")
1393@when_not("prometheus-client.relation.configured")
1394def configure_prometheus_client_with_relation(prometheus):
1395 hookenv.log(
1396 "Configuring prometheus_client output plugin, with prometheus-client relation",
1397 level=hookenv.DEBUG,
1398 )
1399 port = get_prometheus_port() or "9103"
1400 # We'll iterate through the prometheus-client relation counterparts,
1401 # inform them of our address so that they scrape it, and get their egress subnets
1402 # so that we can allow them
1403 remote_egress_subnets = []
1404 for relation_id in hookenv.relation_ids("prometheus-client"):
1405 # if juju 2.x+ then we'll attempt to get the network space address
1406 try:
1407 hookenv.log("Getting local network info", level=hookenv.DEBUG)
1408 network_info = hookenv.network_get(
1409 "prometheus-client", relation_id=relation_id
1410 )
1411 hookenv.log(network_info, level=hookenv.DEBUG)
1412 if "ingress-addresses" in network_info:
1413 ip_addr = network_info.get("ingress-addresses")[0]
1414 else:
1415 ip_addr = hookenv.network_get_primary_address("prometheus-client")
1416 for unit in hookenv.related_units(relation_id):
1417 hookenv.log(
1418 "Getting remote egress subnet for relation {} - {}".format(
1419 unit, relation_id
1420 ),
1421 level=hookenv.DEBUG,
1422 )
1423 remote_egress_subnets.append(
1424 hookenv.relation_get("egress-subnets", unit, relation_id)
1425 )
1426 except NotImplementedError:
1427 # if that fails, just let prometheus.configure(...) do it's default
1428 ip_addr = None
1429 prometheus.configure(port, hostname=ip_addr, private_address=ip_addr)
1430 check_prometheus_port("prometheus_output", port)
1431 # If prometheus_ip_range is empty, all remote IPs are allowed
1432 ip_range = get_prometheus_ip_range()
1433 if ip_range != []:
1434 ip_range = ip_range + remote_egress_subnets
1435 render_prometheus_client_config(port, ip_range)
1436 set_flag("plugins.prometheus-client.configured")
1437 set_flag("prometheus-client.relation.configured")
1438 set_flag("telegraf.needs_reload")
1441@when_not("prometheus-client.available")
1442@when_not("plugins.prometheus-client.configured")
1443def configure_prometheus_client():
1444 hookenv.log("Configuring prometheus_client output plugin", level=hookenv.DEBUG)
1445 if get_prometheus_port():
1446 port = get_prometheus_port()
1447 else:
1448 # No relation to prometheus, no port configured: do not configure the plugin
1449 set_flag("plugins.prometheus-client.configured")
1450 return
1451 check_prometheus_port("prometheus_output", port)
1452 ip_range = get_prometheus_ip_range()
1453 render_prometheus_client_config(port, ip_range)
1454 set_flag("plugins.prometheus-client.configured")
1455 set_flag("telegraf.needs_reload")
1456 clear_flag("prometheus-client.relation.configured")
1459def convert_days(time_string):
1460 """Convert string time descript to days.
1462 Function to convert strings like 2w or 14d to a sting containing the number
1463 of days.
1465 Not included, months and years, because the number of days in each changes.
1466 Also not included, seconds.
1467 """
1468 days = re.search(r"(\d+)d$", time_string)
1469 if days:
1470 return days.group(1)
1471 weeks = re.search(r"(\d+)w$", time_string)
1472 if weeks:
1473 days = int(weeks.group(1)) * 7
1474 return str(days)
1475 hours = re.search(r"(\d+)h$", time_string)
1476 if hours:
1477 days = int(hours.group(1)) / 24
1478 return str(days)
1479 mins = re.search(r"(\d+)m$", time_string)
1480 if mins:
1481 days = int(hours.group(1)) / 24 * 60
1482 return str(days)
1485@when("prometheus-rules.available")
1486def render_prometheus_rules(prometheus_rules):
1487 # Send a list of rules for alerting to Prometheus
1488 config = hookenv.config()
1489 unit_name = os.environ.get("JUJU_PRINCIPAL_UNIT")
1490 lead_time = config.get("lead_time")
1491 context = {
1492 "hostname": socket.gethostname(),
1493 "cpu_idle": config.get("cpu_idle"),
1494 "wait_time": config.get("wait_time"),
1495 "lead_time": lead_time,
1496 "lead_days": convert_days(lead_time),
1497 "prometheus_context": config.get("prometheus_context"),
1498 "unit_name": unit_name,
1499 "application_name": unit_name.split("/")[0],
1500 }
1501 formatted_rules = []
1502 template_files = [
1503 "rule_cpu_usage.j2",
1504 "rule_diskfull.j2",
1505 "rule_mem.j2",
1506 "rule_disk_ro.j2",
1507 "rule_packetdrops.j2",
1508 "rule_predict_disk_space.j2",
1509 ]
1510 for template_file in template_files:
1511 with open(os.path.join(get_templates_dir(), template_file), "r") as fd:
1512 formatted_rules.append(render_template(fd.read(), context))
1513 prometheus_rules.configure("\n".join(formatted_rules))
1516@when_not("prometheus-client.available")
1517@when("prometheus-client.relation.configured")
1518def prometheus_client_departed():
1519 hookenv.log("prometheus-client relation not available")
1520 config_path = "{}/{}.conf".format(get_configs_dir(), "prometheus_client")
1521 rels = hookenv.relations_of_type("prometheus-client")
1522 if not rels and os.path.exists(config_path):
1523 hookenv.log("Deleting {} plugin config file".format("prometheus-client"))
1524 os.unlink(config_path)
1525 clear_flag("plugins.prometheus-client.configured")
1528@when(
1529 "plugins.prometheus-client.configured",
1530 "endpoint.dashboards.joined",
1531 "leadership.is_leader",
1532)
1533@when_not("grafana.configured")
1534def register_grafana_dashboard():
1535 grafana = endpoint_from_flag("endpoint.dashboards.joined")
1536 hookenv.log("Loading grafana dashboard", level=hookenv.DEBUG)
1537 dashboard = _load_grafana_dashboard()
1538 digest = hashlib.md5(dashboard.encode("utf8")).hexdigest()
1539 dashboard_dict = json.loads(dashboard)
1540 dashboard_dict["digest"] = digest
1541 hookenv.log(
1542 "Rendered dashboard dict:\n{}".format(dashboard_dict), level=hookenv.DEBUG
1543 )
1544 grafana.register_dashboard(name=GRAFANA_DASHBOARD_NAME, dashboard=dashboard_dict)
1545 hookenv.log('Grafana dashboard "{}" registered.'.format(GRAFANA_DASHBOARD_NAME))
1546 set_flag("grafana.configured")
1549def _load_grafana_dashboard():
1550 prometheus_datasource = "{} - Juju generated source".format(
1551 hookenv.config().get("prometheus_datasource", "prometheus")
1552 )
1553 dashboard_context = dict(datasource=prometheus_datasource)
1554 # TODO: Figure out if metrics exist and then set bools accordingly.
1555 # For now, setting bools to true.
1556 dashboard_context["bonds_enabled"] = True
1557 dashboard_context["bcache_enabled"] = True
1558 dashboard_context["conntrack_enabled"] = True
1559 return render_custom(
1560 source=GRAFANA_DASHBOARD_TELEGRAF_FILE_NAME,
1561 render_context=dashboard_context,
1562 variable_start_string="<<",
1563 variable_end_string=">>",
1564 )
1567# This isn't exposed in charmhelpers: https://github.com/juju/charm-helpers/issues/367
1568def render_custom(source, render_context, **parameters):
1569 """Render a template from the template folder with custom environment parameters.
1571 source: template file name to render from
1572 context: template context variables
1573 parameters: initialization parameters for the jinja Environment
1575 returns the rendered template content
1576 """
1577 template_folder = os.path.join(hookenv.charm_dir(), "templates/dashboards/grafana")
1578 environment = Environment(loader=FileSystemLoader(template_folder), **parameters)
1579 try:
1580 template = environment.get_template(source)
1581 except exceptions.TemplateNotFound as e:
1582 hookenv.log(
1583 "Could not load template {} from {}".format(source, template_folder)
1584 )
1585 raise e
1586 return template.render(render_context)
1589@when("endpoint.dashboards.departed", "grafana.configured")
1590def unregister_grafana_dashboard():
1591 clear_flag("grafana.configured")
1594@when("endpoint.dashboards.failed", "leadership.is_leader")
1595def grafana_dashboard_import_failed():
1596 grafana = endpoint_from_flag("endpoint.dashboards.failed")
1597 for failed_import in grafana.failed_imports:
1598 hookenv.log(
1599 message='Grafana dashboard "{}" import failed with: {}'.format(
1600 failed_import.name, failed_import.reason
1601 ),
1602 level=hookenv.ERROR,
1603 )
1604 clear_flag("grafana.configured")
1607@when("telegraf.needs_reload")
1608@when("telegraf.installed")
1609@when("telegraf.configured")
1610def start_or_restart():
1611 states = sorted(
1612 [
1613 k
1614 for k in get_states().keys()
1615 if k.startswith("plugins") or k.startswith("extra_plugins")
1616 ]
1617 )
1619 service = get_service()
1620 config_files_changed = helpers.any_file_changed(list_config_files())
1621 active_plugins_changed = helpers.data_changed("active_plugins", states or "")
1622 if (
1623 not host.service_running(service)
1624 or config_files_changed # noqa W503
1625 or active_plugins_changed # noqa W503
1626 ):
1627 hookenv.log("Restarting telegraf")
1628 host.service_restart(service)
1629 else:
1630 hookenv.log(
1631 "Not restarting: active_plugins_changed={} | "
1632 "config_files_changed={}".format(
1633 active_plugins_changed, config_files_changed
1634 )
1635 )
1637 # Give telegraf time to restart.
1638 timeout = time.time() + 15
1639 while not host.service_running(service) and time.time() < timeout:
1640 time.sleep(0.1)
1642 if host.service_running(service):
1643 revision = ""
1644 if os.path.exists("version"):
1645 with open("version") as f:
1646 line = f.readline().strip()
1647 # We only want the first 8 characters, that's enough to tell
1648 # which version of the charm we're using.
1649 if len(line) > 8:
1650 revision = " (source version/commit {}…)".format(line[:8])
1651 else:
1652 revision = " (source version/commit {})".format(line)
1653 hookenv.status_set(
1654 "active", "Monitoring {}{}".format(get_remote_unit_name(), revision)
1655 )
1656 clear_flag("telegraf.needs_reload")
1657 else:
1658 hookenv.status_set("blocked", "Telegraf failed to start. Check config.")
1661def is_bcache():
1662 """Determine if this is a container.
1664 return true if bcache is present, and this is not a container.
1665 """
1666 container = is_container()
1667 return os.path.exists("/sys/fs/bcache") and not container
1670@hook("update-status")
1671def update_status():
1672 changed = charms.reactive.helpers.data_changed(
1673 "detect_changes", get_base_inputs(), hash_type="sha256"
1674 )
1675 if changed:
1676 clear_flag("telegraf.configured")
1677 clear_flag("telegraf.apt.configured")
1678 clear_flag("telegraf.snap.configured")
1681@when("nrpe-external-master.available")
1682@when("telegraf.installed")
1683@when("telegraf.configured")
1684@when_not("telegraf.nagios-setup.complete")
1685def configure_nagios(nagios):
1686 """Configure nagios process check.
1688 The flag 'telegraf.nagios-setup.complete' is reset at the moment config is
1689 changed, so this should make sure that updates are handled.
1690 """
1691 # Use charmhelpers to handle the configuration of nrpe
1692 hostname = nrpe.get_nagios_hostname()
1693 nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)
1695 # use charmhelpers to create a process check
1696 nrpe_setup.add_check(
1697 "telegraf_http",
1698 "Telegraf HTTP check",
1699 "check_http -I 127.0.0.1 -p {} -u /metrics".format(get_prometheus_port()),
1700 )
1701 nrpe_setup.write()
1702 set_flag("telegraf.nagios-setup.complete")
1705@when("telegraf.smart_metrics.enabled")
1706@when_not("apt.smartmontools.installed")
1707@when_not("apt.nvme.installed")
1708def install_smart_metrics_packages():
1709 apt.queue_install(["smartmontools", "nvme-cli"])