Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# Copyright 2015-2018 Canonical Ltd. 

2# 

3# This file is part of the Telegraf Charm for Juju. 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License version 3, as 

7# published by the Free Software Foundation. 

8# 

9# This program is distributed in the hope that it will be useful, but 

10# WITHOUT ANY WARRANTY; without even the implied warranties of 

11# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR 

12# PURPOSE. See the GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <http://www.gnu.org/licenses/>. 

16 

17import base64 

18import binascii 

19import collections 

20import hashlib 

21import io 

22import ipaddress 

23import json 

24import os 

25import re 

26import socket 

27import subprocess 

28import sys 

29import time 

30from distutils.version import LooseVersion 

31 

32from charmhelpers import context 

33from charmhelpers.contrib.charmsupport import nrpe 

34from charmhelpers.core import hookenv, host, unitdata 

35from charmhelpers.core.host import is_container 

36from charmhelpers.core.templating import render 

37 

38import charms.promreg 

39from charms import apt 

40from charms.layer import snap 

41from charms.reactive import ( 

42 clear_flag, 

43 endpoint_from_flag, 

44 helpers, 

45 hook, 

46 set_flag, 

47 toggle_flag, 

48 when, 

49 when_not, 

50) 

51from charms.reactive.bus import get_states 

52 

53from files.telegraf_exec_metrics import METRICS 

54 

55from jinja2 import Environment, FileSystemLoader, Template, exceptions 

56 

57import netifaces 

58 

59import yaml 

60 

61 

62DEB_BASE_DIR = "/etc/telegraf" 

63SNAP_BASE_DIR = "/var/snap/telegraf/current" 

64SUDOERS_DIR = "/etc/sudoers.d" 

65 

66CONFIG_FILE = "telegraf.conf" 

67 

68CONFIG_DIR = "telegraf.d" 

69 

70GRAFANA_DASHBOARD_TELEGRAF_FILE_NAME = "Telegraf.json.j2" 

71 

72GRAFANA_DASHBOARD_NAME = "telegraf" 

73 

74SNAP_SERVICE = "snap.telegraf.telegraf" 

75DEB_SERVICE = "telegraf" 

76 

77SNAP_OWNER = "snap_daemon" 

78SNAP_GROUP = "snap_daemon" 

79SNAP_USER = "snap_daemon" 

80DEB_OWNER = "root" 

81DEB_GROUP = "telegraf" 

82DEB_USER = "telegraf" 

83 

84# Utilities # 

85 

86 

87class InvalidInstallMethod(Exception): 

88 pass 

89 

90 

91class InvalidPrometheusIPRange(Exception): 

92 pass 

93 

94 

95def write_telegraf_file(path, content): 

96 return host.write_file( 

97 path, 

98 content.encode("UTF-8"), 

99 owner=get_telegraf_owner(), 

100 group=get_telegraf_group(), 

101 perms=0o640, 

102 ) 

103 

104 

105def get_install_method(): 

106 config = hookenv.config() 

107 

108 if config["install_method"] in ["deb", "snap"]: 

109 return config["install_method"] 

110 else: 

111 hookenv.log( 

112 "Invalid install_method for telegraf: {}".format(config["install_method"]), 

113 level=hookenv.ERROR, 

114 ) 

115 raise InvalidInstallMethod() 

116 

117 

118def render_sudoers_file(filename): 

119 """Generate and install a sudoers file from a template to /etc/sudoers.d .""" 

120 template = "{}.tmpl".format(filename) 

121 context = {"telegraf_user": get_telegraf_user()} 

122 hookenv.log("Installing sudoers file {}".format(filename), level="DEBUG") 

123 render( 

124 source=template, 

125 templates_dir=os.path.join(get_templates_dir(), "sudoers"), 

126 target=os.path.join(SUDOERS_DIR, filename), 

127 context=context, 

128 perms=0o640, 

129 ) 

130 

131 

132def remove_sudoers_file(filename): 

133 """Remove a sudoers file. Do nothing if the file doesn't exist.""" 

134 path = os.path.join(SUDOERS_DIR, filename) 

135 if os.path.isfile(path): 

136 os.unlink(path) 

137 

138 

139def get_telegraf_user(): 

140 install_method = get_install_method() 

141 

142 # get_install_method() already checked that we got a valid value 

143 if install_method == "snap": 

144 return SNAP_USER 

145 return DEB_USER 

146 

147 

148def get_telegraf_owner(): 

149 install_method = get_install_method() 

150 

151 # get_install_method() already checked that we got a valid value 

152 if install_method == "snap": 

153 return SNAP_OWNER 

154 return DEB_OWNER 

155 

156 

157def get_telegraf_group(): 

158 install_method = get_install_method() 

159 

160 # get_install_method() already checked that we got a valid value 

161 if install_method == "snap": 

162 return SNAP_GROUP 

163 return DEB_GROUP 

164 

165 

166def get_base_dir(): 

167 config = hookenv.config() 

168 

169 if config["install_method"] == "deb": 

170 return DEB_BASE_DIR 

171 elif config["install_method"] == "snap": 

172 return SNAP_BASE_DIR 

173 else: 

174 hookenv.log( 

175 "Invalid install_method for telegraf: {}".format(config["install_method"]), 

176 level=hookenv.ERROR, 

177 ) 

178 raise InvalidInstallMethod() 

179 

180 

181def get_service(): 

182 config = hookenv.config() 

183 

184 if config["install_method"] == "deb": 

185 return DEB_SERVICE 

186 elif config["install_method"] == "snap": 

187 return SNAP_SERVICE 

188 else: 

189 hookenv.log( 

190 "Invalid install_method for telegraf: {}".format(config["install_method"]), 

191 level=hookenv.ERROR, 

192 ) 

193 raise InvalidInstallMethod() 

194 

195 

196def get_templates_dir(): 

197 return os.path.join(hookenv.charm_dir(), "templates") 

198 

199 

200def get_main_config_path(): 

201 return os.path.join(get_base_dir(), CONFIG_FILE) 

202 

203 

204def get_configs_dir(): 

205 return os.path.join(get_base_dir(), CONFIG_DIR) 

206 

207 

208def get_files_dir(): 

209 return os.path.join(hookenv.charm_dir(), "files") 

210 

211 

212def list_supported_plugins(): 

213 return [k for k in hookenv.metadata()["requires"].keys() if k != "juju-info"] + [ 

214 k for k in hookenv.metadata()["provides"].keys() if k != "juju-info" 

215 ] 

216 

217 

218def list_config_files(): 

219 config_files = [get_main_config_path()] 

220 # only include config files for configured plugins 

221 current_states = get_states() 

222 

223 for plugin in list_supported_plugins(): 

224 # The prometheus_client plugin can be configured either from a relation or 

225 # from the juju config 

226 if ("plugins.{}.configured".format(plugin) in current_states.keys()) or ( 

227 "{}.configured".format(plugin) in current_states.keys() 

228 ): 

229 # The "prometheus-client" relation sets the "prometheus_client" plugin 

230 if plugin == "prometheus-client": 

231 plugin = "prometheus_client" 

232 config_path = "{}/{}.conf".format(get_configs_dir(), plugin) 

233 config_files.append(config_path) 

234 

235 if "extra_plugins.configured" in current_states.keys(): 

236 config_files.append("{}/extra_plugins.conf".format(get_configs_dir())) 

237 config_files.append("{}/socket_listener.conf".format(get_configs_dir())) 

238 

239 return config_files 

240 

241 

242def get_hostname_label(): 

243 config = hookenv.config() 

244 hostname_fmt = config["hostname"] 

245 unit = get_remote_unit_name().replace("/", "-") # / is invalid in labels. 

246 

247 if hostname_fmt == "UNIT_NAME": # Deprecated 

248 return unit 

249 env = os.environ 

250 model = env.get("JUJU_ENV_NAME") or env.get("JUJU_MODEL_NAME", "") 

251 uuid = env.get("JUJU_ENV_UUID") or env.get("JUJU_MODEL_UUID", "") 

252 syshost = socket.gethostname() 

253 

254 return hostname_fmt.format(unit=unit, model=model, uuid=uuid, host=syshost) 

255 

256 

257def get_remote_unit_name(): 

258 unit = hookenv.principal_unit() 

259 

260 if unit: 

261 # Note(aluria): use Juju env var available since 2017 

262 

263 return unit 

264 else: 

265 # Note(aluria): lookup all available IPv4/IPv6 addresses (except lo) 

266 ip_addresses = set() 

267 

268 for iface in netifaces.interfaces(): 

269 if iface == "lo": 

270 continue 

271 ip_addrs = netifaces.ifaddresses(iface) 

272 

273 for iface_type in ip_addrs: 

274 if iface_type in (netifaces.AF_INET, netifaces.AF_INET6): 

275 for addrs in ip_addrs[iface_type]: 

276 ip_addresses.add(addrs["addr"]) 

277 

278 # Note(aluria): and try to match them against rel['private-address'] 

279 

280 for rel_type in hookenv.metadata()["requires"].keys(): 

281 for rel in hookenv.relations_of_type(rel_type): 

282 if rel["private-address"] in ip_addresses: 

283 return rel["__unit__"] 

284 

285 

286def get_disabled_plugins(): 

287 """Return consolidated list of all plugins to be disabled.""" 

288 config = hookenv.config() 

289 str_disabled_plugins = config["disabled_plugins"] 

290 disabled_plugins = str_disabled_plugins.split(":") if str_disabled_plugins else [] 

291 # if LXD container, disable a list of unsuitable plugins 

292 if is_container(): 

293 disabled_plugins_lxd = { 

294 "cpu", 

295 "disk", 

296 "diskio", 

297 "mem", 

298 "swap", 

299 "system", 

300 "bcache", 

301 "bond", 

302 "cgroup", 

303 "kernel_vmstat", 

304 "zoneinfo", 

305 "buddyinfo", 

306 "softnet_stat", 

307 } 

308 disabled_plugins_lxd.update(set(disabled_plugins)) 

309 disabled_plugins = sorted(disabled_plugins_lxd) 

310 return disabled_plugins 

311 

312 

313def get_base_inputs(): 

314 """Make a structure for rendering the base_inputs template. 

315 

316 Returns a dict of items for the template. 

317 """ 

318 extra_options = get_extra_options() 

319 conntrack = os.path.exists("/proc/sys/net/netfilter/nf_conntrack_max") 

320 config = hookenv.config() 

321 iptables = config["collect_iptables_metrics"] 

322 smart = config["collect_smart_metrics"] 

323 disabled_plugins = get_disabled_plugins() 

324 

325 return { 

326 "extra_options": extra_options["inputs"], 

327 "bcache": is_bcache(), 

328 "python": sys.executable, 

329 "files_dir": get_files_dir(), 

330 "disabled_plugins": disabled_plugins, 

331 "conntrack": conntrack, 

332 "iptables": iptables, 

333 "smart": smart, 

334 } 

335 

336 

337def render_base_inputs(): 

338 # use base inputs from charm templates 

339 with open(os.path.join(get_templates_dir(), "base_inputs.conf"), "r") as fd: 

340 return render_template(fd.read(), get_base_inputs()) 

341 

342 

343def get_extra_options(): 

344 extra_options = {"inputs": {}, "outputs": {}} 

345 extra_options_raw = hookenv.config()["extra_options"] 

346 extra_opts = yaml.full_load(extra_options_raw) or {} 

347 extra_options.update(extra_opts) 

348 # jsonify value, required as the telegraf config values format is similar 

349 # to raw json 

350 json_vals = {} 

351 # kind level 

352 

353 for k, v in extra_options.items(): 

354 json_vals[k] = {} 

355 # plugins level 

356 

357 for plugin, values in v.items(): 

358 json_vals[k][plugin] = {} 

359 # inner plugin (aka key:value) 

360 

361 for key, val in values.items(): 

362 if key in ("tagpass", "tagdrop"): 

363 # this is a tagpass/drop, we need to go deeper 

364 json_vals[k][plugin][key] = {} 

365 

366 for tag, tagvalue in val.items(): 

367 json_vals[k][plugin][key][tag] = json.dumps(tagvalue) 

368 else: 

369 json_vals[k][plugin][key] = json.dumps(val) 

370 

371 return json_vals 

372 

373 

374def render_extra_options(kind, name, extra_options=None): 

375 template = """ 

376 {% if extra_options %} 

377 {% for key, value in extra_options.items() %} 

378 {% if key != 'tagpass' and key != 'tagdrop' %} 

379 {{ key }} = {{ value }} 

380 {% endif %} 

381 {% endfor %} 

382 {% for key, value in extra_options.items() %} 

383 {% if key == 'tagpass' or key == 'tagdrop' %} 

384 [{{ kind }}.{{ name }}.{{ key }}] 

385 {% for tag, tagvalue in value.items() %} 

386 {{ tag }} = {{ tagvalue }} 

387 {% endfor %} 

388 {% endif %} 

389 {% endfor %} 

390 {% endif %} 

391 """ 

392 if extra_options is None: 

393 extra_options = get_extra_options() 

394 context = { 

395 "extra_options": extra_options[kind].get(name, {}), 

396 "kind": kind, 

397 "name": name, 

398 } 

399 return render_template(template, context) 

400 

401 

402def render_template(template, context): 

403 tmpl = Template(template, lstrip_blocks=True, trim_blocks=True) 

404 return tmpl.render(**context) 

405 

406 

407def check_prometheus_port(key, new_port): 

408 unitdata_key = "{}.port".format(key) 

409 kv = unitdata.kv() 

410 existing_port = kv.get(unitdata_key) 

411 if existing_port != new_port: 

412 hookenv.open_port(new_port) 

413 hookenv.log("Opened port {}".format(new_port)) 

414 if existing_port: # Dont try to close non existing ports 

415 hookenv.close_port(existing_port) 

416 charms.promreg.deregister(None, existing_port) 

417 kv.set(unitdata_key, new_port) 

418 

419 # Register the port, even if this is a re-registration 

420 charms.promreg.register(None, new_port) 

421 

422 

423def get_prometheus_port(): 

424 config = hookenv.config() 

425 if not config.get("prometheus_output_port", False): 

426 return False 

427 if config.get("prometheus_output_port") == "default": 

428 return 9103 

429 return int(config.get("prometheus_output_port")) 

430 

431 

432def get_prometheus_ip_range(): 

433 config = hookenv.config() 

434 if config.get("prometheus_ip_range") == "": 

435 return [] 

436 # we should have a list of IPs, confirm that's the case 

437 ips = [] 

438 for ip in config.get("prometheus_ip_range").split(","): 

439 # strip any spaces 

440 ip = ip.strip() 

441 try: 

442 ipaddress.ip_network(ip) 

443 except ValueError: 

444 hookenv.log( 

445 "Invalid prometheus_ip_range provided: {}".format( 

446 config.get("prometheus_ip_range") 

447 ), 

448 level=hookenv.ERROR, 

449 ) 

450 raise InvalidPrometheusIPRange() 

451 else: 

452 ips.append(ip) 

453 return ips 

454 

455 

456def get_socket_listener_port(): 

457 config = hookenv.config() 

458 if not config.get("socket_listener_port", False): 

459 return 8094 

460 else: 

461 return int(config.get("socket_listener_port")) 

462 

463 

464def render_socket_listener_config(context): 

465 # Remove deprecated config file 

466 old_config_path = "{}/tcp_listener.conf".format(get_configs_dir()) 

467 if os.path.exists(old_config_path): 

468 os.remove(old_config_path) 

469 

470 config_path = "{}/socket_listener.conf".format(get_configs_dir()) 

471 render( 

472 source="socket_listener.tmpl", 

473 templates_dir=get_templates_dir(), 

474 target=config_path, 

475 context=context, 

476 ) 

477 

478 

479def get_sysstat_config_with_sadc_xall(content): 

480 """Get updated sysstat config content with `-S XALL` in `SADC_OPTIONS`. 

481 

482 `/etc/sysstat/systat` consists of a sequence of shell variable assignments 

483 used to configure sysstat logging. 

484 

485 Check the original config content. 

486 

487 If change needed, make the change and return new config content. 

488 

489 If no change, return None. 

490 """ 

491 # if SADC_OPTIONS already exists with `-S XALL` in value, no need to change, 

492 # return None 

493 if re.search(r'^SADC_OPTIONS=".*-S\s+XALL.*"', content, flags=re.M): 

494 return None 

495 

496 # if no such option, append and return 

497 # this also ignores commented option, e.g.: # SADC_OPTIONS="-S ALL" 

498 if not re.search(r'^SADC_OPTIONS=".*"', content, flags=re.M): 

499 return content.rstrip() + '\nSADC_OPTIONS="-S XALL"' 

500 

501 match = re.search(r'^SADC_OPTIONS="(.*)"', content, flags=re.M) 

502 if match: 

503 # if option exists, check the value in quotes 

504 value = match.group(1) 

505 if "-S" in value: 

506 # if `-S` in value, replace it to XALL, keep other options unchanged. 

507 return re.sub( 

508 r'^SADC_OPTIONS="(.*-S\s+)(\w+)(.*)"', 

509 r'SADC_OPTIONS="\1XALL\3"', 

510 content, 

511 flags=re.M, 

512 ) 

513 else: 

514 # if `-S` not in value, append `-S XALL` to value 

515 return re.sub( 

516 r'^SADC_OPTIONS="(.*)"', 

517 r'SADC_OPTIONS="\1 -S XALL"', 

518 content, 

519 flags=re.M, 

520 ) 

521 

522 

523def update_sysstat_config_with_sdac_xall(path="/etc/sysstat/sysstat"): 

524 """Update `/etc/sysstat/sysstat` to ensure `-S XALL` in `SADC_OPTIONS`.""" 

525 if os.path.isfile(path): 

526 with open(path, mode="r", encoding="utf8") as f: 

527 new_text = get_sysstat_config_with_sadc_xall(f.read()) 

528 

529 if new_text: 

530 hookenv.log("updating {} to ensure `-S XALL` in SADC_OPTIONS".format(path)) 

531 with open(path, mode="w", encoding="utf8") as f: 

532 f.write(new_text) 

533 else: 

534 hookenv.log( 

535 "sysstat config file not found: {}".format(path), level=hookenv.WARNING 

536 ) 

537 

538 

539def configure_telegraf(): # noqa: C901 

540 hookenv.log("Generating telegraf.conf", level=hookenv.DEBUG) 

541 update_sysstat_config_with_sdac_xall() 

542 config = hookenv.config() 

543 context = config.copy() 

544 try: 

545 config_path = get_main_config_path() 

546 except InvalidInstallMethod: 

547 hookenv.status_set( 

548 "blocked", 

549 "Wrong install_method provided: {!r}".format(config["install_method"]), 

550 ) 

551 

552 return 

553 

554 if get_remote_unit_name() is None: 

555 hookenv.status_set("waiting", "Waiting for juju-info relation") 

556 # if UNIT_NAME in hostname config and relation not yet available, 

557 # make telegraf unable to start to not get weird metrics names 

558 

559 if os.path.exists(config_path): 

560 os.unlink(config_path) 

561 

562 return 

563 

564 inputs = config.get("inputs_config", "") 

565 outputs = config.get("outputs_config", "") 

566 # just for the migration out of base64 

567 

568 if inputs: 

569 try: 

570 inputs = base64.b64decode(inputs.encode("utf-8"), validate=True).decode( 

571 "utf-8" 

572 ) 

573 except binascii.Error: 

574 # not bas64, probably already up to date configs 

575 pass 

576 

577 if outputs: 

578 try: 

579 outputs = base64.b64decode(outputs.encode("utf-8"), validate=True).decode( 

580 "utf-8" 

581 ) 

582 except binascii.Error: 

583 # not bas64, probably already up to date configs 

584 pass 

585 

586 tags = [] 

587 # Initialize juju-related tags to be exposed as labels 

588 # Using a dict to avoid duplicates, which telegraf will see as conflicts 

589 tags_dict = collections.OrderedDict() 

590 tags_dict["juju_application"] = get_remote_unit_name().split("/")[0] 

591 tags_dict["juju_unit"] = get_remote_unit_name().replace("/", "-") 

592 try: 

593 tags_dict["juju_model"] = hookenv.model_name() 

594 except KeyError: 

595 pass # support older Juju 1.x deploys 

596 # Parse juju-configured tags, which can override the 3 juju_* defined above 

597 if config["tags"]: 

598 for tag in config["tags"].split(","): 

599 key, value = tag.split("=") 

600 tags_dict[key] = value 

601 # Render the tags dict into an array 

602 for key, value in tags_dict.items(): 

603 tags.append('{} = "{}"'.format(key, value)) 

604 context["tags"] = tags 

605 

606 if inputs: 

607 context["inputs"] = inputs 

608 else: 

609 # use base inputs from charm templates 

610 context["inputs"] = render_base_inputs() 

611 

612 if outputs: 

613 context["outputs"] = outputs 

614 else: 

615 context["outputs"] = "" 

616 hookenv.log("No output plugins in main config.") 

617 

618 context["hostname"] = get_hostname_label() 

619 

620 logfile_path = os.path.normpath(context["logfile"]) 

621 

622 if ( 

623 context["logfile"] 

624 and not logfile_path.startswith("/var/log/") # noqa W503 

625 and not ( # noqa W503 

626 config["install_method"] == "snap" 

627 and logfile_path.startswith("/var/snap/telegraf/common/") # noqa W503 

628 ) 

629 ): 

630 # only allow logging in /var/log, syslog, or /var/snap/telegraf/common 

631 hookenv.log( 

632 "logfile value reset to stderr." 

633 " Original value: {}".format(context["logfile"]), 

634 hookenv.DEBUG, 

635 ) 

636 context["logfile"] = "" 

637 

638 context["extra_options"] = get_extra_options() 

639 

640 if get_socket_listener_port(): 

641 context["socket_listener_port"] = get_socket_listener_port() 

642 

643 render_socket_listener_config(context=context) 

644 

645 disabled_plugins = ":".join(get_disabled_plugins()) 

646 

647 # handle plugin configuration for OVS 

648 if host.service_running("openvswitch-switch"): 

649 # add sudoers file for telegraf if openvswitch is running 

650 sudoers_filename = "telegraf_ovs" 

651 render_sudoers_file(sudoers_filename) 

652 else: 

653 # disable the OVS checks if the service is not curently running 

654 # no need to handle duplicates here, as those are handled during 

655 # template rendering later 

656 ovs_metric_names = [ 

657 metric_name for metric_name in METRICS if metric_name.startswith("ovs_") 

658 ] 

659 

660 hookenv.log( 

661 "disabling the following metrics, since OVS is not available: {}".format( 

662 ovs_metric_names 

663 ) 

664 ) 

665 

666 ovs_disabled_plugins = ":".join(ovs_metric_names) 

667 

668 if disabled_plugins: 

669 disabled_plugins = disabled_plugins + ":" + ovs_disabled_plugins 

670 else: 

671 disabled_plugins = ovs_disabled_plugins 

672 

673 # handle the sudoers for iptables 

674 sudoers_filename = "telegraf_iptables" 

675 if config["collect_iptables_metrics"]: 

676 render_sudoers_file(sudoers_filename) 

677 else: 

678 remove_sudoers_file(sudoers_filename) 

679 

680 # handle the sudoers for smart 

681 sudoers_filename = "telegraf_smart" 

682 if config["collect_smart_metrics"]: 

683 render_sudoers_file(sudoers_filename) 

684 else: 

685 remove_sudoers_file(sudoers_filename) 

686 

687 telegraf_exec_metrics = os.path.join(get_files_dir(), "telegraf_exec_metrics.py") 

688 cmd = [ 

689 telegraf_exec_metrics, 

690 "--render-config-files", 

691 "--disabled-metrics", 

692 disabled_plugins, 

693 "--configs-dir", 

694 get_configs_dir(), # unit test can monkeypatch this path 

695 "--python", 

696 sys.executable, # this will be venv python interpreter path 

697 ] 

698 hookenv.log("Rendering exec metrics config files: {}".format(" ".join(cmd))) 

699 subprocess.check_call(cmd) 

700 

701 hookenv.log("Updating main config file") 

702 render( 

703 source="telegraf.conf.tmpl", 

704 templates_dir=get_templates_dir(), 

705 target=config_path, 

706 context=context, 

707 ) 

708 

709 # Make sure that only the right service is enabled, then defer to start_or_restart() 

710 for service in [DEB_SERVICE, SNAP_SERVICE]: 

711 if service == get_service(): 

712 host.service_resume(service) 

713 host.service_reload(service) 

714 else: 

715 try: 

716 host.service_pause(service) 

717 except ValueError: 

718 # On machines not supporting snaps, it won't find the service 

719 # If we're using the snap, and the deb failed to install, it's fine too 

720 pass 

721 

722 set_flag("telegraf.configured") 

723 set_flag("telegraf.needs_reload") 

724 

725 if config["install_method"] == "deb": 

726 set_flag("telegraf.apt.configured") 

727 else: 

728 set_flag("telegraf.snap.configured") 

729 

730 

731# States 

732 

733 

734@when_not("telegraf.installed") 

735def install_telegraf(): 

736 try: 

737 install_method = get_install_method() 

738 except InvalidInstallMethod: 

739 hookenv.status_set( 

740 "blocked", "Wrong install_method provided. Expected either 'deb' or 'snap'." 

741 ) 

742 

743 return 

744 

745 if install_method == "deb": 

746 try: 

747 snap.remove("telegraf") 

748 except Exception: 

749 # the snap may already be absent, or snaps may not even be supported 

750 # in this environment 

751 pass 

752 apt.queue_install(["telegraf"]) 

753 elif install_method == "snap": 

754 apt.purge("telegraf") 

755 config = hookenv.config() 

756 snap_channel = config.get("snap_channel") 

757 snap.install("telegraf", channel=snap_channel, classic=True) 

758 

759 if install_method: 

760 set_flag("telegraf.installed") 

761 

762 

763@when("telegraf.installed") 

764@when("apt.installed.telegraf") 

765@when("plugins.prometheus-client.configured") 

766@when_not("telegraf.configured") 

767@when_not("telegraf.apt.configured") 

768def configure_telegraf_deb(): 

769 configure_telegraf() 

770 

771 

772@when("telegraf.installed") 

773@when("snap.installed.telegraf") 

774@when("plugins.prometheus-client.configured") 

775@when_not("telegraf.configured") 

776@when_not("telegraf.snap.configured") 

777def configure_telegraf_snap(): 

778 configure_telegraf() 

779 

780 

781@hook("upgrade-charm") 

782def upgrade_charm(): 

783 for plugin in list_supported_plugins(): 

784 clear_flag("plugins.{}.configured".format(plugin)) 

785 clear_flag("extra_plugins.configured") 

786 clear_flag("telegraf.configured") 

787 clear_flag("telegraf.apt.configured") 

788 clear_flag("telegraf.snap.configured") 

789 clear_flag("grafana.configured") 

790 

791 

792@when("config.changed") 

793def handle_config_changes(): 

794 config = hookenv.config() 

795 

796 if config.changed("extra_options"): 

797 for plugin in list_supported_plugins(): 

798 clear_flag("plugins.{}.configured".format(plugin)) 

799 clear_flag("prometheus-client.relation.configured") 

800 # if something else changed, let's reconfigure telegraf itself just in case 

801 

802 if config.changed("extra_plugins"): 

803 clear_flag("extra_plugins.configured") 

804 

805 if config.get("collect_smart_metrics"): 

806 set_flag("telegraf.smart_metrics.enabled") 

807 else: 

808 clear_flag("telegraf.smart_metrics.enabled") 

809 

810 if ( 

811 config.changed("install_method") 

812 or config.changed("snap_channel") # noqa W503 

813 or config.changed("install_sources") # noqa W503 

814 ): 

815 clear_flag("telegraf.installed") 

816 clear_flag("extra_plugins.configured") 

817 clear_flag("plugins.prometheus-client.configured") 

818 clear_flag("prometheus-client.relation.configured") 

819 

820 if config.changed("prometheus_output_port") or config.changed( 

821 "prometheus_ip_range" 

822 ): 

823 clear_flag("plugins.prometheus-client.configured") 

824 clear_flag("prometheus-client.relation.configured") 

825 clear_flag("telegraf.configured") 

826 clear_flag("telegraf.apt.configured") 

827 clear_flag("telegraf.snap.configured") 

828 clear_flag("telegraf.nagios-setup.complete") 

829 clear_flag("grafana.configured") 

830 

831 

832@when("telegraf.configured") 

833@when_not("extra_plugins.configured") 

834def configure_extra_plugins(): 

835 config = hookenv.config() 

836 plugins = config["extra_plugins"] 

837 

838 if plugins: 

839 config_path = "{}/extra_plugins.conf".format(get_configs_dir()) 

840 host.write_file(config_path, plugins.encode("utf-8")) 

841 set_flag("extra_plugins.configured") 

842 set_flag("telegraf.needs_reload") 

843 

844 

845@when("elasticsearch.available") 

846@when("telegraf.installed") 

847def elasticsearch_input(es): 

848 template = """ 

849[[inputs.elasticsearch]] 

850 servers = {{ servers }} 

851""" 

852 hosts = [] 

853 rels = hookenv.relations_of_type("elasticsearch") 

854 for rel in rels: 

855 es_host = rel.get("host") 

856 port = rel.get("port") 

857 if not es_host or not port: 

858 hookenv.log("No host received for relation: {}.".format(rel)) 

859 continue 

860 hosts.append("http://{}:{}".format(es_host, port)) 

861 config_path = "{}/{}.conf".format(get_configs_dir(), "elasticsearch") 

862 if hosts: 

863 context = {"servers": json.dumps(hosts)} 

864 input_config = render_template(template, context) + render_extra_options( 

865 "inputs", "elasticsearch" 

866 ) 

867 hookenv.log("Updating {} plugin config file".format("elasticsearch")) 

868 host.write_file(config_path, input_config.encode("utf-8")) 

869 set_flag("plugins.elasticsearch.configured") 

870 elif os.path.exists(config_path): 

871 os.unlink(config_path) 

872 clear_flag("plugins.elasticsearch.configured") 

873 set_flag("telegraf.needs_reload") 

874 

875 

876@when("memcached.available") 

877@when("telegraf.installed") 

878def memcached_input(memcache): 

879 template = """ 

880[[inputs.memcached]] 

881 servers = {{ servers }} 

882""" 

883 required_keys = ["host", "port"] 

884 rels = hookenv.relations_of_type("memcached") 

885 addresses = [] 

886 for rel in rels: 

887 if all([rel.get(key) for key in required_keys]): 

888 addr = rel["host"] 

889 port = rel["port"] 

890 address = "{}:{}".format(addr, port) 

891 addresses.append(address) 

892 config_path = "{}/{}.conf".format(get_configs_dir(), "memcached") 

893 if addresses: 

894 context = {"servers": json.dumps(addresses)} 

895 input_config = render_template(template, context) + render_extra_options( 

896 "inputs", "memcached" 

897 ) 

898 hookenv.log("Updating {} plugin config file".format("memcached")) 

899 host.write_file(config_path, input_config.encode("utf-8")) 

900 set_flag("plugins.memcached.configured") 

901 elif os.path.exists(config_path): 

902 os.unlink(config_path) 

903 set_flag("telegraf.needs_reload") 

904 

905 

906@when("mongodb.database.available") 

907@when("telegraf.installed") 

908def mongodb_input(mongodb): 

909 template = """ 

910[[inputs.mongodb]] 

911 servers = {{ servers }} 

912""" 

913 rels = hookenv.relations_of_type("mongodb") 

914 mongo_addresses = [] 

915 for rel in rels: 

916 addr = rel["private-address"] 

917 port = rel.get("port", None) 

918 if port: 

919 mongo_address = "{}:{}".format(addr, port) 

920 else: 

921 mongo_address = addr 

922 mongo_addresses.append(mongo_address) 

923 config_path = "{}/{}.conf".format(get_configs_dir(), "mongodb") 

924 if mongo_addresses: 

925 context = {"servers": json.dumps(mongo_addresses)} 

926 input_config = render_template(template, context) + render_extra_options( 

927 "inputs", "mongodb" 

928 ) 

929 hookenv.log("Updating {} plugin config file".format("mongodb")) 

930 host.write_file(config_path, input_config.encode("utf-8")) 

931 set_flag("plugins.mongodb.configured") 

932 elif os.path.exists(config_path): 

933 os.unlink(config_path) 

934 set_flag("telegraf.needs_reload") 

935 

936 

937@when("mysql.available") 

938@when("telegraf.installed") 

939def mysql_input(mysql): 

940 contexts = [] 

941 for relid, relation in context.Relations()[mysql.relation_name].items(): 

942 for unit, reldata in relation.items(): 

943 hookenv.log("Available relations", level="DEBUG") 

944 if reldata["private-address"] == hookenv.unit_private_ip(): 

945 if mysql.connection_string(): 

946 contexts.append( 

947 { 

948 "host": mysql.host(), 

949 "port": mysql.port(), 

950 "user": mysql.user(), 

951 "pass": mysql.password(), 

952 "is_secure": "false", # TODO: provide config intf for this 

953 "slave": reldata.get("slave", None), 

954 } 

955 ) 

956 break 

957 

958 render_mysql_tmpl(contexts) 

959 toggle_flag("plugins.mysql.configured", bool(contexts)) 

960 set_flag("telegraf.needs_reload") 

961 

962 

963@when("postgresql.database.connected") 

964def choose_postgresql_database(): 

965 # We have no need for our own database. If we need to create 

966 # custom views, we can remove this handler. Or maybe it should 

967 # be configurable, so people can use telegraf to export custom 

968 # metrics about their data. 

969 pgsql = endpoint_from_flag("postgresql.database.connected") 

970 pgsql.set_database("postgres") 

971 

972 

973@when("postgresql.database.connected") 

974@when_not("postgresql.database.available") 

975def postgresql_waiting(): 

976 hookenv.status_set("waiting", "Waiting for PostgreSQL relation") 

977 

978 

979@when("postgresql.database.available", "postgresql.database.changed") 

980@when("telegraf.installed") 

981def postgresql_input(): 

982 # The subordinate may be connected to several services. 

983 contexts = [] 

984 extra_options = get_extra_options() 

985 pg_options = extra_options["inputs"].get("postgresql") 

986 principal = hookenv.principal_unit() 

987 

988 if principal not in hookenv.expected_related_units("postgresql"): 

989 return 

990 

991 pgsql = endpoint_from_flag("postgresql.database.available") 

992 assert pgsql is not None 

993 for css in pgsql: 

994 cs = css.get(principal) 

995 if cs: 

996 ver = css.version or "9.3" 

997 contexts.append( 

998 { 

999 "conn_str": str(cs), 

1000 "server": cs.host, 

1001 "replica": "master" if cs == css.master else "hot standby", 

1002 "extra_options": pg_options, 

1003 "version": ver, 

1004 "pg10": LooseVersion(ver + ".0") >= LooseVersion("10.0"), 

1005 } 

1006 ) 

1007 hookenv.status_set( 

1008 "maintenance", 

1009 "Monitoring PostgreSQL {} on {}".format(css.version, principal), 

1010 ) 

1011 if not contexts: 

1012 hookenv.status_set( 

1013 "waiting", "Waiting for PostgreSQL database on {}".format(principal) 

1014 ) 

1015 clear_flag("plugins.postgresql.configured") 

1016 return 

1017 

1018 render_postgresql_tmpl(contexts) 

1019 set_flag("plugins.postgresql.configured") 

1020 set_flag("telegraf.needs_reload") 

1021 

1022 

1023def render_mysql_tmpl(contexts): 

1024 config_path = "{}/{}.conf".format(get_configs_dir(), "mysql") 

1025 

1026 if contexts: 

1027 f = io.StringIO() 

1028 template = open(os.path.join(get_templates_dir(), "mysql.tmpl"), "r").read() 

1029 for ctx in contexts: 

1030 f.write(render_template(template, ctx)) 

1031 f.write(render_extra_options("inputs", "mysql")) 

1032 write_telegraf_file(config_path, f.getvalue()) 

1033 elif os.path.exists(config_path): 

1034 os.unlink(config_path) 

1035 

1036 

1037def render_postgresql_tmpl(contexts): 

1038 config_path = "{}/{}.conf".format(get_configs_dir(), "postgresql") 

1039 

1040 if contexts: 

1041 f = io.StringIO() 

1042 template = open( 

1043 os.path.join(get_templates_dir(), "postgresql.tmpl"), "r" 

1044 ).read() 

1045 for ctx in contexts: 

1046 f.write(render_template(template, ctx)) 

1047 write_telegraf_file(config_path, f.getvalue()) 

1048 elif os.path.exists(config_path): 

1049 os.unlink(config_path) 

1050 

1051 

1052@when("haproxy.available") 

1053@when("telegraf.installed") 

1054def haproxy_input(haproxy): 

1055 template = """ 

1056[[inputs.haproxy]] 

1057 servers = {{ servers }} 

1058""" 

1059 rels = hookenv.relations_of_type("haproxy") 

1060 haproxy_addresses = [] 

1061 for rel in rels: 

1062 enabled = rel.get("enabled", False) 

1063 # Juju gives us a string instead of a boolean, fix it 

1064 if isinstance(enabled, str): 

1065 if enabled in ["y", "yes", "true", "t", "on", "True"]: 

1066 enabled = True 

1067 else: 

1068 enabled = False 

1069 if not enabled: 

1070 continue 

1071 addr = rel.get("listener-address", rel["private-address"]) 

1072 port = rel["port"] 

1073 user = rel["user"] 

1074 password = rel.get("password", None) 

1075 userpass = user 

1076 if password: 

1077 userpass += ":{}".format(password) 

1078 haproxy_address = "http://{}@{}:{}".format(userpass, addr, port) 

1079 haproxy_addresses.append(haproxy_address) 

1080 config_path = "{}/{}.conf".format(get_configs_dir(), "haproxy") 

1081 if haproxy_addresses: 

1082 input_config = render_template( 

1083 template, {"servers": json.dumps(haproxy_addresses)} 

1084 ) + render_extra_options("inputs", "haproxy") 

1085 hookenv.log("Updating {} plugin config file".format("haproxy")) 

1086 write_telegraf_file(config_path, input_config) 

1087 set_flag("plugins.haproxy.configured") 

1088 elif os.path.exists(config_path): 

1089 os.unlink(config_path) 

1090 set_flag("telegraf.needs_reload") 

1091 

1092 

1093@when("apache.available") 

1094@when("telegraf.installed") 

1095def apache_input(apache): 

1096 template = """ 

1097[[inputs.apache]] 

1098 urls = {{ urls }} 

1099""" 

1100 config_path = "{}/{}.conf".format(get_configs_dir(), "apache") 

1101 port = "8080" 

1102 vhost = render( 

1103 source="apache-server-status.tmpl", 

1104 templates_dir=get_templates_dir(), 

1105 target=None, 

1106 context={"port": port}, 

1107 ) 

1108 relation_info = { 

1109 "ports": port, 

1110 "domain": "apache-status", 

1111 "enabled": True, 

1112 "site_config": vhost, 

1113 "site_modules": "status", 

1114 } 

1115 urls = [] 

1116 rels = hookenv.relations_of_type("apache") 

1117 for rel in rels: 

1118 hookenv.relation_set(rel["__relid__"], relation_settings=relation_info) 

1119 addr = rel["private-address"] 

1120 url = "http://{}:{}/server-status?auto".format(addr, port) 

1121 urls.append(url) 

1122 if urls: 

1123 context = {"urls": json.dumps(urls)} 

1124 input_config = render_template(template, context) + render_extra_options( 

1125 "inputs", "apache" 

1126 ) 

1127 hookenv.log("Updating {} plugin config file".format("apache")) 

1128 host.write_file(config_path, input_config.encode("utf-8")) 

1129 set_flag("plugins.apache.configured") 

1130 elif os.path.exists(config_path): 

1131 os.unlink(config_path) 

1132 set_flag("telegraf.needs_reload") 

1133 

1134 

1135@when("endpoint.redis.available") 

1136@when("telegraf.installed") 

1137def redis_input(redis): 

1138 template = """ 

1139[[inputs.redis]] 

1140 servers = ["tcp://{{ host }}:{{ port }}"] 

1141 # Until https://github.com/influxdata/telegraf/issues/5036 is fixed 

1142 fielddrop = ["aof_last_bgrewrite_status","aof_last_write_status","maxmemory_policy","rdb_last_bgsave_status","used_memory_dataset_perc","used_memory_peak_perc"] 

1143""" # noqa E501 (inline template) 

1144 config_path = "{}/{}.conf".format(get_configs_dir(), "redis") 

1145 

1146 rels = hookenv.relations_of_type("redis") 

1147 if rels: 

1148 if len(rels) != 1: 

1149 hookenv.log( 

1150 "Unexpected number of units in the redis relation." 

1151 "Expected 1, got {}".format(len(rels)), 

1152 "WARNING", 

1153 ) 

1154 

1155 ctxt = {} 

1156 ctxt["host"] = rels[0]["host"].strip('"') 

1157 ctxt["port"] = rels[0]["port"] 

1158 input_config = render_template(template, ctxt) + render_extra_options( 

1159 "inputs", "redis" 

1160 ) 

1161 hookenv.log("Updating {} plugin config file".format("redis")) 

1162 host.write_file(config_path, input_config.encode("utf-8")) 

1163 set_flag("plugins.redis.configured") 

1164 elif os.path.exists(config_path): 

1165 os.unlink(config_path) 

1166 clear_flag("plugins.redis.configured") 

1167 

1168 set_flag("telegraf.needs_reload") 

1169 

1170 

1171@when("endpoint.sentry.joined") 

1172@when("telegraf.installed") 

1173def sentry_input(sentry): 

1174 template = """ 

1175[[inputs.statsd]] 

1176 protocol = "udp" 

1177 service_address = ":8125" 

1178 delete_gauges = false 

1179 delete_counters = false 

1180 delete_sets = false 

1181 delete_timings = false 

1182""" 

1183 config_path = "{}/{}.conf".format(get_configs_dir(), "sentry") 

1184 

1185 rels = hookenv.relations_of_type("sentry") 

1186 if rels: 

1187 for rel in rels: 

1188 input_config = render_template(template, {}) + render_extra_options( 

1189 "inputs", "sentry" 

1190 ) 

1191 hookenv.log("Updating {} plugin config file".format("sentry")) 

1192 host.write_file(config_path, input_config.encode("utf-8")) 

1193 set_flag("plugins.sentry.configured") 

1194 elif os.path.exists(config_path): 

1195 os.unlink(config_path) 

1196 clear_flag("plugins.sentry.configured") 

1197 

1198 set_flag("telegraf.needs_reload") 

1199 

1200 

1201@when("exec.available") 

1202@when("telegraf.installed") 

1203def exec_input(exec_rel): 

1204 template = """ 

1205{% for cmd in commands %} 

1206[[inputs.exec]] 

1207 commands = {{ cmd.commands }} 

1208 {% for key, value in cmd|dictsort %} 

1209 {% if key not in ["commands", "tags"] %} 

1210 {{ key }} = "{{ value }}" 

1211 {% endif %} 

1212 {% endfor %} 

1213 {% if cmd.tags %} 

1214 [inputs.exec.tags] 

1215 {% for tag, tag_value in cmd.tags|dictsort %} 

1216 {{ tag }} = "{{ tag_value }}" 

1217 {% endfor %} 

1218 {% endif %} 

1219 

1220{% endfor %} 

1221""" 

1222 config_path = "{}/{}.conf".format(get_configs_dir(), "exec") 

1223 commands = exec_rel.commands() 

1224 if not commands: 

1225 hookenv.log("No Commands defined in the exec relation, doing nothing.") 

1226 return 

1227 pre_proc_cmds = [] 

1228 for command in commands: 

1229 run_on_this_unit = command.pop("run_on_this_unit") 

1230 if run_on_this_unit: 

1231 pre_proc_cmds.append(command) 

1232 if pre_proc_cmds: 

1233 input_config = render_template(template, {"commands": pre_proc_cmds}) 

1234 hookenv.log("Updating {} plugin config file".format("exec")) 

1235 host.write_file(config_path, input_config.encode("utf-8")) 

1236 set_flag("plugins.exec.configured") 

1237 else: 

1238 # if no commands, remove previous config 

1239 if os.path.exists(config_path): 

1240 os.unlink(config_path) 

1241 set_flag("telegraf.needs_reload") 

1242 

1243 

1244@when_not("exec.available") 

1245@when("plugins.exec.configured") 

1246def exec_input_departed(): 

1247 config_path = "{}/{}.conf".format(get_configs_dir(), "exec") 

1248 rels = hookenv.relations_of_type("exec") 

1249 if not rels: 

1250 clear_flag("plugins.exec.configured") 

1251 if os.path.exists(config_path): 

1252 os.unlink(config_path) 

1253 set_flag("telegraf.needs_reload") 

1254 

1255 

1256@when("amqp.connected") 

1257@when_not("amqp.available") 

1258def rabbitmq_input_setup(rabbitmq): 

1259 # Requires management_plugin=true on the rabbitmq-server application. 

1260 # vhost will not be used, but still needs to be requested. 

1261 username = vhost = "telegraf-" + hookenv.local_unit().replace("/", "-") 

1262 rabbitmq.set_local(admin=True) 

1263 rabbitmq.set_remote(admin=True) 

1264 rabbitmq.request_access(username=username, vhost=vhost) 

1265 

1266 

1267@when("amqp.available") 

1268@when("telegraf.installed") 

1269def rabbitmq_input(rabbitmq): 

1270 template = """ 

1271[[inputs.rabbitmq]] 

1272 url = "http://{{ server }}:{{ port }}" 

1273 username = "{{ username }}" 

1274 password = "{{ password }}" 

1275 fielddrop = ["idle_since"] 

1276""" 

1277 addr = rabbitmq.private_address() 

1278 port = "15672" 

1279 username = rabbitmq.username() 

1280 password = rabbitmq.password() 

1281 

1282 if not (addr and username and password): 

1283 return 

1284 

1285 config_path = "{}/{}.conf".format(get_configs_dir(), "rabbitmq") 

1286 input_config = render_template( 

1287 template, 

1288 {"server": addr, "username": username, "password": password, "port": port}, 

1289 ) 

1290 

1291 hookenv.log("Updating {} plugin config file".format("rabbitmq")) 

1292 write_telegraf_file(config_path, input_config) 

1293 

1294 set_flag("plugins.rabbitmq.configured") 

1295 set_flag("telegraf.needs_reload") 

1296 

1297 

1298@when_not("amqp.available") 

1299@when("plugins.rabbitmq.configured") 

1300def rabbitmq_input_departed(): 

1301 config_path = "{}/{}.conf".format(get_configs_dir(), "rabbitmq") 

1302 clear_flag("plugins.rabbitmq.configured") 

1303 if os.path.exists(config_path): 

1304 os.unlink(config_path) 

1305 set_flag("telegraf.needs_reload") 

1306 

1307 

1308@when("influxdb-api.available") 

1309@when("telegraf.installed") 

1310def influxdb_api_output(influxdb): 

1311 required_keys = ["hostname", "port", "user", "password"] 

1312 rels = hookenv.relations_of_type("influxdb-api") 

1313 endpoints = [] 

1314 user = None 

1315 password = None 

1316 for rel in rels: 

1317 if all([rel.get(key) for key in required_keys]): 

1318 endpoints.append("http://{}:{}".format(rel["hostname"], rel["port"])) 

1319 if user is None: 

1320 user = rel["user"] 

1321 if password is None: 

1322 password = rel["password"] 

1323 config_path = "{}/{}.conf".format(get_configs_dir(), "influxdb-api") 

1324 if endpoints: 

1325 hookenv.log("Updating {} plugin config file".format("influxdb-api")) 

1326 content = render( 

1327 source="influxdb-api.conf.tmpl", 

1328 target=None, 

1329 templates_dir=get_templates_dir(), 

1330 context={ 

1331 "urls": json.dumps(endpoints), 

1332 "username": "{}".format(user), 

1333 "password": "{}".format(password), 

1334 }, 

1335 ) 

1336 extra_opts = render_extra_options("outputs", "influxdb") 

1337 write_telegraf_file(config_path, "\n".join([content, extra_opts])) 

1338 set_flag("plugins.influxdb-api.configured") 

1339 elif os.path.exists(config_path): 

1340 os.unlink(config_path) 

1341 set_flag("telegraf.needs_reload") 

1342 

1343 

1344def generate_prometheus_output_config(prometheus_output_port, prometheus_ip_range): 

1345 # If extra_options are set for prometheus_client, let's integrate them 

1346 extra_options = get_extra_options() 

1347 options = extra_options["outputs"].get("prometheus_client", {}) 

1348 listen = options.pop("listen", None) 

1349 if not listen: 

1350 listen = ":{}".format(prometheus_output_port) 

1351 elif int(listen.split(":", 1)[1]) != prometheus_output_port: 

1352 hookenv.log( 

1353 """prometheus_output_port is {}, but extra_options would set it 

1354 to {}. Choosing {} from prometheus_output_port.""".format( 

1355 prometheus_output_port, 

1356 int(listen.split(":", 1)[1]), 

1357 prometheus_output_port, 

1358 ), 

1359 level=hookenv.WARNING, 

1360 ) 

1361 listen = "{}:{}".format(listen.split(":", 1)[0], prometheus_output_port) 

1362 

1363 options_ip_range = options.pop("ip_range", []) 

1364 ip_range = options_ip_range + prometheus_ip_range 

1365 

1366 return { 

1367 "listen": listen, 

1368 "ip_range": ip_range, 

1369 "extra_options": render_extra_options( 

1370 "outputs", "prometheus_client", extra_options=extra_options 

1371 ), 

1372 } 

1373 

1374 

1375def render_prometheus_client_config(port, ip_range): 

1376 config_path = "{}/{}.conf".format(get_configs_dir(), "prometheus_client") 

1377 hookenv.log( 

1378 "Updating {} plugin config file. Port is {} and ip_range is {}".format( 

1379 "prometheus_client", port, ip_range 

1380 ), 

1381 level=hookenv.INFO, 

1382 ) 

1383 context = generate_prometheus_output_config(port, ip_range) 

1384 render( 

1385 source="prometheus_client.tmpl", 

1386 templates_dir=get_templates_dir(), 

1387 target=config_path, 

1388 context=context, 

1389 ) 

1390 

1391 

1392@when("prometheus-client.available") 

1393@when_not("prometheus-client.relation.configured") 

1394def configure_prometheus_client_with_relation(prometheus): 

1395 hookenv.log( 

1396 "Configuring prometheus_client output plugin, with prometheus-client relation", 

1397 level=hookenv.DEBUG, 

1398 ) 

1399 port = get_prometheus_port() or "9103" 

1400 # We'll iterate through the prometheus-client relation counterparts, 

1401 # inform them of our address so that they scrape it, and get their egress subnets 

1402 # so that we can allow them 

1403 remote_egress_subnets = [] 

1404 for relation_id in hookenv.relation_ids("prometheus-client"): 

1405 # if juju 2.x+ then we'll attempt to get the network space address 

1406 try: 

1407 hookenv.log("Getting local network info", level=hookenv.DEBUG) 

1408 network_info = hookenv.network_get( 

1409 "prometheus-client", relation_id=relation_id 

1410 ) 

1411 hookenv.log(network_info, level=hookenv.DEBUG) 

1412 if "ingress-addresses" in network_info: 

1413 ip_addr = network_info.get("ingress-addresses")[0] 

1414 else: 

1415 ip_addr = hookenv.network_get_primary_address("prometheus-client") 

1416 for unit in hookenv.related_units(relation_id): 

1417 hookenv.log( 

1418 "Getting remote egress subnet for relation {} - {}".format( 

1419 unit, relation_id 

1420 ), 

1421 level=hookenv.DEBUG, 

1422 ) 

1423 remote_egress_subnets.append( 

1424 hookenv.relation_get("egress-subnets", unit, relation_id) 

1425 ) 

1426 except NotImplementedError: 

1427 # if that fails, just let prometheus.configure(...) do it's default 

1428 ip_addr = None 

1429 prometheus.configure(port, hostname=ip_addr, private_address=ip_addr) 

1430 check_prometheus_port("prometheus_output", port) 

1431 # If prometheus_ip_range is empty, all remote IPs are allowed 

1432 ip_range = get_prometheus_ip_range() 

1433 if ip_range != []: 

1434 ip_range = ip_range + remote_egress_subnets 

1435 render_prometheus_client_config(port, ip_range) 

1436 set_flag("plugins.prometheus-client.configured") 

1437 set_flag("prometheus-client.relation.configured") 

1438 set_flag("telegraf.needs_reload") 

1439 

1440 

1441@when_not("prometheus-client.available") 

1442@when_not("plugins.prometheus-client.configured") 

1443def configure_prometheus_client(): 

1444 hookenv.log("Configuring prometheus_client output plugin", level=hookenv.DEBUG) 

1445 if get_prometheus_port(): 

1446 port = get_prometheus_port() 

1447 else: 

1448 # No relation to prometheus, no port configured: do not configure the plugin 

1449 set_flag("plugins.prometheus-client.configured") 

1450 return 

1451 check_prometheus_port("prometheus_output", port) 

1452 ip_range = get_prometheus_ip_range() 

1453 render_prometheus_client_config(port, ip_range) 

1454 set_flag("plugins.prometheus-client.configured") 

1455 set_flag("telegraf.needs_reload") 

1456 clear_flag("prometheus-client.relation.configured") 

1457 

1458 

1459def convert_days(time_string): 

1460 """Convert string time descript to days. 

1461 

1462 Function to convert strings like 2w or 14d to a sting containing the number 

1463 of days. 

1464 

1465 Not included, months and years, because the number of days in each changes. 

1466 Also not included, seconds. 

1467 """ 

1468 days = re.search(r"(\d+)d$", time_string) 

1469 if days: 

1470 return days.group(1) 

1471 weeks = re.search(r"(\d+)w$", time_string) 

1472 if weeks: 

1473 days = int(weeks.group(1)) * 7 

1474 return str(days) 

1475 hours = re.search(r"(\d+)h$", time_string) 

1476 if hours: 

1477 days = int(hours.group(1)) / 24 

1478 return str(days) 

1479 mins = re.search(r"(\d+)m$", time_string) 

1480 if mins: 

1481 days = int(hours.group(1)) / 24 * 60 

1482 return str(days) 

1483 

1484 

1485@when("prometheus-rules.available") 

1486def render_prometheus_rules(prometheus_rules): 

1487 # Send a list of rules for alerting to Prometheus 

1488 config = hookenv.config() 

1489 unit_name = os.environ.get("JUJU_PRINCIPAL_UNIT") 

1490 lead_time = config.get("lead_time") 

1491 context = { 

1492 "hostname": socket.gethostname(), 

1493 "cpu_idle": config.get("cpu_idle"), 

1494 "wait_time": config.get("wait_time"), 

1495 "lead_time": lead_time, 

1496 "lead_days": convert_days(lead_time), 

1497 "prometheus_context": config.get("prometheus_context"), 

1498 "unit_name": unit_name, 

1499 "application_name": unit_name.split("/")[0], 

1500 } 

1501 formatted_rules = [] 

1502 template_files = [ 

1503 "rule_cpu_usage.j2", 

1504 "rule_diskfull.j2", 

1505 "rule_mem.j2", 

1506 "rule_disk_ro.j2", 

1507 "rule_packetdrops.j2", 

1508 "rule_predict_disk_space.j2", 

1509 ] 

1510 for template_file in template_files: 

1511 with open(os.path.join(get_templates_dir(), template_file), "r") as fd: 

1512 formatted_rules.append(render_template(fd.read(), context)) 

1513 prometheus_rules.configure("\n".join(formatted_rules)) 

1514 

1515 

1516@when_not("prometheus-client.available") 

1517@when("prometheus-client.relation.configured") 

1518def prometheus_client_departed(): 

1519 hookenv.log("prometheus-client relation not available") 

1520 config_path = "{}/{}.conf".format(get_configs_dir(), "prometheus_client") 

1521 rels = hookenv.relations_of_type("prometheus-client") 

1522 if not rels and os.path.exists(config_path): 

1523 hookenv.log("Deleting {} plugin config file".format("prometheus-client")) 

1524 os.unlink(config_path) 

1525 clear_flag("plugins.prometheus-client.configured") 

1526 

1527 

1528@when( 

1529 "plugins.prometheus-client.configured", 

1530 "endpoint.dashboards.joined", 

1531 "leadership.is_leader", 

1532) 

1533@when_not("grafana.configured") 

1534def register_grafana_dashboard(): 

1535 grafana = endpoint_from_flag("endpoint.dashboards.joined") 

1536 hookenv.log("Loading grafana dashboard", level=hookenv.DEBUG) 

1537 dashboard = _load_grafana_dashboard() 

1538 digest = hashlib.md5(dashboard.encode("utf8")).hexdigest() 

1539 dashboard_dict = json.loads(dashboard) 

1540 dashboard_dict["digest"] = digest 

1541 hookenv.log( 

1542 "Rendered dashboard dict:\n{}".format(dashboard_dict), level=hookenv.DEBUG 

1543 ) 

1544 grafana.register_dashboard(name=GRAFANA_DASHBOARD_NAME, dashboard=dashboard_dict) 

1545 hookenv.log('Grafana dashboard "{}" registered.'.format(GRAFANA_DASHBOARD_NAME)) 

1546 set_flag("grafana.configured") 

1547 

1548 

1549def _load_grafana_dashboard(): 

1550 prometheus_datasource = "{} - Juju generated source".format( 

1551 hookenv.config().get("prometheus_datasource", "prometheus") 

1552 ) 

1553 dashboard_context = dict(datasource=prometheus_datasource) 

1554 # TODO: Figure out if metrics exist and then set bools accordingly. 

1555 # For now, setting bools to true. 

1556 dashboard_context["bonds_enabled"] = True 

1557 dashboard_context["bcache_enabled"] = True 

1558 dashboard_context["conntrack_enabled"] = True 

1559 return render_custom( 

1560 source=GRAFANA_DASHBOARD_TELEGRAF_FILE_NAME, 

1561 render_context=dashboard_context, 

1562 variable_start_string="<<", 

1563 variable_end_string=">>", 

1564 ) 

1565 

1566 

1567# This isn't exposed in charmhelpers: https://github.com/juju/charm-helpers/issues/367 

1568def render_custom(source, render_context, **parameters): 

1569 """Render a template from the template folder with custom environment parameters. 

1570 

1571 source: template file name to render from 

1572 context: template context variables 

1573 parameters: initialization parameters for the jinja Environment 

1574 

1575 returns the rendered template content 

1576 """ 

1577 template_folder = os.path.join(hookenv.charm_dir(), "templates/dashboards/grafana") 

1578 environment = Environment(loader=FileSystemLoader(template_folder), **parameters) 

1579 try: 

1580 template = environment.get_template(source) 

1581 except exceptions.TemplateNotFound as e: 

1582 hookenv.log( 

1583 "Could not load template {} from {}".format(source, template_folder) 

1584 ) 

1585 raise e 

1586 return template.render(render_context) 

1587 

1588 

1589@when("endpoint.dashboards.departed", "grafana.configured") 

1590def unregister_grafana_dashboard(): 

1591 clear_flag("grafana.configured") 

1592 

1593 

1594@when("endpoint.dashboards.failed", "leadership.is_leader") 

1595def grafana_dashboard_import_failed(): 

1596 grafana = endpoint_from_flag("endpoint.dashboards.failed") 

1597 for failed_import in grafana.failed_imports: 

1598 hookenv.log( 

1599 message='Grafana dashboard "{}" import failed with: {}'.format( 

1600 failed_import.name, failed_import.reason 

1601 ), 

1602 level=hookenv.ERROR, 

1603 ) 

1604 clear_flag("grafana.configured") 

1605 

1606 

1607@when("telegraf.needs_reload") 

1608@when("telegraf.installed") 

1609@when("telegraf.configured") 

1610def start_or_restart(): 

1611 states = sorted( 

1612 [ 

1613 k 

1614 for k in get_states().keys() 

1615 if k.startswith("plugins") or k.startswith("extra_plugins") 

1616 ] 

1617 ) 

1618 

1619 service = get_service() 

1620 config_files_changed = helpers.any_file_changed(list_config_files()) 

1621 active_plugins_changed = helpers.data_changed("active_plugins", states or "") 

1622 if ( 

1623 not host.service_running(service) 

1624 or config_files_changed # noqa W503 

1625 or active_plugins_changed # noqa W503 

1626 ): 

1627 hookenv.log("Restarting telegraf") 

1628 host.service_restart(service) 

1629 else: 

1630 hookenv.log( 

1631 "Not restarting: active_plugins_changed={} | " 

1632 "config_files_changed={}".format( 

1633 active_plugins_changed, config_files_changed 

1634 ) 

1635 ) 

1636 

1637 # Give telegraf time to restart. 

1638 timeout = time.time() + 15 

1639 while not host.service_running(service) and time.time() < timeout: 

1640 time.sleep(0.1) 

1641 

1642 if host.service_running(service): 

1643 revision = "" 

1644 if os.path.exists("version"): 

1645 with open("version") as f: 

1646 line = f.readline().strip() 

1647 # We only want the first 8 characters, that's enough to tell 

1648 # which version of the charm we're using. 

1649 if len(line) > 8: 

1650 revision = " (source version/commit {}…)".format(line[:8]) 

1651 else: 

1652 revision = " (source version/commit {})".format(line) 

1653 hookenv.status_set( 

1654 "active", "Monitoring {}{}".format(get_remote_unit_name(), revision) 

1655 ) 

1656 clear_flag("telegraf.needs_reload") 

1657 else: 

1658 hookenv.status_set("blocked", "Telegraf failed to start. Check config.") 

1659 

1660 

1661def is_bcache(): 

1662 """Determine if this is a container. 

1663 

1664 return true if bcache is present, and this is not a container. 

1665 """ 

1666 container = is_container() 

1667 return os.path.exists("/sys/fs/bcache") and not container 

1668 

1669 

1670@hook("update-status") 

1671def update_status(): 

1672 changed = charms.reactive.helpers.data_changed( 

1673 "detect_changes", get_base_inputs(), hash_type="sha256" 

1674 ) 

1675 if changed: 

1676 clear_flag("telegraf.configured") 

1677 clear_flag("telegraf.apt.configured") 

1678 clear_flag("telegraf.snap.configured") 

1679 

1680 

1681@when("nrpe-external-master.available") 

1682@when("telegraf.installed") 

1683@when("telegraf.configured") 

1684@when_not("telegraf.nagios-setup.complete") 

1685def configure_nagios(nagios): 

1686 """Configure nagios process check. 

1687 

1688 The flag 'telegraf.nagios-setup.complete' is reset at the moment config is 

1689 changed, so this should make sure that updates are handled. 

1690 """ 

1691 # Use charmhelpers to handle the configuration of nrpe 

1692 hostname = nrpe.get_nagios_hostname() 

1693 nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False) 

1694 

1695 # use charmhelpers to create a process check 

1696 nrpe_setup.add_check( 

1697 "telegraf_http", 

1698 "Telegraf HTTP check", 

1699 "check_http -I 127.0.0.1 -p {} -u /metrics".format(get_prometheus_port()), 

1700 ) 

1701 nrpe_setup.write() 

1702 set_flag("telegraf.nagios-setup.complete") 

1703 

1704 

1705@when("telegraf.smart_metrics.enabled") 

1706@when_not("apt.smartmontools.installed") 

1707@when_not("apt.nvme.installed") 

1708def install_smart_metrics_packages(): 

1709 apt.queue_install(["smartmontools", "nvme-cli"])