Coverage for lib/lib_openstack_service_checks.py : 31%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import collections
2import glob
3import os
4import pwd
5import re
6import subprocess
7from urllib.parse import urlparse
9import configparser
11from charmhelpers.core.templating import render
12from charmhelpers.contrib.openstack.utils import config_flags_parser
13from charmhelpers.core import hookenv, host, unitdata
14from charmhelpers.contrib.charmsupport.nrpe import NRPE
15from charms.reactive import any_file_changed
16import keystoneauth1
17from keystoneclient import session
20class OSCCredentialsError(Exception):
21 pass
24class OSCEndpointError(OSCCredentialsError):
25 pass
28class OSCHelper():
29 def __init__(self):
30 self.charm_config = hookenv.config()
31 self._keystone_client = None
33 def store_keystone_credentials(self, creds):
34 '''store keystone credentials'''
35 kv = unitdata.kv()
36 kv.set('keystonecreds', creds)
37 kv.set('rallyinstalled', False)
39 @property
40 def novarc(self):
41 return '/var/lib/nagios/nagios.novarc'
43 @property
44 def contrail_analytics_vip(self):
45 return self.charm_config['contrail_analytics_vip']
47 @property
48 def plugins_dir(self):
49 return '/usr/local/lib/nagios/plugins/'
51 @property
52 def scripts_dir(self):
53 return '/usr/local/bin/'
55 @property
56 def rally_cron_file(self):
57 return '/etc/cron.d/osc_rally'
59 @property
60 def is_rally_enabled(self):
61 return self.charm_config['check-rally']
63 @property
64 def is_neutron_agents_check_enabled(self):
65 return self.charm_config['check-neutron-agents']
67 @property
68 def skipped_rally_checks(self):
69 skipped_os_components = self.charm_config['skip-rally'].strip()
70 if not skipped_os_components:
71 return []
73 # filter skip-rally input to match available (or supported) components that
74 # should be disabled
75 available_os_components = 'cinder glance nova neutron'.split()
76 return [comp.strip().lower() for comp in skipped_os_components.split(',')
77 if comp.strip().lower() in available_os_components]
79 @property
80 def rally_cron_schedule(self):
81 schedule = self.charm_config['rally-cron-schedule']
82 if schedule.strip() == '' or len(schedule.strip().split()) != 5:
83 return '*/15 * * * *'
84 else:
85 return schedule.strip()
87 def get_os_credentials(self):
88 ident_creds = config_flags_parser(self.charm_config['os-credentials'])
89 if not ident_creds.get('auth_url'):
90 raise OSCCredentialsError('auth_url')
91 elif '/v3' in ident_creds.get('auth_url'):
92 extra_attrs = ['domain']
93 creds = {'auth_version': 3}
94 else:
95 extra_attrs = []
96 creds = {}
98 common_attrs = ('username password region_name auth_url'
99 ' credentials_project').split()
100 all_attrs = common_attrs + extra_attrs
101 missing = [k for k in all_attrs if k not in ident_creds]
102 if missing:
103 raise OSCCredentialsError(', '.join(missing))
105 ident_creds['auth_url'] = ident_creds['auth_url'].strip('\"\'')
106 creds.update(dict([(k, ident_creds.get(k))
107 for k in all_attrs
108 if k not in ('credentials_project', 'domain')]))
109 if extra_attrs:
110 creds.update({'project_name': ident_creds['credentials_project'],
111 'user_domain_name': ident_creds['domain'],
112 'project_domain_name': ident_creds['domain'],
113 })
114 else:
115 creds['tenant_name'] = ident_creds['credentials_project']
117 return creds
119 def get_keystone_credentials(self):
120 '''retrieve keystone credentials from either config or relation data
122 If config 'os-crendentials' is set, return that info otherwise look for a keystonecreds relation data'
124 :return: dict of credential information for keystone
125 '''
126 return unitdata.kv().get('keystonecreds')
128 @property
129 def nova_warn(self):
130 return self.charm_config.get('nova_warn')
132 @property
133 def nova_crit(self):
134 return self.charm_config.get('nova_crit')
136 @property
137 def nova_skip_aggregates(self):
138 skipped_aggregates = self.charm_config.get('skipped_host_aggregates')
139 # We have to make sure there are no malicious injections in the code
140 # as this gets passed to a python script via bash
141 regex = r'([\w_-]+(?:,[\w_-]+)*)'
142 sanitized = ",".join(re.findall(regex, skipped_aggregates))
143 sanitized = [s for s in sanitized.split(',') if s != ""]
144 sanitized = ",".join(sanitized)
145 return sanitized
147 @property
148 def skip_disabled(self):
149 if self.charm_config.get('skip-disabled'):
150 return '--skip-disabled'
151 else:
152 return ''
154 @property
155 def check_dns(self):
156 return self.charm_config.get('check-dns')
158 def update_plugins(self):
159 charm_plugin_dir = os.path.join(hookenv.charm_dir(), 'files', 'plugins/')
160 host.rsync(charm_plugin_dir, self.plugins_dir, options=['--executability'])
162 def render_checks(self, creds):
163 render(source='nagios.novarc', target=self.novarc, context=creds,
164 owner='nagios', group='nagios')
166 nrpe = NRPE()
167 if not os.path.exists(self.plugins_dir):
168 os.makedirs(self.plugins_dir)
170 self.update_plugins()
171 nova_check_command = os.path.join(self.plugins_dir, 'check_nova_services.py')
172 check_command = '{} --warn {} --crit {} --skip-aggregates {} {}'.format(
173 nova_check_command, self.nova_warn, self.nova_crit, self.nova_skip_aggregates,
174 self.skip_disabled).strip()
175 nrpe.add_check(shortname='nova_services',
176 description='Check that enabled Nova services are up',
177 check_cmd=check_command,
178 )
180 if self.is_neutron_agents_check_enabled:
181 nrpe.add_check(shortname='neutron_agents',
182 description='Check that enabled Neutron agents are up',
183 check_cmd=os.path.join(self.plugins_dir,
184 'check_neutron_agents.sh'),
185 )
186 else:
187 nrpe.remove_check(shortname='neutron_agents')
189 if self.contrail_analytics_vip:
190 contrail_check_command = '{} --host {}'.format(
191 os.path.join(self.plugins_dir, 'check_contrail_analytics_alarms.py'),
192 self.contrail_analytics_vip)
193 nrpe.add_check(shortname='contrail_analytics_alarms',
194 description='Check Contrail Analytics alarms',
195 check_cmd=contrail_check_command,
196 )
197 else:
198 nrpe.remove_check(shortname='contrail_analytics_alarms')
200 if len(self.check_dns):
201 nrpe.add_check(shortname='dns_multi',
202 description='Check DNS names are resolvable',
203 check_cmd='{} {}'.format(
204 os.path.join(self.plugins_dir,
205 'check_dns_multi.sh'),
206 ' '.join(self.check_dns.split())),
207 )
208 else:
209 nrpe.remove_check(shortname='dns_multi')
210 nrpe.write()
212 self.create_endpoint_checks(creds)
214 def _split_url(self, netloc, scheme):
215 """http(s)://host:port or http(s)://host will return a host and a port
217 Even if a port is not specified, this helper will return a host and a port
218 (guessing it from the protocol used, if needed)
220 :param netloc: network location part as returned by urllib.urlparse
221 :type netloc: str
222 :param scheme: URL scheme specifier as returned by urllib.urlparse
223 :returns: str
224 :rtype: Tuple[str, str]
225 """
226 if netloc.find(':') == -1:
227 # no port specified
228 host = netloc
229 port = 80 if scheme == 'http' else 443
230 else:
231 host, port = netloc.split(':')
233 return host, port
235 def create_endpoint_checks(self, creds):
236 """
237 Create an NRPE check for each Keystone catalog endpoint.
239 Read the Keystone catalog, and create a check for each endpoint listed.
240 If there is a healthcheck endpoint for the API, use that URL, otherwise check
241 the url '/'.
242 If SSL, add a check for the cert.
244 v2 endpoint needs the 'interface' attribute:
245 <Endpoint {'id': 'XXXXX', 'region': 'RegionOne', 'publicurl': 'http://10.x.x.x:9696',
246 'service_id': 'YYY', 'internalurl': 'http://10.x.x.x:9696', 'enabled': True,
247 'adminurl': 'http://10.x.x.x:9696'}>
248 """
249 # provide URLs that can be used for healthcheck for some services
250 # This also provides a nasty hack-ish way to add switches if we need
251 # for some services.
252 health_check_params = {
253 'aodh': '/healthcheck',
254 'barbican': '/v1 -e Unauthorized',
255 'ceilometer': '/ -e Unauthorized -d x-openstack-request-id',
256 'cinderv1': '/v1 -e Unauthorized -d x-openstack-request-id',
257 'cinderv2': '/v2 -e Unauthorized',
258 'cinderv3': '/v3 -e Unauthorized -d x-openstack-request-id',
259 'designate': '/v2 -e Unauthorized',
260 'glance': '/healthcheck',
261 'gnocchi': '/v1 -e Unauthorized',
262 'heat': '/v1 -e Unauthorized',
263 'keystone': '/healthcheck',
264 'nova': '/healthcheck',
265 'octavia': '/v2 -e Unauthorized',
266 'placement': '/healthcheck -e Unauthorized -d x-openstack-request-id',
267 's3': '/healthcheck',
268 'swift': self.charm_config.get('swift_check_params', '/'),
269 }
271 self.get_keystone_client(creds)
272 endpoints = self.keystone_endpoints
273 services = [svc for svc in self.keystone_services if svc.enabled]
274 nrpe = NRPE()
275 skip_service = set()
276 for endpoint in endpoints:
277 endpoint.service_names = [x.name
278 for x in services
279 if x.id == endpoint.service_id]
280 service_name = endpoint.service_names[0]
281 endpoint.healthcheck_url = health_check_params.get(service_name, '/')
283 # Note(aluria): glance-simplestreams-sync does not provide an API to check
284 if service_name == 'image-stream':
285 continue
287 if not hasattr(endpoint, 'interface'):
288 if service_name == 'keystone':
289 # Note(aluria): filter:healthcheck is not configured in v2
290 # https://docs.openstack.org/keystone/pike/configuration.html#health-check-middleware
291 continue
292 for interface in 'admin internal public'.split():
293 old_interface_name = '{}url'.format(interface)
294 if not hasattr(endpoint, old_interface_name):
295 continue
296 endpoint.interface = interface
297 endpoint.url = getattr(endpoint, old_interface_name)
298 skip_service.add(service_name)
299 break
301 check_url = urlparse(endpoint.url)
302 if not self.charm_config.get('check_{}_urls'.format(endpoint.interface)):
303 nrpe.remove_check(shortname='{}_{}'.format(service_name, endpoint.interface))
304 if check_url.scheme == 'https':
305 nrpe.remove_check(shortname='{}_{}_cert'.format(service_name, endpoint.interface))
306 continue
308 cmd_params = ['/usr/lib/nagios/plugins/check_http']
309 host, port = self._split_url(check_url.netloc, check_url.scheme)
310 cmd_params.append('-H {} -p {}'.format(host, port))
311 cmd_params.append('-u {}'.format(endpoint.healthcheck_url))
313 # if this is https, we want to add a check for cert expiry
314 # also need to tell check_http use use TLS
315 if check_url.scheme == 'https':
316 cmd_params.append('-S')
317 # Add an extra check for TLS cert expiry
318 cmd_params_cert = cmd_params.copy()
319 cmd_params_cert.append('-C {},{}'.format(self.charm_config['tls_warn_days'] or 30,
320 self.charm_config['tls_crit_days'] or 14))
321 nrpe.add_check(shortname='{}_{}_cert'.format(service_name, endpoint.interface),
322 description='Certificate expiry check for {} {}'.format(service_name,
323 endpoint.interface),
324 check_cmd=' '.join(cmd_params_cert))
326 # Add the actual health check for the URL
327 nrpe.add_check(shortname='{}_{}'.format(service_name, endpoint.interface),
328 description='Endpoint url check for {} {}'.format(service_name, endpoint.interface),
329 check_cmd=' '.join(cmd_params))
331 nrpe.write()
333 def get_keystone_client(self, creds):
334 """Import the appropriate Keystone client depending on API version.
336 Use credential info to determine the Keystone API version, and make a
337 client session object that is to be used for authenticated
338 communication with Keystone.
340 :returns: a keystoneclient Client object
341 """
342 if int(creds.get('auth_version', 0)) >= 3:
343 from keystoneclient.v3 import client
344 from keystoneclient.auth.identity import v3 as kst_version
345 auth_fields = 'username password auth_url user_domain_name project_domain_name project_name'.split()
346 else:
347 from keystoneclient.v2_0 import client
348 from keystoneclient.auth.identity import v2 as kst_version
349 auth_fields = 'username password auth_url tenant_name'.split()
351 auth_creds = dict([(key, creds.get(key)) for key in auth_fields])
352 auth = kst_version.Password(**auth_creds)
353 sess = session.Session(auth=auth)
354 self._keystone_client = client.Client(session=sess)
356 if self._keystone_client is None:
357 raise OSCEndpointError('Unable to list the endpoint errors, yet: '
358 'could not connect to the Identity Service')
360 @property
361 def keystone_endpoints(self):
362 try:
363 return self._keystone_client.endpoints.list()
364 except keystoneauth1.exceptions.http.InternalServerError as error:
365 raise OSCEndpointError(
366 'Unable to list the keystone endpoints, yet: {}'.format(error))
368 @property
369 def keystone_services(self):
370 return self._keystone_client.services.list()
372 @property
373 def _load_envvars(self, novarc='/var/lib/nagios/nagios.novarc'):
374 if not os.path.exists(novarc):
375 return False
377 output = subprocess.check_output(['/bin/bash', '-c', 'source {} && env'.format(novarc)])
378 i = 0
379 for line in output.decode('utf-8').splitlines():
380 if not line.startswith('OS_'):
381 continue
382 key, value = line.split('=')
383 os.environ[key] = value
384 i += 1
386 return i >= 3
388 def _run_as(self, user, user_cmd):
389 try:
390 pwd.getpwnam(user)
391 # preserve envvars and run as `user`
392 cmd = ['sudo', '-Eu', user]
394 # convert command into a list
395 if isinstance(user_cmd, str):
396 # split string into arguments
397 cmd.extend(user_cmd.split())
398 elif isinstance(user_cmd, list):
399 cmd.extend(user_cmd)
400 else:
401 hookenv.log("_run_as - can't run as user {} the command: {}".format(user, user_cmd))
402 return False
404 subprocess.check_call(cmd)
405 return True
407 except KeyError as error:
408 hookenv.log('_run_as - user does not exist => {}'.format(str(error)))
409 return False
410 except subprocess.CalledProcessError as error:
411 hookenv.log('_run_as - cmd failed => {}'.format(str(error)))
412 if error.stderr:
413 hookenv.log('_run_as stderr => {}'.format(error.stderr))
414 if error.stdout:
415 hookenv.log('_run_as stderr => {}'.format(error.stdout))
416 return False
418 @property
419 def _rallyuser(self):
420 return 'nagiososc'
422 def install_rally(self):
423 kv = unitdata.kv()
424 if kv.get('rallyinstalled', False):
425 return True
427 if not self._load_envvars:
428 hookenv.log('install_rally - could not load nagios.novarc')
429 return False
431 user = self._rallyuser
432 host.adduser(user)
433 host.mkdir(os.path.join('/home', user), owner=user, group=user, perms=0o755, force=False)
435 for tool in ['rally', 'tempest']:
436 toolname = 'fcbtest.{}init'.format(tool)
437 installed = self._run_as(user, [toolname])
438 if not installed:
439 hookenv.log('install_rally - could not initialize {}'.format(tool))
440 return False
442 kv.set('rallyinstalled', True)
443 return True
445 def _regenerate_tempest_conf(self, tempestfile):
446 config = configparser.ConfigParser()
447 config.read(tempestfile)
448 for section in config.keys():
449 for key, value in config[section].items():
450 try:
451 if section != 'DEFAULT' and key in config['DEFAULT'].keys():
452 # avoid copying the DEFAULT config options to the rest of sections
453 continue
454 except KeyError:
455 # DEFAULT section does not exist
456 pass
458 # Enable Cinder, which is a default OpenStack service
459 if section == 'service_available' and key == 'cinder':
460 config[section][key] = 'True'
462 with open(tempestfile, 'w') as fd:
463 config.write(fd)
465 def reconfigure_tempest(self):
466 """Expects an external network already configured, and enables cinder tests
468 Sample:
469 RALLY_VERIFIER=7b9d06ef-e651-4da3-a56b-ecac67c595c5
470 RALLY_VERIFICATION=4a730963-083f-4e1e-8c55-f2b4b9c9c0ac
471 RALLY_DEPLOYMENT=a75657c6-9eea-4f00-9117-2580fe056a80
472 RALLY_ENV=a75657c6-9eea-4f00-9117-2580fe056a80
473 """
474 RALLY_CONF = ['/home', self._rallyuser, 'snap', 'fcbtest', 'current', '.rally']
475 rally_globalconfig = os.path.join(*RALLY_CONF, 'globals')
476 if not os.path.isfile(rally_globalconfig):
477 return False
479 uuids = collections.defaultdict(lambda: '*')
480 with open(rally_globalconfig, 'r') as fd:
481 for line in fd.readlines():
482 key, value = line.strip().split('=')
483 if key in ['RALLY_VERIFIER', 'RALLY_DEPLOYMENT']:
484 uuids[key] = value
486 tempest_path = os.path.join(*RALLY_CONF, 'verification',
487 'verifier-{RALLY_VERIFIER}'.format(**uuids),
488 'for-deployment-{RALLY_DEPLOYMENT}'.format(**uuids),
489 'tempest.conf')
490 tempestfile = glob.glob(tempest_path)
491 if len(tempestfile) == 0:
492 # No tempest.conf file generated, yet
493 return False
495 if not any_file_changed([tempestfile[0]]):
496 return False
498 self._regenerate_tempest_conf(tempestfile[0])
499 return True
501 def _get_rally_checks_context(self):
502 os_components_skip_list = self.skipped_rally_checks
503 ctxt = {}
504 for comp in 'cinder glance nova neutron'.split():
505 ctxt.update({comp: comp not in os_components_skip_list})
506 return ctxt
508 def update_rally_checkfiles(self):
509 if not self.is_rally_enabled:
510 return
512 # Copy run_rally.sh to /usr/local/bin
513 rally_script = os.path.join(hookenv.charm_dir(), 'files', 'run_rally.py')
514 host.rsync(rally_script, self.scripts_dir, options=['--executability'])
516 ostestsfile = os.path.join('/home', self._rallyuser, 'ostests.txt')
517 render(source='ostests.txt.j2', target=ostestsfile,
518 context=self._get_rally_checks_context(),
519 owner=self._rallyuser, group=self._rallyuser)
521 proxy_settings = hookenv.env_proxy_settings()
522 if proxy_settings:
523 content = '\n'.join(['{}={}'.format(proxy_var, proxy_var_val)
524 for proxy_var, proxy_var_val in proxy_settings.items()])
525 else:
526 content = ''
528 context = {
529 'schedule': self.rally_cron_schedule,
530 'user': self._rallyuser,
531 'cmd': os.path.join(self.scripts_dir, 'run_rally.py'),
532 }
533 content += '\n#\n{schedule} {user} timeout -k 840s -s SIGTERM 780s {cmd}'.format(**context)
534 with open(self.rally_cron_file, 'w') as fd:
535 fd.write('# Juju generated - DO NOT EDIT\n{}\n\n'.format(content))
537 def configure_rally_check(self):
538 kv = unitdata.kv()
539 if kv.get('rallyconfigured', False):
540 return
542 self.update_rally_checkfiles()
543 rally_check = os.path.join(self.plugins_dir, 'check_rally.py')
544 nrpe = NRPE()
545 nrpe.add_check(shortname='rally',
546 description='Check that all rally tests pass',
547 check_cmd=rally_check,
548 )
549 nrpe.write()
550 kv.set('rallyconfigured', True)
552 def remove_rally_check(self):
553 filename = self.rally_cron_file
554 if os.path.exists(filename):
555 os.unlink(filename)
557 if os.path.exists('/etc/nagios/nrpe.d/check_rally.cfg'):
558 nrpe = NRPE()
559 nrpe.remove_check(shortname='rally')
560 nrpe.write()
562 def deploy_rally(self):
563 if self.is_rally_enabled:
564 installed = self.install_rally()
565 if not installed:
566 return False
567 self.configure_rally_check()
568 else:
569 self.remove_rally_check()
570 unitdata.kv().set('rallyconfigured', False)
571 return True