Added cloud name to all metrics

2026-05-23 20:10:33 +00:00 · 2020-11-24 18:58:23 -05:00
parent 5669f74b43
commit 141c4abd8f
4 changed files with 42 additions and 42 deletions
--- a/lib/api_metrics.py
+++ b/lib/api_metrics.py
@@ -5,10 +5,10 @@ import datetime
 import traceback
 import prometheus_client as prom

-api_metrics = prom.Gauge('openstack_api_response_seconds', 'Time for openstack api to execute.', ['api_name'])
-api_status = prom.Gauge('openstack_api_status', 'API current status. 1 = up 0 = down.',['api_name'])
+api_metrics = prom.Gauge('openstack_api_response_seconds', 'Time for openstack api to execute.', ['api_name','cloud_name'])
+api_status = prom.Gauge('openstack_api_status', 'API current status. 1 = up 0 = down.',['api_name','cloud_name'])

-def generate_nova_metrics(connection):
+def generate_nova_metrics(connection,cloud_name):
    try:
        start_time = datetime.datetime.now()
        for server in connection.compute.servers():
@@ -17,14 +17,14 @@ def generate_nova_metrics(connection):
        time_took = end_time - start_time
        seconds_took = time_took.seconds
        print(f'Nova took {seconds_took} seconds')
-        api_metrics.labels('nova').set(seconds_took)
-        api_status.labels('nova').set(1)
+        api_metrics.labels('nova',cloud_name).set(seconds_took)
+        api_status.labels('nova',cloud_name).set(1)
    except:
        print(traceback.print_exc())
        print("Nova api is down.")
-        api_status.labels('nova').set(0)
+        api_status.labels('nova',cloud_name).set(0)

-def generate_neutron_metrics(connection):
+def generate_neutron_metrics(connection,cloud_name):
    try:
        project = connection.current_project
        start_time = datetime.datetime.now()
@@ -34,14 +34,14 @@ def generate_neutron_metrics(connection):
        time_took = end_time - start_time
        seconds_took = time_took.seconds
        print(f'Neutron took {seconds_took} seconds')
-        api_metrics.labels('neutron').set(seconds_took)
-        api_status.labels('neutron').set(1)
+        api_metrics.labels('neutron',cloud_name).set(seconds_took)
+        api_status.labels('neutron',cloud_name).set(1)
    except:
        print(traceback.print_exc())
        print("Neutron api is down.")
-        api_status.labels('neutron').set(0)
+        api_status.labels('neutron',cloud_name).set(0)

-def generate_cinder_metrics(connection):
+def generate_cinder_metrics(connection,cloud_name):
    try:
        start_time = datetime.datetime.now()
        for volume in  connection.volume.volumes():
@@ -50,8 +50,8 @@ def generate_cinder_metrics(connection):
        time_took = end_time - start_time
        seconds_took = time_took.seconds
        print(f'Cinder took {seconds_took} seconds')
-        api_metrics.labels('cinder').set(seconds_took)
-        api_status.labels('cinder').set(1)
+        api_metrics.labels('cinder',cloud_name).set(seconds_took)
+        api_status.labels('cinder',cloud_name).set(1)
    except:
        print(traceback.print_exc())
        print("Cinder api is down.")
--- a/lib/hypervisor_metrics.py
+++ b/lib/hypervisor_metrics.py
@@ -4,33 +4,33 @@ import openstack
 import datetime
 import prometheus_client as prom

-hypervisor_running_vms = prom.Gauge('openstack_hypervisor_running_vms', 'Number of VMs running on this hypervisor.',['hypervisor_hostname'])
-hypervisor_used_ram_mb = prom.Gauge('openstack_hypervisor_used_ram_mb', 'Total MB of used RAM on the hypervisor.',['hypervisor_hostname'])
-hypervisor_total_ram_mb = prom.Gauge('openstack_hypervisor_total_ram_mb', 'Total MB of RAM on the hypervisor.',['hypervisor_hostname'])
-hypervisor_used_cpus = prom.Gauge('openstack_hypervisor_used_cpus', 'Total VCPUs used on the hypervisor.',['hypervisor_hostname'])
-hypervisor_total_cpus = prom.Gauge('openstack_hypervisor_total_cpus', 'Total VCPUs on the hypervisor.',['hypervisor_hostname'])
-hypervisor_enabled = prom.Gauge('openstack_hypervisor_enabled', 'nova-compute service status on hypervisor. 1 is enabled 0 is disabled.',['hypervisor_hostname'])
-hypervisor_up = prom.Gauge('openstack_hypervisor_up', 'nova-compute service state on hypervisor. 1 is up 0 is down.',['hypervisor_hostname'])
-hypervisor_local_gb_total = prom.Gauge('openstack_hypervisor_local_gb_total', 'Total local disk in GB.',['hypervisor_hostname'])
-hypervisor_local_gb_used = prom.Gauge('openstack_hypervisor_local_gb_used', 'Used local disk in GB.',['hypervisor_hostname'])
+hypervisor_running_vms = prom.Gauge('openstack_hypervisor_running_vms', 'Number of VMs running on this hypervisor.',['hypervisor_hostname','cloud_name'])
+hypervisor_used_ram_mb = prom.Gauge('openstack_hypervisor_used_ram_mb', 'Total MB of used RAM on the hypervisor.',['hypervisor_hostname','cloud_name'])
+hypervisor_total_ram_mb = prom.Gauge('openstack_hypervisor_total_ram_mb', 'Total MB of RAM on the hypervisor.',['hypervisor_hostname','cloud_name'])
+hypervisor_used_cpus = prom.Gauge('openstack_hypervisor_used_cpus', 'Total VCPUs used on the hypervisor.',['hypervisor_hostname','cloud_name'])
+hypervisor_total_cpus = prom.Gauge('openstack_hypervisor_total_cpus', 'Total VCPUs on the hypervisor.',['hypervisor_hostname','cloud_name'])
+hypervisor_enabled = prom.Gauge('openstack_hypervisor_enabled', 'nova-compute service status on hypervisor. 1 is enabled 0 is disabled.',['hypervisor_hostname','cloud_name'])
+hypervisor_up = prom.Gauge('openstack_hypervisor_up', 'nova-compute service state on hypervisor. 1 is up 0 is down.',['hypervisor_hostname','cloud_name'])
+hypervisor_local_gb_total = prom.Gauge('openstack_hypervisor_local_gb_total', 'Total local disk in GB.',['hypervisor_hostname','cloud_name'])
+hypervisor_local_gb_used = prom.Gauge('openstack_hypervisor_local_gb_used', 'Used local disk in GB.',['hypervisor_hostname','cloud_name'])

-def generate_hypervisor_metrics(connection):
+def generate_hypervisor_metrics(connection,cloud_name):
    for hypervisor in connection.list_hypervisors():
        print(f'Getting hypervisor {hypervisor.name} metrics.')
        # See: https://opendev.org/openstack/openstacksdk/src/branch/master/openstack/compute/v2/hypervisor.py
-        hypervisor_running_vms.labels(hypervisor.name).set(hypervisor.running_vms)
-        hypervisor_used_ram_mb.labels(hypervisor.name).set(hypervisor.memory_used)
-        hypervisor_total_ram_mb.labels(hypervisor.name).set(hypervisor.memory_size)
-        hypervisor_used_cpus.labels(hypervisor.name).set(hypervisor.vcpus_used)
-        hypervisor_total_cpus.labels(hypervisor.name).set(hypervisor.vcpus)
-        hypervisor_local_gb_total.labels(hypervisor.name).set(hypervisor.local_disk_size)
-        hypervisor_local_gb_used.labels(hypervisor.name).set(hypervisor.local_disk_used)
+        hypervisor_running_vms.labels(hypervisor.name,cloud_name).set(hypervisor.running_vms)
+        hypervisor_used_ram_mb.labels(hypervisor.name,cloud_name).set(hypervisor.memory_used)
+        hypervisor_total_ram_mb.labels(hypervisor.name,cloud_name).set(hypervisor.memory_size)
+        hypervisor_used_cpus.labels(hypervisor.name,cloud_name).set(hypervisor.vcpus_used)
+        hypervisor_total_cpus.labels(hypervisor.name,cloud_name).set(hypervisor.vcpus)
+        hypervisor_local_gb_total.labels(hypervisor.name,cloud_name).set(hypervisor.local_disk_size)
+        hypervisor_local_gb_used.labels(hypervisor.name,cloud_name).set(hypervisor.local_disk_used)

        if hypervisor.status == "enabled":
-            hypervisor_enabled.labels(hypervisor.name).set(1)
+            hypervisor_enabled.labels(hypervisor.name,cloud_name).set(1)
        else:
-            hypervisor_enabled.labels(hypervisor.name).set(0)
+            hypervisor_enabled.labels(hypervisor.name,cloud_name).set(0)
        if hypervisor.state == "up":
-            hypervisor_up.labels(hypervisor.name).set(1)
+            hypervisor_up.labels(hypervisor.name,cloud_name).set(1)
        else:
-            hypervisor_up.labels(hypervisor.name).set(0)
+            hypervisor_up.labels(hypervisor.name,cloud_name).set(0)
--- a/lib/instance_deploy.py
+++ b/lib/instance_deploy.py
@@ -6,7 +6,7 @@ import datetime
 import traceback
 import prometheus_client as prom

-instance_deploy_metrics = prom.Gauge('openstack_instance_deploy_seconds_to_ping', 'Time to deploy an instance and ping it.', ['hypervisor_hostname'])
+instance_deploy_metrics = prom.Gauge('openstack_instance_deploy_seconds_to_ping', 'Time to deploy an instance and ping it.', ['hypervisor_hostname','cloud_name'])

 def run_pings(ip_address):
    try:
@@ -94,7 +94,7 @@ def create_instance(connection, flavor, image, network, hypervisor):
        print(traceback.print_exc())
        cleanup(connection, f"{instance_name}")

-def get_metrics(connection, flavor, image, network):
+def get_metrics(connection, flavor, image, network, cloud_name):
    instance_image = get_image(connection, image)
    instance_flavor = get_flavor(connection, flavor)
    instance_network = get_network(connection, network)
@@ -107,5 +107,5 @@ def get_metrics(connection, flavor, image, network):
            time_took = end_time - start_time
            seconds_took = time_took.seconds
            print(f'Instance creation on {hypervisor.name} took {seconds_took} seconds.')
-            instance_deploy_metrics.labels(f'{hypervisor.name}').set(seconds_took)
+            instance_deploy_metrics.labels(f'{hypervisor.name}',cloud_name).set(seconds_took)
        cleanup(connection, f"{hypervisor.name}-metric")