diff --git a/README.md b/README.md index f08283a..4572ae5 100644 --- a/README.md +++ b/README.md @@ -35,23 +35,23 @@ openstack_api_status{api_name="horizon",cloud_name="CLOUD_NAME"} # Information ### Standard Metrics Provided -| Metric | Description| -|--------------------------|--------------------------| -| `openstack_api_response_seconds {api_name="API_NAME",cloud_name="CLOUD_NAME"}` | Seconds for the api to respond via openstack sdk. nova, neutron, and cinder are currently recorded. | -| `openstack_api_status {api_name="API_NAME",cloud_name="CLOUD_NAME"}` | Status of the openstack api. 1 = up 0 = down. nova, neutron, and cinder are currently recorded. | -| `openstack_hypervisor_running_vms {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Number of running VMs on every hypervisor in the region. | -| `openstack_hypervisor_used_ram_mb {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Amount of RAM in MB used (as reported by nova-compute) for every hypervisor in the region. | -| `openstack_hypervisor_total_ram_mb {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Amount of RAM in MB in total (as reported by nova-compute) for every hypervisor in the region. | -| `openstack_hypervisor_used_cpus {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Number of vcpus used (as reported by nova-compute) for every hypervisor in the region. | -| `openstack_hypervisor_total_cpus {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Number of vcpus in total (as reported by nova-compute) for every hypervisor in the region. | -| `openstack_hypervisor_enabled {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | nova-compute status for every hypervisor in the region. 1 = enabled 0 = disabled| -| `openstack_hypervisor_up {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | nova-compute state for every hypervisor in the region. 1 = up 0 = down | -| `openstack_hypervisor_local_gb_total {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}`| Total local disk in GB (as reported by nova-compute) for every hypervisor in the region. | -| `openstack_hypervisor_local_gb_used {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Total local disk used in GB (as reported by nova-compute) for every hypervisor in the region. | +| Metric | Metric Labels | Description| +|-----|-----|-----| +| `openstack_api_response_seconds` | `{api_name="API_NAME",cloud_name="CLOUD_NAME"}` | Seconds for the api to respond via openstack sdk. nova, neutron, and cinder are currently recorded. | +| `openstack_api_status` | `{api_name="API_NAME",cloud_name="CLOUD_NAME"}` | Status of the openstack api. 1 = up 0 = down. nova, neutron, and cinder are currently recorded. | +| `openstack_hypervisor_running_vms` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | Number of running VMs on every hypervisor in the region. | +| `openstack_hypervisor_used_ram_mb` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | Amount of RAM in MB used (as reported by nova-compute) for every hypervisor in the region. | +| `openstack_hypervisor_total_ram_mb` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | Amount of RAM in MB in total (as reported by nova-compute) for every hypervisor in the region. | +| `openstack_hypervisor_used_cpus` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | Number of vcpus used (as reported by nova-compute) for every hypervisor in the region. | +| `openstack_hypervisor_total_cpus` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | Number of vcpus in total (as reported by nova-compute) for every hypervisor in the region. | +| `openstack_hypervisor_enabled` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | nova-compute status for every hypervisor in the region. 1 = enabled 0 = disabled| +| `openstack_hypervisor_up` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | nova-compute state for every hypervisor in the region. 1 = up 0 = down | +| `openstack_hypervisor_local_gb_total` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}`| Total local disk in GB (as reported by nova-compute) for every hypervisor in the region. | +| `openstack_hypervisor_local_gb_used` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME",aggregate="AGGREGATE_NAME"}` | Total local disk used in GB (as reported by nova-compute) for every hypervisor in the region. | ### Optional Metrics (use flags when running) -| Metric | Description | -|-----|-----| -|`openstack_instance_deploy_seconds_to_ping {hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Seconds from deploy command to ping when creating an instance for every hypervisor in the region. Requires --flavor, --image, --network, and --instance_deploy flags. The network used needs to have TCP port 22 (uses TCP instead of ICMP to ping) open in the default security group. | -|`openstack_horizon_response_seconds {cloud_name="CLOUD_NAME"}` | Seconds it takes for Chromium to log into Horizon. Requires --horizon_url flag. | -|`openstack_horizon_status {cloud_name="CLOUD_NAME"}` | Horizon status. 1 = up 0 = down. Requires --horizon_url flag. | \ No newline at end of file +| Metric | Metrics Labels | Description | +|-----|-----|-----| +|`openstack_instance_deploy_seconds_to_ping` | `{hypervisor_hostname="HYPERVISOR_NAME",cloud_name="CLOUD_NAME"}` | Seconds from deploy command to ping when creating an instance for every hypervisor in the region. Requires --flavor, --image, --network, and --instance_deploy flags. The network used needs to have TCP port 22 (uses TCP instead of ICMP to ping) open in the default security group. | +|`openstack_horizon_response_seconds` | `{cloud_name="CLOUD_NAME"}` | Seconds it takes for Chromium to log into Horizon. Requires --horizon_url flag. | +|`openstack_horizon_status` | `{cloud_name="CLOUD_NAME"}` | Horizon status. 1 = up 0 = down. Requires --horizon_url flag. | \ No newline at end of file diff --git a/grafana-dashboards/Openstack Hypervisors.json b/grafana-dashboards/Openstack Hypervisors.json index d48afd8..c190b4e 100644 --- a/grafana-dashboards/Openstack Hypervisors.json +++ b/grafana-dashboards/Openstack Hypervisors.json @@ -58,9 +58,23 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1606268920451, + "iteration": 1606916848356, "links": [], "panels": [ + { + "collapsed": false, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "Hypervisor: $hypervisor", + "type": "row" + }, { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", @@ -68,7 +82,7 @@ "h": 5, "w": 3, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "links": [], @@ -132,7 +146,7 @@ "h": 5, "w": 3, "x": 3, - "y": 0 + "y": 1 }, "id": 3, "links": [], @@ -196,7 +210,7 @@ "h": 5, "w": 3, "x": 6, - "y": 0 + "y": 1 }, "id": 4, "links": [], @@ -259,7 +273,7 @@ "colorValue": false, "colors": [ "#C4162A", - "#C4162A", + "#73BF69", "#299c46" ], "datasource": "${DS_PROMETHEUS}", @@ -275,7 +289,7 @@ "h": 5, "w": 3, "x": 9, - "y": 0 + "y": 1 }, "id": 7, "interval": null, @@ -322,7 +336,7 @@ "refId": "A" } ], - "thresholds": "1,0", + "thresholds": "1,1", "timeFrom": null, "timeShift": null, "title": "Status", @@ -342,13 +356,266 @@ ], "valueName": "current" }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "decmbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 6 + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "pluginVersion": "6.4.1", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "(openstack_hypervisor_total_ram_mb{cloud_name=\"$cloud\",hypervisor_hostname=\"$hypervisor\"} - openstack_hypervisor_used_ram_mb{cloud_name=\"$cloud\",hypervisor_hostname=\"$hypervisor\"})", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "RAM Available", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 6 + }, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "pluginVersion": "6.4.1", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "openstack_hypervisor_total_cpus{cloud_name=\"$cloud\",hypervisor_hostname=\"$hypervisor\"} - openstack_hypervisor_used_cpus{cloud_name=\"$cloud\",hypervisor_hostname=\"$hypervisor\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "VCPU Available", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "decgbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 6 + }, + "id": 19, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "pluginVersion": "6.4.1", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "openstack_hypervisor_local_gb_total{cloud_name=\"$cloud\",hypervisor_hostname=\"$hypervisor\"} - openstack_hypervisor_local_gb_used{cloud_name=\"$cloud\",hypervisor_hostname=\"$hypervisor\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Local Disk Available", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, { "cacheTimeout": null, "colorBackground": true, "colorValue": false, "colors": [ "#C4162A", - "#C4162A", + "#37872D", "#299c46" ], "datasource": "${DS_PROMETHEUS}", @@ -363,8 +630,8 @@ "gridPos": { "h": 5, "w": 3, - "x": 12, - "y": 0 + "x": 9, + "y": 6 }, "id": 8, "interval": null, @@ -411,7 +678,7 @@ "refId": "A" } ], - "thresholds": "1,0", + "thresholds": "1,1", "timeFrom": null, "timeShift": null, "title": "State", @@ -444,7 +711,7 @@ "h": 5, "w": 15, "x": 0, - "y": 5 + "y": 11 }, "id": 5, "legend": { @@ -519,6 +786,553 @@ "align": false, "alignLevel": null } + }, + { + "collapsed": false, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 12, + "panels": [], + "title": "Aggregate: $aggregate", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 17 + }, + "id": 14, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "lastNotNull" + ], + "defaults": { + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "nullValueMode": "connected", + "thresholds": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.8 + }, + { + "color": "#d44a3a", + "value": 0.9 + } + ], + "unit": "percentunit" + }, + "override": {}, + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.4.1", + "targets": [ + { + "expr": "sum(openstack_hypervisor_used_ram_mb{cloud_name=\"$cloud\",aggregate=\"$aggregate\"}) / sum(openstack_hypervisor_total_ram_mb{cloud_name=\"$cloud\",aggregate=\"$aggregate\"})", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Aggregate RAM Usage", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 17 + }, + "id": 15, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "lastNotNull" + ], + "defaults": { + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "nullValueMode": "connected", + "thresholds": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.8 + }, + { + "color": "#d44a3a", + "value": 0.9 + } + ], + "unit": "percentunit" + }, + "override": {}, + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.4.1", + "targets": [ + { + "expr": "sum(openstack_hypervisor_used_cpus{cloud_name=\"$cloud\",aggregate=\"$aggregate\"}) / sum(openstack_hypervisor_total_cpus{cloud_name=\"$cloud\",aggregate=\"$aggregate\"})", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Aggregate VCPU Usage", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 17 + }, + "id": 16, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "lastNotNull" + ], + "defaults": { + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 1, + "min": 0, + "nullValueMode": "connected", + "thresholds": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.8 + }, + { + "color": "#d44a3a", + "value": 0.9 + } + ], + "unit": "percentunit" + }, + "override": {}, + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.4.1", + "targets": [ + { + "expr": "sum(openstack_hypervisor_local_gb_used{cloud_name=\"$cloud\",aggregate=\"$aggregate\"}) / sum(openstack_hypervisor_local_gb_total{cloud_name=\"$cloud\",aggregate=\"$aggregate\"})", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Aggregate Local Disk Usage", + "type": "gauge" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "decmbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 22 + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "pluginVersion": "6.4.1", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(openstack_hypervisor_total_ram_mb{cloud_name=\"$cloud\",aggregate=\"$aggregate\"}) - sum(openstack_hypervisor_used_ram_mb{cloud_name=\"$cloud\",aggregate=\"$aggregate\"})", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Aggregate RAM Available", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 22 + }, + "id": 21, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "pluginVersion": "6.4.1", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(openstack_hypervisor_total_cpus{cloud_name=\"$cloud\",aggregate=\"$aggregate\"}) - sum(openstack_hypervisor_used_cpus{cloud_name=\"$cloud\",aggregate=\"$aggregate\"})", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Aggregate VCPU Available", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "decgbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 22 + }, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "pluginVersion": "6.4.1", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(openstack_hypervisor_local_gb_total{cloud_name=\"$cloud\",aggregate=\"$aggregate\"}) - sum(openstack_hypervisor_local_gb_used{cloud_name=\"$cloud\",aggregate=\"$aggregate\"})", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Aggregate Local Disk Available", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 15, + "x": 0, + "y": 27 + }, + "id": 13, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.4.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openstack_hypervisor_running_vms{cloud_name=\"$cloud\",aggregate=\"$aggregate\"})", + "legendFormat": "VMs", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Running VMs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "schemaVersion": 20, @@ -552,14 +1366,36 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(openstack_hypervisor_used_ram_mb{cloud_name=~\"$cloud\"}, hypervisor_hostname)", + "definition": "label_values(openstack_hypervisor_used_ram_mb{cloud_name=~\"$cloud\"}, aggregate)", + "hide": 0, + "includeAll": false, + "label": "Aggregate", + "multi": false, + "name": "aggregate", + "options": [], + "query": "label_values(openstack_hypervisor_used_ram_mb{cloud_name=~\"$cloud\"}, aggregate)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(openstack_hypervisor_used_ram_mb{cloud_name=~\"$cloud\", aggregate=~\"$aggregate\"}, hypervisor_hostname)", "hide": 0, "includeAll": false, "label": "Hypervisor", "multi": false, "name": "hypervisor", "options": [], - "query": "label_values(openstack_hypervisor_used_ram_mb{cloud_name=~\"$cloud\"}, hypervisor_hostname)", + "query": "label_values(openstack_hypervisor_used_ram_mb{cloud_name=~\"$cloud\", aggregate=~\"$aggregate\"}, hypervisor_hostname)", "refresh": 1, "regex": "", "skipUrlSync": false, @@ -573,7 +1409,7 @@ ] }, "time": { - "from": "now-24h", + "from": "now-1h", "to": "now" }, "timepicker": { @@ -593,5 +1429,5 @@ "timezone": "", "title": "Openstack Hypervisors", "uid": "JdR8Hk0Mk", - "version": 5 + "version": 10 } \ No newline at end of file diff --git a/lib/hypervisor_metrics.py b/lib/hypervisor_metrics.py index 89d482a..1de2006 100644 --- a/lib/hypervisor_metrics.py +++ b/lib/hypervisor_metrics.py @@ -4,33 +4,41 @@ import openstack import datetime import prometheus_client as prom -hypervisor_running_vms = prom.Gauge('openstack_hypervisor_running_vms', 'Number of VMs running on this hypervisor.',['hypervisor_hostname','cloud_name']) -hypervisor_used_ram_mb = prom.Gauge('openstack_hypervisor_used_ram_mb', 'Total MB of used RAM on the hypervisor.',['hypervisor_hostname','cloud_name']) -hypervisor_total_ram_mb = prom.Gauge('openstack_hypervisor_total_ram_mb', 'Total MB of RAM on the hypervisor.',['hypervisor_hostname','cloud_name']) -hypervisor_used_cpus = prom.Gauge('openstack_hypervisor_used_cpus', 'Total VCPUs used on the hypervisor.',['hypervisor_hostname','cloud_name']) -hypervisor_total_cpus = prom.Gauge('openstack_hypervisor_total_cpus', 'Total VCPUs on the hypervisor.',['hypervisor_hostname','cloud_name']) -hypervisor_enabled = prom.Gauge('openstack_hypervisor_enabled', 'nova-compute service status on hypervisor. 1 is enabled 0 is disabled.',['hypervisor_hostname','cloud_name']) -hypervisor_up = prom.Gauge('openstack_hypervisor_up', 'nova-compute service state on hypervisor. 1 is up 0 is down.',['hypervisor_hostname','cloud_name']) -hypervisor_local_gb_total = prom.Gauge('openstack_hypervisor_local_gb_total', 'Total local disk in GB.',['hypervisor_hostname','cloud_name']) -hypervisor_local_gb_used = prom.Gauge('openstack_hypervisor_local_gb_used', 'Used local disk in GB.',['hypervisor_hostname','cloud_name']) +hypervisor_running_vms = prom.Gauge('openstack_hypervisor_running_vms', 'Number of VMs running on this hypervisor.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_used_ram_mb = prom.Gauge('openstack_hypervisor_used_ram_mb', 'Total MB of used RAM on the hypervisor.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_total_ram_mb = prom.Gauge('openstack_hypervisor_total_ram_mb', 'Total MB of RAM on the hypervisor.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_used_cpus = prom.Gauge('openstack_hypervisor_used_cpus', 'Total VCPUs used on the hypervisor.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_total_cpus = prom.Gauge('openstack_hypervisor_total_cpus', 'Total VCPUs on the hypervisor.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_enabled = prom.Gauge('openstack_hypervisor_enabled', 'nova-compute service status on hypervisor. 1 is enabled 0 is disabled.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_up = prom.Gauge('openstack_hypervisor_up', 'nova-compute service state on hypervisor. 1 is up 0 is down.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_local_gb_total = prom.Gauge('openstack_hypervisor_local_gb_total', 'Total local disk in GB.',['hypervisor_hostname','cloud_name','aggregate']) +hypervisor_local_gb_used = prom.Gauge('openstack_hypervisor_local_gb_used', 'Used local disk in GB.',['hypervisor_hostname','cloud_name','aggregate']) def generate_hypervisor_metrics(connection,cloud_name): for hypervisor in connection.list_hypervisors(): print(f'Getting hypervisor {hypervisor.name} metrics.') + aggregate_member = "" + for aggregate in connection.list_aggregates(): + for host in aggregate.hosts: + if host == hypervisor.service_details['host']: + aggregate_member = aggregate.name + if aggregate_member == "": + aggregate_member = "None" + print(f"Hypervisor {hypervisor.name} is a member of aggregate {aggregate_member}") # See: https://opendev.org/openstack/openstacksdk/src/branch/master/openstack/compute/v2/hypervisor.py - hypervisor_running_vms.labels(hypervisor.name,cloud_name).set(hypervisor.running_vms) - hypervisor_used_ram_mb.labels(hypervisor.name,cloud_name).set(hypervisor.memory_used) - hypervisor_total_ram_mb.labels(hypervisor.name,cloud_name).set(hypervisor.memory_size) - hypervisor_used_cpus.labels(hypervisor.name,cloud_name).set(hypervisor.vcpus_used) - hypervisor_total_cpus.labels(hypervisor.name,cloud_name).set(hypervisor.vcpus) - hypervisor_local_gb_total.labels(hypervisor.name,cloud_name).set(hypervisor.local_disk_size) - hypervisor_local_gb_used.labels(hypervisor.name,cloud_name).set(hypervisor.local_disk_used) + hypervisor_running_vms.labels(hypervisor.name,cloud_name,aggregate_member).set(hypervisor.running_vms) + hypervisor_used_ram_mb.labels(hypervisor.name,cloud_name,aggregate_member).set(hypervisor.memory_used) + hypervisor_total_ram_mb.labels(hypervisor.name,cloud_name,aggregate_member).set(hypervisor.memory_size) + hypervisor_used_cpus.labels(hypervisor.name,cloud_name,aggregate_member).set(hypervisor.vcpus_used) + hypervisor_total_cpus.labels(hypervisor.name,cloud_name,aggregate_member).set(hypervisor.vcpus) + hypervisor_local_gb_total.labels(hypervisor.name,cloud_name,aggregate_member).set(hypervisor.local_disk_size) + hypervisor_local_gb_used.labels(hypervisor.name,cloud_name,aggregate_member).set(hypervisor.local_disk_used) if hypervisor.status == "enabled": - hypervisor_enabled.labels(hypervisor.name,cloud_name).set(1) + hypervisor_enabled.labels(hypervisor.name,cloud_name,aggregate_member).set(1) else: - hypervisor_enabled.labels(hypervisor.name,cloud_name).set(0) + hypervisor_enabled.labels(hypervisor.name,cloud_name,aggregate_member).set(0) if hypervisor.state == "up": - hypervisor_up.labels(hypervisor.name,cloud_name).set(1) + hypervisor_up.labels(hypervisor.name,cloud_name,aggregate_member).set(1) else: - hypervisor_up.labels(hypervisor.name,cloud_name).set(0) \ No newline at end of file + hypervisor_up.labels(hypervisor.name,cloud_name,aggregate_member).set(0)