Create opnsense_server_ha_state metric

in order to replace
 and  that will be removed
in version 1.0.0.

Also use Enum to store possible OPNsense HA states to avoid typo
This commit is contained in:
Pierre Verkest 2023-09-04 00:06:17 +02:00
parent 7501a0d9b8
commit 76b1338ae6
4 changed files with 68 additions and 34 deletions

View file

@ -30,8 +30,13 @@ This exporter gives following metrics, all metrics received following labels:
### Enums ### Enums
- `opnsense_main_ha_state`: OPNSense HA state of the MAIN server - `opnsense_main_ha_state`: (deprecated) OPNSense HA state of the MAIN server
- `opnsense_backup_ha_state`: OPNSense HA state of the BACKUP server - `opnsense_backup_ha_state`: (deprecated) OPNSense HA state of the BACKUP server
- `opnsense_server_ha_state`: OPNSense HA state, on of following value:
- **active**: that OPNSense server is receiving traffic
- **hot_standby**: the OPNSense server is ready to be promote as active server
- **maintenancemode**: the OPNSense server was turned into maintenance mode
- **unavailable**: the OPNSense server wasn't accessible or return unexpected value
### Gauges ### Gauges
@ -93,10 +98,8 @@ You can setup env through `.env` file or environment variables with defined as d
## Roadmap ## Roadmap
- allow to configure interfaces to get traffic rates for lan,wan and/or other names
- refactor server in a class to avoid transmitted params over methods
- allow to change the listening port (today it force using `8000`) - allow to change the listening port (today it force using `8000`)
- allow to configure timeouts using environemnt variables - allow to configure timeouts using environment variables
- improves logging to get a debug mode to understand errors based on unexpected payloads - improves logging to get a debug mode to understand errors based on unexpected payloads
## Changelog ## Changelog
@ -108,6 +111,8 @@ You can setup env through `.env` file or environment variables with defined as d
- replace `active_server_bytes_received` and - replace `active_server_bytes_received` and
`active_server_bytes_transmitted` by `active_server_bytes_transmitted` by
`opnsense_active_server_traffic_rate` `opnsense_active_server_traffic_rate`
- add `opnsense_server_ha_state` and mark `opnsense_main_ha_state`
and `opnsense_backup_ha_state` as deprecated.
### Version 0.4.0 (2023-09-02) ### Version 0.4.0 (2023-09-02)

View file

@ -7,6 +7,13 @@ from requests import RequestException
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class OPNSenseHAState(Enum):
ACTIVE = "active"
HOT_STANDBY = "hot_standby"
UNAVAILABLE = "unavailable"
MAINTENANCE_MODE = "maintenancemode"
class OPNSenseTrafficMetric(Enum): class OPNSenseTrafficMetric(Enum):
IN = "rate_bits_in" IN = "rate_bits_in"
OUT = "rate_bits_out" OUT = "rate_bits_out"
@ -77,26 +84,26 @@ class OPNSenseAPI:
response.raise_for_status() response.raise_for_status()
return response.json() return response.json()
def get_interface_vip_status(self): def get_interface_vip_status(self) -> OPNSenseHAState:
try: try:
data = self.get("/api/diagnostics/interface/get_vip_status/") data = self.get("/api/diagnostics/interface/get_vip_status/")
except RequestException as ex: except RequestException as ex:
logger.error( logger.error(
"Get VIP STATUS on %s failed with the following error %r", self.host, ex "Get VIP STATUS on %s failed with the following error %r", self.host, ex
) )
return "unavailable" return OPNSenseHAState.UNAVAILABLE
if data["carp"]["maintenancemode"]: if data["carp"]["maintenancemode"]:
return "maintenancemode" return OPNSenseHAState.MAINTENANCE_MODE
is_active = all([row["status"] == "MASTER" for row in data["rows"]]) is_active = all([row["status"] == "MASTER" for row in data["rows"]])
if is_active: if is_active:
return "active" return OPNSenseHAState.ACTIVE
is_backup = all([row["status"] == "BACKUP" for row in data["rows"]]) is_backup = all([row["status"] == "BACKUP" for row in data["rows"]])
if is_backup: if is_backup:
return "hot_standby" return OPNSenseHAState.HOT_STANDBY
logger.warning( logger.warning(
"this host %s is no active nor backup received payload %s", self.host, data "this host %s is no active nor backup received payload %s", self.host, data
) )
return "unavailable" return OPNSenseHAState.UNAVAILABLE
def get_traffic(self, interfaces): def get_traffic(self, interfaces):
try: try:

View file

@ -1,4 +1,5 @@
import argparse import argparse
import logging
import os import os
import socket import socket
import time import time
@ -6,12 +7,21 @@ import time
from dotenv import load_dotenv from dotenv import load_dotenv
from prometheus_client import Enum, Gauge, start_http_server from prometheus_client import Enum, Gauge, start_http_server
from opnsense_exporter.opnsense_api import OPNSenseAPI, OPNSenseRole from opnsense_exporter.opnsense_api import OPNSenseAPI, OPNSenseHAState, OPNSenseRole
logger = logging.getLogger(__name__)
load_dotenv() load_dotenv()
HA_STATES = ["active", "hot_standby", "unavailable", "maintenancemode"] HA_STATES = [enum.value for enum in list(OPNSenseHAState)]
main_ha_state = Enum(
class DeprecatedPromEnum(Enum, DeprecationWarning):
def state(self, *args, **kwargs):
super().state(*args, **kwargs)
logger.warning("This metric %s will be removed in v1.0.0", self._name)
main_ha_state = DeprecatedPromEnum(
"opnsense_main_ha_state", "opnsense_main_ha_state",
"OPNSense HA state of the MAIN server", "OPNSense HA state of the MAIN server",
[ [
@ -21,7 +31,7 @@ main_ha_state = Enum(
], ],
states=HA_STATES, states=HA_STATES,
) )
backup_ha_state = Enum( backup_ha_state = DeprecatedPromEnum(
"opnsense_backup_ha_state", "opnsense_backup_ha_state",
"OPNSense HA state of the BACKUP server", "OPNSense HA state of the BACKUP server",
[ [
@ -31,6 +41,18 @@ backup_ha_state = Enum(
], ],
states=HA_STATES, states=HA_STATES,
) )
opnsense_server_ha_state = Enum(
"opnsense_server_ha_state",
"OPNSense server HA state",
[
"instance",
"host",
"role",
],
states=HA_STATES,
)
opnsense_active_server_traffic_rate = Gauge( opnsense_active_server_traffic_rate = Gauge(
"opnsense_active_server_traffic_rate", "opnsense_active_server_traffic_rate",
"Active OPNSense server bytes in/out per interface", "Active OPNSense server bytes in/out per interface",
@ -64,15 +86,15 @@ class OPNSensePrometheusExporter:
main_state = self.main.get_interface_vip_status() main_state = self.main.get_interface_vip_status()
backup_sate = self.backup.get_interface_vip_status() backup_sate = self.backup.get_interface_vip_status()
main_ha_state.labels(instance=self.exporter_instance, **self.main.labels).state( main_ha_state.labels(instance=self.exporter_instance, **self.main.labels).state(
main_state main_state.value
) )
backup_ha_state.labels( backup_ha_state.labels(
instance=self.exporter_instance, **self.backup.labels instance=self.exporter_instance, **self.backup.labels
).state(backup_sate) ).state(backup_sate.value)
active_opnsense = None active_opnsense = None
if main_state == "active": if main_state == OPNSenseHAState.ACTIVE:
active_opnsense = self.main active_opnsense = self.main
if backup_sate == "active": if backup_sate == OPNSenseHAState.ACTIVE:
active_opnsense = self.backup active_opnsense = self.backup
if active_opnsense: if active_opnsense:
for traffic in active_opnsense.get_traffic(self.interfaces): for traffic in active_opnsense.get_traffic(self.interfaces):

View file

@ -26,9 +26,9 @@ def test_get_interface_vip_status_active():
) )
assert ( assert (
OPNSenseAPI( OPNSenseAPI(OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD)
OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD .get_interface_vip_status()
).get_interface_vip_status() .value
== "active" == "active"
) )
@ -42,9 +42,9 @@ def test_get_interface_vip_status_backup():
) )
assert ( assert (
OPNSenseAPI( OPNSenseAPI(OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD)
OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD .get_interface_vip_status()
).get_interface_vip_status() .value
== "hot_standby" == "hot_standby"
) )
@ -58,9 +58,9 @@ def test_get_interface_vip_status_mainteance_mode():
) )
assert ( assert (
OPNSenseAPI( OPNSenseAPI(OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD)
OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD .get_interface_vip_status()
).get_interface_vip_status() .value
== "maintenancemode" == "maintenancemode"
) )
@ -73,9 +73,9 @@ def test_get_interface_vip_status_unavailable_weird_case():
body=generate_get_vip_status_paylaod("MASTER", "BACKUP", False), body=generate_get_vip_status_paylaod("MASTER", "BACKUP", False),
) )
assert ( assert (
OPNSenseAPI( OPNSenseAPI(OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD)
OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD .get_interface_vip_status()
).get_interface_vip_status() .value
== "unavailable" == "unavailable"
) )
@ -89,9 +89,9 @@ def test_get_interface_vip_status_unavailable_rest_api_error():
status=404, status=404,
) )
assert ( assert (
OPNSenseAPI( OPNSenseAPI(OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD)
OPNSenseRole.MAIN, MAIN_HOST, LOGIN, PASSWORD .get_interface_vip_status()
).get_interface_vip_status() .value
== "unavailable" == "unavailable"
) )