New metric restic_locks_total. Resolves #10

This commit is contained in:
ngosang 2023-07-30 00:37:32 +02:00
parent 3e183cbf83
commit 0bc9a62563
2 changed files with 42 additions and 7 deletions

View file

@ -123,6 +123,7 @@ is `Flase` (only log error, such as network error with Cloud backends).
reasons. Default is `False` (perform `restic check`). reasons. Default is `False` (perform `restic check`).
- `NO_STATS`: (Optional) Do not collect per backup statistics for performance - `NO_STATS`: (Optional) Do not collect per backup statistics for performance
reasons. Default is `False` (collect per backup statistics). reasons. Default is `False` (collect per backup statistics).
- `NO_LOCKS`: (Optional) Do not collect the number of locks. Default is `False` (collect number of locks).
### Configuration for Rclone ### Configuration for Rclone
@ -154,6 +155,9 @@ services:
# HELP restic_check_success Result of restic check operation in the repository # HELP restic_check_success Result of restic check operation in the repository
# TYPE restic_check_success gauge # TYPE restic_check_success gauge
restic_check_success 1.0 restic_check_success 1.0
# HELP restic_locks_total Total number of locks in the repository
# TYPE restic_locks_total counter
restic_locks_total 1.0
# HELP restic_snapshots_total Total number of snapshots in the repository # HELP restic_snapshots_total Total number of snapshots in the repository
# TYPE restic_snapshots_total counter # TYPE restic_snapshots_total counter
restic_snapshots_total 100.0 restic_snapshots_total 100.0

View file

@ -16,13 +16,14 @@ from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGIS
class ResticCollector(object): class ResticCollector(object):
def __init__( def __init__(
self, repository, password_file, exit_on_error, disable_check, disable_stats self, repository, password_file, exit_on_error, disable_check, disable_stats, disable_locks
): ):
self.repository = repository self.repository = repository
self.password_file = password_file self.password_file = password_file
self.exit_on_error = exit_on_error self.exit_on_error = exit_on_error
self.disable_check = disable_check self.disable_check = disable_check
self.disable_stats = disable_stats self.disable_stats = disable_stats
self.disable_locks = disable_locks
# todo: the stats cache increases over time -> remove old ids # todo: the stats cache increases over time -> remove old ids
# todo: cold start -> the stats cache could be saved in a persistent volume # todo: cold start -> the stats cache could be saved in a persistent volume
# todo: cold start -> the restic cache (/root/.cache/restic) could be # todo: cold start -> the restic cache (/root/.cache/restic) could be
@ -46,37 +47,36 @@ class ResticCollector(object):
"Result of restic check operation in the repository", "Result of restic check operation in the repository",
labels=[], labels=[],
) )
locks_total = CounterMetricFamily(
"restic_locks_total",
"Total number of locks in the repository",
labels=[],
)
snapshots_total = CounterMetricFamily( snapshots_total = CounterMetricFamily(
"restic_snapshots_total", "restic_snapshots_total",
"Total number of snapshots in the repository", "Total number of snapshots in the repository",
labels=[], labels=[],
) )
backup_timestamp = GaugeMetricFamily( backup_timestamp = GaugeMetricFamily(
"restic_backup_timestamp", "restic_backup_timestamp",
"Timestamp of the last backup", "Timestamp of the last backup",
labels=common_label_names, labels=common_label_names,
) )
backup_files_total = CounterMetricFamily( backup_files_total = CounterMetricFamily(
"restic_backup_files_total", "restic_backup_files_total",
"Number of files in the backup", "Number of files in the backup",
labels=common_label_names, labels=common_label_names,
) )
backup_size_total = CounterMetricFamily( backup_size_total = CounterMetricFamily(
"restic_backup_size_total", "restic_backup_size_total",
"Total size of backup in bytes", "Total size of backup in bytes",
labels=common_label_names, labels=common_label_names,
) )
backup_snapshots_total = CounterMetricFamily( backup_snapshots_total = CounterMetricFamily(
"restic_backup_snapshots_total", "restic_backup_snapshots_total",
"Total number of snapshots", "Total number of snapshots",
labels=common_label_names, labels=common_label_names,
) )
scrape_duration_seconds = GaugeMetricFamily( scrape_duration_seconds = GaugeMetricFamily(
"restic_scrape_duration_seconds", "restic_scrape_duration_seconds",
"Ammount of time each scrape takes", "Ammount of time each scrape takes",
@ -84,6 +84,7 @@ class ResticCollector(object):
) )
check_success.add_metric([], self.metrics["check_success"]) check_success.add_metric([], self.metrics["check_success"])
locks_total.add_metric([], self.metrics["locks_total"])
snapshots_total.add_metric([], self.metrics["snapshots_total"]) snapshots_total.add_metric([], self.metrics["snapshots_total"])
for client in self.metrics["clients"]: for client in self.metrics["clients"]:
@ -104,6 +105,7 @@ class ResticCollector(object):
scrape_duration_seconds.add_metric([], self.metrics["duration"]) scrape_duration_seconds.add_metric([], self.metrics["duration"])
yield check_success yield check_success
yield locks_total
yield snapshots_total yield snapshots_total
yield backup_timestamp yield backup_timestamp
yield backup_files_total yield backup_files_total
@ -192,8 +194,15 @@ class ResticCollector(object):
else: else:
check_success = self.get_check() check_success = self.get_check()
if self.disable_locks:
# return 0 as "no-locks" value
locks_total = 0
else:
locks_total = self.get_locks()
metrics = { metrics = {
"check_success": check_success, "check_success": check_success,
"locks_total": locks_total,
"clients": clients, "clients": clients,
"snapshots_total": len(all_snapshots), "snapshots_total": len(all_snapshots),
"duration": time.time() - duration "duration": time.time() - duration
@ -283,6 +292,26 @@ class ResticCollector(object):
) )
return 0 # error return 0 # error
def get_locks(self):
cmd = [
"restic",
"-r",
self.repository,
"-p",
self.password_file,
"--no-lock",
"list",
"locks",
]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
raise Exception(
"Error executing restic list locks command: " + self.parse_stderr(result)
)
text_result = result.stdout.decode("utf-8")
return len(text_result.split("\n")) - 1
def calc_snapshot_hash(self, snapshot: dict) -> str: def calc_snapshot_hash(self, snapshot: dict) -> str:
text = snapshot["hostname"] + snapshot["username"] + ",".join(snapshot["paths"]) text = snapshot["hostname"] + snapshot["username"] + ",".join(snapshot["paths"])
return hashlib.sha256(text.encode("utf-8")).hexdigest() return hashlib.sha256(text.encode("utf-8")).hexdigest()
@ -323,6 +352,7 @@ if __name__ == "__main__":
exporter_exit_on_error = bool(os.environ.get("EXIT_ON_ERROR", False)) exporter_exit_on_error = bool(os.environ.get("EXIT_ON_ERROR", False))
exporter_disable_check = bool(os.environ.get("NO_CHECK", False)) exporter_disable_check = bool(os.environ.get("NO_CHECK", False))
exporter_disable_stats = bool(os.environ.get("NO_STATS", False)) exporter_disable_stats = bool(os.environ.get("NO_STATS", False))
exporter_disable_locks = bool(os.environ.get("NO_LOCKS", False))
try: try:
collector = ResticCollector( collector = ResticCollector(
@ -331,6 +361,7 @@ if __name__ == "__main__":
exporter_exit_on_error, exporter_exit_on_error,
exporter_disable_check, exporter_disable_check,
exporter_disable_stats, exporter_disable_stats,
exporter_disable_locks,
) )
REGISTRY.register(collector) REGISTRY.register(collector)
start_http_server(exporter_port, exporter_address) start_http_server(exporter_port, exporter_address)