New metric restic_locks_total. Resolves #10
This commit is contained in:
parent
3e183cbf83
commit
0bc9a62563
2 changed files with 42 additions and 7 deletions
|
@ -123,6 +123,7 @@ is `Flase` (only log error, such as network error with Cloud backends).
|
||||||
reasons. Default is `False` (perform `restic check`).
|
reasons. Default is `False` (perform `restic check`).
|
||||||
- `NO_STATS`: (Optional) Do not collect per backup statistics for performance
|
- `NO_STATS`: (Optional) Do not collect per backup statistics for performance
|
||||||
reasons. Default is `False` (collect per backup statistics).
|
reasons. Default is `False` (collect per backup statistics).
|
||||||
|
- `NO_LOCKS`: (Optional) Do not collect the number of locks. Default is `False` (collect number of locks).
|
||||||
|
|
||||||
### Configuration for Rclone
|
### Configuration for Rclone
|
||||||
|
|
||||||
|
@ -154,6 +155,9 @@ services:
|
||||||
# HELP restic_check_success Result of restic check operation in the repository
|
# HELP restic_check_success Result of restic check operation in the repository
|
||||||
# TYPE restic_check_success gauge
|
# TYPE restic_check_success gauge
|
||||||
restic_check_success 1.0
|
restic_check_success 1.0
|
||||||
|
# HELP restic_locks_total Total number of locks in the repository
|
||||||
|
# TYPE restic_locks_total counter
|
||||||
|
restic_locks_total 1.0
|
||||||
# HELP restic_snapshots_total Total number of snapshots in the repository
|
# HELP restic_snapshots_total Total number of snapshots in the repository
|
||||||
# TYPE restic_snapshots_total counter
|
# TYPE restic_snapshots_total counter
|
||||||
restic_snapshots_total 100.0
|
restic_snapshots_total 100.0
|
||||||
|
|
|
@ -16,13 +16,14 @@ from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGIS
|
||||||
|
|
||||||
class ResticCollector(object):
|
class ResticCollector(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, repository, password_file, exit_on_error, disable_check, disable_stats
|
self, repository, password_file, exit_on_error, disable_check, disable_stats, disable_locks
|
||||||
):
|
):
|
||||||
self.repository = repository
|
self.repository = repository
|
||||||
self.password_file = password_file
|
self.password_file = password_file
|
||||||
self.exit_on_error = exit_on_error
|
self.exit_on_error = exit_on_error
|
||||||
self.disable_check = disable_check
|
self.disable_check = disable_check
|
||||||
self.disable_stats = disable_stats
|
self.disable_stats = disable_stats
|
||||||
|
self.disable_locks = disable_locks
|
||||||
# todo: the stats cache increases over time -> remove old ids
|
# todo: the stats cache increases over time -> remove old ids
|
||||||
# todo: cold start -> the stats cache could be saved in a persistent volume
|
# todo: cold start -> the stats cache could be saved in a persistent volume
|
||||||
# todo: cold start -> the restic cache (/root/.cache/restic) could be
|
# todo: cold start -> the restic cache (/root/.cache/restic) could be
|
||||||
|
@ -46,37 +47,36 @@ class ResticCollector(object):
|
||||||
"Result of restic check operation in the repository",
|
"Result of restic check operation in the repository",
|
||||||
labels=[],
|
labels=[],
|
||||||
)
|
)
|
||||||
|
locks_total = CounterMetricFamily(
|
||||||
|
"restic_locks_total",
|
||||||
|
"Total number of locks in the repository",
|
||||||
|
labels=[],
|
||||||
|
)
|
||||||
snapshots_total = CounterMetricFamily(
|
snapshots_total = CounterMetricFamily(
|
||||||
"restic_snapshots_total",
|
"restic_snapshots_total",
|
||||||
"Total number of snapshots in the repository",
|
"Total number of snapshots in the repository",
|
||||||
labels=[],
|
labels=[],
|
||||||
)
|
)
|
||||||
|
|
||||||
backup_timestamp = GaugeMetricFamily(
|
backup_timestamp = GaugeMetricFamily(
|
||||||
"restic_backup_timestamp",
|
"restic_backup_timestamp",
|
||||||
"Timestamp of the last backup",
|
"Timestamp of the last backup",
|
||||||
labels=common_label_names,
|
labels=common_label_names,
|
||||||
)
|
)
|
||||||
|
|
||||||
backup_files_total = CounterMetricFamily(
|
backup_files_total = CounterMetricFamily(
|
||||||
"restic_backup_files_total",
|
"restic_backup_files_total",
|
||||||
"Number of files in the backup",
|
"Number of files in the backup",
|
||||||
labels=common_label_names,
|
labels=common_label_names,
|
||||||
)
|
)
|
||||||
|
|
||||||
backup_size_total = CounterMetricFamily(
|
backup_size_total = CounterMetricFamily(
|
||||||
"restic_backup_size_total",
|
"restic_backup_size_total",
|
||||||
"Total size of backup in bytes",
|
"Total size of backup in bytes",
|
||||||
labels=common_label_names,
|
labels=common_label_names,
|
||||||
)
|
)
|
||||||
|
|
||||||
backup_snapshots_total = CounterMetricFamily(
|
backup_snapshots_total = CounterMetricFamily(
|
||||||
"restic_backup_snapshots_total",
|
"restic_backup_snapshots_total",
|
||||||
"Total number of snapshots",
|
"Total number of snapshots",
|
||||||
labels=common_label_names,
|
labels=common_label_names,
|
||||||
)
|
)
|
||||||
|
|
||||||
scrape_duration_seconds = GaugeMetricFamily(
|
scrape_duration_seconds = GaugeMetricFamily(
|
||||||
"restic_scrape_duration_seconds",
|
"restic_scrape_duration_seconds",
|
||||||
"Ammount of time each scrape takes",
|
"Ammount of time each scrape takes",
|
||||||
|
@ -84,6 +84,7 @@ class ResticCollector(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
check_success.add_metric([], self.metrics["check_success"])
|
check_success.add_metric([], self.metrics["check_success"])
|
||||||
|
locks_total.add_metric([], self.metrics["locks_total"])
|
||||||
snapshots_total.add_metric([], self.metrics["snapshots_total"])
|
snapshots_total.add_metric([], self.metrics["snapshots_total"])
|
||||||
|
|
||||||
for client in self.metrics["clients"]:
|
for client in self.metrics["clients"]:
|
||||||
|
@ -104,6 +105,7 @@ class ResticCollector(object):
|
||||||
scrape_duration_seconds.add_metric([], self.metrics["duration"])
|
scrape_duration_seconds.add_metric([], self.metrics["duration"])
|
||||||
|
|
||||||
yield check_success
|
yield check_success
|
||||||
|
yield locks_total
|
||||||
yield snapshots_total
|
yield snapshots_total
|
||||||
yield backup_timestamp
|
yield backup_timestamp
|
||||||
yield backup_files_total
|
yield backup_files_total
|
||||||
|
@ -192,8 +194,15 @@ class ResticCollector(object):
|
||||||
else:
|
else:
|
||||||
check_success = self.get_check()
|
check_success = self.get_check()
|
||||||
|
|
||||||
|
if self.disable_locks:
|
||||||
|
# return 0 as "no-locks" value
|
||||||
|
locks_total = 0
|
||||||
|
else:
|
||||||
|
locks_total = self.get_locks()
|
||||||
|
|
||||||
metrics = {
|
metrics = {
|
||||||
"check_success": check_success,
|
"check_success": check_success,
|
||||||
|
"locks_total": locks_total,
|
||||||
"clients": clients,
|
"clients": clients,
|
||||||
"snapshots_total": len(all_snapshots),
|
"snapshots_total": len(all_snapshots),
|
||||||
"duration": time.time() - duration
|
"duration": time.time() - duration
|
||||||
|
@ -283,6 +292,26 @@ class ResticCollector(object):
|
||||||
)
|
)
|
||||||
return 0 # error
|
return 0 # error
|
||||||
|
|
||||||
|
def get_locks(self):
|
||||||
|
cmd = [
|
||||||
|
"restic",
|
||||||
|
"-r",
|
||||||
|
self.repository,
|
||||||
|
"-p",
|
||||||
|
self.password_file,
|
||||||
|
"--no-lock",
|
||||||
|
"list",
|
||||||
|
"locks",
|
||||||
|
]
|
||||||
|
|
||||||
|
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise Exception(
|
||||||
|
"Error executing restic list locks command: " + self.parse_stderr(result)
|
||||||
|
)
|
||||||
|
text_result = result.stdout.decode("utf-8")
|
||||||
|
return len(text_result.split("\n")) - 1
|
||||||
|
|
||||||
def calc_snapshot_hash(self, snapshot: dict) -> str:
|
def calc_snapshot_hash(self, snapshot: dict) -> str:
|
||||||
text = snapshot["hostname"] + snapshot["username"] + ",".join(snapshot["paths"])
|
text = snapshot["hostname"] + snapshot["username"] + ",".join(snapshot["paths"])
|
||||||
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||||
|
@ -323,6 +352,7 @@ if __name__ == "__main__":
|
||||||
exporter_exit_on_error = bool(os.environ.get("EXIT_ON_ERROR", False))
|
exporter_exit_on_error = bool(os.environ.get("EXIT_ON_ERROR", False))
|
||||||
exporter_disable_check = bool(os.environ.get("NO_CHECK", False))
|
exporter_disable_check = bool(os.environ.get("NO_CHECK", False))
|
||||||
exporter_disable_stats = bool(os.environ.get("NO_STATS", False))
|
exporter_disable_stats = bool(os.environ.get("NO_STATS", False))
|
||||||
|
exporter_disable_locks = bool(os.environ.get("NO_LOCKS", False))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
collector = ResticCollector(
|
collector = ResticCollector(
|
||||||
|
@ -331,6 +361,7 @@ if __name__ == "__main__":
|
||||||
exporter_exit_on_error,
|
exporter_exit_on_error,
|
||||||
exporter_disable_check,
|
exporter_disable_check,
|
||||||
exporter_disable_stats,
|
exporter_disable_stats,
|
||||||
|
exporter_disable_locks,
|
||||||
)
|
)
|
||||||
REGISTRY.register(collector)
|
REGISTRY.register(collector)
|
||||||
start_http_server(exporter_port, exporter_address)
|
start_http_server(exporter_port, exporter_address)
|
||||||
|
|
Loading…
Reference in a new issue