Compare commits

...

40 commits
1.0.0 ... main

Author SHA1 Message Date
ngosang
cb89566b67 Bump version 1.5.0 2024-01-20 16:04:50 +01:00
ngosang
4efcaba7c5 Replaced RESTIC_REPO_URL, RESTIC_REPO_PASSWORD and RESTIC_REPO_PASSWORD_FILE environment variables 2024-01-20 16:00:11 +01:00
ngosang
10dae8da3a Add new label "snapshot_tags" in the list of tags separated by comma 2024-01-20 15:28:45 +01:00
ngosang
71fb2a6695 Update Python dependencies 2024-01-20 15:17:44 +01:00
ngosang
e94816eb54 Update Restic 0.16.3 2024-01-20 15:15:42 +01:00
ngosang
c882d9a9a9 Update base Docker image to Alpine 3.19 2024-01-20 15:14:47 +01:00
ngosang
9c5e0d0071 Bump version 1.4.0 2023-10-14 23:38:09 +02:00
ngosang
9070dc9aef Update Grafana dashboard to include repository locks and client version 2023-10-14 23:36:10 +02:00
ngosang
a4243a8554 Include metric label client_version. Resolves #5 2023-10-14 23:35:18 +02:00
ngosang
d90f46a4d1 Update Python 3.12 2023-10-14 22:48:58 +02:00
ngosang
d26e181f21 Update Restic 0.16.0 2023-10-14 22:47:05 +02:00
ngosang
e1973cb48f Update changelog 2023-07-30 01:30:03 +02:00
ngosang
539582566d Bump version 1.3.0 2023-07-30 01:15:42 +02:00
ngosang
8697539748 Include backup paths in the exported metrics. Resolves #17 2023-07-30 01:02:04 +02:00
ngosang
2f4bab16ef Fix typos and static methods 2023-07-30 00:46:09 +02:00
ngosang
0bc9a62563 New metric restic_locks_total. Resolves #10 2023-07-30 00:37:32 +02:00
ngosang
3e183cbf83 Add Rclone instructions in the readme 2023-07-29 23:51:17 +02:00
ngosang
8463e245c2 Update Python dependencies 2023-07-29 23:38:15 +02:00
ngosang
d63604f1b8 Update Restic 0.15.2 2023-07-29 23:36:17 +02:00
ngosang
9621db9e79 Update base Docker image to Alpine 3.18 2023-07-29 23:35:21 +02:00
ngosang
a4e37f8770 Bump version 1.2.2 2023-03-31 15:31:51 +02:00
ngosang
c80b74c1ec Include OpenSSH in the Docker image to support SFTP protocol 2023-03-31 15:30:55 +02:00
ngosang
6ebd2b7dfc Bump version 1.2.1 2023-03-26 14:37:56 +02:00
ngosang
8a99ddae98 Improve hash calculation to avoid duplicate clients 2023-03-26 14:34:48 +02:00
ngosang
f2fe3aff54 Bump version 1.2.0 2023-03-18 16:19:12 +01:00
ngosang
135cf592d2 Update changelog 2023-03-18 16:18:36 +01:00
Diego Heras
26bab7ba5d
Use empty username when username is not present
Use empty username when username is not present in snapshot.
2023-03-18 16:04:46 +01:00
Diego Heras
2c3ef3e79c
Merge branch 'main' into skip-username-when-not-present 2023-03-18 16:04:12 +01:00
Diego Heras
31d99bf98d
Performance improvements
Performance improvements for restic-exporter
2023-03-18 16:02:30 +01:00
Konstantin Shalygin
60617651cb
Performance improvements for restic-exporter 2023-03-17 10:25:51 +07:00
Enrico204
011b658805 Use empty username when username is not present
in snapshot.

This commit is fixing a crash when the snapshot has
no username field in JSON. The missing username is
replaced with a space.
2023-02-06 13:52:07 +01:00
ngosang
880b47131c Bump version 1.1.0 2023-02-02 20:37:01 +01:00
ngosang
864f63600a Improve error handling and better log traces 2023-02-02 20:30:57 +01:00
ngosang
84190584b8 Fix snapshot time parsing for old versions of Restic. Resolves #1 2023-02-02 20:02:12 +01:00
ngosang
3e00452190 Rename PASSWORD_FILE env var to RESTIC_REPO_PASSWORD_FILE 2023-02-02 19:48:08 +01:00
ngosang
f04f5468d8 Add documentation for other backends 2023-02-02 19:33:43 +01:00
ngosang
2a5e4e426a Exit if the repository password is not configured 2023-02-02 19:25:34 +01:00
ngosang
982cfe90d5 Update prometheus-client 0.16.0 2023-02-02 18:48:15 +01:00
ngosang
dc2f63c6f9 Update Restic 0.15.1 2023-02-02 18:47:59 +01:00
ngosang
2c086e491b Update Grafana dashboard 2022-12-06 21:49:33 +01:00
12 changed files with 663 additions and 273 deletions

View file

@ -6,7 +6,7 @@ on:
- "main"
jobs:
build:
tag-release:
runs-on: ubuntu-latest
steps:
-

View file

@ -6,7 +6,7 @@ on:
- '*.*.*'
jobs:
build:
build-docker-images:
runs-on: ubuntu-latest
steps:
-

View file

@ -1,5 +1,59 @@
# Changelog
## 1.5.0 (2024/01/20)
* Replaced RESTIC_REPO_URL, RESTIC_REPO_PASSWORD and RESTIC_REPO_PASSWORD_FILE environment variables with the Restic equivalents
* Add new label "snapshot_tags" in the list of tags separated by comma. The label "snapshot_tag" only contains the first tag
* Update Restic 0.16.3
* Update Python dependencies
* Update base Docker image to Alpine 3.19
## 1.4.0 (2023/10/14)
* Include metric label client_version. Resolves #5
* Update Grafana dashboard to include repository locks and client version
* Update Restic 0.16.0
* Update Python 3.12
## 1.3.0 (2023/07/30)
* Add new metric "restic_locks_total" with the number of repository locks
* Add new label "snapshot_paths" in the metrics with the backup paths
* Add NO_LOCKS env var to skip restic locks collection
* Add INCLUDE_PATHS env var to include the backup paths in the metrics
* Add Rclone instructions in the readme
* Update Restic 0.15.2
* Update Python dependencies
* Update base Docker image to Alpine 3.18
## 1.2.2 (2023/03/31)
* Include OpenSSH in the Docker image to support SFTP protocol
## 1.2.1 (2023/03/26)
* Improve hash calculation to avoid duplicate clients (snapshot_hash label changes)
## 1.2.0 (2023/03/18)
* Add new label "snapshot_tag" in the metrics with the backup tag (if tags is present)
* Add new metric "restic_scrape_duration_seconds"
* Add EXIT_ON_ERROR env var to control behaviour on error
* Add NO_CHECK env var to skip restic check stats
* Add NO_STATS env var to skip stats per backup
* Fix crash when backup username is empty. #7
## 1.1.0 (2023/02/02)
* Update Restic 0.15.1
* Update prometheus-client 0.16.0
* Fix snapshot time parsing for old versions of Restic. Resolves #1
* Exit if the repository password is not configured
* Improve error handling and better log traces
* Rename PASSWORD_FILE env var to RESTIC_REPO_PASSWORD_FILE
* Update Grafana dashboard
* Add documentation for other backends
## 1.0.0 (2022/12/06)
* First release

View file

@ -1,6 +1,6 @@
FROM golang:alpine3.17 AS builder
FROM golang:1.20-alpine3.19 AS builder
ENV RESTIC_VERSION 0.14.0
ENV RESTIC_VERSION 0.16.3
ENV CGO_ENABLED 0
RUN cd /tmp \
@ -12,9 +12,9 @@ RUN cd /tmp \
# flag -ldflags "-s -w" produces a smaller executable
&& go build -ldflags "-s -w" -v -o /tmp/restic ./cmd/restic
FROM python:3.11-alpine3.17
FROM python:3.12-alpine3.19
RUN apk add --no-cache --update tzdata
RUN apk add --no-cache --update openssh tzdata
COPY --from=builder /tmp/restic /usr/bin
COPY entrypoint.sh requirements.txt /

100
README.md
View file

@ -19,8 +19,8 @@ Requirements:
```bash
pip install -r /requirements.txt
export RESTIC_REPO_URL=/data
export PASSWORD_FILE=/restic_password_file
export RESTIC_REPOSITORY=/data
export RESTIC_PASSWORD_FILE=/restic_password_file
python restic-exporter.py
```
@ -59,9 +59,9 @@ services:
container_name: restic-exporter
environment:
- TZ=Europe/Madrid
- RESTIC_REPO_URL=/data
- RESTIC_REPO_PASSWORD=<password_here>
# - RESTIC_REPO_PASSWORD_FILE=</file_with_password_here>
- RESTIC_REPOSITORY=/data
- RESTIC_PASSWORD=<password_here>
# - RESTIC_PASSWORD_FILE=</file_with_password_here>
- REFRESH_INTERVAL=1800 # 30 min
volumes:
- /host_path/restic/data:/data
@ -76,8 +76,8 @@ services:
docker run -d \
--name=restic-exporter \
-e TZ=Europe/Madrid \
-e RESTIC_REPO_URL=/data \
-e RESTIC_REPO_PASSWORD=<password_here> \
-e RESTIC_REPOSITORY=/data \
-e RESTIC_PASSWORD=<password_here> \
-e REFRESH_INTERVAL=1800 \
-p 8001:8001 \
--restart unless-stopped \
@ -86,37 +86,97 @@ docker run -d \
## Configuration
All configuration is done with environment variables.
This Prometheus exporter is compatible with all [backends supported by Restic](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html).
Some of them need additional environment variables for the secrets.
- `RESTIC_REPO_URL`: Restic repository URL. It could be a local repository (eg: `/data`) or a remote repository (eg: `rest:http://user:password@127.0.0.1:8000/`).
- `RESTIC_REPO_PASSWORD`: Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD_FILE` is not defined.
- `RESTIC_REPO_PASSWORD_FILE`: File with the Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD` is not defined. Remember to mount the Docker volume with the file.
- `REFRESH_INTERVAL`: (Optional) Refresh interval for the metrics in seconds. Computing the metrics is a expensive task, keep this value as high as possible. Default 60
- `LISTEN_PORT`: (Optional) The address the exporter should listen on. The default is `8001`.
- `LISTEN_ADDRESS`: (Optional) The address the exporter should listen on. The default is to listen on all addresses.
All configuration is done with environment variables:
- `RESTIC_REPOSITORY`: Restic repository URL. All backends are supported. Examples:
* Local repository: `/data`
* REST Server: `rest:http://user:password@127.0.0.1:8000/`
* Amazon S3: `s3:s3.amazonaws.com/bucket_name`
* Backblaze B2: `b2:bucketname:path/to/repo`
* Rclone (see notes below): `rclone:gd-backup:/restic`
- `RESTIC_PASSWORD`: Restic repository password in plain text. This is only
required if `RESTIC_PASSWORD_FILE` is not defined.
- `RESTIC_PASSWORD_FILE`: File with the Restic repository password in plain
text. This is only required if `RESTIC_PASSWORD` is not defined. Remember
to mount the Docker volume with the file.
- `AWS_ACCESS_KEY_ID`: (Optional) Required for Amazon S3, Minio and Wasabi
backends.
- `AWS_SECRET_ACCESS_KEY`: (Optional) Required for Amazon S3, Minio and Wasabi
backends.
- `B2_ACCOUNT_ID`: (Optional) Required for Backblaze B2 backend.
- `B2_ACCOUNT_KEY`: (Optional) Required for Backblaze B2 backend.
- `REFRESH_INTERVAL`: (Optional) Refresh interval for the metrics in seconds.
Computing the metrics is an expensive task, keep this value as high as possible.
Default is `60` seconds.
- `LISTEN_PORT`: (Optional) The address the exporter should listen on. The
default is `8001`.
- `LISTEN_ADDRESS`: (Optional) The address the exporter should listen on. The
default is to listen on all addresses.
- `LOG_LEVEL`: (Optional) Log level of the traces. The default is `INFO`.
- `EXIT_ON_ERROR`: (Optional) Shutdown exporter on any `restic` error. Default
is `Flase` (only log error, such as network error with Cloud backends).
- `NO_CHECK`: (Optional) Do not perform `restic check` operation for performance
reasons. Default is `False` (perform `restic check`).
- `NO_STATS`: (Optional) Do not collect per backup statistics for performance
reasons. Default is `False` (collect per backup statistics).
- `NO_LOCKS`: (Optional) Do not collect the number of locks. Default is `False` (collect the number of locks).
- `INCLUDE_PATHS`: (Optional) Include snapshot paths for each backup. The paths are separated by commas. Default is `False` (not collect the paths).
### Configuration for Rclone
Rclone is not included in the Docker image. You have to mount the Rclone executable and the Rclone configuration from the host machine. Here is an example with docker-compose:
```yaml
version: '2.1'
services:
restic-exporter:
image: ngosang/restic-exporter
container_name: restic-exporter
environment:
- TZ=Europe/Madrid
- RESTIC_REPOSITORY=rclone:gd-backup:/restic
- RESTIC_PASSWORD=
- REFRESH_INTERVAL=1800 # 30 min
volumes:
- /host_path/restic/data:/data
- /usr/bin/rclone:/usr/bin/rclone:ro
- /host_path/restic/rclone.conf:/root/.config/rclone/rclone.conf:ro
ports:
- "8001:8001"
restart: unless-stopped
```
## Exported metrics
```shell
```bash
# HELP restic_check_success Result of restic check operation in the repository
# TYPE restic_check_success gauge
restic_check_success 1.0
# HELP restic_locks_total Total number of locks in the repository
# TYPE restic_locks_total counter
restic_locks_total 1.0
# HELP restic_snapshots_total Total number of snapshots in the repository
# TYPE restic_snapshots_total counter
restic_snapshots_total 1777.0
restic_snapshots_total 100.0
# HELP restic_backup_timestamp Timestamp of the last backup
# TYPE restic_backup_timestamp gauge
restic_backup_timestamp{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 1.669754009e+09
restic_backup_timestamp{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 1.666273638e+09
# HELP restic_backup_files_total Number of files in the backup
# TYPE restic_backup_files_total counter
restic_backup_files_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 19051.0
restic_backup_files_total{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 8.0
# HELP restic_backup_size_total Total size of backup in bytes
# TYPE restic_backup_size_total counter
restic_backup_size_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 4.1174838248e+010
restic_backup_size_total{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 4.3309562e+07
# HELP restic_backup_snapshots_total Total number of snapshots
# TYPE restic_backup_snapshots_total counter
restic_backup_snapshots_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 106.0
restic_backup_snapshots_total{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 1.0
# HELP restic_scrape_duration_seconds Amount of time each scrape takes
# TYPE restic_scrape_duration_seconds gauge
restic_scrape_duration_seconds 166.9411084651947
```
## Prometheus config

View file

@ -6,9 +6,9 @@ services:
container_name: restic-exporter
environment:
- TZ=Europe/Madrid
- RESTIC_REPO_URL=/data
- RESTIC_REPO_PASSWORD=password_here
# - RESTIC_REPO_PASSWORD_FILE=/file_with_password_here
- RESTIC_REPOSITORY=/data
- RESTIC_PASSWORD=password_here
# - RESTIC_PASSWORD_FILE=/file_with_password_here
- REFRESH_INTERVAL=1800 # 30 min
volumes:
- /host_path/restic/data:/data

View file

@ -3,16 +3,23 @@
# Exit on error. For debug use set -x
set -e
export PASSWORD_FILE="/tmp/restic_passwd"
if [ -n "${RESTIC_REPO_PASSWORD}" ]; then
echo "The environment variable RESTIC_REPO_PASSWORD is deprecated, please use RESTIC_PASSWORD instead."
export RESTIC_PASSWORD="${RESTIC_REPO_PASSWORD}"
fi
if [ -n "${RESTIC_REPO_PASSWORD_FILE}" ]; then
echo "The environment variable RESTIC_REPO_PASSWORD_FILE is deprecated, please use RESTIC_PASSWORD_FILE instead."
export RESTIC_PASSWORD_FILE="${RESTIC_REPO_PASSWORD_FILE}"
fi
if [ -z "${RESTIC_REPO_PASSWORD}" ]; then
if [ -z "${RESTIC_REPO_PASSWORD_FILE}" ]; then
echo "You have to define one of these environment variables: RESTIC_REPO_PASSWORD or RESTIC_REPO_PASSWORD_FILE"
else
cp "${RESTIC_REPO_PASSWORD_FILE}" "${PASSWORD_FILE}"
if [ -z "${RESTIC_PASSWORD}" ]; then
if [ -z "${RESTIC_PASSWORD_FILE}" ]; then
echo "You have to define one of these environment variables: RESTIC_PASSWORD or RESTIC_PASSWORD_FILE"
exit 1
fi
else
echo "${RESTIC_REPO_PASSWORD}" > "${PASSWORD_FILE}"
export RESTIC_PASSWORD_FILE="/tmp/restic_passwd"
echo "${RESTIC_PASSWORD}" > "${RESTIC_PASSWORD_FILE}"
fi
/usr/local/bin/python -u /restic-exporter.py

View file

@ -1,4 +1,53 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.0"
},
{
"type": "panel",
"id": "piechart",
"name": "Pie chart",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
@ -26,14 +75,22 @@
"fiscalYearStartMonth": 0,
"gnetId": 9846,
"graphTooltip": 0,
"id": 40,
"links": [],
"id": null,
"links": [
{
"icon": "external link",
"tags": [
"system"
],
"type": "dashboards"
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
@ -75,7 +132,7 @@
},
"gridPos": {
"h": 3,
"w": 24,
"w": 18,
"x": 0,
"y": 0
},
@ -102,7 +159,11 @@
"expr": "restic_check_success",
"interval": "",
"legendFormat": "",
"refId": "A"
"refId": "A",
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
}
}
],
"title": "Repository Check",
@ -111,7 +172,75 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 2
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 6,
"x": 18,
"y": 0
},
"id": 42,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "restic_locks_total",
"interval": "",
"legendFormat": "",
"range": true,
"refId": "A"
}
],
"title": "Repository Locks",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
@ -193,7 +322,7 @@
},
"gridPos": {
"h": 9,
"w": 15,
"w": 18,
"x": 0,
"y": 3
},
@ -228,7 +357,11 @@
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{client_hostname}}",
"refId": "A"
"refId": "A",
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
}
},
{
"exemplar": true,
@ -239,7 +372,11 @@
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{client_hostname}}",
"refId": "B"
"refId": "B",
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
}
}
],
"title": "Last backup",
@ -259,7 +396,7 @@
"client_id": true,
"client_os_version": true,
"client_username": true,
"client_version": true,
"client_version": false,
"instance": true,
"job": true,
"snapshot_hash": true,
@ -279,7 +416,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
@ -309,8 +446,8 @@
},
"gridPos": {
"h": 9,
"w": 9,
"x": 15,
"w": 6,
"x": 18,
"y": 3
},
"id": 32,
@ -336,6 +473,11 @@
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "restic_backup_snapshots_total",
"format": "table",
@ -378,7 +520,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
@ -421,7 +563,7 @@
},
"gridPos": {
"h": 9,
"w": 9,
"w": 18,
"x": 0,
"y": 12
},
@ -448,6 +590,10 @@
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"exemplar": true,
"expr": "restic_backup_size_total",
"format": "table",
@ -456,31 +602,70 @@
"intervalFactor": 1,
"legendFormat": "{{client_hostname}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"exemplar": true,
"expr": "restic_backup_files_total",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{client_hostname}}",
"refId": "B"
}
],
"title": "Total backup size",
"title": "Total backup size & files",
"transformations": [
{
"id": "joinByField",
"options": {
"byField": "client_hostname",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Time 1": true,
"Time 2": true,
"__name__": true,
"__name__ 1": true,
"backup_id": true,
"backup_type": true,
"client_hostname": false,
"client_id": true,
"client_os_version": true,
"client_username": true,
"client_username 1": true,
"client_username 2": true,
"client_version": true,
"client_version 1": true,
"client_version 2": true,
"instance": true,
"instance 1": true,
"instance 2": true,
"job": true,
"job 1": true,
"job 2": true,
"snapshot_hash": true,
"snapshot_hash 1": true,
"snapshot_hash 2": true,
"snapshot_id": true
},
"indexByName": {},
"renameByName": {
"Value": "total_backup_size"
"Time 2": "",
"Value": "total_backup_size",
"Value #A": "total_backup_size",
"Value #B": "total_backup_files"
}
}
}
@ -490,7 +675,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
@ -513,7 +698,7 @@
"gridPos": {
"h": 9,
"w": 6,
"x": 9,
"x": 18,
"y": 12
},
"id": 35,
@ -552,7 +737,11 @@
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{client_hostname}}",
"refId": "A"
"refId": "A",
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
}
}
],
"title": "Total backup size",
@ -583,110 +772,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto",
"filterable": false,
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 9,
"x": 15,
"y": 12
},
"id": 33,
"links": [],
"maxDataPoints": 100,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 1,
"showHeader": true,
"sortBy": [
{
"desc": false,
"displayName": "client_hostname"
}
]
},
"pluginVersion": "9.3.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
},
"exemplar": true,
"expr": "restic_backup_files_total",
"format": "table",
"instant": true,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{client_hostname}}",
"refId": "A"
}
],
"title": "Total backup files",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"__name__": true,
"backup_id": true,
"backup_type": true,
"client_hostname": false,
"client_id": true,
"client_os_version": true,
"client_username": true,
"client_version": true,
"instance": true,
"job": true,
"snapshot_hash": true,
"snapshot_id": true
},
"indexByName": {},
"renameByName": {
"Value": "total_backup_files"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
@ -772,7 +858,11 @@
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{client_hostname}}",
"refId": "A"
"refId": "A",
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
}
}
],
"title": "Total backup size",
@ -781,7 +871,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
@ -863,7 +953,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"exemplar": true,
"expr": "sum by(client_hostname,client_username) (restic_backup_files_total)",
@ -880,7 +970,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "fmMKqssGk"
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
@ -966,7 +1056,11 @@
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{client_hostname}}",
"refId": "A"
"refId": "A",
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
}
}
],
"title": "Total snapshot count",
@ -1014,6 +1108,6 @@
"timezone": "",
"title": "Restic Exporter",
"uid": "2JzZl3B7k",
"version": 22,
"version": 25,
"weekStart": ""
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 124 KiB

After

Width:  |  Height:  |  Size: 183 KiB

View file

@ -1,5 +1,5 @@
{
"name": "ngosang-restic-exporter",
"version": "1.0.0",
"version": "1.5.0",
"author": "ngosang@hotmail.es"
}

View file

@ -1 +1 @@
prometheus-client==0.15.0
prometheus-client==0.19.0

View file

@ -8,21 +8,31 @@ import time
import re
import subprocess
import sys
import traceback
import prometheus_client
import prometheus_client.core
from prometheus_client import start_http_server
from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGISTRY
class ResticCollector(object):
def __init__(self, repository, password_file_):
def __init__(
self, repository, password_file, exit_on_error, disable_check,
disable_stats, disable_locks, include_paths
):
self.repository = repository
self.password_file = password_file_
self.password_file = password_file
self.exit_on_error = exit_on_error
self.disable_check = disable_check
self.disable_stats = disable_stats
self.disable_locks = disable_locks
self.include_paths = include_paths
# todo: the stats cache increases over time -> remove old ids
# todo: cold start -> the stats cache could be saved in a persistent volume
# todo: cold start -> the restic cache (/root/.cache/restic) could be saved in a persistent volume
# todo: cold start -> the restic cache (/root/.cache/restic) could be
# saved in a persistent volume
self.stats_cache = {}
self.metrics = {}
self.refresh()
self.refresh(exit_on_error)
def collect(self):
logging.debug("Incoming request")
@ -30,120 +40,214 @@ class ResticCollector(object):
common_label_names = [
"client_hostname",
"client_username",
"snapshot_hash"
"client_version",
"snapshot_hash",
"snapshot_tag",
"snapshot_tags",
"snapshot_paths",
]
check_success = prometheus_client.core.GaugeMetricFamily(
check_success = GaugeMetricFamily(
"restic_check_success",
"Result of restic check operation in the repository",
labels=[])
snapshots_total = prometheus_client.core.CounterMetricFamily(
labels=[],
)
locks_total = CounterMetricFamily(
"restic_locks_total",
"Total number of locks in the repository",
labels=[],
)
snapshots_total = CounterMetricFamily(
"restic_snapshots_total",
"Total number of snapshots in the repository",
labels=[])
backup_timestamp = prometheus_client.core.GaugeMetricFamily(
labels=[],
)
backup_timestamp = GaugeMetricFamily(
"restic_backup_timestamp",
"Timestamp of the last backup",
labels=common_label_names)
backup_files_total = prometheus_client.core.CounterMetricFamily(
labels=common_label_names,
)
backup_files_total = CounterMetricFamily(
"restic_backup_files_total",
"Number of files in the backup",
labels=common_label_names)
backup_size_total = prometheus_client.core.CounterMetricFamily(
labels=common_label_names,
)
backup_size_total = CounterMetricFamily(
"restic_backup_size_total",
"Total size of backup in bytes",
labels=common_label_names)
backup_snapshots_total = prometheus_client.core.CounterMetricFamily(
labels=common_label_names,
)
backup_snapshots_total = CounterMetricFamily(
"restic_backup_snapshots_total",
"Total number of snapshots",
labels=common_label_names)
labels=common_label_names,
)
scrape_duration_seconds = GaugeMetricFamily(
"restic_scrape_duration_seconds",
"Amount of time each scrape takes",
labels=[],
)
check_success.add_metric([], self.metrics["check_success"])
locks_total.add_metric([], self.metrics["locks_total"])
snapshots_total.add_metric([], self.metrics["snapshots_total"])
for client in self.metrics['clients']:
for client in self.metrics["clients"]:
common_label_values = [
client["hostname"],
client["username"],
client["snapshot_hash"]
client["version"],
client["snapshot_hash"],
client["snapshot_tag"],
client["snapshot_tags"],
client["snapshot_paths"],
]
backup_timestamp.add_metric(common_label_values, client["timestamp"])
backup_files_total.add_metric(common_label_values, client["files_total"])
backup_size_total.add_metric(common_label_values, client["size_total"])
backup_snapshots_total.add_metric(common_label_values, client["snapshots_total"])
backup_snapshots_total.add_metric(
common_label_values, client["snapshots_total"]
)
scrape_duration_seconds.add_metric([], self.metrics["duration"])
yield check_success
yield locks_total
yield snapshots_total
yield backup_timestamp
yield backup_files_total
yield backup_size_total
yield backup_snapshots_total
yield scrape_duration_seconds
def refresh(self):
def refresh(self, exit_on_error=False):
try:
self.metrics = self.get_metrics()
except Exception as e:
logging.error("Unable to collect metrics from Restic. Error: %s", str(e))
except Exception:
logging.error(
"Unable to collect metrics from Restic. %s",
traceback.format_exc(0).replace("\n", " "),
)
# Shutdown exporter for any error
if exit_on_error:
sys.exit(1)
def get_metrics(self):
duration = time.time()
# calc total number of snapshots per hash
all_snapshots = self.get_snapshots()
latest_snapshots = self.get_snapshots(True)
snap_total_counter = {}
for snap in all_snapshots:
if snap["hash"] not in snap_total_counter:
snap_total_counter[snap["hash"]] = 1
else:
snap_total_counter[snap["hash"]] += 1
# get the latest snapshot per hash
latest_snapshots_dup = self.get_snapshots(True)
latest_snapshots = {}
for snap in latest_snapshots_dup:
time_parsed = re.sub(r"\.[^+-]+", "", snap["time"])
if len(time_parsed) > 19:
# restic 14: '2023-01-12T06:59:33.1576588+01:00' ->
# '2023-01-12T06:59:33+01:00'
time_format = "%Y-%m-%dT%H:%M:%S%z"
else:
# restic 12: '2023-02-01T14:14:19.30760523Z' ->
# '2023-02-01T14:14:19'
time_format = "%Y-%m-%dT%H:%M:%S"
timestamp = time.mktime(
datetime.datetime.strptime(time_parsed, time_format).timetuple()
)
snap["timestamp"] = timestamp
if snap["hash"] not in latest_snapshots or \
snap["timestamp"] > latest_snapshots[snap["hash"]]["timestamp"]:
latest_snapshots[snap["hash"]] = snap
clients = []
for snap in latest_snapshots:
stats = self.get_stats(snap['id'])
for snap in list(latest_snapshots.values()):
# collect stats for each snap only if enabled
if self.disable_stats:
# return zero as "no-stats" value
stats = {
"total_size": -1,
"total_file_count": -1,
}
else:
stats = self.get_stats(snap["id"])
time_parsed = re.sub(r'\.[^+-]+', '', snap['time'])
timestamp = time.mktime(datetime.datetime.strptime(time_parsed, "%Y-%m-%dT%H:%M:%S%z").timetuple())
clients.append(
{
"hostname": snap["hostname"],
"username": snap["username"],
"version": snap["program_version"] if "program_version" in snap else "",
"snapshot_hash": snap["hash"],
"snapshot_tag": snap["tags"][0] if "tags" in snap else "",
"snapshot_tags": ",".join(snap["tags"]) if "tags" in snap else "",
"snapshot_paths": ",".join(snap["paths"]) if self.include_paths else "",
"timestamp": snap["timestamp"],
"size_total": stats["total_size"],
"files_total": stats["total_file_count"],
"snapshots_total": snap_total_counter[snap["hash"]],
}
)
snapshots_total = 0
for snap2 in all_snapshots:
if snap2['hash'] == snap['hash']:
snapshots_total += 1
clients.append({
'snapshot_hash': snap['hash'],
'hostname': snap['hostname'],
'username': snap['username'],
'timestamp': timestamp,
'size_total': stats['total_size'],
'files_total': stats['total_file_count'],
'snapshots_total': snapshots_total
})
# todo: fix the commented code when the bug is fixed in restic
# https://github.com/restic/restic/issues/2126
# stats = self.get_stats()
check_success = self.get_check()
if self.disable_check:
# return 2 as "no-check" value
check_success = 2
else:
check_success = self.get_check()
if self.disable_locks:
# return 0 as "no-locks" value
locks_total = 0
else:
locks_total = self.get_locks()
metrics = {
'check_success': check_success,
'clients': clients,
"check_success": check_success,
"locks_total": locks_total,
"clients": clients,
"snapshots_total": len(all_snapshots),
"duration": time.time() - duration
# 'size_total': stats['total_size'],
# 'files_total': stats['total_file_count'],
'snapshots_total': len(all_snapshots)
}
return metrics
def get_snapshots(self, only_latest=False):
cmd = [
'restic',
'-r', self.repository,
'-p', self.password_file,
'--no-lock',
'snapshots', '--json'
"restic",
"-r",
self.repository,
"-p",
self.password_file,
"--no-lock",
"snapshots",
"--json",
]
if only_latest:
cmd.extend(['--latest', '1'])
result = subprocess.run(cmd, stdout=subprocess.PIPE)
if only_latest:
cmd.extend(["--latest", "1"])
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
raise Exception("Error executing restic snapshot command. Exit code: " + str(result.returncode))
snapshots = json.loads(result.stdout.decode('utf-8'))
raise Exception(
"Error executing restic snapshot command: " + self.parse_stderr(result)
)
snapshots = json.loads(result.stdout.decode("utf-8"))
for snap in snapshots:
snap['hash'] = self.calc_snapshot_hash(snap)
if "username" not in snap:
snap["username"] = ""
snap["hash"] = self.calc_snapshot_hash(snap)
return snapshots
def get_stats(self, snapshot_id=None):
@ -154,19 +258,24 @@ class ResticCollector(object):
return self.stats_cache[snapshot_id]
cmd = [
'restic',
'-r', self.repository,
'-p', self.password_file,
'--no-lock',
'stats', '--json'
"restic",
"-r",
self.repository,
"-p",
self.password_file,
"--no-lock",
"stats",
"--json",
]
if snapshot_id is not None:
cmd.extend([snapshot_id])
result = subprocess.run(cmd, stdout=subprocess.PIPE)
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
raise Exception("Error executing restic stats command. Exit code: " + str(result.returncode))
stats = json.loads(result.stdout.decode('utf-8'))
raise Exception(
"Error executing restic stats command: " + self.parse_stderr(result)
)
stats = json.loads(result.stdout.decode("utf-8"))
if snapshot_id is not None:
self.stats_cache[snapshot_id] = stats
@ -176,58 +285,124 @@ class ResticCollector(object):
def get_check(self):
# This command takes 20 seconds or more, but it's required
cmd = [
'restic',
'-r', self.repository,
'-p', self.password_file,
'--no-lock',
'check'
"restic",
"-r",
self.repository,
"-p",
self.password_file,
"--no-lock",
"check",
]
result = subprocess.run(cmd, stdout=subprocess.PIPE)
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode == 0:
return 1 # ok
return 0 # error
else:
logging.warning(
"Error checking the repository health. " + self.parse_stderr(result)
)
return 0 # error
def calc_snapshot_hash(self, snapshot: dict) -> str:
text = snapshot['hostname'] + ",".join(snapshot['paths'])
return hashlib.sha256(text.encode('utf-8')).hexdigest()
def get_locks(self):
cmd = [
"restic",
"-r",
self.repository,
"-p",
self.password_file,
"--no-lock",
"list",
"locks",
]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
raise Exception(
"Error executing restic list locks command: " + self.parse_stderr(result)
)
text_result = result.stdout.decode("utf-8")
return len(text_result.split("\n")) - 1
@staticmethod
def calc_snapshot_hash(snapshot: dict) -> str:
text = snapshot["hostname"] + snapshot["username"] + ",".join(snapshot["paths"])
return hashlib.sha256(text.encode("utf-8")).hexdigest()
@staticmethod
def parse_stderr(result):
return (
result.stderr.decode("utf-8").replace("\n", " ")
+ " Exit code: "
+ str(result.returncode)
)
if __name__ == "__main__":
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
format="%(asctime)s %(levelname)-8s %(message)s",
level=logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO")),
datefmt='%Y-%m-%d %H:%M:%S',
handlers=[
logging.StreamHandler(sys.stdout)
]
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logging.info("Starting Restic Prometheus Exporter ...")
logging.info("It could take a while if the repository is remote.")
logging.info("Starting Restic Prometheus Exporter")
logging.info("It could take a while if the repository is remote")
try:
restic_repo_url = os.environ["RESTIC_REPO_URL"]
except Exception:
logging.error("Configuration error. The environment variable RESTIC_REPO_URL is mandatory")
restic_repo_url = os.environ.get("RESTIC_REPOSITORY")
if restic_repo_url is None:
restic_repo_url = os.environ.get("RESTIC_REPO_URL")
if restic_repo_url is not None:
logging.warning(
"The environment variable RESTIC_REPO_URL is deprecated, "
"please use RESTIC_REPOSITORY instead."
)
if restic_repo_url is None:
logging.error("The environment variable RESTIC_REPOSITORY is mandatory")
sys.exit(1)
try:
password_file = os.environ["PASSWORD_FILE"]
except Exception:
logging.error("Configuration error. The environment variable PASSWORD_FILE is mandatory")
restic_repo_password_file = os.environ.get("RESTIC_PASSWORD_FILE")
if restic_repo_password_file is None:
restic_repo_password_file = os.environ.get("RESTIC_REPO_PASSWORD_FILE")
if restic_repo_password_file is not None:
logging.warning(
"The environment variable RESTIC_REPO_PASSWORD_FILE is deprecated, "
"please use RESTIC_PASSWORD_FILE instead."
)
if restic_repo_password_file is None:
logging.error("The environment variable RESTIC_PASSWORD_FILE is mandatory")
sys.exit(1)
exporter_address = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
exporter_port = int(os.environ.get("LISTEN_PORT", 8001))
exporter_refresh_interval = int(os.environ.get("REFRESH_INTERVAL", 60))
exporter_exit_on_error = bool(os.environ.get("EXIT_ON_ERROR", False))
exporter_disable_check = bool(os.environ.get("NO_CHECK", False))
exporter_disable_stats = bool(os.environ.get("NO_STATS", False))
exporter_disable_locks = bool(os.environ.get("NO_LOCKS", False))
exporter_include_paths = bool(os.environ.get("INCLUDE_PATHS", False))
collector = ResticCollector(restic_repo_url, password_file)
try:
collector = ResticCollector(
restic_repo_url,
restic_repo_password_file,
exporter_exit_on_error,
exporter_disable_check,
exporter_disable_stats,
exporter_disable_locks,
exporter_include_paths,
)
REGISTRY.register(collector)
start_http_server(exporter_port, exporter_address)
logging.info(
"Serving at http://{0}:{1}".format(exporter_address, exporter_port)
)
prometheus_client.core.REGISTRY.register(collector)
prometheus_client.start_http_server(exporter_port, exporter_address)
while True:
logging.info(
"Refreshing stats every {0} seconds".format(exporter_refresh_interval)
)
time.sleep(exporter_refresh_interval)
collector.refresh()
logging.info("Server listening in http://%s:%d/metrics", exporter_address, exporter_port)
while True:
logging.info("Refreshing stats every %d seconds", exporter_refresh_interval)
time.sleep(exporter_refresh_interval)
collector.refresh()
except KeyboardInterrupt:
logging.info("\nInterrupted")
exit(0)