Compare commits
40 commits
Author | SHA1 | Date | |
---|---|---|---|
|
cb89566b67 | ||
|
4efcaba7c5 | ||
|
10dae8da3a | ||
|
71fb2a6695 | ||
|
e94816eb54 | ||
|
c882d9a9a9 | ||
|
9c5e0d0071 | ||
|
9070dc9aef | ||
|
a4243a8554 | ||
|
d90f46a4d1 | ||
|
d26e181f21 | ||
|
e1973cb48f | ||
|
539582566d | ||
|
8697539748 | ||
|
2f4bab16ef | ||
|
0bc9a62563 | ||
|
3e183cbf83 | ||
|
8463e245c2 | ||
|
d63604f1b8 | ||
|
9621db9e79 | ||
|
a4e37f8770 | ||
|
c80b74c1ec | ||
|
6ebd2b7dfc | ||
|
8a99ddae98 | ||
|
f2fe3aff54 | ||
|
135cf592d2 | ||
|
26bab7ba5d | ||
|
2c3ef3e79c | ||
|
31d99bf98d | ||
|
60617651cb | ||
|
011b658805 | ||
|
880b47131c | ||
|
864f63600a | ||
|
84190584b8 | ||
|
3e00452190 | ||
|
f04f5468d8 | ||
|
2a5e4e426a | ||
|
982cfe90d5 | ||
|
dc2f63c6f9 | ||
|
2c086e491b |
12 changed files with 663 additions and 273 deletions
2
.github/workflows/autotag.yml
vendored
2
.github/workflows/autotag.yml
vendored
|
@ -6,7 +6,7 @@ on:
|
|||
- "main"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
tag-release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
|
|
2
.github/workflows/release-docker.yml
vendored
2
.github/workflows/release-docker.yml
vendored
|
@ -6,7 +6,7 @@ on:
|
|||
- '*.*.*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
build-docker-images:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
|
|
54
CHANGELOG.md
54
CHANGELOG.md
|
@ -1,5 +1,59 @@
|
|||
# Changelog
|
||||
|
||||
## 1.5.0 (2024/01/20)
|
||||
|
||||
* Replaced RESTIC_REPO_URL, RESTIC_REPO_PASSWORD and RESTIC_REPO_PASSWORD_FILE environment variables with the Restic equivalents
|
||||
* Add new label "snapshot_tags" in the list of tags separated by comma. The label "snapshot_tag" only contains the first tag
|
||||
* Update Restic 0.16.3
|
||||
* Update Python dependencies
|
||||
* Update base Docker image to Alpine 3.19
|
||||
|
||||
## 1.4.0 (2023/10/14)
|
||||
|
||||
* Include metric label client_version. Resolves #5
|
||||
* Update Grafana dashboard to include repository locks and client version
|
||||
* Update Restic 0.16.0
|
||||
* Update Python 3.12
|
||||
|
||||
## 1.3.0 (2023/07/30)
|
||||
|
||||
* Add new metric "restic_locks_total" with the number of repository locks
|
||||
* Add new label "snapshot_paths" in the metrics with the backup paths
|
||||
* Add NO_LOCKS env var to skip restic locks collection
|
||||
* Add INCLUDE_PATHS env var to include the backup paths in the metrics
|
||||
* Add Rclone instructions in the readme
|
||||
* Update Restic 0.15.2
|
||||
* Update Python dependencies
|
||||
* Update base Docker image to Alpine 3.18
|
||||
|
||||
## 1.2.2 (2023/03/31)
|
||||
|
||||
* Include OpenSSH in the Docker image to support SFTP protocol
|
||||
|
||||
## 1.2.1 (2023/03/26)
|
||||
|
||||
* Improve hash calculation to avoid duplicate clients (snapshot_hash label changes)
|
||||
|
||||
## 1.2.0 (2023/03/18)
|
||||
|
||||
* Add new label "snapshot_tag" in the metrics with the backup tag (if tags is present)
|
||||
* Add new metric "restic_scrape_duration_seconds"
|
||||
* Add EXIT_ON_ERROR env var to control behaviour on error
|
||||
* Add NO_CHECK env var to skip restic check stats
|
||||
* Add NO_STATS env var to skip stats per backup
|
||||
* Fix crash when backup username is empty. #7
|
||||
|
||||
## 1.1.0 (2023/02/02)
|
||||
|
||||
* Update Restic 0.15.1
|
||||
* Update prometheus-client 0.16.0
|
||||
* Fix snapshot time parsing for old versions of Restic. Resolves #1
|
||||
* Exit if the repository password is not configured
|
||||
* Improve error handling and better log traces
|
||||
* Rename PASSWORD_FILE env var to RESTIC_REPO_PASSWORD_FILE
|
||||
* Update Grafana dashboard
|
||||
* Add documentation for other backends
|
||||
|
||||
## 1.0.0 (2022/12/06)
|
||||
|
||||
* First release
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM golang:alpine3.17 AS builder
|
||||
FROM golang:1.20-alpine3.19 AS builder
|
||||
|
||||
ENV RESTIC_VERSION 0.14.0
|
||||
ENV RESTIC_VERSION 0.16.3
|
||||
ENV CGO_ENABLED 0
|
||||
|
||||
RUN cd /tmp \
|
||||
|
@ -12,9 +12,9 @@ RUN cd /tmp \
|
|||
# flag -ldflags "-s -w" produces a smaller executable
|
||||
&& go build -ldflags "-s -w" -v -o /tmp/restic ./cmd/restic
|
||||
|
||||
FROM python:3.11-alpine3.17
|
||||
FROM python:3.12-alpine3.19
|
||||
|
||||
RUN apk add --no-cache --update tzdata
|
||||
RUN apk add --no-cache --update openssh tzdata
|
||||
|
||||
COPY --from=builder /tmp/restic /usr/bin
|
||||
COPY entrypoint.sh requirements.txt /
|
||||
|
|
100
README.md
100
README.md
|
@ -19,8 +19,8 @@ Requirements:
|
|||
```bash
|
||||
pip install -r /requirements.txt
|
||||
|
||||
export RESTIC_REPO_URL=/data
|
||||
export PASSWORD_FILE=/restic_password_file
|
||||
export RESTIC_REPOSITORY=/data
|
||||
export RESTIC_PASSWORD_FILE=/restic_password_file
|
||||
python restic-exporter.py
|
||||
```
|
||||
|
||||
|
@ -59,9 +59,9 @@ services:
|
|||
container_name: restic-exporter
|
||||
environment:
|
||||
- TZ=Europe/Madrid
|
||||
- RESTIC_REPO_URL=/data
|
||||
- RESTIC_REPO_PASSWORD=<password_here>
|
||||
# - RESTIC_REPO_PASSWORD_FILE=</file_with_password_here>
|
||||
- RESTIC_REPOSITORY=/data
|
||||
- RESTIC_PASSWORD=<password_here>
|
||||
# - RESTIC_PASSWORD_FILE=</file_with_password_here>
|
||||
- REFRESH_INTERVAL=1800 # 30 min
|
||||
volumes:
|
||||
- /host_path/restic/data:/data
|
||||
|
@ -76,8 +76,8 @@ services:
|
|||
docker run -d \
|
||||
--name=restic-exporter \
|
||||
-e TZ=Europe/Madrid \
|
||||
-e RESTIC_REPO_URL=/data \
|
||||
-e RESTIC_REPO_PASSWORD=<password_here> \
|
||||
-e RESTIC_REPOSITORY=/data \
|
||||
-e RESTIC_PASSWORD=<password_here> \
|
||||
-e REFRESH_INTERVAL=1800 \
|
||||
-p 8001:8001 \
|
||||
--restart unless-stopped \
|
||||
|
@ -86,37 +86,97 @@ docker run -d \
|
|||
|
||||
## Configuration
|
||||
|
||||
All configuration is done with environment variables.
|
||||
This Prometheus exporter is compatible with all [backends supported by Restic](https://restic.readthedocs.io/en/latest/030_preparing_a_new_repo.html).
|
||||
Some of them need additional environment variables for the secrets.
|
||||
|
||||
- `RESTIC_REPO_URL`: Restic repository URL. It could be a local repository (eg: `/data`) or a remote repository (eg: `rest:http://user:password@127.0.0.1:8000/`).
|
||||
- `RESTIC_REPO_PASSWORD`: Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD_FILE` is not defined.
|
||||
- `RESTIC_REPO_PASSWORD_FILE`: File with the Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD` is not defined. Remember to mount the Docker volume with the file.
|
||||
- `REFRESH_INTERVAL`: (Optional) Refresh interval for the metrics in seconds. Computing the metrics is a expensive task, keep this value as high as possible. Default 60
|
||||
- `LISTEN_PORT`: (Optional) The address the exporter should listen on. The default is `8001`.
|
||||
- `LISTEN_ADDRESS`: (Optional) The address the exporter should listen on. The default is to listen on all addresses.
|
||||
All configuration is done with environment variables:
|
||||
|
||||
- `RESTIC_REPOSITORY`: Restic repository URL. All backends are supported. Examples:
|
||||
* Local repository: `/data`
|
||||
* REST Server: `rest:http://user:password@127.0.0.1:8000/`
|
||||
* Amazon S3: `s3:s3.amazonaws.com/bucket_name`
|
||||
* Backblaze B2: `b2:bucketname:path/to/repo`
|
||||
* Rclone (see notes below): `rclone:gd-backup:/restic`
|
||||
|
||||
- `RESTIC_PASSWORD`: Restic repository password in plain text. This is only
|
||||
required if `RESTIC_PASSWORD_FILE` is not defined.
|
||||
- `RESTIC_PASSWORD_FILE`: File with the Restic repository password in plain
|
||||
text. This is only required if `RESTIC_PASSWORD` is not defined. Remember
|
||||
to mount the Docker volume with the file.
|
||||
- `AWS_ACCESS_KEY_ID`: (Optional) Required for Amazon S3, Minio and Wasabi
|
||||
backends.
|
||||
- `AWS_SECRET_ACCESS_KEY`: (Optional) Required for Amazon S3, Minio and Wasabi
|
||||
backends.
|
||||
- `B2_ACCOUNT_ID`: (Optional) Required for Backblaze B2 backend.
|
||||
- `B2_ACCOUNT_KEY`: (Optional) Required for Backblaze B2 backend.
|
||||
- `REFRESH_INTERVAL`: (Optional) Refresh interval for the metrics in seconds.
|
||||
Computing the metrics is an expensive task, keep this value as high as possible.
|
||||
Default is `60` seconds.
|
||||
- `LISTEN_PORT`: (Optional) The address the exporter should listen on. The
|
||||
default is `8001`.
|
||||
- `LISTEN_ADDRESS`: (Optional) The address the exporter should listen on. The
|
||||
default is to listen on all addresses.
|
||||
- `LOG_LEVEL`: (Optional) Log level of the traces. The default is `INFO`.
|
||||
- `EXIT_ON_ERROR`: (Optional) Shutdown exporter on any `restic` error. Default
|
||||
is `Flase` (only log error, such as network error with Cloud backends).
|
||||
- `NO_CHECK`: (Optional) Do not perform `restic check` operation for performance
|
||||
reasons. Default is `False` (perform `restic check`).
|
||||
- `NO_STATS`: (Optional) Do not collect per backup statistics for performance
|
||||
reasons. Default is `False` (collect per backup statistics).
|
||||
- `NO_LOCKS`: (Optional) Do not collect the number of locks. Default is `False` (collect the number of locks).
|
||||
- `INCLUDE_PATHS`: (Optional) Include snapshot paths for each backup. The paths are separated by commas. Default is `False` (not collect the paths).
|
||||
|
||||
### Configuration for Rclone
|
||||
|
||||
Rclone is not included in the Docker image. You have to mount the Rclone executable and the Rclone configuration from the host machine. Here is an example with docker-compose:
|
||||
|
||||
```yaml
|
||||
version: '2.1'
|
||||
services:
|
||||
restic-exporter:
|
||||
image: ngosang/restic-exporter
|
||||
container_name: restic-exporter
|
||||
environment:
|
||||
- TZ=Europe/Madrid
|
||||
- RESTIC_REPOSITORY=rclone:gd-backup:/restic
|
||||
- RESTIC_PASSWORD=
|
||||
- REFRESH_INTERVAL=1800 # 30 min
|
||||
volumes:
|
||||
- /host_path/restic/data:/data
|
||||
- /usr/bin/rclone:/usr/bin/rclone:ro
|
||||
- /host_path/restic/rclone.conf:/root/.config/rclone/rclone.conf:ro
|
||||
ports:
|
||||
- "8001:8001"
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
## Exported metrics
|
||||
|
||||
```shell
|
||||
```bash
|
||||
# HELP restic_check_success Result of restic check operation in the repository
|
||||
# TYPE restic_check_success gauge
|
||||
restic_check_success 1.0
|
||||
# HELP restic_locks_total Total number of locks in the repository
|
||||
# TYPE restic_locks_total counter
|
||||
restic_locks_total 1.0
|
||||
# HELP restic_snapshots_total Total number of snapshots in the repository
|
||||
# TYPE restic_snapshots_total counter
|
||||
restic_snapshots_total 1777.0
|
||||
restic_snapshots_total 100.0
|
||||
# HELP restic_backup_timestamp Timestamp of the last backup
|
||||
# TYPE restic_backup_timestamp gauge
|
||||
restic_backup_timestamp{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 1.669754009e+09
|
||||
restic_backup_timestamp{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 1.666273638e+09
|
||||
# HELP restic_backup_files_total Number of files in the backup
|
||||
# TYPE restic_backup_files_total counter
|
||||
restic_backup_files_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 19051.0
|
||||
restic_backup_files_total{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 8.0
|
||||
# HELP restic_backup_size_total Total size of backup in bytes
|
||||
# TYPE restic_backup_size_total counter
|
||||
restic_backup_size_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 4.1174838248e+010
|
||||
restic_backup_size_total{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 4.3309562e+07
|
||||
# HELP restic_backup_snapshots_total Total number of snapshots
|
||||
# TYPE restic_backup_snapshots_total counter
|
||||
restic_backup_snapshots_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 106.0
|
||||
restic_backup_snapshots_total{client_hostname="product.example.com",client_username="root",client_version="restic 0.16.0",snapshot_hash="20795072cba0953bcdbe52e9cf9d75e5726042f5bbf2584bb2999372398ee835",snapshot_tag="mysql",snapshot_tags="mysql,tag2",snapshot_paths="/mysql/data,/mysql/config"} 1.0
|
||||
# HELP restic_scrape_duration_seconds Amount of time each scrape takes
|
||||
# TYPE restic_scrape_duration_seconds gauge
|
||||
restic_scrape_duration_seconds 166.9411084651947
|
||||
```
|
||||
|
||||
## Prometheus config
|
||||
|
|
|
@ -6,9 +6,9 @@ services:
|
|||
container_name: restic-exporter
|
||||
environment:
|
||||
- TZ=Europe/Madrid
|
||||
- RESTIC_REPO_URL=/data
|
||||
- RESTIC_REPO_PASSWORD=password_here
|
||||
# - RESTIC_REPO_PASSWORD_FILE=/file_with_password_here
|
||||
- RESTIC_REPOSITORY=/data
|
||||
- RESTIC_PASSWORD=password_here
|
||||
# - RESTIC_PASSWORD_FILE=/file_with_password_here
|
||||
- REFRESH_INTERVAL=1800 # 30 min
|
||||
volumes:
|
||||
- /host_path/restic/data:/data
|
||||
|
|
|
@ -3,16 +3,23 @@
|
|||
# Exit on error. For debug use set -x
|
||||
set -e
|
||||
|
||||
export PASSWORD_FILE="/tmp/restic_passwd"
|
||||
if [ -n "${RESTIC_REPO_PASSWORD}" ]; then
|
||||
echo "The environment variable RESTIC_REPO_PASSWORD is deprecated, please use RESTIC_PASSWORD instead."
|
||||
export RESTIC_PASSWORD="${RESTIC_REPO_PASSWORD}"
|
||||
fi
|
||||
if [ -n "${RESTIC_REPO_PASSWORD_FILE}" ]; then
|
||||
echo "The environment variable RESTIC_REPO_PASSWORD_FILE is deprecated, please use RESTIC_PASSWORD_FILE instead."
|
||||
export RESTIC_PASSWORD_FILE="${RESTIC_REPO_PASSWORD_FILE}"
|
||||
fi
|
||||
|
||||
if [ -z "${RESTIC_REPO_PASSWORD}" ]; then
|
||||
if [ -z "${RESTIC_REPO_PASSWORD_FILE}" ]; then
|
||||
echo "You have to define one of these environment variables: RESTIC_REPO_PASSWORD or RESTIC_REPO_PASSWORD_FILE"
|
||||
else
|
||||
cp "${RESTIC_REPO_PASSWORD_FILE}" "${PASSWORD_FILE}"
|
||||
if [ -z "${RESTIC_PASSWORD}" ]; then
|
||||
if [ -z "${RESTIC_PASSWORD_FILE}" ]; then
|
||||
echo "You have to define one of these environment variables: RESTIC_PASSWORD or RESTIC_PASSWORD_FILE"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "${RESTIC_REPO_PASSWORD}" > "${PASSWORD_FILE}"
|
||||
export RESTIC_PASSWORD_FILE="/tmp/restic_passwd"
|
||||
echo "${RESTIC_PASSWORD}" > "${RESTIC_PASSWORD_FILE}"
|
||||
fi
|
||||
|
||||
/usr/local/bin/python -u /restic-exporter.py
|
||||
|
|
|
@ -1,4 +1,53 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_PROMETHEUS",
|
||||
"label": "Prometheus",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "prometheus",
|
||||
"pluginName": "Prometheus"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "9.3.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "piechart",
|
||||
"name": "Pie chart",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "prometheus",
|
||||
"name": "Prometheus",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "stat",
|
||||
"name": "Stat",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
|
@ -26,14 +75,22 @@
|
|||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 9846,
|
||||
"graphTooltip": 0,
|
||||
"id": 40,
|
||||
"links": [],
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"icon": "external link",
|
||||
"tags": [
|
||||
"system"
|
||||
],
|
||||
"type": "dashboards"
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
|
@ -75,7 +132,7 @@
|
|||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 24,
|
||||
"w": 18,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
|
@ -102,7 +159,11 @@
|
|||
"expr": "restic_check_success",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"title": "Repository Check",
|
||||
|
@ -111,7 +172,75 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 42,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "restic_locks_total",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Repository Locks",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
|
@ -193,7 +322,7 @@
|
|||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 15,
|
||||
"w": 18,
|
||||
"x": 0,
|
||||
"y": 3
|
||||
},
|
||||
|
@ -228,7 +357,11 @@
|
|||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -239,7 +372,11 @@
|
|||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "B"
|
||||
"refId": "B",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"title": "Last backup",
|
||||
|
@ -259,7 +396,7 @@
|
|||
"client_id": true,
|
||||
"client_os_version": true,
|
||||
"client_username": true,
|
||||
"client_version": true,
|
||||
"client_version": false,
|
||||
"instance": true,
|
||||
"job": true,
|
||||
"snapshot_hash": true,
|
||||
|
@ -279,7 +416,7 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
|
@ -309,8 +446,8 @@
|
|||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 9,
|
||||
"x": 15,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 3
|
||||
},
|
||||
"id": 32,
|
||||
|
@ -336,6 +473,11 @@
|
|||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "restic_backup_snapshots_total",
|
||||
"format": "table",
|
||||
|
@ -378,7 +520,7 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
|
@ -421,7 +563,7 @@
|
|||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 9,
|
||||
"w": 18,
|
||||
"x": 0,
|
||||
"y": 12
|
||||
},
|
||||
|
@ -448,6 +590,10 @@
|
|||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"exemplar": true,
|
||||
"expr": "restic_backup_size_total",
|
||||
"format": "table",
|
||||
|
@ -456,31 +602,70 @@
|
|||
"intervalFactor": 1,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "restic_backup_files_total",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Total backup size",
|
||||
"title": "Total backup size & files",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "joinByField",
|
||||
"options": {
|
||||
"byField": "client_hostname",
|
||||
"mode": "outer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Time 1": true,
|
||||
"Time 2": true,
|
||||
"__name__": true,
|
||||
"__name__ 1": true,
|
||||
"backup_id": true,
|
||||
"backup_type": true,
|
||||
"client_hostname": false,
|
||||
"client_id": true,
|
||||
"client_os_version": true,
|
||||
"client_username": true,
|
||||
"client_username 1": true,
|
||||
"client_username 2": true,
|
||||
"client_version": true,
|
||||
"client_version 1": true,
|
||||
"client_version 2": true,
|
||||
"instance": true,
|
||||
"instance 1": true,
|
||||
"instance 2": true,
|
||||
"job": true,
|
||||
"job 1": true,
|
||||
"job 2": true,
|
||||
"snapshot_hash": true,
|
||||
"snapshot_hash 1": true,
|
||||
"snapshot_hash 2": true,
|
||||
"snapshot_id": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"Value": "total_backup_size"
|
||||
"Time 2": "",
|
||||
"Value": "total_backup_size",
|
||||
"Value #A": "total_backup_size",
|
||||
"Value #B": "total_backup_files"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -490,7 +675,7 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
|
@ -513,7 +698,7 @@
|
|||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 9,
|
||||
"x": 18,
|
||||
"y": 12
|
||||
},
|
||||
"id": 35,
|
||||
|
@ -552,7 +737,11 @@
|
|||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"title": "Total backup size",
|
||||
|
@ -583,110 +772,7 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto",
|
||||
"filterable": false,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 9,
|
||||
"x": 15,
|
||||
"y": 12
|
||||
},
|
||||
"id": 33,
|
||||
"links": [],
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 1,
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": false,
|
||||
"displayName": "client_hostname"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "9.3.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
},
|
||||
"exemplar": true,
|
||||
"expr": "restic_backup_files_total",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total backup files",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"__name__": true,
|
||||
"backup_id": true,
|
||||
"backup_type": true,
|
||||
"client_hostname": false,
|
||||
"client_id": true,
|
||||
"client_os_version": true,
|
||||
"client_username": true,
|
||||
"client_version": true,
|
||||
"instance": true,
|
||||
"job": true,
|
||||
"snapshot_hash": true,
|
||||
"snapshot_id": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"Value": "total_backup_files"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
|
@ -772,7 +858,11 @@
|
|||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"title": "Total backup size",
|
||||
|
@ -781,7 +871,7 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
|
@ -863,7 +953,7 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"exemplar": true,
|
||||
"expr": "sum by(client_hostname,client_username) (restic_backup_files_total)",
|
||||
|
@ -880,7 +970,7 @@
|
|||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fmMKqssGk"
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
|
@ -966,7 +1056,11 @@
|
|||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{client_hostname}}",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
}
|
||||
}
|
||||
],
|
||||
"title": "Total snapshot count",
|
||||
|
@ -1014,6 +1108,6 @@
|
|||
"timezone": "",
|
||||
"title": "Restic Exporter",
|
||||
"uid": "2JzZl3B7k",
|
||||
"version": 22,
|
||||
"version": 25,
|
||||
"weekStart": ""
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 124 KiB After Width: | Height: | Size: 183 KiB |
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"name": "ngosang-restic-exporter",
|
||||
"version": "1.0.0",
|
||||
"version": "1.5.0",
|
||||
"author": "ngosang@hotmail.es"
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
prometheus-client==0.15.0
|
||||
prometheus-client==0.19.0
|
||||
|
|
|
@ -8,21 +8,31 @@ import time
|
|||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import prometheus_client
|
||||
import prometheus_client.core
|
||||
from prometheus_client import start_http_server
|
||||
from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGISTRY
|
||||
|
||||
|
||||
class ResticCollector(object):
|
||||
def __init__(self, repository, password_file_):
|
||||
def __init__(
|
||||
self, repository, password_file, exit_on_error, disable_check,
|
||||
disable_stats, disable_locks, include_paths
|
||||
):
|
||||
self.repository = repository
|
||||
self.password_file = password_file_
|
||||
self.password_file = password_file
|
||||
self.exit_on_error = exit_on_error
|
||||
self.disable_check = disable_check
|
||||
self.disable_stats = disable_stats
|
||||
self.disable_locks = disable_locks
|
||||
self.include_paths = include_paths
|
||||
# todo: the stats cache increases over time -> remove old ids
|
||||
# todo: cold start -> the stats cache could be saved in a persistent volume
|
||||
# todo: cold start -> the restic cache (/root/.cache/restic) could be saved in a persistent volume
|
||||
# todo: cold start -> the restic cache (/root/.cache/restic) could be
|
||||
# saved in a persistent volume
|
||||
self.stats_cache = {}
|
||||
self.metrics = {}
|
||||
self.refresh()
|
||||
self.refresh(exit_on_error)
|
||||
|
||||
def collect(self):
|
||||
logging.debug("Incoming request")
|
||||
|
@ -30,120 +40,214 @@ class ResticCollector(object):
|
|||
common_label_names = [
|
||||
"client_hostname",
|
||||
"client_username",
|
||||
"snapshot_hash"
|
||||
"client_version",
|
||||
"snapshot_hash",
|
||||
"snapshot_tag",
|
||||
"snapshot_tags",
|
||||
"snapshot_paths",
|
||||
]
|
||||
|
||||
check_success = prometheus_client.core.GaugeMetricFamily(
|
||||
check_success = GaugeMetricFamily(
|
||||
"restic_check_success",
|
||||
"Result of restic check operation in the repository",
|
||||
labels=[])
|
||||
|
||||
snapshots_total = prometheus_client.core.CounterMetricFamily(
|
||||
labels=[],
|
||||
)
|
||||
locks_total = CounterMetricFamily(
|
||||
"restic_locks_total",
|
||||
"Total number of locks in the repository",
|
||||
labels=[],
|
||||
)
|
||||
snapshots_total = CounterMetricFamily(
|
||||
"restic_snapshots_total",
|
||||
"Total number of snapshots in the repository",
|
||||
labels=[])
|
||||
|
||||
backup_timestamp = prometheus_client.core.GaugeMetricFamily(
|
||||
labels=[],
|
||||
)
|
||||
backup_timestamp = GaugeMetricFamily(
|
||||
"restic_backup_timestamp",
|
||||
"Timestamp of the last backup",
|
||||
labels=common_label_names)
|
||||
|
||||
backup_files_total = prometheus_client.core.CounterMetricFamily(
|
||||
labels=common_label_names,
|
||||
)
|
||||
backup_files_total = CounterMetricFamily(
|
||||
"restic_backup_files_total",
|
||||
"Number of files in the backup",
|
||||
labels=common_label_names)
|
||||
|
||||
backup_size_total = prometheus_client.core.CounterMetricFamily(
|
||||
labels=common_label_names,
|
||||
)
|
||||
backup_size_total = CounterMetricFamily(
|
||||
"restic_backup_size_total",
|
||||
"Total size of backup in bytes",
|
||||
labels=common_label_names)
|
||||
|
||||
backup_snapshots_total = prometheus_client.core.CounterMetricFamily(
|
||||
labels=common_label_names,
|
||||
)
|
||||
backup_snapshots_total = CounterMetricFamily(
|
||||
"restic_backup_snapshots_total",
|
||||
"Total number of snapshots",
|
||||
labels=common_label_names)
|
||||
labels=common_label_names,
|
||||
)
|
||||
scrape_duration_seconds = GaugeMetricFamily(
|
||||
"restic_scrape_duration_seconds",
|
||||
"Amount of time each scrape takes",
|
||||
labels=[],
|
||||
)
|
||||
|
||||
check_success.add_metric([], self.metrics["check_success"])
|
||||
locks_total.add_metric([], self.metrics["locks_total"])
|
||||
snapshots_total.add_metric([], self.metrics["snapshots_total"])
|
||||
|
||||
for client in self.metrics['clients']:
|
||||
for client in self.metrics["clients"]:
|
||||
common_label_values = [
|
||||
client["hostname"],
|
||||
client["username"],
|
||||
client["snapshot_hash"]
|
||||
client["version"],
|
||||
client["snapshot_hash"],
|
||||
client["snapshot_tag"],
|
||||
client["snapshot_tags"],
|
||||
client["snapshot_paths"],
|
||||
]
|
||||
|
||||
backup_timestamp.add_metric(common_label_values, client["timestamp"])
|
||||
backup_files_total.add_metric(common_label_values, client["files_total"])
|
||||
backup_size_total.add_metric(common_label_values, client["size_total"])
|
||||
backup_snapshots_total.add_metric(common_label_values, client["snapshots_total"])
|
||||
backup_snapshots_total.add_metric(
|
||||
common_label_values, client["snapshots_total"]
|
||||
)
|
||||
|
||||
scrape_duration_seconds.add_metric([], self.metrics["duration"])
|
||||
|
||||
yield check_success
|
||||
yield locks_total
|
||||
yield snapshots_total
|
||||
yield backup_timestamp
|
||||
yield backup_files_total
|
||||
yield backup_size_total
|
||||
yield backup_snapshots_total
|
||||
yield scrape_duration_seconds
|
||||
|
||||
def refresh(self):
|
||||
def refresh(self, exit_on_error=False):
|
||||
try:
|
||||
self.metrics = self.get_metrics()
|
||||
except Exception as e:
|
||||
logging.error("Unable to collect metrics from Restic. Error: %s", str(e))
|
||||
except Exception:
|
||||
logging.error(
|
||||
"Unable to collect metrics from Restic. %s",
|
||||
traceback.format_exc(0).replace("\n", " "),
|
||||
)
|
||||
|
||||
# Shutdown exporter for any error
|
||||
if exit_on_error:
|
||||
sys.exit(1)
|
||||
|
||||
def get_metrics(self):
|
||||
duration = time.time()
|
||||
|
||||
# calc total number of snapshots per hash
|
||||
all_snapshots = self.get_snapshots()
|
||||
latest_snapshots = self.get_snapshots(True)
|
||||
snap_total_counter = {}
|
||||
for snap in all_snapshots:
|
||||
if snap["hash"] not in snap_total_counter:
|
||||
snap_total_counter[snap["hash"]] = 1
|
||||
else:
|
||||
snap_total_counter[snap["hash"]] += 1
|
||||
|
||||
# get the latest snapshot per hash
|
||||
latest_snapshots_dup = self.get_snapshots(True)
|
||||
latest_snapshots = {}
|
||||
for snap in latest_snapshots_dup:
|
||||
time_parsed = re.sub(r"\.[^+-]+", "", snap["time"])
|
||||
if len(time_parsed) > 19:
|
||||
# restic 14: '2023-01-12T06:59:33.1576588+01:00' ->
|
||||
# '2023-01-12T06:59:33+01:00'
|
||||
time_format = "%Y-%m-%dT%H:%M:%S%z"
|
||||
else:
|
||||
# restic 12: '2023-02-01T14:14:19.30760523Z' ->
|
||||
# '2023-02-01T14:14:19'
|
||||
time_format = "%Y-%m-%dT%H:%M:%S"
|
||||
timestamp = time.mktime(
|
||||
datetime.datetime.strptime(time_parsed, time_format).timetuple()
|
||||
)
|
||||
snap["timestamp"] = timestamp
|
||||
if snap["hash"] not in latest_snapshots or \
|
||||
snap["timestamp"] > latest_snapshots[snap["hash"]]["timestamp"]:
|
||||
latest_snapshots[snap["hash"]] = snap
|
||||
|
||||
clients = []
|
||||
for snap in latest_snapshots:
|
||||
stats = self.get_stats(snap['id'])
|
||||
for snap in list(latest_snapshots.values()):
|
||||
# collect stats for each snap only if enabled
|
||||
if self.disable_stats:
|
||||
# return zero as "no-stats" value
|
||||
stats = {
|
||||
"total_size": -1,
|
||||
"total_file_count": -1,
|
||||
}
|
||||
else:
|
||||
stats = self.get_stats(snap["id"])
|
||||
|
||||
time_parsed = re.sub(r'\.[^+-]+', '', snap['time'])
|
||||
timestamp = time.mktime(datetime.datetime.strptime(time_parsed, "%Y-%m-%dT%H:%M:%S%z").timetuple())
|
||||
clients.append(
|
||||
{
|
||||
"hostname": snap["hostname"],
|
||||
"username": snap["username"],
|
||||
"version": snap["program_version"] if "program_version" in snap else "",
|
||||
"snapshot_hash": snap["hash"],
|
||||
"snapshot_tag": snap["tags"][0] if "tags" in snap else "",
|
||||
"snapshot_tags": ",".join(snap["tags"]) if "tags" in snap else "",
|
||||
"snapshot_paths": ",".join(snap["paths"]) if self.include_paths else "",
|
||||
"timestamp": snap["timestamp"],
|
||||
"size_total": stats["total_size"],
|
||||
"files_total": stats["total_file_count"],
|
||||
"snapshots_total": snap_total_counter[snap["hash"]],
|
||||
}
|
||||
)
|
||||
|
||||
snapshots_total = 0
|
||||
for snap2 in all_snapshots:
|
||||
if snap2['hash'] == snap['hash']:
|
||||
snapshots_total += 1
|
||||
|
||||
clients.append({
|
||||
'snapshot_hash': snap['hash'],
|
||||
'hostname': snap['hostname'],
|
||||
'username': snap['username'],
|
||||
'timestamp': timestamp,
|
||||
'size_total': stats['total_size'],
|
||||
'files_total': stats['total_file_count'],
|
||||
'snapshots_total': snapshots_total
|
||||
})
|
||||
# todo: fix the commented code when the bug is fixed in restic
|
||||
# https://github.com/restic/restic/issues/2126
|
||||
# stats = self.get_stats()
|
||||
check_success = self.get_check()
|
||||
|
||||
if self.disable_check:
|
||||
# return 2 as "no-check" value
|
||||
check_success = 2
|
||||
else:
|
||||
check_success = self.get_check()
|
||||
|
||||
if self.disable_locks:
|
||||
# return 0 as "no-locks" value
|
||||
locks_total = 0
|
||||
else:
|
||||
locks_total = self.get_locks()
|
||||
|
||||
metrics = {
|
||||
'check_success': check_success,
|
||||
'clients': clients,
|
||||
"check_success": check_success,
|
||||
"locks_total": locks_total,
|
||||
"clients": clients,
|
||||
"snapshots_total": len(all_snapshots),
|
||||
"duration": time.time() - duration
|
||||
# 'size_total': stats['total_size'],
|
||||
# 'files_total': stats['total_file_count'],
|
||||
'snapshots_total': len(all_snapshots)
|
||||
}
|
||||
|
||||
return metrics
|
||||
|
||||
def get_snapshots(self, only_latest=False):
|
||||
cmd = [
|
||||
'restic',
|
||||
'-r', self.repository,
|
||||
'-p', self.password_file,
|
||||
'--no-lock',
|
||||
'snapshots', '--json'
|
||||
"restic",
|
||||
"-r",
|
||||
self.repository,
|
||||
"-p",
|
||||
self.password_file,
|
||||
"--no-lock",
|
||||
"snapshots",
|
||||
"--json",
|
||||
]
|
||||
if only_latest:
|
||||
cmd.extend(['--latest', '1'])
|
||||
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE)
|
||||
if only_latest:
|
||||
cmd.extend(["--latest", "1"])
|
||||
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
raise Exception("Error executing restic snapshot command. Exit code: " + str(result.returncode))
|
||||
snapshots = json.loads(result.stdout.decode('utf-8'))
|
||||
raise Exception(
|
||||
"Error executing restic snapshot command: " + self.parse_stderr(result)
|
||||
)
|
||||
snapshots = json.loads(result.stdout.decode("utf-8"))
|
||||
for snap in snapshots:
|
||||
snap['hash'] = self.calc_snapshot_hash(snap)
|
||||
if "username" not in snap:
|
||||
snap["username"] = ""
|
||||
snap["hash"] = self.calc_snapshot_hash(snap)
|
||||
return snapshots
|
||||
|
||||
def get_stats(self, snapshot_id=None):
|
||||
|
@ -154,19 +258,24 @@ class ResticCollector(object):
|
|||
return self.stats_cache[snapshot_id]
|
||||
|
||||
cmd = [
|
||||
'restic',
|
||||
'-r', self.repository,
|
||||
'-p', self.password_file,
|
||||
'--no-lock',
|
||||
'stats', '--json'
|
||||
"restic",
|
||||
"-r",
|
||||
self.repository,
|
||||
"-p",
|
||||
self.password_file,
|
||||
"--no-lock",
|
||||
"stats",
|
||||
"--json",
|
||||
]
|
||||
if snapshot_id is not None:
|
||||
cmd.extend([snapshot_id])
|
||||
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE)
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
raise Exception("Error executing restic stats command. Exit code: " + str(result.returncode))
|
||||
stats = json.loads(result.stdout.decode('utf-8'))
|
||||
raise Exception(
|
||||
"Error executing restic stats command: " + self.parse_stderr(result)
|
||||
)
|
||||
stats = json.loads(result.stdout.decode("utf-8"))
|
||||
|
||||
if snapshot_id is not None:
|
||||
self.stats_cache[snapshot_id] = stats
|
||||
|
@ -176,58 +285,124 @@ class ResticCollector(object):
|
|||
def get_check(self):
|
||||
# This command takes 20 seconds or more, but it's required
|
||||
cmd = [
|
||||
'restic',
|
||||
'-r', self.repository,
|
||||
'-p', self.password_file,
|
||||
'--no-lock',
|
||||
'check'
|
||||
"restic",
|
||||
"-r",
|
||||
self.repository,
|
||||
"-p",
|
||||
self.password_file,
|
||||
"--no-lock",
|
||||
"check",
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE)
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode == 0:
|
||||
return 1 # ok
|
||||
return 0 # error
|
||||
else:
|
||||
logging.warning(
|
||||
"Error checking the repository health. " + self.parse_stderr(result)
|
||||
)
|
||||
return 0 # error
|
||||
|
||||
def calc_snapshot_hash(self, snapshot: dict) -> str:
|
||||
text = snapshot['hostname'] + ",".join(snapshot['paths'])
|
||||
return hashlib.sha256(text.encode('utf-8')).hexdigest()
|
||||
def get_locks(self):
|
||||
cmd = [
|
||||
"restic",
|
||||
"-r",
|
||||
self.repository,
|
||||
"-p",
|
||||
self.password_file,
|
||||
"--no-lock",
|
||||
"list",
|
||||
"locks",
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if result.returncode != 0:
|
||||
raise Exception(
|
||||
"Error executing restic list locks command: " + self.parse_stderr(result)
|
||||
)
|
||||
text_result = result.stdout.decode("utf-8")
|
||||
return len(text_result.split("\n")) - 1
|
||||
|
||||
@staticmethod
|
||||
def calc_snapshot_hash(snapshot: dict) -> str:
|
||||
text = snapshot["hostname"] + snapshot["username"] + ",".join(snapshot["paths"])
|
||||
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def parse_stderr(result):
|
||||
return (
|
||||
result.stderr.decode("utf-8").replace("\n", " ")
|
||||
+ " Exit code: "
|
||||
+ str(result.returncode)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s %(levelname)-8s %(message)s',
|
||||
format="%(asctime)s %(levelname)-8s %(message)s",
|
||||
level=logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO")),
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
handlers=[logging.StreamHandler(sys.stdout)],
|
||||
)
|
||||
logging.info("Starting Restic Prometheus Exporter ...")
|
||||
logging.info("It could take a while if the repository is remote.")
|
||||
logging.info("Starting Restic Prometheus Exporter")
|
||||
logging.info("It could take a while if the repository is remote")
|
||||
|
||||
try:
|
||||
restic_repo_url = os.environ["RESTIC_REPO_URL"]
|
||||
except Exception:
|
||||
logging.error("Configuration error. The environment variable RESTIC_REPO_URL is mandatory")
|
||||
restic_repo_url = os.environ.get("RESTIC_REPOSITORY")
|
||||
if restic_repo_url is None:
|
||||
restic_repo_url = os.environ.get("RESTIC_REPO_URL")
|
||||
if restic_repo_url is not None:
|
||||
logging.warning(
|
||||
"The environment variable RESTIC_REPO_URL is deprecated, "
|
||||
"please use RESTIC_REPOSITORY instead."
|
||||
)
|
||||
if restic_repo_url is None:
|
||||
logging.error("The environment variable RESTIC_REPOSITORY is mandatory")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
password_file = os.environ["PASSWORD_FILE"]
|
||||
except Exception:
|
||||
logging.error("Configuration error. The environment variable PASSWORD_FILE is mandatory")
|
||||
restic_repo_password_file = os.environ.get("RESTIC_PASSWORD_FILE")
|
||||
if restic_repo_password_file is None:
|
||||
restic_repo_password_file = os.environ.get("RESTIC_REPO_PASSWORD_FILE")
|
||||
if restic_repo_password_file is not None:
|
||||
logging.warning(
|
||||
"The environment variable RESTIC_REPO_PASSWORD_FILE is deprecated, "
|
||||
"please use RESTIC_PASSWORD_FILE instead."
|
||||
)
|
||||
if restic_repo_password_file is None:
|
||||
logging.error("The environment variable RESTIC_PASSWORD_FILE is mandatory")
|
||||
sys.exit(1)
|
||||
|
||||
exporter_address = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
|
||||
exporter_port = int(os.environ.get("LISTEN_PORT", 8001))
|
||||
exporter_refresh_interval = int(os.environ.get("REFRESH_INTERVAL", 60))
|
||||
exporter_exit_on_error = bool(os.environ.get("EXIT_ON_ERROR", False))
|
||||
exporter_disable_check = bool(os.environ.get("NO_CHECK", False))
|
||||
exporter_disable_stats = bool(os.environ.get("NO_STATS", False))
|
||||
exporter_disable_locks = bool(os.environ.get("NO_LOCKS", False))
|
||||
exporter_include_paths = bool(os.environ.get("INCLUDE_PATHS", False))
|
||||
|
||||
collector = ResticCollector(restic_repo_url, password_file)
|
||||
try:
|
||||
collector = ResticCollector(
|
||||
restic_repo_url,
|
||||
restic_repo_password_file,
|
||||
exporter_exit_on_error,
|
||||
exporter_disable_check,
|
||||
exporter_disable_stats,
|
||||
exporter_disable_locks,
|
||||
exporter_include_paths,
|
||||
)
|
||||
REGISTRY.register(collector)
|
||||
start_http_server(exporter_port, exporter_address)
|
||||
logging.info(
|
||||
"Serving at http://{0}:{1}".format(exporter_address, exporter_port)
|
||||
)
|
||||
|
||||
prometheus_client.core.REGISTRY.register(collector)
|
||||
prometheus_client.start_http_server(exporter_port, exporter_address)
|
||||
while True:
|
||||
logging.info(
|
||||
"Refreshing stats every {0} seconds".format(exporter_refresh_interval)
|
||||
)
|
||||
time.sleep(exporter_refresh_interval)
|
||||
collector.refresh()
|
||||
|
||||
logging.info("Server listening in http://%s:%d/metrics", exporter_address, exporter_port)
|
||||
while True:
|
||||
logging.info("Refreshing stats every %d seconds", exporter_refresh_interval)
|
||||
time.sleep(exporter_refresh_interval)
|
||||
collector.refresh()
|
||||
except KeyboardInterrupt:
|
||||
logging.info("\nInterrupted")
|
||||
exit(0)
|
||||
|
|
Loading…
Add table
Reference in a new issue