First release: restic-exporter 1.0.0

This commit is contained in:
ngosang 2022-12-06 20:40:47 +01:00
parent 5f23dcb5f1
commit 69c7485071
13 changed files with 1581 additions and 2 deletions

20
.github/workflows/autotag.yml vendored Normal file
View file

@ -0,0 +1,20 @@
name: autotag
on:
push:
branches:
- "main"
jobs:
build:
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v2
-
name: Auto Tag
uses: Klemensas/action-autotag@stable
with:
GITHUB_TOKEN: "${{ secrets.GH_PAT }}"
tag_prefix: ""

52
.github/workflows/release-docker.yml vendored Normal file
View file

@ -0,0 +1,52 @@
name: release-docker
on:
push:
tags:
- '*.*.*'
jobs:
build:
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v2
-
name: Downcase repo
run: echo REPOSITORY=ngosang/restic-exporter >> $GITHUB_ENV
-
name: Docker metadata
id: docker_metadata
uses: docker/metadata-action@v3
with:
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }}
-
name: Set up QEMU
uses: docker/setup-qemu-action@v1.0.1
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
-
name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Login to GitHub Container Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GH_PAT }}
-
name: Build and push
uses: docker/build-push-action@v2
with:
context: .
file: ./Dockerfile
platforms: linux/386, linux/amd64, linux/arm/v6, linux/arm/v7, linux/arm64/v8, linux/ppc64le, linux/s390x
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.docker_metadata.outputs.tags }}
labels: ${{ steps.docker_metadata.outputs.labels }}

7
.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
*.pyc
.settings
.directory
.idea/
_trial_temp/
.vscode
.run/

6
CHANGELOG.md Normal file
View file

@ -0,0 +1,6 @@
# Changelog
## 1.0.0 (2022/12/06)
* First release
* Restic 0.14.0

41
Dockerfile Normal file
View file

@ -0,0 +1,41 @@
FROM golang:alpine3.17 AS builder
ENV RESTIC_VERSION 0.14.0
ENV CGO_ENABLED 0
RUN cd /tmp \
# download restic source code
&& wget https://github.com/restic/restic/archive/refs/tags/v${RESTIC_VERSION}.tar.gz -O restic.tar.gz \
&& tar xvf restic.tar.gz \
&& cd restic-* \
# build the executable
# flag -ldflags "-s -w" produces a smaller executable
&& go build -ldflags "-s -w" -v -o /tmp/restic ./cmd/restic
FROM python:3.11-alpine3.17
RUN apk add --no-cache --update tzdata
COPY --from=builder /tmp/restic /usr/bin
COPY entrypoint.sh requirements.txt /
RUN pip install -r /requirements.txt \
# remove temporary files
&& rm -rf /root/.cache
COPY ./restic-exporter.py /restic-exporter.py
EXPOSE 8001
CMD [ "/entrypoint.sh" ]
# Help
#
# Local build
# docker build -t restic-exporter:custom .
#
# Multi-arch build
# docker buildx create --use
# docker buildx build -t restic-exporter:custom --platform linux/386,linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64/v8,linux/ppc64le,linux/s390x .
#
# add --push to publish in DockerHub

164
README.md
View file

@ -1,2 +1,162 @@
# restic-exporter
Prometheus exporter for the Restic backup system
# ngosang/restic-exporter
[![Latest release](https://img.shields.io/github/v/release/ngosang/restic-exporter)](https://github.com/ngosang/restic-exporter/releases)
[![Docker Pulls](https://img.shields.io/docker/pulls/ngosang/restic-exporter)](https://hub.docker.com/r/ngosang/restic-exporter/)
[![Donate PayPal](https://img.shields.io/badge/Donate-PayPal-yellow.svg)](https://www.paypal.com/paypalme/diegoheras0xff)
[![Donate Bitcoin](https://img.shields.io/badge/Donate-Bitcoin-f7931a.svg)](https://www.blockchain.com/btc/address/14EcPN47rWXkmFvjfohJx2rQxxoeBRJhej)
[![Donate Ethereum](https://img.shields.io/badge/Donate-Ethereum-8c8c8c.svg)](https://www.blockchain.com/eth/address/0x0D1549BbB00926BF3D92c1A8A58695e982f1BE2E)
Prometheus exporter for the [Restic](https://github.com/restic/restic) backup system.
## Install
### Form source code
Requirements:
* Python 3
* [prometheus-client](https://github.com/prometheus/client_python)
```bash
pip install -r /requirements.txt
export RESTIC_REPO_URL=/data
export PASSWORD_FILE=/restic_password_file
python restic-exporter.py
```
### Docker
Docker images are available in [GHCR](https://github.com/ngosang/restic-exporter/pkgs/container/restic-exporter) and [DockerHub](https://hub.docker.com/r/ngosang/restic-exporter).
```bash
docker pull ghcr.io/ngosang/restic-exporter
or
docker pull ngosang/restic-exporter
```
#### Supported Architectures
The architectures supported by this image are:
* linux/386
* linux/amd64
* linux/arm/v6
* linux/arm/v7
* linux/arm64/v8
* linux/ppc64le
* linux/s390x
#### docker-compose
Compatible with docker-compose v2 schemas:
```yaml
---
version: '2.1'
services:
restic-exporter:
image: ngosang/restic-exporter
container_name: restic-exporter
environment:
- TZ=Europe/Madrid
- RESTIC_REPO_URL=/data
- RESTIC_REPO_PASSWORD=<password_here>
# - RESTIC_REPO_PASSWORD_FILE=</file_with_password_here>
- REFRESH_INTERVAL=1800 # 30 min
volumes:
- /host_path/restic/data:/data
ports:
- "8001:8001"
restart: unless-stopped
```
#### docker cli
```bash
docker run -d \
--name=restic-exporter \
-e TZ=Europe/Madrid \
-e RESTIC_REPO_URL=/data \
-e RESTIC_REPO_PASSWORD=<password_here> \
-e REFRESH_INTERVAL=1800 \
-p 8001:8001 \
--restart unless-stopped \
ngosang/restic-exporter
```
## Configuration
All configuration is done with environment variables.
- `RESTIC_REPO_URL`: Restic repository URL. It could be a local repository (eg: `/data`) or a remote repository (eg: `rest:http://user:password@127.0.0.1:8000/`).
- `RESTIC_REPO_PASSWORD`: Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD_FILE` is not defined.
- `RESTIC_REPO_PASSWORD_FILE`: File with the Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD` is not defined. Remember to mount the Docker volume with the file.
- `REFRESH_INTERVAL`: (Optional) Refresh interval for the metrics in seconds. Computing the metrics is a expensive task, keep this value as high as possible. Default 60
- `LISTEN_PORT`: (Optional) The address the exporter should listen on. The default is `8001`.
- `LISTEN_ADDRESS`: (Optional) The address the exporter should listen on. The default is to listen on all addresses.
- `LOG_LEVEL`: (Optional) Log level of the traces. The default is `INFO`.
## Exported metrics
```shell
# HELP restic_check_success Result of restic check operation in the repository
# TYPE restic_check_success gauge
restic_check_success 1.0
# HELP restic_snapshots_total Total number of snapshots in the repository
# TYPE restic_snapshots_total counter
restic_snapshots_total 1777.0
# HELP restic_backup_timestamp Timestamp of the last backup
# TYPE restic_backup_timestamp gauge
restic_backup_timestamp{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 1.669754009e+09
# HELP restic_backup_files_total Number of files in the backup
# TYPE restic_backup_files_total counter
restic_backup_files_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 19051.0
# HELP restic_backup_size_total Total size of backup in bytes
# TYPE restic_backup_size_total counter
restic_backup_size_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 4.1174838248e+010
# HELP restic_backup_snapshots_total Total number of snapshots
# TYPE restic_backup_snapshots_total counter
restic_backup_snapshots_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 106.0
```
## Prometheus config
Example Prometheus configuration:
```yaml
scrape_configs:
- job_name: 'restic-exporter'
static_configs:
- targets: ['192.168.1.100:8001']
```
## Prometheus / Alertmanager rules
Example Prometheus rules for alerting:
```yaml
- alert: ResticCheckFailed
expr: restic_check_success == 0
for: 5m
labels:
severity: critical
annotations:
summary: Restic check failed (instance {{ $labels.instance }})
description: Restic check failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}
- alert: ResticOutdatedBackup
# 1209600 = 15 days
expr: time() - restic_backup_timestamp > 1209600
for: 0m
labels:
severity: critical
annotations:
summary: Restic {{ $labels.client_hostname }} / {{ $labels.client_username }} backup is outdated
description: Restic backup is outdated\n VALUE = {{ $value }}\n LABELS = {{ $labels }}
```
## Grafana dashboard
There is a reference Grafana dashboard in [grafana/grafana_dashboard.json](./grafana/grafana_dashboard.json).
![](./grafana/grafana_dashboard.png)

17
docker-compose.yml Normal file
View file

@ -0,0 +1,17 @@
version: "2.1"
services:
restic-exporter:
image: ngosang/restic-exporter
container_name: restic-exporter
environment:
- TZ=Europe/Madrid
- RESTIC_REPO_URL=/data
- RESTIC_REPO_PASSWORD=password_here
# - RESTIC_REPO_PASSWORD_FILE=/file_with_password_here
- REFRESH_INTERVAL=1800 # 30 min
volumes:
- /host_path/restic/data:/data
ports:
- "8001:8001"
restart: unless-stopped

18
entrypoint.sh Executable file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env sh
# Exit on error. For debug use set -x
set -e
export PASSWORD_FILE="/tmp/restic_passwd"
if [ -z "${RESTIC_REPO_PASSWORD}" ]; then
if [ -z "${RESTIC_REPO_PASSWORD_FILE}" ]; then
echo "You have to define one of these environment variables: RESTIC_REPO_PASSWORD or RESTIC_REPO_PASSWORD_FILE"
else
cp "${RESTIC_REPO_PASSWORD_FILE}" "${PASSWORD_FILE}"
fi
else
echo "${RESTIC_REPO_PASSWORD}" > "${PASSWORD_FILE}"
fi
/usr/local/bin/python -u /restic-exporter.py

File diff suppressed because it is too large Load diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

5
package.json Normal file
View file

@ -0,0 +1,5 @@
{
"name": "ngosang-restic-exporter",
"version": "1.0.0",
"author": "ngosang@hotmail.es"
}

1
requirements.txt Normal file
View file

@ -0,0 +1 @@
prometheus-client==0.15.0

233
restic-exporter.py Normal file
View file

@ -0,0 +1,233 @@
#!/usr/bin/env python3
import datetime
import hashlib
import json
import logging
import os
import time
import re
import subprocess
import sys
import prometheus_client
import prometheus_client.core
class ResticCollector(object):
def __init__(self, repository, password_file_):
self.repository = repository
self.password_file = password_file_
# todo: the stats cache increases over time -> remove old ids
# todo: cold start -> the stats cache could be saved in a persistent volume
# todo: cold start -> the restic cache (/root/.cache/restic) could be saved in a persistent volume
self.stats_cache = {}
self.metrics = {}
self.refresh()
def collect(self):
logging.debug("Incoming request")
common_label_names = [
"client_hostname",
"client_username",
"snapshot_hash"
]
check_success = prometheus_client.core.GaugeMetricFamily(
"restic_check_success",
"Result of restic check operation in the repository",
labels=[])
snapshots_total = prometheus_client.core.CounterMetricFamily(
"restic_snapshots_total",
"Total number of snapshots in the repository",
labels=[])
backup_timestamp = prometheus_client.core.GaugeMetricFamily(
"restic_backup_timestamp",
"Timestamp of the last backup",
labels=common_label_names)
backup_files_total = prometheus_client.core.CounterMetricFamily(
"restic_backup_files_total",
"Number of files in the backup",
labels=common_label_names)
backup_size_total = prometheus_client.core.CounterMetricFamily(
"restic_backup_size_total",
"Total size of backup in bytes",
labels=common_label_names)
backup_snapshots_total = prometheus_client.core.CounterMetricFamily(
"restic_backup_snapshots_total",
"Total number of snapshots",
labels=common_label_names)
check_success.add_metric([], self.metrics["check_success"])
snapshots_total.add_metric([], self.metrics["snapshots_total"])
for client in self.metrics['clients']:
common_label_values = [
client["hostname"],
client["username"],
client["snapshot_hash"]
]
backup_timestamp.add_metric(common_label_values, client["timestamp"])
backup_files_total.add_metric(common_label_values, client["files_total"])
backup_size_total.add_metric(common_label_values, client["size_total"])
backup_snapshots_total.add_metric(common_label_values, client["snapshots_total"])
yield check_success
yield snapshots_total
yield backup_timestamp
yield backup_files_total
yield backup_size_total
yield backup_snapshots_total
def refresh(self):
try:
self.metrics = self.get_metrics()
except Exception as e:
logging.error("Unable to collect metrics from Restic. Error: %s", str(e))
def get_metrics(self):
all_snapshots = self.get_snapshots()
latest_snapshots = self.get_snapshots(True)
clients = []
for snap in latest_snapshots:
stats = self.get_stats(snap['id'])
time_parsed = re.sub(r'\.[^+-]+', '', snap['time'])
timestamp = time.mktime(datetime.datetime.strptime(time_parsed, "%Y-%m-%dT%H:%M:%S%z").timetuple())
snapshots_total = 0
for snap2 in all_snapshots:
if snap2['hash'] == snap['hash']:
snapshots_total += 1
clients.append({
'snapshot_hash': snap['hash'],
'hostname': snap['hostname'],
'username': snap['username'],
'timestamp': timestamp,
'size_total': stats['total_size'],
'files_total': stats['total_file_count'],
'snapshots_total': snapshots_total
})
# todo: fix the commented code when the bug is fixed in restic
# https://github.com/restic/restic/issues/2126
# stats = self.get_stats()
check_success = self.get_check()
metrics = {
'check_success': check_success,
'clients': clients,
# 'size_total': stats['total_size'],
# 'files_total': stats['total_file_count'],
'snapshots_total': len(all_snapshots)
}
return metrics
def get_snapshots(self, only_latest=False):
cmd = [
'restic',
'-r', self.repository,
'-p', self.password_file,
'--no-lock',
'snapshots', '--json'
]
if only_latest:
cmd.extend(['--latest', '1'])
result = subprocess.run(cmd, stdout=subprocess.PIPE)
if result.returncode != 0:
raise Exception("Error executing restic snapshot command. Exit code: " + str(result.returncode))
snapshots = json.loads(result.stdout.decode('utf-8'))
for snap in snapshots:
snap['hash'] = self.calc_snapshot_hash(snap)
return snapshots
def get_stats(self, snapshot_id=None):
# This command is expensive in CPU/Memory (1-5 seconds),
# and much more when snapshot_id=None (3 minutes) -> we avoid this call for now
# https://github.com/restic/restic/issues/2126
if snapshot_id is not None and snapshot_id in self.stats_cache:
return self.stats_cache[snapshot_id]
cmd = [
'restic',
'-r', self.repository,
'-p', self.password_file,
'--no-lock',
'stats', '--json'
]
if snapshot_id is not None:
cmd.extend([snapshot_id])
result = subprocess.run(cmd, stdout=subprocess.PIPE)
if result.returncode != 0:
raise Exception("Error executing restic stats command. Exit code: " + str(result.returncode))
stats = json.loads(result.stdout.decode('utf-8'))
if snapshot_id is not None:
self.stats_cache[snapshot_id] = stats
return stats
def get_check(self):
# This command takes 20 seconds or more, but it's required
cmd = [
'restic',
'-r', self.repository,
'-p', self.password_file,
'--no-lock',
'check'
]
result = subprocess.run(cmd, stdout=subprocess.PIPE)
if result.returncode == 0:
return 1 # ok
return 0 # error
def calc_snapshot_hash(self, snapshot: dict) -> str:
text = snapshot['hostname'] + ",".join(snapshot['paths'])
return hashlib.sha256(text.encode('utf-8')).hexdigest()
if __name__ == "__main__":
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
level=logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO")),
datefmt='%Y-%m-%d %H:%M:%S',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logging.info("Starting Restic Prometheus Exporter ...")
logging.info("It could take a while if the repository is remote.")
try:
restic_repo_url = os.environ["RESTIC_REPO_URL"]
except Exception:
logging.error("Configuration error. The environment variable RESTIC_REPO_URL is mandatory")
sys.exit(1)
try:
password_file = os.environ["PASSWORD_FILE"]
except Exception:
logging.error("Configuration error. The environment variable PASSWORD_FILE is mandatory")
sys.exit(1)
exporter_address = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
exporter_port = int(os.environ.get("LISTEN_PORT", 8001))
exporter_refresh_interval = int(os.environ.get("REFRESH_INTERVAL", 60))
collector = ResticCollector(restic_repo_url, password_file)
prometheus_client.core.REGISTRY.register(collector)
prometheus_client.start_http_server(exporter_port, exporter_address)
logging.info("Server listening in http://%s:%d/metrics", exporter_address, exporter_port)
while True:
logging.info("Refreshing stats every %d seconds", exporter_refresh_interval)
time.sleep(exporter_refresh_interval)
collector.refresh()