diff --git a/.github/workflows/autotag.yml b/.github/workflows/autotag.yml new file mode 100644 index 0000000..d2a076a --- /dev/null +++ b/.github/workflows/autotag.yml @@ -0,0 +1,20 @@ +name: autotag + +on: + push: + branches: + - "main" + +jobs: + build: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v2 + - + name: Auto Tag + uses: Klemensas/action-autotag@stable + with: + GITHUB_TOKEN: "${{ secrets.GH_PAT }}" + tag_prefix: "" diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml new file mode 100644 index 0000000..e104798 --- /dev/null +++ b/.github/workflows/release-docker.yml @@ -0,0 +1,52 @@ +name: release-docker + +on: + push: + tags: + - '*.*.*' + +jobs: + build: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v2 + - + name: Downcase repo + run: echo REPOSITORY=ngosang/restic-exporter >> $GITHUB_ENV + - + name: Docker metadata + id: docker_metadata + uses: docker/metadata-action@v3 + with: + images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }} + - + name: Set up QEMU + uses: docker/setup-qemu-action@v1.0.1 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - + name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GH_PAT }} + - + name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + file: ./Dockerfile + platforms: linux/386, linux/amd64, linux/arm/v6, linux/arm/v7, linux/arm64/v8, linux/ppc64le, linux/s390x + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.docker_metadata.outputs.tags }} + labels: ${{ steps.docker_metadata.outputs.labels }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..501c338 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*.pyc +.settings +.directory +.idea/ +_trial_temp/ +.vscode +.run/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..a8387b3 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,6 @@ +# Changelog + +## 1.0.0 (2022/12/06) + +* First release +* Restic 0.14.0 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f65c18d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,41 @@ +FROM golang:alpine3.17 AS builder + +ENV RESTIC_VERSION 0.14.0 +ENV CGO_ENABLED 0 + +RUN cd /tmp \ + # download restic source code + && wget https://github.com/restic/restic/archive/refs/tags/v${RESTIC_VERSION}.tar.gz -O restic.tar.gz \ + && tar xvf restic.tar.gz \ + && cd restic-* \ + # build the executable + # flag -ldflags "-s -w" produces a smaller executable + && go build -ldflags "-s -w" -v -o /tmp/restic ./cmd/restic + +FROM python:3.11-alpine3.17 + +RUN apk add --no-cache --update tzdata + +COPY --from=builder /tmp/restic /usr/bin +COPY entrypoint.sh requirements.txt / + +RUN pip install -r /requirements.txt \ + # remove temporary files + && rm -rf /root/.cache + +COPY ./restic-exporter.py /restic-exporter.py + +EXPOSE 8001 + +CMD [ "/entrypoint.sh" ] + +# Help +# +# Local build +# docker build -t restic-exporter:custom . +# +# Multi-arch build +# docker buildx create --use +# docker buildx build -t restic-exporter:custom --platform linux/386,linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64/v8,linux/ppc64le,linux/s390x . +# +# add --push to publish in DockerHub diff --git a/README.md b/README.md index 19de000..9510381 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,162 @@ -# restic-exporter -Prometheus exporter for the Restic backup system +# ngosang/restic-exporter + +[![Latest release](https://img.shields.io/github/v/release/ngosang/restic-exporter)](https://github.com/ngosang/restic-exporter/releases) +[![Docker Pulls](https://img.shields.io/docker/pulls/ngosang/restic-exporter)](https://hub.docker.com/r/ngosang/restic-exporter/) +[![Donate PayPal](https://img.shields.io/badge/Donate-PayPal-yellow.svg)](https://www.paypal.com/paypalme/diegoheras0xff) +[![Donate Bitcoin](https://img.shields.io/badge/Donate-Bitcoin-f7931a.svg)](https://www.blockchain.com/btc/address/14EcPN47rWXkmFvjfohJx2rQxxoeBRJhej) +[![Donate Ethereum](https://img.shields.io/badge/Donate-Ethereum-8c8c8c.svg)](https://www.blockchain.com/eth/address/0x0D1549BbB00926BF3D92c1A8A58695e982f1BE2E) + +Prometheus exporter for the [Restic](https://github.com/restic/restic) backup system. + +## Install + +### Form source code + +Requirements: + * Python 3 + * [prometheus-client](https://github.com/prometheus/client_python) + +```bash +pip install -r /requirements.txt + +export RESTIC_REPO_URL=/data +export PASSWORD_FILE=/restic_password_file +python restic-exporter.py +``` + +### Docker + +Docker images are available in [GHCR](https://github.com/ngosang/restic-exporter/pkgs/container/restic-exporter) and [DockerHub](https://hub.docker.com/r/ngosang/restic-exporter). + +```bash +docker pull ghcr.io/ngosang/restic-exporter +or +docker pull ngosang/restic-exporter +``` + +#### Supported Architectures + +The architectures supported by this image are: + +* linux/386 +* linux/amd64 +* linux/arm/v6 +* linux/arm/v7 +* linux/arm64/v8 +* linux/ppc64le +* linux/s390x + +#### docker-compose + +Compatible with docker-compose v2 schemas: + +```yaml +--- +version: '2.1' +services: + restic-exporter: + image: ngosang/restic-exporter + container_name: restic-exporter + environment: + - TZ=Europe/Madrid + - RESTIC_REPO_URL=/data + - RESTIC_REPO_PASSWORD= + # - RESTIC_REPO_PASSWORD_FILE= + - REFRESH_INTERVAL=1800 # 30 min + volumes: + - /host_path/restic/data:/data + ports: + - "8001:8001" + restart: unless-stopped +``` + +#### docker cli + +```bash +docker run -d \ + --name=restic-exporter \ + -e TZ=Europe/Madrid \ + -e RESTIC_REPO_URL=/data \ + -e RESTIC_REPO_PASSWORD= \ + -e REFRESH_INTERVAL=1800 \ + -p 8001:8001 \ + --restart unless-stopped \ + ngosang/restic-exporter +``` + +## Configuration + +All configuration is done with environment variables. + +- `RESTIC_REPO_URL`: Restic repository URL. It could be a local repository (eg: `/data`) or a remote repository (eg: `rest:http://user:password@127.0.0.1:8000/`). +- `RESTIC_REPO_PASSWORD`: Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD_FILE` is not defined. +- `RESTIC_REPO_PASSWORD_FILE`: File with the Restic repository password in plain text. This is only required if `RESTIC_REPO_PASSWORD` is not defined. Remember to mount the Docker volume with the file. +- `REFRESH_INTERVAL`: (Optional) Refresh interval for the metrics in seconds. Computing the metrics is a expensive task, keep this value as high as possible. Default 60 +- `LISTEN_PORT`: (Optional) The address the exporter should listen on. The default is `8001`. +- `LISTEN_ADDRESS`: (Optional) The address the exporter should listen on. The default is to listen on all addresses. +- `LOG_LEVEL`: (Optional) Log level of the traces. The default is `INFO`. + +## Exported metrics + +```shell +# HELP restic_check_success Result of restic check operation in the repository +# TYPE restic_check_success gauge +restic_check_success 1.0 +# HELP restic_snapshots_total Total number of snapshots in the repository +# TYPE restic_snapshots_total counter +restic_snapshots_total 1777.0 +# HELP restic_backup_timestamp Timestamp of the last backup +# TYPE restic_backup_timestamp gauge +restic_backup_timestamp{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 1.669754009e+09 +# HELP restic_backup_files_total Number of files in the backup +# TYPE restic_backup_files_total counter +restic_backup_files_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 19051.0 +# HELP restic_backup_size_total Total size of backup in bytes +# TYPE restic_backup_size_total counter +restic_backup_size_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 4.1174838248e+010 +# HELP restic_backup_snapshots_total Total number of snapshots +# TYPE restic_backup_snapshots_total counter +restic_backup_snapshots_total{client_hostname="PC-HOME-1",client_username="PC-HOME-1\\User-1",snapshot_hash="1911eb846f1642c327936915f1fad4e16190d0ab6b68e045294f5f0280a00ebe"} 106.0 +``` + +## Prometheus config + +Example Prometheus configuration: + +```yaml +scrape_configs: + - job_name: 'restic-exporter' + static_configs: + - targets: ['192.168.1.100:8001'] +``` + +## Prometheus / Alertmanager rules + +Example Prometheus rules for alerting: + +```yaml + - alert: ResticCheckFailed + expr: restic_check_success == 0 + for: 5m + labels: + severity: critical + annotations: + summary: Restic check failed (instance {{ $labels.instance }}) + description: Restic check failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }} + + - alert: ResticOutdatedBackup + # 1209600 = 15 days + expr: time() - restic_backup_timestamp > 1209600 + for: 0m + labels: + severity: critical + annotations: + summary: Restic {{ $labels.client_hostname }} / {{ $labels.client_username }} backup is outdated + description: Restic backup is outdated\n VALUE = {{ $value }}\n LABELS = {{ $labels }} +``` + +## Grafana dashboard + +There is a reference Grafana dashboard in [grafana/grafana_dashboard.json](./grafana/grafana_dashboard.json). + +![](./grafana/grafana_dashboard.png) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..82a8d44 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,17 @@ +version: "2.1" + +services: + restic-exporter: + image: ngosang/restic-exporter + container_name: restic-exporter + environment: + - TZ=Europe/Madrid + - RESTIC_REPO_URL=/data + - RESTIC_REPO_PASSWORD=password_here + # - RESTIC_REPO_PASSWORD_FILE=/file_with_password_here + - REFRESH_INTERVAL=1800 # 30 min + volumes: + - /host_path/restic/data:/data + ports: + - "8001:8001" + restart: unless-stopped diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..ed36911 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env sh + +# Exit on error. For debug use set -x +set -e + +export PASSWORD_FILE="/tmp/restic_passwd" + +if [ -z "${RESTIC_REPO_PASSWORD}" ]; then + if [ -z "${RESTIC_REPO_PASSWORD_FILE}" ]; then + echo "You have to define one of these environment variables: RESTIC_REPO_PASSWORD or RESTIC_REPO_PASSWORD_FILE" + else + cp "${RESTIC_REPO_PASSWORD_FILE}" "${PASSWORD_FILE}" + fi +else + echo "${RESTIC_REPO_PASSWORD}" > "${PASSWORD_FILE}" +fi + +/usr/local/bin/python -u /restic-exporter.py diff --git a/grafana/grafana_dashboard.json b/grafana/grafana_dashboard.json new file mode 100644 index 0000000..9a24b6b --- /dev/null +++ b/grafana/grafana_dashboard.json @@ -0,0 +1,1019 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Restic statistics dashboard", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 9846, + "graphTooltip": 0, + "id": 40, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "ERROR" + }, + "1": { + "color": "green", + "index": 0, + "text": "OK" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 40, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "9.3.0", + "targets": [ + { + "exemplar": true, + "expr": "restic_check_success", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Repository Check", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "last_backup" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "auto" + }, + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "last_backup_since" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "unit", + "value": "s" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 604800 + }, + { + "color": "red", + "value": 1209600 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 15, + "x": 0, + "y": 3 + }, + "id": 27, + "links": [], + "maxDataPoints": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "client_hostname" + } + ] + }, + "pluginVersion": "9.3.0", + "targets": [ + { + "exemplar": true, + "expr": "restic_backup_timestamp * 1000", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{client_hostname}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "time() - restic_backup_timestamp", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{client_hostname}}", + "refId": "B" + } + ], + "title": "Last backup", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "backup_id": true, + "backup_type": true, + "client_hostname": false, + "client_id": true, + "client_os_version": true, + "client_username": true, + "client_version": true, + "instance": true, + "job": true, + "snapshot_hash": true, + "snapshot_id": true + }, + "indexByName": {}, + "renameByName": { + "Value": "", + "Value #A": "last_backup", + "Value #B": "last_backup_since" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 15, + "y": 3 + }, + "id": 32, + "links": [], + "maxDataPoints": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "client_hostname" + } + ] + }, + "pluginVersion": "9.3.0", + "targets": [ + { + "exemplar": true, + "expr": "restic_backup_snapshots_total", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{client_hostname}}", + "refId": "A" + } + ], + "title": "Total snapshot count", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "backup_id": true, + "backup_type": true, + "client_hostname": false, + "client_id": true, + "client_os_version": true, + "client_username": true, + "client_version": true, + "instance": true, + "job": true, + "snapshot_hash": true, + "snapshot_id": true + }, + "indexByName": {}, + "renameByName": { + "Value": "total_snapshot_count" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "total_backup_size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 0, + "y": 12 + }, + "id": 34, + "links": [], + "maxDataPoints": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "client_hostname" + } + ] + }, + "pluginVersion": "9.3.0", + "targets": [ + { + "exemplar": true, + "expr": "restic_backup_size_total", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{client_hostname}}", + "refId": "A" + } + ], + "title": "Total backup size", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "backup_id": true, + "backup_type": true, + "client_hostname": false, + "client_id": true, + "client_os_version": true, + "client_username": true, + "client_version": true, + "instance": true, + "job": true, + "snapshot_hash": true, + "snapshot_id": true + }, + "indexByName": {}, + "renameByName": { + "Value": "total_backup_size" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 9, + "y": 12 + }, + "id": 35, + "links": [], + "maxDataPoints": 100, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": false, + "values": [] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "exemplar": true, + "expr": "restic_backup_size_total", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{client_hostname}}", + "refId": "A" + } + ], + "title": "Total backup size", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false, + "__name__": true, + "backup_id": true, + "client_hostname": false, + "client_username": true, + "instance": true, + "job": true, + "snapshot_id": true + }, + "indexByName": {}, + "renameByName": { + "Value": "", + "snapshot_id": "" + } + } + } + ], + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 15, + "y": 12 + }, + "id": 33, + "links": [], + "maxDataPoints": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 1, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "client_hostname" + } + ] + }, + "pluginVersion": "9.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "exemplar": true, + "expr": "restic_backup_files_total", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{client_hostname}}", + "refId": "A" + } + ], + "title": "Total backup files", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "backup_id": true, + "backup_type": true, + "client_hostname": false, + "client_id": true, + "client_os_version": true, + "client_username": true, + "client_version": true, + "instance": true, + "job": true, + "snapshot_hash": true, + "snapshot_id": true + }, + "indexByName": {}, + "renameByName": { + "Value": "total_backup_files" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 8, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "exemplar": true, + "expr": "sum by(client_hostname,client_username) (restic_backup_size_total)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_hostname}}", + "refId": "A" + } + ], + "title": "Total backup size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 15, + "x": 0, + "y": 30 + }, + "id": 41, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "exemplar": true, + "expr": "sum by(client_hostname,client_username) (restic_backup_files_total)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_hostname}}", + "refId": "A" + } + ], + "title": "Total backup files", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fmMKqssGk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 15, + "y": 30 + }, + "id": 30, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "exemplar": true, + "expr": "sum by(client_hostname,client_username) (restic_backup_snapshots_total)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_hostname}}", + "refId": "A" + } + ], + "title": "Total snapshot count", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 37, + "style": "dark", + "tags": [ + "system" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Restic Exporter", + "uid": "2JzZl3B7k", + "version": 22, + "weekStart": "" +} \ No newline at end of file diff --git a/grafana/grafana_dashboard.png b/grafana/grafana_dashboard.png new file mode 100644 index 0000000..b5edc66 Binary files /dev/null and b/grafana/grafana_dashboard.png differ diff --git a/package.json b/package.json new file mode 100644 index 0000000..a7e3bfe --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "name": "ngosang-restic-exporter", + "version": "1.0.0", + "author": "ngosang@hotmail.es" +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2623560 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +prometheus-client==0.15.0 diff --git a/restic-exporter.py b/restic-exporter.py new file mode 100644 index 0000000..46ee0e4 --- /dev/null +++ b/restic-exporter.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +import datetime +import hashlib +import json +import logging +import os +import time +import re +import subprocess +import sys + +import prometheus_client +import prometheus_client.core + + +class ResticCollector(object): + def __init__(self, repository, password_file_): + self.repository = repository + self.password_file = password_file_ + # todo: the stats cache increases over time -> remove old ids + # todo: cold start -> the stats cache could be saved in a persistent volume + # todo: cold start -> the restic cache (/root/.cache/restic) could be saved in a persistent volume + self.stats_cache = {} + self.metrics = {} + self.refresh() + + def collect(self): + logging.debug("Incoming request") + + common_label_names = [ + "client_hostname", + "client_username", + "snapshot_hash" + ] + + check_success = prometheus_client.core.GaugeMetricFamily( + "restic_check_success", + "Result of restic check operation in the repository", + labels=[]) + + snapshots_total = prometheus_client.core.CounterMetricFamily( + "restic_snapshots_total", + "Total number of snapshots in the repository", + labels=[]) + + backup_timestamp = prometheus_client.core.GaugeMetricFamily( + "restic_backup_timestamp", + "Timestamp of the last backup", + labels=common_label_names) + + backup_files_total = prometheus_client.core.CounterMetricFamily( + "restic_backup_files_total", + "Number of files in the backup", + labels=common_label_names) + + backup_size_total = prometheus_client.core.CounterMetricFamily( + "restic_backup_size_total", + "Total size of backup in bytes", + labels=common_label_names) + + backup_snapshots_total = prometheus_client.core.CounterMetricFamily( + "restic_backup_snapshots_total", + "Total number of snapshots", + labels=common_label_names) + + check_success.add_metric([], self.metrics["check_success"]) + snapshots_total.add_metric([], self.metrics["snapshots_total"]) + + for client in self.metrics['clients']: + common_label_values = [ + client["hostname"], + client["username"], + client["snapshot_hash"] + ] + backup_timestamp.add_metric(common_label_values, client["timestamp"]) + backup_files_total.add_metric(common_label_values, client["files_total"]) + backup_size_total.add_metric(common_label_values, client["size_total"]) + backup_snapshots_total.add_metric(common_label_values, client["snapshots_total"]) + + yield check_success + yield snapshots_total + yield backup_timestamp + yield backup_files_total + yield backup_size_total + yield backup_snapshots_total + + def refresh(self): + try: + self.metrics = self.get_metrics() + except Exception as e: + logging.error("Unable to collect metrics from Restic. Error: %s", str(e)) + + def get_metrics(self): + all_snapshots = self.get_snapshots() + latest_snapshots = self.get_snapshots(True) + clients = [] + for snap in latest_snapshots: + stats = self.get_stats(snap['id']) + + time_parsed = re.sub(r'\.[^+-]+', '', snap['time']) + timestamp = time.mktime(datetime.datetime.strptime(time_parsed, "%Y-%m-%dT%H:%M:%S%z").timetuple()) + + snapshots_total = 0 + for snap2 in all_snapshots: + if snap2['hash'] == snap['hash']: + snapshots_total += 1 + + clients.append({ + 'snapshot_hash': snap['hash'], + 'hostname': snap['hostname'], + 'username': snap['username'], + 'timestamp': timestamp, + 'size_total': stats['total_size'], + 'files_total': stats['total_file_count'], + 'snapshots_total': snapshots_total + }) + # todo: fix the commented code when the bug is fixed in restic + # https://github.com/restic/restic/issues/2126 + # stats = self.get_stats() + check_success = self.get_check() + metrics = { + 'check_success': check_success, + 'clients': clients, + # 'size_total': stats['total_size'], + # 'files_total': stats['total_file_count'], + 'snapshots_total': len(all_snapshots) + } + return metrics + + def get_snapshots(self, only_latest=False): + cmd = [ + 'restic', + '-r', self.repository, + '-p', self.password_file, + '--no-lock', + 'snapshots', '--json' + ] + if only_latest: + cmd.extend(['--latest', '1']) + + result = subprocess.run(cmd, stdout=subprocess.PIPE) + if result.returncode != 0: + raise Exception("Error executing restic snapshot command. Exit code: " + str(result.returncode)) + snapshots = json.loads(result.stdout.decode('utf-8')) + for snap in snapshots: + snap['hash'] = self.calc_snapshot_hash(snap) + return snapshots + + def get_stats(self, snapshot_id=None): + # This command is expensive in CPU/Memory (1-5 seconds), + # and much more when snapshot_id=None (3 minutes) -> we avoid this call for now + # https://github.com/restic/restic/issues/2126 + if snapshot_id is not None and snapshot_id in self.stats_cache: + return self.stats_cache[snapshot_id] + + cmd = [ + 'restic', + '-r', self.repository, + '-p', self.password_file, + '--no-lock', + 'stats', '--json' + ] + if snapshot_id is not None: + cmd.extend([snapshot_id]) + + result = subprocess.run(cmd, stdout=subprocess.PIPE) + if result.returncode != 0: + raise Exception("Error executing restic stats command. Exit code: " + str(result.returncode)) + stats = json.loads(result.stdout.decode('utf-8')) + + if snapshot_id is not None: + self.stats_cache[snapshot_id] = stats + + return stats + + def get_check(self): + # This command takes 20 seconds or more, but it's required + cmd = [ + 'restic', + '-r', self.repository, + '-p', self.password_file, + '--no-lock', + 'check' + ] + + result = subprocess.run(cmd, stdout=subprocess.PIPE) + if result.returncode == 0: + return 1 # ok + return 0 # error + + def calc_snapshot_hash(self, snapshot: dict) -> str: + text = snapshot['hostname'] + ",".join(snapshot['paths']) + return hashlib.sha256(text.encode('utf-8')).hexdigest() + + +if __name__ == "__main__": + logging.basicConfig( + format='%(asctime)s %(levelname)-8s %(message)s', + level=logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO")), + datefmt='%Y-%m-%d %H:%M:%S', + handlers=[ + logging.StreamHandler(sys.stdout) + ] + ) + logging.info("Starting Restic Prometheus Exporter ...") + logging.info("It could take a while if the repository is remote.") + + try: + restic_repo_url = os.environ["RESTIC_REPO_URL"] + except Exception: + logging.error("Configuration error. The environment variable RESTIC_REPO_URL is mandatory") + sys.exit(1) + + try: + password_file = os.environ["PASSWORD_FILE"] + except Exception: + logging.error("Configuration error. The environment variable PASSWORD_FILE is mandatory") + sys.exit(1) + + exporter_address = os.environ.get("LISTEN_ADDRESS", "0.0.0.0") + exporter_port = int(os.environ.get("LISTEN_PORT", 8001)) + exporter_refresh_interval = int(os.environ.get("REFRESH_INTERVAL", 60)) + + collector = ResticCollector(restic_repo_url, password_file) + + prometheus_client.core.REGISTRY.register(collector) + prometheus_client.start_http_server(exporter_port, exporter_address) + + logging.info("Server listening in http://%s:%d/metrics", exporter_address, exporter_port) + while True: + logging.info("Refreshing stats every %d seconds", exporter_refresh_interval) + time.sleep(exporter_refresh_interval) + collector.refresh()