ntfy-alertmanager/main.go

570 lines
14 KiB
Go
Raw Normal View History

2022-10-13 13:14:56 +02:00
// A bridge between ntfy and Alertmanager
2022-10-09 14:19:48 +02:00
package main
import (
2023-07-17 16:39:16 +02:00
"context"
"crypto/sha512"
"crypto/subtle"
"crypto/tls"
"crypto/x509"
_ "embed"
2022-10-10 02:42:13 +02:00
"encoding/base64"
"encoding/hex"
"encoding/json"
"errors"
"flag"
"fmt"
2023-08-13 14:48:27 +02:00
"log/slog"
2022-10-09 14:19:48 +02:00
"net/http"
"os"
2023-07-17 16:39:16 +02:00
"os/signal"
"slices"
2022-10-09 14:19:48 +02:00
"strings"
2023-07-17 16:39:16 +02:00
"syscall"
"time"
2022-10-09 14:19:48 +02:00
2023-08-13 14:48:27 +02:00
"git.xenrox.net/~xenrox/go-utils/logging"
"git.xenrox.net/~xenrox/ntfy-alertmanager/cache"
2023-07-12 14:56:48 +02:00
"git.xenrox.net/~xenrox/ntfy-alertmanager/config"
"golang.org/x/text/cases"
"golang.org/x/text/language"
2022-10-09 14:19:48 +02:00
)
var version = "dev"
2023-02-21 13:57:21 +01:00
type bridge struct {
2023-07-12 14:56:48 +02:00
cfg *config.Config
2023-08-13 14:48:27 +02:00
logger *slog.Logger
cache cache.Cache
2023-02-20 13:27:41 +01:00
client *httpClient
}
type payload struct {
Status string `json:"status"`
Alerts []alert `json:"alerts"`
GroupLabels map[string]string `json:"groupLabels"`
CommonLabels map[string]string `json:"commonLabels"`
CommonAnnotations map[string]string `json:"commonAnnotations"`
ExternalURL string `json:"externalURL"`
}
type alert struct {
Status string `json:"status"`
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
Fingerprint string `json:"fingerprint"`
}
type notification struct {
title string
body string
priority string
tags string
icon string
emailAddress string
call string
silenceBody string
fingerprint string
status string
}
type ntfyError struct {
Error string `json:"error"`
}
2023-02-21 13:57:21 +01:00
func (br *bridge) singleAlertNotifications(p *payload) []*notification {
var notifications []*notification
for _, alert := range p.Alerts {
contains, err := br.cache.Contains(alert.Fingerprint, alert.Status)
if err != nil {
2023-08-13 14:48:27 +02:00
br.logger.Error("Failed to lookup alert in cache",
slog.String("fingerprint", alert.Fingerprint),
slog.String("error", err.Error()))
}
if contains {
2023-08-13 14:48:27 +02:00
br.logger.Debug("Alert skipped: Still in cache",
slog.String("fingerprint", alert.Fingerprint))
continue
}
n := new(notification)
n.fingerprint = alert.Fingerprint
n.status = alert.Status
// create title
n.title = fmt.Sprintf("[%s]", strings.ToUpper(alert.Status))
if name, ok := alert.Labels["alertname"]; ok {
n.title = fmt.Sprintf("%s %s", n.title, name)
}
for _, value := range p.GroupLabels {
n.title = fmt.Sprintf("%s %s", n.title, value)
}
// create body
n.body = "Labels:\n"
sortedLabelKeys := sortKeys(alert.Labels)
for _, key := range sortedLabelKeys {
n.body = fmt.Sprintf("%s%s = %s\n", n.body, key, alert.Labels[key])
}
n.body += "\nAnnotations:\n"
for key, value := range alert.Annotations {
n.body = fmt.Sprintf("%s%s = %s\n", n.body, key, value)
}
var tags []string
if alert.Status == "resolved" {
2023-07-12 14:56:48 +02:00
tags = append(tags, br.cfg.Resolved.Tags...)
n.icon = br.cfg.Resolved.Icon
2023-09-27 15:15:32 +02:00
n.priority = br.cfg.Resolved.Priority
}
2023-07-12 14:56:48 +02:00
n.emailAddress = br.cfg.Ntfy.EmailAddress
n.call = br.cfg.Ntfy.Call
2023-07-12 14:56:48 +02:00
for _, labelName := range br.cfg.Labels.Order {
val, ok := alert.Labels[labelName]
if !ok {
continue
}
2023-07-12 14:56:48 +02:00
labelConfig, ok := br.cfg.Labels.Label[fmt.Sprintf("%s:%s", labelName, val)]
if !ok {
continue
}
if n.priority == "" {
n.priority = labelConfig.Priority
}
if n.icon == "" {
n.icon = labelConfig.Icon
}
if n.emailAddress == "" {
2023-07-12 14:56:48 +02:00
n.emailAddress = labelConfig.EmailAddress
}
if n.call == "" {
2023-07-12 14:56:48 +02:00
n.call = labelConfig.Call
}
for _, val := range labelConfig.Tags {
if !slices.Contains(tags, val) {
tags = append(tags, val)
}
}
}
n.tags = strings.Join(tags, ",")
2023-07-12 14:56:48 +02:00
if br.cfg.Am.SilenceDuration != 0 && alert.Status == "firing" {
2023-02-21 13:57:21 +01:00
if br.cfg.BaseURL == "" {
br.logger.Error("Failed to create silence action: No base-url set")
} else {
// I could not convince ntfy to accept an Action with a body which contains
// a json with more than one key. Instead the json will be base64 encoded
// and sent to the ntfy-alertmanager silences endpoint, that operates as
// a proxy and will do the Alertmanager API request.
s := &silenceBody{AlertManagerURL: p.ExternalURL, Labels: alert.Labels}
b, err := json.Marshal(s)
if err != nil {
2023-08-13 14:48:27 +02:00
br.logger.Error("Failed to create silence action",
slog.String("error", err.Error()))
}
n.silenceBody = base64.StdEncoding.EncodeToString(b)
}
}
notifications = append(notifications, n)
}
return notifications
}
2023-02-21 13:57:21 +01:00
func (br *bridge) multiAlertNotification(p *payload) *notification {
n := new(notification)
// create title
count := len(p.Alerts)
title := fmt.Sprintf("[%s", strings.ToUpper(p.Status))
if p.Status == "firing" {
title = fmt.Sprintf("%s:%d", title, count)
}
title += "]"
for _, value := range p.GroupLabels {
title = fmt.Sprintf("%s %s", title, value)
}
n.title = title
// create body
var body string
c := cases.Title(language.English)
for _, alert := range p.Alerts {
alertBody := fmt.Sprintf("%s\nLabels:\n", c.String(alert.Status))
sortedLabelKeys := sortKeys(alert.Labels)
for _, key := range sortedLabelKeys {
alertBody = fmt.Sprintf("%s%s = %s\n", alertBody, key, alert.Labels[key])
}
alertBody += "Annotations:\n"
for key, value := range alert.Annotations {
alertBody = fmt.Sprintf("%s%s = %s\n", alertBody, key, value)
}
alertBody += "\n"
body += alertBody
}
n.body = body
var tags []string
if p.Status == "resolved" {
2023-07-12 14:56:48 +02:00
tags = append(tags, br.cfg.Resolved.Tags...)
n.icon = br.cfg.Resolved.Icon
2023-09-27 15:15:32 +02:00
n.priority = br.cfg.Resolved.Priority
}
2023-07-12 14:56:48 +02:00
n.emailAddress = br.cfg.Ntfy.EmailAddress
n.call = br.cfg.Ntfy.Call
2023-07-12 14:56:48 +02:00
for _, labelName := range br.cfg.Labels.Order {
val, ok := p.CommonLabels[labelName]
if !ok {
continue
}
2023-07-12 14:56:48 +02:00
labelConfig, ok := br.cfg.Labels.Label[fmt.Sprintf("%s:%s", labelName, val)]
if !ok {
continue
}
if n.priority == "" {
n.priority = labelConfig.Priority
}
if n.icon == "" {
n.icon = labelConfig.Icon
}
if n.emailAddress == "" {
2023-07-12 14:56:48 +02:00
n.emailAddress = labelConfig.EmailAddress
}
if n.call == "" {
2023-07-12 14:56:48 +02:00
n.call = labelConfig.Call
}
for _, val := range labelConfig.Tags {
if !slices.Contains(tags, val) {
tags = append(tags, val)
}
}
}
n.tags = strings.Join(tags, ",")
2023-07-12 14:56:48 +02:00
if br.cfg.Am.SilenceDuration != 0 && p.Status == "firing" {
2023-02-21 13:57:21 +01:00
if br.cfg.BaseURL == "" {
br.logger.Error("Failed to create silence action: No base-url set")
} else {
s := &silenceBody{AlertManagerURL: p.ExternalURL, Labels: p.CommonLabels}
b, err := json.Marshal(s)
if err != nil {
2023-08-13 14:48:27 +02:00
br.logger.Error("Failed to create silence action",
slog.String("error", err.Error()))
}
n.silenceBody = base64.StdEncoding.EncodeToString(b)
}
}
return n
}
2023-02-21 13:57:21 +01:00
func (br *bridge) publish(n *notification) error {
2023-07-12 14:56:48 +02:00
req, err := http.NewRequest(http.MethodPost, br.cfg.Ntfy.Topic, strings.NewReader(n.body))
if err != nil {
return err
}
// ntfy authentication
2023-07-12 14:56:48 +02:00
if br.cfg.Ntfy.Password != "" && br.cfg.Ntfy.User != "" {
req.SetBasicAuth(br.cfg.Ntfy.User, br.cfg.Ntfy.Password)
} else if br.cfg.Ntfy.AccessToken != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", br.cfg.Ntfy.AccessToken))
}
req.Header.Set("X-Title", n.title)
if n.priority != "" {
req.Header.Set("X-Priority", n.priority)
}
if n.icon != "" {
req.Header.Set("X-Icon", n.icon)
}
if n.tags != "" {
req.Header.Set("X-Tags", n.tags)
}
if n.emailAddress != "" {
req.Header.Set("X-Email", n.emailAddress)
}
if n.call != "" {
req.Header.Set("X-Call", n.call)
}
if n.silenceBody != "" {
2023-02-21 13:57:21 +01:00
url := br.cfg.BaseURL + "/silences"
var authString string
2023-02-21 13:57:21 +01:00
if br.cfg.User != "" && br.cfg.Password != "" {
auth := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", br.cfg.User, br.cfg.Password)))
authString = fmt.Sprintf(", headers.Authorization=Basic %s", auth)
}
req.Header.Set("Actions", fmt.Sprintf("http, Silence, %s, method=POST, body=%s%s", url, n.silenceBody, authString))
}
configFingerprint := br.cfg.Ntfy.CertFingerprint
if configFingerprint != "" {
tlsCfg := &tls.Config{}
tlsCfg.VerifyPeerCertificate = func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
for _, rawCert := range rawCerts {
hash := sha512.Sum512(rawCert)
if hex.EncodeToString(hash[:]) == configFingerprint {
return nil
}
}
if len(rawCerts) == 0 {
return errors.New("the ntfy server does not offer a certificate")
}
hash := sha512.Sum512(rawCerts[0])
var expectedFingerprint string
for i, b := range hash {
if i != 0 {
expectedFingerprint += ":"
}
expectedFingerprint += fmt.Sprintf("%02X", b)
}
return fmt.Errorf("the ntfy certificate fingerprint (%s) is not set in the config", expectedFingerprint)
}
tlsCfg.InsecureSkipVerify = true
br.client.Transport = &http.Transport{TLSClientConfig: tlsCfg}
}
2023-02-21 13:57:21 +01:00
resp, err := br.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
var ntfyError ntfyError
if err := json.NewDecoder(resp.Body).Decode(&ntfyError); err != nil {
2023-08-13 14:48:27 +02:00
br.logger.Debug("Publish: Failed to decode error",
slog.String("error", err.Error()))
return fmt.Errorf("ntfy: received status code %d", resp.StatusCode)
}
return fmt.Errorf("ntfy: %s (status code %d)", ntfyError.Error, resp.StatusCode)
}
return nil
}
2023-02-21 13:57:21 +01:00
func (br *bridge) handleWebhooks(w http.ResponseWriter, r *http.Request) {
logger := br.logger.With(slog.String("handler", "/"))
2022-10-09 20:03:55 +02:00
if r.Method != http.MethodPost {
http.Error(w, "Only POST allowed", http.StatusMethodNotAllowed)
logger.Debug(fmt.Sprintf("Illegal HTTP method: expected %q, got %q", "POST", r.Method))
2022-10-09 20:03:55 +02:00
return
}
contentType := r.Header.Get("Content-Type")
if contentType != "application/json" {
http.Error(w, "Only application/json allowed", http.StatusUnsupportedMediaType)
logger.Debug(fmt.Sprintf("Illegal content type: %s", contentType))
2022-10-09 20:03:55 +02:00
return
}
var event payload
if err := json.NewDecoder(r.Body).Decode(&event); err != nil {
http.Error(w, "Failed to parse payload", http.StatusInternalServerError)
logger.Debug("Failed to decode payload",
2023-08-13 14:48:27 +02:00
slog.String("error", err.Error()))
return
}
logger.Debug("Received alert",
2023-08-13 14:48:27 +02:00
slog.Any("payload", event))
2023-02-08 15:31:06 +01:00
2023-07-12 14:56:48 +02:00
if br.cfg.AlertMode == config.Single {
2023-02-21 13:57:21 +01:00
notifications := br.singleAlertNotifications(&event)
for _, n := range notifications {
2023-02-21 13:57:21 +01:00
err := br.publish(n)
if err != nil {
logger.Error("Failed to publish notification",
2023-08-13 14:48:27 +02:00
slog.String("error", err.Error()))
} else {
if err := br.cache.Set(n.fingerprint, n.status); err != nil {
logger.Error("Failed to cache alert",
2023-08-13 14:48:27 +02:00
slog.String("fingerprint", n.fingerprint),
slog.String("error", err.Error()))
}
}
}
} else {
2023-02-21 13:57:21 +01:00
notification := br.multiAlertNotification(&event)
err := br.publish(notification)
if err != nil {
logger.Error("Failed to publish notification",
2023-08-13 14:48:27 +02:00
slog.String("error", err.Error()))
}
}
2022-10-09 14:19:48 +02:00
}
func (br *bridge) corsMiddleware(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
handler.ServeHTTP(w, r)
})
}
func (br *bridge) authMiddleware(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
logger := br.logger.With(slog.String("url", r.URL.String()))
2022-10-10 19:55:33 +02:00
user, pass, ok := r.BasicAuth()
if !ok {
logger.Debug("basic auth failure")
2022-10-10 19:55:33 +02:00
return
}
inputUserHash := sha512.Sum512([]byte(user))
inputPassHash := sha512.Sum512([]byte(pass))
2023-02-21 13:57:21 +01:00
configUserHash := sha512.Sum512([]byte(br.cfg.User))
configPassHash := sha512.Sum512([]byte(br.cfg.Password))
validUser := subtle.ConstantTimeCompare(inputUserHash[:], configUserHash[:])
validPass := subtle.ConstantTimeCompare(inputPassHash[:], configPassHash[:])
if validUser != 1 || validPass != 1 {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
logger.Debug("basic auth: wrong user or password")
2022-10-10 19:55:33 +02:00
return
}
handler.ServeHTTP(w, r)
})
2022-10-10 19:55:33 +02:00
}
2023-07-17 16:39:16 +02:00
func (br *bridge) runCleanup(ctx context.Context) {
for {
2023-07-17 16:39:16 +02:00
select {
case <-time.After(br.cfg.Cache.CleanupInterval):
br.logger.Info("Pruning cache")
br.cache.Cleanup()
case <-ctx.Done():
return
}
}
}
2022-10-09 14:19:48 +02:00
func main() {
var configPath string
flag.StringVar(&configPath, "config", "/etc/ntfy-alertmanager/config", "config file path")
var showVersion bool
flag.BoolVar(&showVersion, "version", false, "Show version and exit")
flag.Parse()
if showVersion {
fmt.Println(version)
os.Exit(0)
}
2023-07-12 14:56:48 +02:00
cfg, err := config.ReadConfig(configPath)
if err != nil {
slog.Error("Failed to read config",
2023-08-13 14:48:27 +02:00
slog.String("error", err.Error()))
os.Exit(1)
}
logger, err := logging.New(cfg.LogLevel, cfg.LogFormat, os.Stderr)
2023-08-13 14:48:27 +02:00
if err != nil {
slog.Error("Failed to create logger",
2023-08-13 14:48:27 +02:00
slog.String("error", err.Error()))
os.Exit(1)
}
2023-07-17 16:39:16 +02:00
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()
2023-02-20 13:27:41 +01:00
client := &httpClient{&http.Client{Timeout: time.Second * 3}}
2023-02-17 01:45:14 +01:00
c, err := cache.NewCache(cfg.Cache)
if err != nil {
2023-08-13 14:48:27 +02:00
logger.Error("Failed to create cache",
slog.String("error", err.Error()))
os.Exit(1)
}
bridge := &bridge{cfg: cfg, logger: logger, cache: c, client: client}
2023-08-13 14:48:27 +02:00
logger.Info(fmt.Sprintf("Listening on %s, ntfy-alertmanager %s", cfg.HTTPAddress, version))
2022-10-10 19:55:33 +02:00
2023-07-14 12:38:43 +02:00
mux := http.NewServeMux()
mux.HandleFunc("/", bridge.handleWebhooks)
mux.HandleFunc("/silences", bridge.handleSilences)
2023-07-14 12:47:22 +02:00
httpServer := &http.Server{
Addr: cfg.HTTPAddress,
Handler: mux,
}
2022-10-10 19:55:33 +02:00
if cfg.User != "" && cfg.Password != "" {
logger.Info("Enabling HTTP Basic Authentication")
httpServer.Handler = bridge.authMiddleware(mux)
2022-10-10 19:55:33 +02:00
}
httpServer.Handler = bridge.corsMiddleware(httpServer.Handler)
if _, ok := c.(*cache.MemoryCache); ok {
2023-07-17 16:39:16 +02:00
go bridge.runCleanup(ctx)
}
go func() {
err = httpServer.ListenAndServe()
if err != nil && err != http.ErrServerClosed {
2023-08-13 14:48:27 +02:00
logger.Error("Failed to start HTTP server",
slog.String("error", err.Error()))
os.Exit(1)
2023-07-17 16:39:16 +02:00
}
}()
<-ctx.Done()
stop()
httpShutdownContext, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err = httpServer.Shutdown(httpShutdownContext)
if err != nil {
2023-08-13 14:48:27 +02:00
logger.Error("Failed to shutdown HTTP server",
slog.String("error", err.Error()))
}
2022-10-09 14:19:48 +02:00
}