feat: export metrics with socket errors
Add new metric to collect the number of errors found when connecting to the fail2ban server socket. Errors are split into two categories: connection errors (e.g. socket file not found), and request errors (e.g. invalid response received from server). Update the `up` metric to return `0` if the socket connection fails. Improve error logging.
This commit is contained in:
parent
828b67cdd9
commit
4da46f3c4a
2 changed files with 57 additions and 21 deletions
|
@ -50,7 +50,12 @@ var (
|
|||
[]string{"type"}, nil,
|
||||
)
|
||||
|
||||
metricServerPing = prometheus.NewDesc(
|
||||
metricErrorCountNew = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(sockNamespace, "", "errors"),
|
||||
"Number of errors found since startup",
|
||||
[]string{"type"}, nil,
|
||||
)
|
||||
metricServerUp = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(sockNamespace, "", "up"),
|
||||
"Check if the fail2ban server is up",
|
||||
nil, nil,
|
||||
|
@ -83,10 +88,12 @@ var (
|
|||
)
|
||||
|
||||
type Exporter struct {
|
||||
db *fail2banDb.Fail2BanDB
|
||||
socketPath string
|
||||
lastError error
|
||||
dbErrorCount int
|
||||
db *fail2banDb.Fail2BanDB
|
||||
socketPath string
|
||||
lastError error
|
||||
dbErrorCount int
|
||||
socketConnectionErrorCount int
|
||||
socketRequestErrorCount int
|
||||
}
|
||||
|
||||
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
@ -98,13 +105,14 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
|||
ch <- metricErrorCount
|
||||
}
|
||||
if e.socketPath != "" {
|
||||
ch <- metricServerPing
|
||||
ch <- metricServerUp
|
||||
ch <- metricJailCount
|
||||
ch <- metricJailFailedCurrent
|
||||
ch <- metricJailFailedTotal
|
||||
ch <- metricJailBannedCurrent
|
||||
ch <- metricJailBannedTotal
|
||||
}
|
||||
ch <- metricErrorCountNew
|
||||
}
|
||||
|
||||
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
|
@ -119,12 +127,16 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
|||
s, err := socket.ConnectToSocket(e.socketPath)
|
||||
if err != nil {
|
||||
log.Printf("error opening socket: %v", err)
|
||||
e.socketConnectionErrorCount++
|
||||
} else {
|
||||
defer s.Close()
|
||||
e.collectServerPingMetric(ch, s)
|
||||
}
|
||||
e.collectServerUpMetric(ch, s)
|
||||
if err == nil && s != nil {
|
||||
e.collectJailMetrics(ch, s)
|
||||
}
|
||||
}
|
||||
e.collectErrorCountMetricNew(ch)
|
||||
}
|
||||
|
||||
func (e *Exporter) collectUpMetric(ch chan<- prometheus.Metric) {
|
||||
|
@ -191,20 +203,42 @@ func (e *Exporter) collectEnabledJailMetrics(ch chan<- prometheus.Metric) {
|
|||
}
|
||||
}
|
||||
|
||||
func (e *Exporter) collectServerPingMetric(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket) {
|
||||
pingSuccess := s.Ping()
|
||||
var pingSuccessInt float64 = 1
|
||||
if !pingSuccess {
|
||||
pingSuccessInt = 0
|
||||
func (e *Exporter) collectErrorCountMetricNew(ch chan<- prometheus.Metric) {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metricErrorCountNew, prometheus.CounterValue, float64(e.dbErrorCount), "db",
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metricErrorCountNew, prometheus.CounterValue, float64(e.socketConnectionErrorCount), "socket_conn",
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metricErrorCountNew, prometheus.CounterValue, float64(e.socketRequestErrorCount), "socket_req",
|
||||
)
|
||||
}
|
||||
|
||||
func (e *Exporter) collectServerUpMetric(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket) {
|
||||
var serverUp float64 = 0
|
||||
if s != nil {
|
||||
pingSuccess, err := s.Ping()
|
||||
if err != nil {
|
||||
e.socketRequestErrorCount++
|
||||
log.Print(err)
|
||||
}
|
||||
if err == nil && pingSuccess {
|
||||
serverUp = 1
|
||||
}
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metricServerPing, prometheus.GaugeValue, pingSuccessInt,
|
||||
metricServerUp, prometheus.GaugeValue, serverUp,
|
||||
)
|
||||
}
|
||||
|
||||
func (e *Exporter) collectJailMetrics(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket) {
|
||||
jails, err := s.GetJails()
|
||||
var count float64 = 0
|
||||
if err != nil {
|
||||
e.socketRequestErrorCount++
|
||||
log.Print(err)
|
||||
}
|
||||
if err == nil {
|
||||
count = float64(len(jails))
|
||||
}
|
||||
|
@ -220,6 +254,7 @@ func (e *Exporter) collectJailMetrics(ch chan<- prometheus.Metric, s *socket.Fai
|
|||
func (e *Exporter) collectJailStatsMetric(ch chan<- prometheus.Metric, s *socket.Fail2BanSocket, jail string) {
|
||||
stats, err := s.GetJailStats(jail)
|
||||
if err != nil {
|
||||
e.socketRequestErrorCount++
|
||||
log.Printf("failed to get stats for jail %s: %v", jail, err)
|
||||
return
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@ import (
|
|||
"fmt"
|
||||
"github.com/kisielk/og-rek"
|
||||
"github.com/nlpodyssey/gopickle/types"
|
||||
"log"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
@ -36,21 +35,19 @@ func (s *Fail2BanSocket) Close() error {
|
|||
return s.socket.Close()
|
||||
}
|
||||
|
||||
func (s *Fail2BanSocket) Ping() bool {
|
||||
func (s *Fail2BanSocket) Ping() (bool, error) {
|
||||
response, err := s.sendCommand([]string{pingCommand, "100"})
|
||||
if err != nil {
|
||||
log.Printf("server ping failed: %v", err)
|
||||
return false
|
||||
return false, newConnectionError(pingCommand, err)
|
||||
}
|
||||
|
||||
if t, ok := response.(*types.Tuple); ok {
|
||||
if (*t)[1] == "pong" {
|
||||
return true
|
||||
return true, nil
|
||||
}
|
||||
log.Printf("unexpected response data: %s", t)
|
||||
return false, fmt.Errorf("unexpected response data (expecting 'pong'): %s", (*t)[1])
|
||||
}
|
||||
log.Printf("(%s) unexpected response format - cannot parse: %v", pingCommand, response)
|
||||
return false
|
||||
return false, newBadFormatError(pingCommand, response)
|
||||
}
|
||||
|
||||
func (s *Fail2BanSocket) GetJails() ([]string, error) {
|
||||
|
@ -125,6 +122,10 @@ func newBadFormatError(command string, data interface{}) error {
|
|||
return fmt.Errorf("(%s) unexpected response format - cannot parse: %v", command, data)
|
||||
}
|
||||
|
||||
func newConnectionError(command string, err error) error {
|
||||
return fmt.Errorf("(%s) failed to send command through socket: %v", command, err)
|
||||
}
|
||||
|
||||
func trimSpaceForAll(slice []string) []string {
|
||||
for i := range slice {
|
||||
slice[i] = strings.TrimSpace(slice[i])
|
||||
|
|
Loading…
Reference in a new issue