initial commit

This commit is contained in:
gilbn 2020-06-25 22:43:33 +02:00
parent 9adadc5316
commit 43ba40d1f0
8 changed files with 678 additions and 2 deletions

133
.dockerignore Normal file
View file

@ -0,0 +1,133 @@
.gitignore
LICENSE
README.md
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/

14
Dockerfile Normal file
View file

@ -0,0 +1,14 @@
FROM lsiobase/alpine:3.12
LABEL maintainer="GilbN"
WORKDIR /geoip2influx
COPY requirements.txt geoip2influx.py /geoip2influx/
RUN \
echo " ## Installing packages ## " && \
apk add --no-cache --virtual=build-dependencies \
python3 \
py3-pip \
libmaxminddb && \
echo " ## Installing python modules ## " && \
pip3 install --no-cache-dir -r requirements.txt
COPY root/ /

164
README.md
View file

@ -1,2 +1,162 @@
# geoip2influx
A python script that will parse the nginx access.log and send geolocation metrics and log metrics to InfluxDB
# Geoip2Influx
<p align="center"></a>
<a href="https://discord.gg/HSPa4cz" rel="noopener"><img class="alignnone" title="Geoip2Influx!" src="https://img.shields.io/badge/chat-Discord-blue.svg?style=for-the-badge&logo=discord" alt="" height="37" />
</a>
<a href="https://technicalramblings.com/" rel="noopener"><img class="alignnone" title="technicalramblings!" src="https://img.shields.io/badge/blog-technicalramblings.com-informational.svg?style=for-the-badge" alt="" height="37" />
</a>
<a href="https://hub.docker.com/r/gilbn/lsio-docker-mods" rel="noopener"><img alt="Docker Cloud Build Status" src="https://img.shields.io/docker/cloud/build/gilbn/lsio-docker-mods?style=for-the-badge&logo=docker" height="37">
</a>
<br />
<br />
***
Adapted source: https://github.com/ratibor78/geostat
![](https://i.imgur.com/mh0IhYA.jpg)
The script will parse the access log for IPs and and convert them into geo metrics for InfluxDB. It will also send log metrics if enabled.
***
## Usage
### Enviroment variables:
These are the **default** values for all envs.
Add the ones that differ on your system.
| Environment Varialbe | Example Value | Description |
| -------------------- | ------------- | ----------- |
| NGINX_LOG_PATH | /config/log/nginx/access.log | Container path for Nginx logfile , defaults to the example. |
| INFLUX_HOST | localhost | Host running InfluxDB. |
| INFLUX_HOST_PORT | 8086 | Optional, defaults to 8086. |
| INFLUX_DATABASE | geoip2influx | Optional, defaults to geoip2influx. |
| INFLUX_USER | root | Optional, defaults to root. |
| INFLUX_PASS | root | Optional, defaults to root. |
| GEO_MEASUREMENT | geoip2influx | InfluxDB measurement name for geohashes. Optional, defaults to the example. |
| LOG_MEASUREMENT | nginx_access_logs | InfluxDB measurement name for nginx logs. Optional, defaults to the example. |
| SEND_NGINX_LOGS | true | Set to `false` to disable nginx logs. Optional, defaults to `true`. |
| GEOIP2INFLUX_LOG_LEVEL | info | Sets the log level in geoip2influx.log. Use `debug` for verbose logging Optional, defaults to info. |
| INFLUX_RETENTION | 30d | Sets the retention for the database. Optional, defaults to example.|
| INFLUX_SHARD | 2d | Set the shard for the database. Optional, defaults to example. |
| MAXMINDDB_LICENSE_KEY | xxxxxxx | Add your Maxmind licence key |
### MaxMind Geolite2
Default download location is `/config/geoip2db/GeoLite2-City.mmdb`
Get your licence key here: https://www.maxmind.com/en/geolite2/signup
### InfluxDB
The InfluxDB database will be created automatically with the name you choose.
```
-e INFLUX_DATABASE=geoip2influx
```
### Docker
```bash
docker create \
--name=geoip2influx \
-e PUID=1000 \
-e PGID=1000 \
-e TZ=Europe/Oslo \
-e INFLUX_HOST=<influxdb host> \
-e INFLUX_HOST_PORT=<influxdb port> \
-e MAXMINDDB_LICENSE_KEY=<license key>\
-v /path/to/appdata/geoip2influx:/config \
-v /path/to/nginx/accesslog/:/config/log/nginx/ \
--restart unless-stopped \
gilbn/geoip2influx
```
### Docker compose
```yaml
version: "2.1"
services:
geoip2influx:
image: gilbn/geoip2influx
container_name: geoip2influx
environment:
- PUID=1000
- PGID=1000
- TZ=Europe/Oslo
- INFLUX_HOST=<influxdb host>
- INFLUX_HOST_PORT=<influxdb port>
- MAXMINDDB_LICENSE_KEY=<license key>
volumes:
- /path/to/appdata/geoip2influx:/config
- /path/to/nginx/accesslog/:/config/log/nginx/
restart: unless-stopped
```
***
## Grafana dashboard:
### [Grafana Dashboard Link](https://grafana.com/grafana/dashboards/12268/)
***
## Sending Nginx log metrics
1. Add the following to the http block in your `nginx.conf` file:
```nginx
geoip2 /config/geoip2db/GeoLite2-City.mmdb {
auto_reload 5m;
$geoip2_data_country_code country iso_code;
$geoip2_data_city_name city names en;
}
log_format custom '$remote_addr - $remote_user [$time_local]'
'"$request" $status $body_bytes_sent'
'"$http_referer" $host "$http_user_agent"'
'"$request_time" "$upstream_connect_time"'
'"$geoip2_data_city_name" "$geoip2_data_country_code"';
```
2. Set the access log use the `custom` log format.
```nginx
access_log /config/log/nginx/access.log custom;
```
### Multiple log files
If you separate your nginx log files but want this script to parse all of them you can do the following:
As nginx can have multiple `access log` directives in a block, just add another one in the server block.
**Example**
```nginx
access_log /config/log/nginx/technicalramblings/access.log custom;
access_log /config/log/nginx/access.log custom;
```
This will log the same lines to both files.
Then use the `/config/log/nginx/access.log` file in the `NGINX_LOG_PATH` variable.
***
## Updates
**21.06.20** - Added $host(domain) to the nginx log metrics. This will break your nginx logs parsing, as you need to update the custom log format.
**06.06.20** - Added influx retention policy to try and mitigate max-values-per-tag limit exceeded errors.
* `-e INFLUX_RETENTION` Default 30d
* `-e INFLUX_SHARD` Default 2d
* It will only add the retention policy if the database doesn't exist.
**30.05.20** - Added logging. Use `-e GEOIP2INFLUX_LOG_LEVEL` to set the log level.
**15.05.20** - Removed `GEOIP2_KEY` and `GEOIP_DB_PATH`variables. With commit https://github.com/linuxserver/docker-letsencrypt/commit/75b9685fdb3ec6edda590300f289b0e75dd9efd0 the letsencrypt container now natively supports downloading and updating(weekly) the GeoLite2-City database!

314
geoip2influx.py Normal file
View file

@ -0,0 +1,314 @@
#! /usr/bin/env python3
# Getting GEO information from Nginx access.log by IP's.
# Alexey Nizhegolenko 2018
# Parts added by Remko Lodder, 2019.
# Added: IPv6 matching, make query based on geoip2 instead of
# geoip, which is going away r.s.n.
# GilbN 2020:
# Adapted to Python 3.
# Added enviroment variables for Docker.
# Added log metrics
# Added regex tester
# Added file path check
# Added logging
# Switched to pep8 style variables ect.
# Adapted to geoip2.
from os.path import exists, isfile
from os import environ as env, stat
from platform import uname
from re import compile, match, search, IGNORECASE
from sys import path, exit
from time import sleep, time
from datetime import datetime
import logging
from geoip2.database import Reader
from geohash2 import encode
from influxdb import InfluxDBClient
from requests.exceptions import ConnectionError
from influxdb.exceptions import InfluxDBServerError, InfluxDBClientError
from IPy import IP as ipadd
# Getting params from envs
geoip_db_path = '/config/geoip2db/GeoLite2-City.mmdb'
log_path = env.get('NGINX_LOG_PATH', '/config/log/nginx/access.log')
influxdb_host = env.get('INFLUX_HOST', 'localhost')
influxdb_port = env.get('INFLUX_HOST_PORT', '8086')
influxdb_database = env.get('INFLUX_DATABASE', 'geoip2influx')
influxdb_user = env.get('INFLUX_USER', 'root')
influxdb_user_pass = env.get('INFLUX_PASS', 'root')
influxdb_retention = env.get('INFLUX_RETENTION','30d')
influxdb_shard = env.get('INFLUX_SHARD', '2d')
geo_measurement = env.get('GEO_MEASUREMENT', 'geoip2influx')
log_measurement = env.get('LOG_MEASUREMENT', 'nginx_access_logs')
send_nginx_logs = env.get('SEND_NGINX_LOGS','true')
log_level = env.get('GEOIP2INFLUX_LOG_LEVEL', 'info').upper()
# Logging
logging.basicConfig(level=log_level,format='%(asctime)s :: %(levelname)s :: %(message)s',datefmt='%d/%b/%Y %H:%M:%S',filename=path[0] + '/geoip2influx.log')
def regex_tester(log_path, N):
time_out = time() + 60
re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA
while True:
assert N >= 0
pos = N + 1
lines = []
with open(log_path) as f:
while len(lines) <= N:
try:
f.seek(-pos, 2)
except IOError:
f.seek(0)
break
finally:
lines = list(f)
pos *= 2
log_lines = lines[-N:]
for line in log_lines:
if re_ipv4.match(line):
regex = compile(r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA
if regex.match(line):
logging.debug('Regex is matching %s continuing...' % log_path)
return True
if re_ipv6.match(line):
regex = compile(r'(?P<ipaddress>(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA
if regex.match(line):
logging.debug('Regex is matching %s continuing...' % log_path)
return True
else:
logging.debug('Testing regex on: %s ' % log_path)
sleep(2)
if time() > time_out:
logging.warning('Failed to match regex on: %s ' % log_path)
break
def file_exists(log_path,geoip_db_path):
time_out = time() + 30
while True:
file_list = [log_path, geoip_db_path]
if not exists(log_path):
logging.warning(('File: %s not found...' % log_path))
sleep(1)
if not exists(geoip_db_path):
logging.warning(('File: %s not found...' % geoip_db_path))
sleep(1)
if all([isfile(f) for f in file_list]):
for f in file_list:
logging.debug('Found: %s' % f)
return True
if time() > time_out:
if not exists(geoip_db_path) and not exists(log_path):
logging.critical("Can't find: " + geoip_db_path + ' or ' + log_path + ', exiting!')
break
elif not exists(geoip_db_path):
logging.critical("Can't find: %s , exiting!" % geoip_db_path)
break
elif not exists(log_path):
logging.critical("Can't find: %s , exiting!" % log_path)
break
def logparse(
log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention,
influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode):
# Preparing variables and params
ips = {}
geohash_fields = {}
geohash_tags = {}
log_data_fields = {}
log_data_tags = {}
nginx_log = {}
hostname = uname()[1]
client = InfluxDBClient(
host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database)
try:
logging.debug('Testing InfluxDB connection')
version = client.request('ping', expected_response_code=204).headers['X-Influxdb-Version']
logging.debug('Influxdb version: %s' % version)
except ConnectionError as e:
logging.critical('Error testing connection to InfluxDB. Please check your url/hostname.\n'
'Error: %s' % e
)
exit(1)
try:
databases = [db['name'] for db in client.get_list_database()]
if influxdb_database in databases:
logging.debug('Found database: %s' % influxdb_database)
except InfluxDBClientError as e:
logging.critical('Error getting database list! Please check your InfluxDB configuration.\n'
'Error: %s' % e
)
exit(1)
if influxdb_database not in databases:
logging.info('Creating database: %s' % influxdb_database)
client.create_database(influxdb_database)
retention_policies = [policy['name'] for policy in client.get_list_retention_policies(database=influxdb_database)]
if '%s %s-%s' % (influxdb_database, influxdb_retention, influxdb_shard) not in retention_policies:
logging.info('Creating %s retention policy (%s-%s)' % (influxdb_database, influxdb_retention, influxdb_shard))
client.create_retention_policy(name='%s %s-%s' % (influxdb_database, influxdb_retention, influxdb_shard), duration=influxdb_retention, replication='1',
database=influxdb_database, default=True, shard_duration=influxdb_shard)
re_ipv4 = compile(r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA
re_ipv6 = compile(r'(?P<ipaddress>(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA
gi = Reader(geoip_db_path)
if send_nginx_logs in ('true', 'True'):
send_logs = True
else:
send_logs = False
re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA
logging.info('SEND_NGINX_LOGS set to false')
pass
if not regex_tester(log_path,3):
if send_logs:
re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA
send_logs = False
logging.warning('NGINX log metrics disabled! Double check your NGINX custom log format..')
# Main loop to parse access.log file in tailf style with sending metrics.
with open(log_path, 'r') as log_file:
logging.info('Starting log parsing')
str_results = stat(log_path)
st_size = str_results[6]
log_file.seek(st_size)
while True:
geo_metrics = []
log_metrics = []
where = log_file.tell()
line = log_file.readline()
inodenew = stat(log_path).st_ino
if inode != inodenew:
break
if not line:
sleep(1)
log_file.seek(where)
else:
if re_ipv4.match(line):
m = re_ipv4.match(line)
ip = m.group(1)
log = re_ipv4
elif re_ipv6.match(line):
m = re_ipv6.match(line)
ip = m.group(1)
log = re_ipv6
else:
logging.warning('Failed to match regex that previously matched!? Skipping this line!\n'
'Please share the log line below on Discord or Github!\n'
'Line: %s' % line
)
continue
if ipadd(ip).iptype() == 'PUBLIC' and ip:
info = gi.city(ip)
if info is not None:
geohash = encode(info.location.latitude, info.location.longitude)
geohash_fields['count'] = 1
geohash_tags['geohash'] = geohash
geohash_tags['ip'] = ip
geohash_tags['host'] = hostname
geohash_tags['country_code'] = info.country.iso_code
geohash_tags['country_name'] = info.country.name
geohash_tags['state'] = info.subdivisions.most_specific.name
geohash_tags['state_code'] = info.subdivisions.most_specific.iso_code
geohash_tags['city'] = info.city.name
geohash_tags['postal_code'] = info.postal.code
geohash_tags['latitude'] = info.location.latitude
geohash_tags['longitude'] = info.location.longitude
ips['tags'] = geohash_tags
ips['fields'] = geohash_fields
ips['measurement'] = geo_measurement
geo_metrics.append(ips)
logging.debug('Geo metrics: %s' % geo_metrics)
try:
client.write_points(geo_metrics)
except (InfluxDBServerError, ConnectionError) as e:
logging.error('Error writing data to InfluxDB! Check your database!\n'
'Error: %s' % e
)
if send_logs:
data = search(log, line)
if ipadd(ip).iptype() == 'PUBLIC' and ip:
info = gi.city(ip)
if info is not None:
datadict = data.groupdict()
log_data_fields['count'] = 1
log_data_fields['bytes_sent'] = int(datadict['bytes_sent'])
log_data_fields['request_time'] = float(datadict['request_time'])
if datadict['connect_time'] == '-':
log_data_fields['connect_time'] = 0.0
else:
log_data_fields['connect_time'] = float(datadict['connect_time'])
log_data_tags['ip'] = datadict['ipaddress']
log_data_tags['datetime'] = datetime.strptime(datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z')
log_data_tags['remote_user'] = datadict['remote_user']
log_data_tags['method'] = datadict['method']
log_data_tags['referrer'] = datadict['referrer']
log_data_tags['host'] = datadict['host']
log_data_tags['http_version'] = datadict['http_version']
log_data_tags['status_code'] = datadict['status_code']
log_data_tags['bytes_sent'] = datadict['bytes_sent']
log_data_tags['url'] = datadict['url']
log_data_tags['user_agent'] = datadict['user_agent']
log_data_tags['request_time'] = datadict['request_time']
log_data_tags['connect_time'] = datadict['connect_time']
log_data_tags['city'] = datadict['city']
log_data_tags['country_code'] = datadict['country_code']
log_data_tags['country_name'] = info.country.name
nginx_log['tags'] = log_data_tags
nginx_log['fields'] = log_data_fields
nginx_log['measurement'] = log_measurement
log_metrics.append(nginx_log)
logging.debug('NGINX log metrics: %s' % log_metrics)
try:
client.write_points(log_metrics)
except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e:
logging.error('Error writing data to InfluxDB! Check your database!\n'
'Error: %s' % e
)
def main():
logging.info('Starting geoip2influx..')
logging.debug('Variables set:' +
'\n geoip_db_path :: %s' % geoip_db_path +
'\n -e LOG_PATH :: %s' % log_path +
'\n -e INFLUX_HOST :: %s' % influxdb_host +
'\n -e INFLUX_HOST_PORT :: %s' % influxdb_port +
'\n -e INFLUX_DATABASE :: %s' % influxdb_database +
'\n -e INFLUX_RETENTION :: %s' % influxdb_retention +
'\n -e INFLUX_SHARD :: %s' % influxdb_shard +
'\n -e INFLUX_USER :: %s' % influxdb_user +
'\n -e INFLUX_PASS :: %s' % influxdb_user_pass +
'\n -e GEO_MEASUREMENT :: %s' % geo_measurement +
'\n -e LOG_MEASUREMENT :: %s' % log_measurement +
'\n -e SEND_NGINX_LOGS :: %s' % send_nginx_logs +
'\n -e GEOIP2INFLUX_LOG_LEVEL :: %s' % log_level
)
# Parsing log file and sending metrics to Influxdb
while file_exists(log_path,geoip_db_path):
# Get inode from log file
inode = stat(log_path).st_ino
# Run main loop and grep a log file
logparse(
log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass,
influxdb_retention, influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode) # NOQA
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
exit(0)

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
geoip2
geohash2
influxdb
IPy

View file

@ -0,0 +1,40 @@
#!/usr/bin/with-contenv bash
echo '------------------------------------------------------------------------'
echo '| Made by GilbN'
echo '| Running installation of required modules for geoip2influx'
echo '------------------------------------------------------------------------'
echo -e "Variables set:\\n\
NGINX_LOG_PATH=${NGINX_LOG_PATH}\\n\
INFLUX_HOST=${INFLUX_HOST}\\n\
INFLUX_HOST_PORT=${INFLUX_HOST_PORT}\\n\
INFLUX_DATABASE=${INFLUX_DATABASE}\\n\
INFLUX_USER=${INFLUX_USER}\\n\
INFLUX_PASS=${INFLUX_PASS}\\n\
INFLUX_RETENTION=${INFLUX_RETENTION}\\n\
INFLUX_SHARD=${INFLUX_SHARD}\\n\
GEO_MEASUREMENT=${GEO_MEASUREMENT}\\n\
LOG_MEASUREMENT=${LOG_MEASUREMENT}\\n\
SEND_NGINX_LOGS=${SEND_NGINX_LOGS}\\n\
GEOIP2INFLUX_LOG_LEVEL=${GEOIP2INFLUX_LOG_LEVEL}\\n\
MAXMINDDB_LICENSE_KEY=${MAXMINDDB_LICENSE_KEY}\\n"
mkdir -p /config/geoip2db
cp -f /geoip2influx/geoip2influx.py /config/geoip2db
chown -R abc:abc /config/geoip2db
chmod +x /config/geoip2db/geoip2influx.py
# create GeoIP2 folder symlink
[[ -d /var/lib/libmaxminddb ]] && [[ ! -L /var/lib/libmaxminddb ]] && \
rm -rf /var/lib/libmaxminddb
[[ ! -d /var/lib/libmaxminddb ]] && \
ln -s /config/geoip2db /var/lib/libmaxminddb
# check GeoIP2 database
if [ -n "$MAXMINDDB_LICENSE_KEY" ]; then
sed -i "s|.*MAXMINDDB_LICENSE_KEY.*|MAXMINDDB_LICENSE_KEY=\"${MAXMINDDB_LICENSE_KEY}\"|g" /etc/conf.d/libmaxminddb
if [ ! -f /var/lib/libmaxminddb/GeoLite2-City.mmdb ]; then
echo "Downloading GeoIP2 City database."
/etc/periodic/weekly/libmaxminddb
fi
fi

7
root/etc/crontabs/root Normal file
View file

@ -0,0 +1,7 @@
# do daily/weekly/monthly maintenance
# min hour day month weekday command
*/15 * * * * run-parts /etc/periodic/15min
0 * * * * run-parts /etc/periodic/hourly
0 2 * * * run-parts /etc/periodic/daily
0 3 * * 6 run-parts /etc/periodic/weekly
0 5 1 * * run-parts /etc/periodic/monthly

View file

@ -0,0 +1,4 @@
#!/usr/bin/with-contenv bash
exec \
python3 /config/geoip2db/geoip2influx.py