From 43ba40d1f0b854892eef7e51871d48a67e759bcb Mon Sep 17 00:00:00 2001 From: gilbn Date: Thu, 25 Jun 2020 22:43:33 +0200 Subject: [PATCH] initial commit --- .dockerignore | 133 ++++++++++++ Dockerfile | 14 ++ README.md | 164 +++++++++++++- geoip2influx.py | 314 +++++++++++++++++++++++++++ requirements.txt | 4 + root/etc/cont-init.d/50-config | 40 ++++ root/etc/crontabs/root | 7 + root/etc/services.d/geoip2influx/run | 4 + 8 files changed, 678 insertions(+), 2 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 geoip2influx.py create mode 100644 requirements.txt create mode 100644 root/etc/cont-init.d/50-config create mode 100644 root/etc/crontabs/root create mode 100644 root/etc/services.d/geoip2influx/run diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b88691a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,133 @@ +.gitignore +LICENSE +README.md + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..95c9bc0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM lsiobase/alpine:3.12 +LABEL maintainer="GilbN" + +WORKDIR /geoip2influx +COPY requirements.txt geoip2influx.py /geoip2influx/ +RUN \ +echo " ## Installing packages ## " && \ +apk add --no-cache --virtual=build-dependencies \ + python3 \ + py3-pip \ + libmaxminddb && \ +echo " ## Installing python modules ## " && \ +pip3 install --no-cache-dir -r requirements.txt +COPY root/ / \ No newline at end of file diff --git a/README.md b/README.md index 39c4af2..c7e1fb5 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,162 @@ -# geoip2influx -A python script that will parse the nginx access.log and send geolocation metrics and log metrics to InfluxDB +# Geoip2Influx + +

+ + + + + +Docker Cloud Build Status + +
+
+ +*** + +Adapted source: https://github.com/ratibor78/geostat + +![](https://i.imgur.com/mh0IhYA.jpg) + + + +The script will parse the access log for IPs and and convert them into geo metrics for InfluxDB. It will also send log metrics if enabled. + +*** + +## Usage + +### Enviroment variables: + +These are the **default** values for all envs. +Add the ones that differ on your system. + +| Environment Varialbe | Example Value | Description | +| -------------------- | ------------- | ----------- | +| NGINX_LOG_PATH | /config/log/nginx/access.log | Container path for Nginx logfile , defaults to the example. | +| INFLUX_HOST | localhost | Host running InfluxDB. | +| INFLUX_HOST_PORT | 8086 | Optional, defaults to 8086. | +| INFLUX_DATABASE | geoip2influx | Optional, defaults to geoip2influx. | +| INFLUX_USER | root | Optional, defaults to root. | +| INFLUX_PASS | root | Optional, defaults to root. | +| GEO_MEASUREMENT | geoip2influx | InfluxDB measurement name for geohashes. Optional, defaults to the example. | +| LOG_MEASUREMENT | nginx_access_logs | InfluxDB measurement name for nginx logs. Optional, defaults to the example. | +| SEND_NGINX_LOGS | true | Set to `false` to disable nginx logs. Optional, defaults to `true`. | +| GEOIP2INFLUX_LOG_LEVEL | info | Sets the log level in geoip2influx.log. Use `debug` for verbose logging Optional, defaults to info. | +| INFLUX_RETENTION | 30d | Sets the retention for the database. Optional, defaults to example.| +| INFLUX_SHARD | 2d | Set the shard for the database. Optional, defaults to example. | +| MAXMINDDB_LICENSE_KEY | xxxxxxx | Add your Maxmind licence key | + + +### MaxMind Geolite2 + +Default download location is `/config/geoip2db/GeoLite2-City.mmdb` + +Get your licence key here: https://www.maxmind.com/en/geolite2/signup + +### InfluxDB + +The InfluxDB database will be created automatically with the name you choose. + +``` +-e INFLUX_DATABASE=geoip2influx +``` + +### Docker + +```bash +docker create \ + --name=geoip2influx \ + -e PUID=1000 \ + -e PGID=1000 \ + -e TZ=Europe/Oslo \ + -e INFLUX_HOST= \ + -e INFLUX_HOST_PORT= \ + -e MAXMINDDB_LICENSE_KEY=\ + -v /path/to/appdata/geoip2influx:/config \ + -v /path/to/nginx/accesslog/:/config/log/nginx/ \ + --restart unless-stopped \ + gilbn/geoip2influx +``` + +### Docker compose + +```yaml +version: "2.1" +services: + geoip2influx: + image: gilbn/geoip2influx + container_name: geoip2influx + environment: + - PUID=1000 + - PGID=1000 + - TZ=Europe/Oslo + - INFLUX_HOST= + - INFLUX_HOST_PORT= + - MAXMINDDB_LICENSE_KEY= + volumes: + - /path/to/appdata/geoip2influx:/config + - /path/to/nginx/accesslog/:/config/log/nginx/ + restart: unless-stopped +``` + +*** + +## Grafana dashboard: +### [Grafana Dashboard Link](https://grafana.com/grafana/dashboards/12268/) + +*** + +## Sending Nginx log metrics + +1. Add the following to the http block in your `nginx.conf` file: + +```nginx +geoip2 /config/geoip2db/GeoLite2-City.mmdb { +auto_reload 5m; +$geoip2_data_country_code country iso_code; +$geoip2_data_city_name city names en; +} + +log_format custom '$remote_addr - $remote_user [$time_local]' + '"$request" $status $body_bytes_sent' + '"$http_referer" $host "$http_user_agent"' + '"$request_time" "$upstream_connect_time"' + '"$geoip2_data_city_name" "$geoip2_data_country_code"'; + ``` + + 2. Set the access log use the `custom` log format. + ```nginx + access_log /config/log/nginx/access.log custom; + ``` + +### Multiple log files + +If you separate your nginx log files but want this script to parse all of them you can do the following: + +As nginx can have multiple `access log` directives in a block, just add another one in the server block. + +**Example** + +```nginx + access_log /config/log/nginx/technicalramblings/access.log custom; + access_log /config/log/nginx/access.log custom; +``` +This will log the same lines to both files. + +Then use the `/config/log/nginx/access.log` file in the `NGINX_LOG_PATH` variable. + +*** + +## Updates + +**21.06.20** - Added $host(domain) to the nginx log metrics. This will break your nginx logs parsing, as you need to update the custom log format. + +**06.06.20** - Added influx retention policy to try and mitigate max-values-per-tag limit exceeded errors. + + * `-e INFLUX_RETENTION` Default 30d + * `-e INFLUX_SHARD` Default 2d + * It will only add the retention policy if the database doesn't exist. + +**30.05.20** - Added logging. Use `-e GEOIP2INFLUX_LOG_LEVEL` to set the log level. + +**15.05.20** - Removed `GEOIP2_KEY` and `GEOIP_DB_PATH`variables. With commit https://github.com/linuxserver/docker-letsencrypt/commit/75b9685fdb3ec6edda590300f289b0e75dd9efd0 the letsencrypt container now natively supports downloading and updating(weekly) the GeoLite2-City database! diff --git a/geoip2influx.py b/geoip2influx.py new file mode 100644 index 0000000..2b178a0 --- /dev/null +++ b/geoip2influx.py @@ -0,0 +1,314 @@ +#! /usr/bin/env python3 + +# Getting GEO information from Nginx access.log by IP's. +# Alexey Nizhegolenko 2018 +# Parts added by Remko Lodder, 2019. +# Added: IPv6 matching, make query based on geoip2 instead of +# geoip, which is going away r.s.n. +# GilbN 2020: + # Adapted to Python 3. + # Added enviroment variables for Docker. + # Added log metrics + # Added regex tester + # Added file path check + # Added logging + # Switched to pep8 style variables ect. + # Adapted to geoip2. + +from os.path import exists, isfile +from os import environ as env, stat +from platform import uname +from re import compile, match, search, IGNORECASE +from sys import path, exit +from time import sleep, time +from datetime import datetime +import logging + +from geoip2.database import Reader +from geohash2 import encode +from influxdb import InfluxDBClient +from requests.exceptions import ConnectionError +from influxdb.exceptions import InfluxDBServerError, InfluxDBClientError +from IPy import IP as ipadd + + +# Getting params from envs +geoip_db_path = '/config/geoip2db/GeoLite2-City.mmdb' +log_path = env.get('NGINX_LOG_PATH', '/config/log/nginx/access.log') +influxdb_host = env.get('INFLUX_HOST', 'localhost') +influxdb_port = env.get('INFLUX_HOST_PORT', '8086') +influxdb_database = env.get('INFLUX_DATABASE', 'geoip2influx') +influxdb_user = env.get('INFLUX_USER', 'root') +influxdb_user_pass = env.get('INFLUX_PASS', 'root') +influxdb_retention = env.get('INFLUX_RETENTION','30d') +influxdb_shard = env.get('INFLUX_SHARD', '2d') +geo_measurement = env.get('GEO_MEASUREMENT', 'geoip2influx') +log_measurement = env.get('LOG_MEASUREMENT', 'nginx_access_logs') +send_nginx_logs = env.get('SEND_NGINX_LOGS','true') +log_level = env.get('GEOIP2INFLUX_LOG_LEVEL', 'info').upper() + +# Logging +logging.basicConfig(level=log_level,format='%(asctime)s :: %(levelname)s :: %(message)s',datefmt='%d/%b/%Y %H:%M:%S',filename=path[0] + '/geoip2influx.log') + +def regex_tester(log_path, N): + time_out = time() + 60 + re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') + re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA + while True: + assert N >= 0 + pos = N + 1 + lines = [] + with open(log_path) as f: + while len(lines) <= N: + try: + f.seek(-pos, 2) + except IOError: + f.seek(0) + break + finally: + lines = list(f) + pos *= 2 + log_lines = lines[-N:] + for line in log_lines: + if re_ipv4.match(line): + regex = compile(r'(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA + if regex.match(line): + logging.debug('Regex is matching %s continuing...' % log_path) + return True + if re_ipv6.match(line): + regex = compile(r'(?P(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA + if regex.match(line): + logging.debug('Regex is matching %s continuing...' % log_path) + return True + else: + logging.debug('Testing regex on: %s ' % log_path) + sleep(2) + if time() > time_out: + logging.warning('Failed to match regex on: %s ' % log_path) + break + + +def file_exists(log_path,geoip_db_path): + time_out = time() + 30 + while True: + file_list = [log_path, geoip_db_path] + if not exists(log_path): + logging.warning(('File: %s not found...' % log_path)) + sleep(1) + if not exists(geoip_db_path): + logging.warning(('File: %s not found...' % geoip_db_path)) + sleep(1) + if all([isfile(f) for f in file_list]): + for f in file_list: + logging.debug('Found: %s' % f) + return True + if time() > time_out: + if not exists(geoip_db_path) and not exists(log_path): + logging.critical("Can't find: " + geoip_db_path + ' or ' + log_path + ', exiting!') + break + elif not exists(geoip_db_path): + logging.critical("Can't find: %s , exiting!" % geoip_db_path) + break + elif not exists(log_path): + logging.critical("Can't find: %s , exiting!" % log_path) + break + + +def logparse( + log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention, + influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode): + # Preparing variables and params + ips = {} + geohash_fields = {} + geohash_tags = {} + log_data_fields = {} + log_data_tags = {} + nginx_log = {} + hostname = uname()[1] + client = InfluxDBClient( + host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database) + + try: + logging.debug('Testing InfluxDB connection') + version = client.request('ping', expected_response_code=204).headers['X-Influxdb-Version'] + logging.debug('Influxdb version: %s' % version) + except ConnectionError as e: + logging.critical('Error testing connection to InfluxDB. Please check your url/hostname.\n' + 'Error: %s' % e + ) + exit(1) + + try: + databases = [db['name'] for db in client.get_list_database()] + if influxdb_database in databases: + logging.debug('Found database: %s' % influxdb_database) + except InfluxDBClientError as e: + logging.critical('Error getting database list! Please check your InfluxDB configuration.\n' + 'Error: %s' % e + ) + exit(1) + + if influxdb_database not in databases: + logging.info('Creating database: %s' % influxdb_database) + client.create_database(influxdb_database) + + retention_policies = [policy['name'] for policy in client.get_list_retention_policies(database=influxdb_database)] + if '%s %s-%s' % (influxdb_database, influxdb_retention, influxdb_shard) not in retention_policies: + logging.info('Creating %s retention policy (%s-%s)' % (influxdb_database, influxdb_retention, influxdb_shard)) + client.create_retention_policy(name='%s %s-%s' % (influxdb_database, influxdb_retention, influxdb_shard), duration=influxdb_retention, replication='1', + database=influxdb_database, default=True, shard_duration=influxdb_shard) + + re_ipv4 = compile(r'(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA + re_ipv6 = compile(r'(?P(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA + + gi = Reader(geoip_db_path) + + if send_nginx_logs in ('true', 'True'): + send_logs = True + else: + send_logs = False + re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') + re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA + logging.info('SEND_NGINX_LOGS set to false') + pass + if not regex_tester(log_path,3): + if send_logs: + re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') + re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA + send_logs = False + logging.warning('NGINX log metrics disabled! Double check your NGINX custom log format..') + + # Main loop to parse access.log file in tailf style with sending metrics. + with open(log_path, 'r') as log_file: + logging.info('Starting log parsing') + str_results = stat(log_path) + st_size = str_results[6] + log_file.seek(st_size) + while True: + geo_metrics = [] + log_metrics = [] + where = log_file.tell() + line = log_file.readline() + inodenew = stat(log_path).st_ino + if inode != inodenew: + break + if not line: + sleep(1) + log_file.seek(where) + else: + if re_ipv4.match(line): + m = re_ipv4.match(line) + ip = m.group(1) + log = re_ipv4 + elif re_ipv6.match(line): + m = re_ipv6.match(line) + ip = m.group(1) + log = re_ipv6 + else: + logging.warning('Failed to match regex that previously matched!? Skipping this line!\n' + 'Please share the log line below on Discord or Github!\n' + 'Line: %s' % line + ) + continue + if ipadd(ip).iptype() == 'PUBLIC' and ip: + info = gi.city(ip) + if info is not None: + geohash = encode(info.location.latitude, info.location.longitude) + geohash_fields['count'] = 1 + geohash_tags['geohash'] = geohash + geohash_tags['ip'] = ip + geohash_tags['host'] = hostname + geohash_tags['country_code'] = info.country.iso_code + geohash_tags['country_name'] = info.country.name + geohash_tags['state'] = info.subdivisions.most_specific.name + geohash_tags['state_code'] = info.subdivisions.most_specific.iso_code + geohash_tags['city'] = info.city.name + geohash_tags['postal_code'] = info.postal.code + geohash_tags['latitude'] = info.location.latitude + geohash_tags['longitude'] = info.location.longitude + ips['tags'] = geohash_tags + ips['fields'] = geohash_fields + ips['measurement'] = geo_measurement + geo_metrics.append(ips) + logging.debug('Geo metrics: %s' % geo_metrics) + try: + client.write_points(geo_metrics) + except (InfluxDBServerError, ConnectionError) as e: + logging.error('Error writing data to InfluxDB! Check your database!\n' + 'Error: %s' % e + ) + + if send_logs: + data = search(log, line) + if ipadd(ip).iptype() == 'PUBLIC' and ip: + info = gi.city(ip) + if info is not None: + datadict = data.groupdict() + log_data_fields['count'] = 1 + log_data_fields['bytes_sent'] = int(datadict['bytes_sent']) + log_data_fields['request_time'] = float(datadict['request_time']) + if datadict['connect_time'] == '-': + log_data_fields['connect_time'] = 0.0 + else: + log_data_fields['connect_time'] = float(datadict['connect_time']) + log_data_tags['ip'] = datadict['ipaddress'] + log_data_tags['datetime'] = datetime.strptime(datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z') + log_data_tags['remote_user'] = datadict['remote_user'] + log_data_tags['method'] = datadict['method'] + log_data_tags['referrer'] = datadict['referrer'] + log_data_tags['host'] = datadict['host'] + log_data_tags['http_version'] = datadict['http_version'] + log_data_tags['status_code'] = datadict['status_code'] + log_data_tags['bytes_sent'] = datadict['bytes_sent'] + log_data_tags['url'] = datadict['url'] + log_data_tags['user_agent'] = datadict['user_agent'] + log_data_tags['request_time'] = datadict['request_time'] + log_data_tags['connect_time'] = datadict['connect_time'] + log_data_tags['city'] = datadict['city'] + log_data_tags['country_code'] = datadict['country_code'] + log_data_tags['country_name'] = info.country.name + nginx_log['tags'] = log_data_tags + nginx_log['fields'] = log_data_fields + nginx_log['measurement'] = log_measurement + log_metrics.append(nginx_log) + logging.debug('NGINX log metrics: %s' % log_metrics) + try: + client.write_points(log_metrics) + except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e: + logging.error('Error writing data to InfluxDB! Check your database!\n' + 'Error: %s' % e + ) + + +def main(): + logging.info('Starting geoip2influx..') + + logging.debug('Variables set:' + + '\n geoip_db_path :: %s' % geoip_db_path + + '\n -e LOG_PATH :: %s' % log_path + + '\n -e INFLUX_HOST :: %s' % influxdb_host + + '\n -e INFLUX_HOST_PORT :: %s' % influxdb_port + + '\n -e INFLUX_DATABASE :: %s' % influxdb_database + + '\n -e INFLUX_RETENTION :: %s' % influxdb_retention + + '\n -e INFLUX_SHARD :: %s' % influxdb_shard + + '\n -e INFLUX_USER :: %s' % influxdb_user + + '\n -e INFLUX_PASS :: %s' % influxdb_user_pass + + '\n -e GEO_MEASUREMENT :: %s' % geo_measurement + + '\n -e LOG_MEASUREMENT :: %s' % log_measurement + + '\n -e SEND_NGINX_LOGS :: %s' % send_nginx_logs + + '\n -e GEOIP2INFLUX_LOG_LEVEL :: %s' % log_level + ) + # Parsing log file and sending metrics to Influxdb + while file_exists(log_path,geoip_db_path): + # Get inode from log file + inode = stat(log_path).st_ino + # Run main loop and grep a log file + logparse( + log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, + influxdb_retention, influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode) # NOQA + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + exit(0) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5abbd2e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +geoip2 +geohash2 +influxdb +IPy \ No newline at end of file diff --git a/root/etc/cont-init.d/50-config b/root/etc/cont-init.d/50-config new file mode 100644 index 0000000..ace9952 --- /dev/null +++ b/root/etc/cont-init.d/50-config @@ -0,0 +1,40 @@ +#!/usr/bin/with-contenv bash + +echo '------------------------------------------------------------------------' +echo '| Made by GilbN' +echo '| Running installation of required modules for geoip2influx' +echo '------------------------------------------------------------------------' + +echo -e "Variables set:\\n\ +NGINX_LOG_PATH=${NGINX_LOG_PATH}\\n\ +INFLUX_HOST=${INFLUX_HOST}\\n\ +INFLUX_HOST_PORT=${INFLUX_HOST_PORT}\\n\ +INFLUX_DATABASE=${INFLUX_DATABASE}\\n\ +INFLUX_USER=${INFLUX_USER}\\n\ +INFLUX_PASS=${INFLUX_PASS}\\n\ +INFLUX_RETENTION=${INFLUX_RETENTION}\\n\ +INFLUX_SHARD=${INFLUX_SHARD}\\n\ +GEO_MEASUREMENT=${GEO_MEASUREMENT}\\n\ +LOG_MEASUREMENT=${LOG_MEASUREMENT}\\n\ +SEND_NGINX_LOGS=${SEND_NGINX_LOGS}\\n\ +GEOIP2INFLUX_LOG_LEVEL=${GEOIP2INFLUX_LOG_LEVEL}\\n\ +MAXMINDDB_LICENSE_KEY=${MAXMINDDB_LICENSE_KEY}\\n" + +mkdir -p /config/geoip2db +cp -f /geoip2influx/geoip2influx.py /config/geoip2db +chown -R abc:abc /config/geoip2db +chmod +x /config/geoip2db/geoip2influx.py + +# create GeoIP2 folder symlink +[[ -d /var/lib/libmaxminddb ]] && [[ ! -L /var/lib/libmaxminddb ]] && \ + rm -rf /var/lib/libmaxminddb +[[ ! -d /var/lib/libmaxminddb ]] && \ + ln -s /config/geoip2db /var/lib/libmaxminddb +# check GeoIP2 database +if [ -n "$MAXMINDDB_LICENSE_KEY" ]; then + sed -i "s|.*MAXMINDDB_LICENSE_KEY.*|MAXMINDDB_LICENSE_KEY=\"${MAXMINDDB_LICENSE_KEY}\"|g" /etc/conf.d/libmaxminddb + if [ ! -f /var/lib/libmaxminddb/GeoLite2-City.mmdb ]; then + echo "Downloading GeoIP2 City database." + /etc/periodic/weekly/libmaxminddb + fi +fi \ No newline at end of file diff --git a/root/etc/crontabs/root b/root/etc/crontabs/root new file mode 100644 index 0000000..78eea72 --- /dev/null +++ b/root/etc/crontabs/root @@ -0,0 +1,7 @@ +# do daily/weekly/monthly maintenance +# min hour day month weekday command +*/15 * * * * run-parts /etc/periodic/15min +0 * * * * run-parts /etc/periodic/hourly +0 2 * * * run-parts /etc/periodic/daily +0 3 * * 6 run-parts /etc/periodic/weekly +0 5 1 * * run-parts /etc/periodic/monthly \ No newline at end of file diff --git a/root/etc/services.d/geoip2influx/run b/root/etc/services.d/geoip2influx/run new file mode 100644 index 0000000..9d829a9 --- /dev/null +++ b/root/etc/services.d/geoip2influx/run @@ -0,0 +1,4 @@ +#!/usr/bin/with-contenv bash + + exec \ + python3 /config/geoip2db/geoip2influx.py \ No newline at end of file