diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..b88691a
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,133 @@
+.gitignore
+LICENSE
+README.md
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..95c9bc0
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,14 @@
+FROM lsiobase/alpine:3.12
+LABEL maintainer="GilbN"
+
+WORKDIR /geoip2influx
+COPY requirements.txt geoip2influx.py /geoip2influx/
+RUN \
+echo " ## Installing packages ## " && \
+apk add --no-cache --virtual=build-dependencies \
+ python3 \
+ py3-pip \
+ libmaxminddb && \
+echo " ## Installing python modules ## " && \
+pip3 install --no-cache-dir -r requirements.txt
+COPY root/ /
\ No newline at end of file
diff --git a/README.md b/README.md
index 39c4af2..c7e1fb5 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,162 @@
-# geoip2influx
-A python script that will parse the nginx access.log and send geolocation metrics and log metrics to InfluxDB
+# Geoip2Influx
+
+
+
+
+
+
+
+
+
+
+
+
+***
+
+Adapted source: https://github.com/ratibor78/geostat
+
+![](https://i.imgur.com/mh0IhYA.jpg)
+
+
+
+The script will parse the access log for IPs and and convert them into geo metrics for InfluxDB. It will also send log metrics if enabled.
+
+***
+
+## Usage
+
+### Enviroment variables:
+
+These are the **default** values for all envs.
+Add the ones that differ on your system.
+
+| Environment Varialbe | Example Value | Description |
+| -------------------- | ------------- | ----------- |
+| NGINX_LOG_PATH | /config/log/nginx/access.log | Container path for Nginx logfile , defaults to the example. |
+| INFLUX_HOST | localhost | Host running InfluxDB. |
+| INFLUX_HOST_PORT | 8086 | Optional, defaults to 8086. |
+| INFLUX_DATABASE | geoip2influx | Optional, defaults to geoip2influx. |
+| INFLUX_USER | root | Optional, defaults to root. |
+| INFLUX_PASS | root | Optional, defaults to root. |
+| GEO_MEASUREMENT | geoip2influx | InfluxDB measurement name for geohashes. Optional, defaults to the example. |
+| LOG_MEASUREMENT | nginx_access_logs | InfluxDB measurement name for nginx logs. Optional, defaults to the example. |
+| SEND_NGINX_LOGS | true | Set to `false` to disable nginx logs. Optional, defaults to `true`. |
+| GEOIP2INFLUX_LOG_LEVEL | info | Sets the log level in geoip2influx.log. Use `debug` for verbose logging Optional, defaults to info. |
+| INFLUX_RETENTION | 30d | Sets the retention for the database. Optional, defaults to example.|
+| INFLUX_SHARD | 2d | Set the shard for the database. Optional, defaults to example. |
+| MAXMINDDB_LICENSE_KEY | xxxxxxx | Add your Maxmind licence key |
+
+
+### MaxMind Geolite2
+
+Default download location is `/config/geoip2db/GeoLite2-City.mmdb`
+
+Get your licence key here: https://www.maxmind.com/en/geolite2/signup
+
+### InfluxDB
+
+The InfluxDB database will be created automatically with the name you choose.
+
+```
+-e INFLUX_DATABASE=geoip2influx
+```
+
+### Docker
+
+```bash
+docker create \
+ --name=geoip2influx \
+ -e PUID=1000 \
+ -e PGID=1000 \
+ -e TZ=Europe/Oslo \
+ -e INFLUX_HOST= \
+ -e INFLUX_HOST_PORT= \
+ -e MAXMINDDB_LICENSE_KEY=\
+ -v /path/to/appdata/geoip2influx:/config \
+ -v /path/to/nginx/accesslog/:/config/log/nginx/ \
+ --restart unless-stopped \
+ gilbn/geoip2influx
+```
+
+### Docker compose
+
+```yaml
+version: "2.1"
+services:
+ geoip2influx:
+ image: gilbn/geoip2influx
+ container_name: geoip2influx
+ environment:
+ - PUID=1000
+ - PGID=1000
+ - TZ=Europe/Oslo
+ - INFLUX_HOST=
+ - INFLUX_HOST_PORT=
+ - MAXMINDDB_LICENSE_KEY=
+ volumes:
+ - /path/to/appdata/geoip2influx:/config
+ - /path/to/nginx/accesslog/:/config/log/nginx/
+ restart: unless-stopped
+```
+
+***
+
+## Grafana dashboard:
+### [Grafana Dashboard Link](https://grafana.com/grafana/dashboards/12268/)
+
+***
+
+## Sending Nginx log metrics
+
+1. Add the following to the http block in your `nginx.conf` file:
+
+```nginx
+geoip2 /config/geoip2db/GeoLite2-City.mmdb {
+auto_reload 5m;
+$geoip2_data_country_code country iso_code;
+$geoip2_data_city_name city names en;
+}
+
+log_format custom '$remote_addr - $remote_user [$time_local]'
+ '"$request" $status $body_bytes_sent'
+ '"$http_referer" $host "$http_user_agent"'
+ '"$request_time" "$upstream_connect_time"'
+ '"$geoip2_data_city_name" "$geoip2_data_country_code"';
+ ```
+
+ 2. Set the access log use the `custom` log format.
+ ```nginx
+ access_log /config/log/nginx/access.log custom;
+ ```
+
+### Multiple log files
+
+If you separate your nginx log files but want this script to parse all of them you can do the following:
+
+As nginx can have multiple `access log` directives in a block, just add another one in the server block.
+
+**Example**
+
+```nginx
+ access_log /config/log/nginx/technicalramblings/access.log custom;
+ access_log /config/log/nginx/access.log custom;
+```
+This will log the same lines to both files.
+
+Then use the `/config/log/nginx/access.log` file in the `NGINX_LOG_PATH` variable.
+
+***
+
+## Updates
+
+**21.06.20** - Added $host(domain) to the nginx log metrics. This will break your nginx logs parsing, as you need to update the custom log format.
+
+**06.06.20** - Added influx retention policy to try and mitigate max-values-per-tag limit exceeded errors.
+
+ * `-e INFLUX_RETENTION` Default 30d
+ * `-e INFLUX_SHARD` Default 2d
+ * It will only add the retention policy if the database doesn't exist.
+
+**30.05.20** - Added logging. Use `-e GEOIP2INFLUX_LOG_LEVEL` to set the log level.
+
+**15.05.20** - Removed `GEOIP2_KEY` and `GEOIP_DB_PATH`variables. With commit https://github.com/linuxserver/docker-letsencrypt/commit/75b9685fdb3ec6edda590300f289b0e75dd9efd0 the letsencrypt container now natively supports downloading and updating(weekly) the GeoLite2-City database!
diff --git a/geoip2influx.py b/geoip2influx.py
new file mode 100644
index 0000000..2b178a0
--- /dev/null
+++ b/geoip2influx.py
@@ -0,0 +1,314 @@
+#! /usr/bin/env python3
+
+# Getting GEO information from Nginx access.log by IP's.
+# Alexey Nizhegolenko 2018
+# Parts added by Remko Lodder, 2019.
+# Added: IPv6 matching, make query based on geoip2 instead of
+# geoip, which is going away r.s.n.
+# GilbN 2020:
+ # Adapted to Python 3.
+ # Added enviroment variables for Docker.
+ # Added log metrics
+ # Added regex tester
+ # Added file path check
+ # Added logging
+ # Switched to pep8 style variables ect.
+ # Adapted to geoip2.
+
+from os.path import exists, isfile
+from os import environ as env, stat
+from platform import uname
+from re import compile, match, search, IGNORECASE
+from sys import path, exit
+from time import sleep, time
+from datetime import datetime
+import logging
+
+from geoip2.database import Reader
+from geohash2 import encode
+from influxdb import InfluxDBClient
+from requests.exceptions import ConnectionError
+from influxdb.exceptions import InfluxDBServerError, InfluxDBClientError
+from IPy import IP as ipadd
+
+
+# Getting params from envs
+geoip_db_path = '/config/geoip2db/GeoLite2-City.mmdb'
+log_path = env.get('NGINX_LOG_PATH', '/config/log/nginx/access.log')
+influxdb_host = env.get('INFLUX_HOST', 'localhost')
+influxdb_port = env.get('INFLUX_HOST_PORT', '8086')
+influxdb_database = env.get('INFLUX_DATABASE', 'geoip2influx')
+influxdb_user = env.get('INFLUX_USER', 'root')
+influxdb_user_pass = env.get('INFLUX_PASS', 'root')
+influxdb_retention = env.get('INFLUX_RETENTION','30d')
+influxdb_shard = env.get('INFLUX_SHARD', '2d')
+geo_measurement = env.get('GEO_MEASUREMENT', 'geoip2influx')
+log_measurement = env.get('LOG_MEASUREMENT', 'nginx_access_logs')
+send_nginx_logs = env.get('SEND_NGINX_LOGS','true')
+log_level = env.get('GEOIP2INFLUX_LOG_LEVEL', 'info').upper()
+
+# Logging
+logging.basicConfig(level=log_level,format='%(asctime)s :: %(levelname)s :: %(message)s',datefmt='%d/%b/%Y %H:%M:%S',filename=path[0] + '/geoip2influx.log')
+
+def regex_tester(log_path, N):
+ time_out = time() + 60
+ re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
+ re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA
+ while True:
+ assert N >= 0
+ pos = N + 1
+ lines = []
+ with open(log_path) as f:
+ while len(lines) <= N:
+ try:
+ f.seek(-pos, 2)
+ except IOError:
+ f.seek(0)
+ break
+ finally:
+ lines = list(f)
+ pos *= 2
+ log_lines = lines[-N:]
+ for line in log_lines:
+ if re_ipv4.match(line):
+ regex = compile(r'(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA
+ if regex.match(line):
+ logging.debug('Regex is matching %s continuing...' % log_path)
+ return True
+ if re_ipv6.match(line):
+ regex = compile(r'(?P(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA
+ if regex.match(line):
+ logging.debug('Regex is matching %s continuing...' % log_path)
+ return True
+ else:
+ logging.debug('Testing regex on: %s ' % log_path)
+ sleep(2)
+ if time() > time_out:
+ logging.warning('Failed to match regex on: %s ' % log_path)
+ break
+
+
+def file_exists(log_path,geoip_db_path):
+ time_out = time() + 30
+ while True:
+ file_list = [log_path, geoip_db_path]
+ if not exists(log_path):
+ logging.warning(('File: %s not found...' % log_path))
+ sleep(1)
+ if not exists(geoip_db_path):
+ logging.warning(('File: %s not found...' % geoip_db_path))
+ sleep(1)
+ if all([isfile(f) for f in file_list]):
+ for f in file_list:
+ logging.debug('Found: %s' % f)
+ return True
+ if time() > time_out:
+ if not exists(geoip_db_path) and not exists(log_path):
+ logging.critical("Can't find: " + geoip_db_path + ' or ' + log_path + ', exiting!')
+ break
+ elif not exists(geoip_db_path):
+ logging.critical("Can't find: %s , exiting!" % geoip_db_path)
+ break
+ elif not exists(log_path):
+ logging.critical("Can't find: %s , exiting!" % log_path)
+ break
+
+
+def logparse(
+ log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention,
+ influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode):
+ # Preparing variables and params
+ ips = {}
+ geohash_fields = {}
+ geohash_tags = {}
+ log_data_fields = {}
+ log_data_tags = {}
+ nginx_log = {}
+ hostname = uname()[1]
+ client = InfluxDBClient(
+ host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database)
+
+ try:
+ logging.debug('Testing InfluxDB connection')
+ version = client.request('ping', expected_response_code=204).headers['X-Influxdb-Version']
+ logging.debug('Influxdb version: %s' % version)
+ except ConnectionError as e:
+ logging.critical('Error testing connection to InfluxDB. Please check your url/hostname.\n'
+ 'Error: %s' % e
+ )
+ exit(1)
+
+ try:
+ databases = [db['name'] for db in client.get_list_database()]
+ if influxdb_database in databases:
+ logging.debug('Found database: %s' % influxdb_database)
+ except InfluxDBClientError as e:
+ logging.critical('Error getting database list! Please check your InfluxDB configuration.\n'
+ 'Error: %s' % e
+ )
+ exit(1)
+
+ if influxdb_database not in databases:
+ logging.info('Creating database: %s' % influxdb_database)
+ client.create_database(influxdb_database)
+
+ retention_policies = [policy['name'] for policy in client.get_list_retention_policies(database=influxdb_database)]
+ if '%s %s-%s' % (influxdb_database, influxdb_retention, influxdb_shard) not in retention_policies:
+ logging.info('Creating %s retention policy (%s-%s)' % (influxdb_database, influxdb_retention, influxdb_shard))
+ client.create_retention_policy(name='%s %s-%s' % (influxdb_database, influxdb_retention, influxdb_shard), duration=influxdb_retention, replication='1',
+ database=influxdb_database, default=True, shard_duration=influxdb_shard)
+
+ re_ipv4 = compile(r'(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA
+ re_ipv6 = compile(r'(?P(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P.+) \[(?P\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P.+)) (?P.+) ((?PHTTP\/[1-3]\.[0-9])["]) (?P\d{3}) (?P\d{1,99})(["](?P(\-)|(.+))["]) (?P.+) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])(["](?P.+)["]) (["](?P.+)["])', IGNORECASE) # NOQA
+
+ gi = Reader(geoip_db_path)
+
+ if send_nginx_logs in ('true', 'True'):
+ send_logs = True
+ else:
+ send_logs = False
+ re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
+ re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA
+ logging.info('SEND_NGINX_LOGS set to false')
+ pass
+ if not regex_tester(log_path,3):
+ if send_logs:
+ re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
+ re_ipv6 = compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') # NOQA
+ send_logs = False
+ logging.warning('NGINX log metrics disabled! Double check your NGINX custom log format..')
+
+ # Main loop to parse access.log file in tailf style with sending metrics.
+ with open(log_path, 'r') as log_file:
+ logging.info('Starting log parsing')
+ str_results = stat(log_path)
+ st_size = str_results[6]
+ log_file.seek(st_size)
+ while True:
+ geo_metrics = []
+ log_metrics = []
+ where = log_file.tell()
+ line = log_file.readline()
+ inodenew = stat(log_path).st_ino
+ if inode != inodenew:
+ break
+ if not line:
+ sleep(1)
+ log_file.seek(where)
+ else:
+ if re_ipv4.match(line):
+ m = re_ipv4.match(line)
+ ip = m.group(1)
+ log = re_ipv4
+ elif re_ipv6.match(line):
+ m = re_ipv6.match(line)
+ ip = m.group(1)
+ log = re_ipv6
+ else:
+ logging.warning('Failed to match regex that previously matched!? Skipping this line!\n'
+ 'Please share the log line below on Discord or Github!\n'
+ 'Line: %s' % line
+ )
+ continue
+ if ipadd(ip).iptype() == 'PUBLIC' and ip:
+ info = gi.city(ip)
+ if info is not None:
+ geohash = encode(info.location.latitude, info.location.longitude)
+ geohash_fields['count'] = 1
+ geohash_tags['geohash'] = geohash
+ geohash_tags['ip'] = ip
+ geohash_tags['host'] = hostname
+ geohash_tags['country_code'] = info.country.iso_code
+ geohash_tags['country_name'] = info.country.name
+ geohash_tags['state'] = info.subdivisions.most_specific.name
+ geohash_tags['state_code'] = info.subdivisions.most_specific.iso_code
+ geohash_tags['city'] = info.city.name
+ geohash_tags['postal_code'] = info.postal.code
+ geohash_tags['latitude'] = info.location.latitude
+ geohash_tags['longitude'] = info.location.longitude
+ ips['tags'] = geohash_tags
+ ips['fields'] = geohash_fields
+ ips['measurement'] = geo_measurement
+ geo_metrics.append(ips)
+ logging.debug('Geo metrics: %s' % geo_metrics)
+ try:
+ client.write_points(geo_metrics)
+ except (InfluxDBServerError, ConnectionError) as e:
+ logging.error('Error writing data to InfluxDB! Check your database!\n'
+ 'Error: %s' % e
+ )
+
+ if send_logs:
+ data = search(log, line)
+ if ipadd(ip).iptype() == 'PUBLIC' and ip:
+ info = gi.city(ip)
+ if info is not None:
+ datadict = data.groupdict()
+ log_data_fields['count'] = 1
+ log_data_fields['bytes_sent'] = int(datadict['bytes_sent'])
+ log_data_fields['request_time'] = float(datadict['request_time'])
+ if datadict['connect_time'] == '-':
+ log_data_fields['connect_time'] = 0.0
+ else:
+ log_data_fields['connect_time'] = float(datadict['connect_time'])
+ log_data_tags['ip'] = datadict['ipaddress']
+ log_data_tags['datetime'] = datetime.strptime(datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z')
+ log_data_tags['remote_user'] = datadict['remote_user']
+ log_data_tags['method'] = datadict['method']
+ log_data_tags['referrer'] = datadict['referrer']
+ log_data_tags['host'] = datadict['host']
+ log_data_tags['http_version'] = datadict['http_version']
+ log_data_tags['status_code'] = datadict['status_code']
+ log_data_tags['bytes_sent'] = datadict['bytes_sent']
+ log_data_tags['url'] = datadict['url']
+ log_data_tags['user_agent'] = datadict['user_agent']
+ log_data_tags['request_time'] = datadict['request_time']
+ log_data_tags['connect_time'] = datadict['connect_time']
+ log_data_tags['city'] = datadict['city']
+ log_data_tags['country_code'] = datadict['country_code']
+ log_data_tags['country_name'] = info.country.name
+ nginx_log['tags'] = log_data_tags
+ nginx_log['fields'] = log_data_fields
+ nginx_log['measurement'] = log_measurement
+ log_metrics.append(nginx_log)
+ logging.debug('NGINX log metrics: %s' % log_metrics)
+ try:
+ client.write_points(log_metrics)
+ except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e:
+ logging.error('Error writing data to InfluxDB! Check your database!\n'
+ 'Error: %s' % e
+ )
+
+
+def main():
+ logging.info('Starting geoip2influx..')
+
+ logging.debug('Variables set:' +
+ '\n geoip_db_path :: %s' % geoip_db_path +
+ '\n -e LOG_PATH :: %s' % log_path +
+ '\n -e INFLUX_HOST :: %s' % influxdb_host +
+ '\n -e INFLUX_HOST_PORT :: %s' % influxdb_port +
+ '\n -e INFLUX_DATABASE :: %s' % influxdb_database +
+ '\n -e INFLUX_RETENTION :: %s' % influxdb_retention +
+ '\n -e INFLUX_SHARD :: %s' % influxdb_shard +
+ '\n -e INFLUX_USER :: %s' % influxdb_user +
+ '\n -e INFLUX_PASS :: %s' % influxdb_user_pass +
+ '\n -e GEO_MEASUREMENT :: %s' % geo_measurement +
+ '\n -e LOG_MEASUREMENT :: %s' % log_measurement +
+ '\n -e SEND_NGINX_LOGS :: %s' % send_nginx_logs +
+ '\n -e GEOIP2INFLUX_LOG_LEVEL :: %s' % log_level
+ )
+ # Parsing log file and sending metrics to Influxdb
+ while file_exists(log_path,geoip_db_path):
+ # Get inode from log file
+ inode = stat(log_path).st_ino
+ # Run main loop and grep a log file
+ logparse(
+ log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass,
+ influxdb_retention, influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode) # NOQA
+
+if __name__ == '__main__':
+ try:
+ main()
+ except KeyboardInterrupt:
+ exit(0)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..5abbd2e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+geoip2
+geohash2
+influxdb
+IPy
\ No newline at end of file
diff --git a/root/etc/cont-init.d/50-config b/root/etc/cont-init.d/50-config
new file mode 100644
index 0000000..ace9952
--- /dev/null
+++ b/root/etc/cont-init.d/50-config
@@ -0,0 +1,40 @@
+#!/usr/bin/with-contenv bash
+
+echo '------------------------------------------------------------------------'
+echo '| Made by GilbN'
+echo '| Running installation of required modules for geoip2influx'
+echo '------------------------------------------------------------------------'
+
+echo -e "Variables set:\\n\
+NGINX_LOG_PATH=${NGINX_LOG_PATH}\\n\
+INFLUX_HOST=${INFLUX_HOST}\\n\
+INFLUX_HOST_PORT=${INFLUX_HOST_PORT}\\n\
+INFLUX_DATABASE=${INFLUX_DATABASE}\\n\
+INFLUX_USER=${INFLUX_USER}\\n\
+INFLUX_PASS=${INFLUX_PASS}\\n\
+INFLUX_RETENTION=${INFLUX_RETENTION}\\n\
+INFLUX_SHARD=${INFLUX_SHARD}\\n\
+GEO_MEASUREMENT=${GEO_MEASUREMENT}\\n\
+LOG_MEASUREMENT=${LOG_MEASUREMENT}\\n\
+SEND_NGINX_LOGS=${SEND_NGINX_LOGS}\\n\
+GEOIP2INFLUX_LOG_LEVEL=${GEOIP2INFLUX_LOG_LEVEL}\\n\
+MAXMINDDB_LICENSE_KEY=${MAXMINDDB_LICENSE_KEY}\\n"
+
+mkdir -p /config/geoip2db
+cp -f /geoip2influx/geoip2influx.py /config/geoip2db
+chown -R abc:abc /config/geoip2db
+chmod +x /config/geoip2db/geoip2influx.py
+
+# create GeoIP2 folder symlink
+[[ -d /var/lib/libmaxminddb ]] && [[ ! -L /var/lib/libmaxminddb ]] && \
+ rm -rf /var/lib/libmaxminddb
+[[ ! -d /var/lib/libmaxminddb ]] && \
+ ln -s /config/geoip2db /var/lib/libmaxminddb
+# check GeoIP2 database
+if [ -n "$MAXMINDDB_LICENSE_KEY" ]; then
+ sed -i "s|.*MAXMINDDB_LICENSE_KEY.*|MAXMINDDB_LICENSE_KEY=\"${MAXMINDDB_LICENSE_KEY}\"|g" /etc/conf.d/libmaxminddb
+ if [ ! -f /var/lib/libmaxminddb/GeoLite2-City.mmdb ]; then
+ echo "Downloading GeoIP2 City database."
+ /etc/periodic/weekly/libmaxminddb
+ fi
+fi
\ No newline at end of file
diff --git a/root/etc/crontabs/root b/root/etc/crontabs/root
new file mode 100644
index 0000000..78eea72
--- /dev/null
+++ b/root/etc/crontabs/root
@@ -0,0 +1,7 @@
+# do daily/weekly/monthly maintenance
+# min hour day month weekday command
+*/15 * * * * run-parts /etc/periodic/15min
+0 * * * * run-parts /etc/periodic/hourly
+0 2 * * * run-parts /etc/periodic/daily
+0 3 * * 6 run-parts /etc/periodic/weekly
+0 5 1 * * run-parts /etc/periodic/monthly
\ No newline at end of file
diff --git a/root/etc/services.d/geoip2influx/run b/root/etc/services.d/geoip2influx/run
new file mode 100644
index 0000000..9d829a9
--- /dev/null
+++ b/root/etc/services.d/geoip2influx/run
@@ -0,0 +1,4 @@
+#!/usr/bin/with-contenv bash
+
+ exec \
+ python3 /config/geoip2db/geoip2influx.py
\ No newline at end of file