commit d75bd2bd88a89bdae040c431d840335a522db5c7 Author: Alexey Date: Mon Oct 8 18:13:00 2018 +0300 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..56973aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.pyc +settings.ini +venv/ diff --git a/GeoLiteCity.dat b/GeoLiteCity.dat new file mode 100644 index 0000000..1adb8c3 Binary files /dev/null and b/GeoLiteCity.dat differ diff --git a/README.md b/README.md new file mode 100644 index 0000000..482ae38 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# geostat diff --git a/geoparser.py b/geoparser.py new file mode 100755 index 0000000..5102985 --- /dev/null +++ b/geoparser.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python + +# Getting GEO information for Nginx access.log IP's. +# Alexey Nizhegolenko 2018 + +import os +import re +# import sys +import time +# import json +import pygeoip +# import subprocess +import Geohash +import configparser +# from collections import Counter +# from datetime import datetime + + +def logparse(logpath): + GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE) + GETIP = r"^(?P[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})" + IPS = {} + with open(logpath, "r") as FILE: + STR_RESULTS = os.stat(logpath) + ST_SIZE = STR_RESULTS[6] + FILE.seek(ST_SIZE) + while 1: + WHERE = FILE.tell() + LINE = FILE.readline() + if not LINE: + time.sleep(1) + FILE.seek(WHERE) + else: + IP = re.search(GETIP, LINE).group(1) + if IP: + INFO = GI.record_by_addr(IP) + if INFO is not None: + HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA + IPS['count'] = 1 + IPS['geohash'] = HASH + + +def main(): + # Getting params from config + pwd = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + config = configparser.ConfigParser() + config.read('%s/settings.ini' % pwd) + logpath = config.get('NGINX_LOG', 'logpath') + # Parse given log file and send metrics + logparse(logpath) + # Send result to fle + # with open('/tmp/metrics.json', 'w') as outputfile: + # json.dump(IPS_IN, outputfile, indent=4, sort_keys=True) + + +if __name__ == '__main__': + main() diff --git a/parser.py b/parser.py new file mode 100755 index 0000000..f9dbac3 --- /dev/null +++ b/parser.py @@ -0,0 +1,84 @@ +#! /usr/bin/env python + + +# Getting GEO information for Nginx access.log IP's. +# Alexey Nizhegolenko 2018 + + +import os +import re +# import sys +import json +import pygeoip +# import subprocess +import Geohash +import configparser +from collections import Counter +# from datetime import datetime + + +def logparse(logpath): + GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE) + GETIP = r"^(?P[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})" + IPS = Counter() + with open(logpath, "r") as file: + for line in file: + IP = re.search(GETIP, line).group(1) + if IP: + IPS[IP] += 1 + OUTPUT = [] + for KEYIP, VALUE in IPS.items(): + OUTIPS = {} + INFO = GI.record_by_addr(KEYIP) + if INFO is not None: + HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) + OUTIPS['ip'] = KEYIP + OUTIPS['geohash'] = HASH + OUTIPS['count'] = VALUE + OUTPUT.append(OUTIPS) + + return OUTPUT + + +''' +def logparse(logpath): + GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE) + GETIP = r"^(?P[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})" + OUTPUT = [] + IPS = {} + with open(logpath, "r") as file: + for line in file: + IP = re.search(GETIP, line) + if IP: + INFO = GI.record_by_addr(IP.group(1)) + if INFO is not None: + HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) + # IPS['key'] = INFO['country_code'] + # IPS['name'] = INFO['country_name'] + IPS['ip'] = IP.group(1) + IPS['geohash'] = HASH + # IPS['data'] = {'latitude': INFO['latitude'], 'longitude': INFO['longitude']} # NOQA + # IPS['latitude'] = INFO['latitude'] + # IPS['longitude'] = INFO['longitude'] + OUTPUT.append(IPS) + return OUTPUT +''' + + +def main(): + # Set LANG ENV + os.environ["LC_ALL"] = "C" + # Getting params from config + pwd = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + config = configparser.ConfigParser() + config.read('%s/settings.ini' % pwd) + logpath = config.get('NGINX_LOG', 'logpath') + # Parse given log file + IPS_IN = logparse(logpath) + # Send result to fle + with open('/tmp/metrics.json', 'w') as outputfile: + json.dump(IPS_IN, outputfile, indent=4, sort_keys=True) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c96e2f6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +configparser==3.5.0 +parsedatetime==2.4 +pygeoip==0.3.2 +Geohash==1.0 diff --git a/settings.ini.back b/settings.ini.back new file mode 100644 index 0000000..62021c3 --- /dev/null +++ b/settings.ini.back @@ -0,0 +1,2 @@ +[NGINX_LOG] +logpath = /var/log/nginx/access.log