first commit

This commit is contained in:
Alexey 2018-10-08 18:13:00 +03:00
commit d75bd2bd88
7 changed files with 151 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
*.pyc
settings.ini
venv/

BIN
GeoLiteCity.dat Normal file

Binary file not shown.

1
README.md Normal file
View file

@ -0,0 +1 @@
# geostat

57
geoparser.py Executable file
View file

@ -0,0 +1,57 @@
#! /usr/bin/env python
# Getting GEO information for Nginx access.log IP's.
# Alexey Nizhegolenko 2018
import os
import re
# import sys
import time
# import json
import pygeoip
# import subprocess
import Geohash
import configparser
# from collections import Counter
# from datetime import datetime
def logparse(logpath):
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
IPS = {}
with open(logpath, "r") as FILE:
STR_RESULTS = os.stat(logpath)
ST_SIZE = STR_RESULTS[6]
FILE.seek(ST_SIZE)
while 1:
WHERE = FILE.tell()
LINE = FILE.readline()
if not LINE:
time.sleep(1)
FILE.seek(WHERE)
else:
IP = re.search(GETIP, LINE).group(1)
if IP:
INFO = GI.record_by_addr(IP)
if INFO is not None:
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA
IPS['count'] = 1
IPS['geohash'] = HASH
def main():
# Getting params from config
pwd = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
config = configparser.ConfigParser()
config.read('%s/settings.ini' % pwd)
logpath = config.get('NGINX_LOG', 'logpath')
# Parse given log file and send metrics
logparse(logpath)
# Send result to fle
# with open('/tmp/metrics.json', 'w') as outputfile:
# json.dump(IPS_IN, outputfile, indent=4, sort_keys=True)
if __name__ == '__main__':
main()

84
parser.py Executable file
View file

@ -0,0 +1,84 @@
#! /usr/bin/env python
# Getting GEO information for Nginx access.log IP's.
# Alexey Nizhegolenko 2018
import os
import re
# import sys
import json
import pygeoip
# import subprocess
import Geohash
import configparser
from collections import Counter
# from datetime import datetime
def logparse(logpath):
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
IPS = Counter()
with open(logpath, "r") as file:
for line in file:
IP = re.search(GETIP, line).group(1)
if IP:
IPS[IP] += 1
OUTPUT = []
for KEYIP, VALUE in IPS.items():
OUTIPS = {}
INFO = GI.record_by_addr(KEYIP)
if INFO is not None:
HASH = Geohash.encode(INFO['latitude'], INFO['longitude'])
OUTIPS['ip'] = KEYIP
OUTIPS['geohash'] = HASH
OUTIPS['count'] = VALUE
OUTPUT.append(OUTIPS)
return OUTPUT
'''
def logparse(logpath):
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
OUTPUT = []
IPS = {}
with open(logpath, "r") as file:
for line in file:
IP = re.search(GETIP, line)
if IP:
INFO = GI.record_by_addr(IP.group(1))
if INFO is not None:
HASH = Geohash.encode(INFO['latitude'], INFO['longitude'])
# IPS['key'] = INFO['country_code']
# IPS['name'] = INFO['country_name']
IPS['ip'] = IP.group(1)
IPS['geohash'] = HASH
# IPS['data'] = {'latitude': INFO['latitude'], 'longitude': INFO['longitude']} # NOQA
# IPS['latitude'] = INFO['latitude']
# IPS['longitude'] = INFO['longitude']
OUTPUT.append(IPS)
return OUTPUT
'''
def main():
# Set LANG ENV
os.environ["LC_ALL"] = "C"
# Getting params from config
pwd = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
config = configparser.ConfigParser()
config.read('%s/settings.ini' % pwd)
logpath = config.get('NGINX_LOG', 'logpath')
# Parse given log file
IPS_IN = logparse(logpath)
# Send result to fle
with open('/tmp/metrics.json', 'w') as outputfile:
json.dump(IPS_IN, outputfile, indent=4, sort_keys=True)
if __name__ == '__main__':
main()

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
configparser==3.5.0
parsedatetime==2.4
pygeoip==0.3.2
Geohash==1.0

2
settings.ini.back Normal file
View file

@ -0,0 +1,2 @@
[NGINX_LOG]
logpath = /var/log/nginx/access.log