first commit
This commit is contained in:
commit
d75bd2bd88
7 changed files with 151 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
*.pyc
|
||||||
|
settings.ini
|
||||||
|
venv/
|
BIN
GeoLiteCity.dat
Normal file
BIN
GeoLiteCity.dat
Normal file
Binary file not shown.
1
README.md
Normal file
1
README.md
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# geostat
|
57
geoparser.py
Executable file
57
geoparser.py
Executable file
|
@ -0,0 +1,57 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
# Getting GEO information for Nginx access.log IP's.
|
||||||
|
# Alexey Nizhegolenko 2018
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
# import sys
|
||||||
|
import time
|
||||||
|
# import json
|
||||||
|
import pygeoip
|
||||||
|
# import subprocess
|
||||||
|
import Geohash
|
||||||
|
import configparser
|
||||||
|
# from collections import Counter
|
||||||
|
# from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def logparse(logpath):
|
||||||
|
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
|
||||||
|
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
|
||||||
|
IPS = {}
|
||||||
|
with open(logpath, "r") as FILE:
|
||||||
|
STR_RESULTS = os.stat(logpath)
|
||||||
|
ST_SIZE = STR_RESULTS[6]
|
||||||
|
FILE.seek(ST_SIZE)
|
||||||
|
while 1:
|
||||||
|
WHERE = FILE.tell()
|
||||||
|
LINE = FILE.readline()
|
||||||
|
if not LINE:
|
||||||
|
time.sleep(1)
|
||||||
|
FILE.seek(WHERE)
|
||||||
|
else:
|
||||||
|
IP = re.search(GETIP, LINE).group(1)
|
||||||
|
if IP:
|
||||||
|
INFO = GI.record_by_addr(IP)
|
||||||
|
if INFO is not None:
|
||||||
|
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA
|
||||||
|
IPS['count'] = 1
|
||||||
|
IPS['geohash'] = HASH
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Getting params from config
|
||||||
|
pwd = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.read('%s/settings.ini' % pwd)
|
||||||
|
logpath = config.get('NGINX_LOG', 'logpath')
|
||||||
|
# Parse given log file and send metrics
|
||||||
|
logparse(logpath)
|
||||||
|
# Send result to fle
|
||||||
|
# with open('/tmp/metrics.json', 'w') as outputfile:
|
||||||
|
# json.dump(IPS_IN, outputfile, indent=4, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
84
parser.py
Executable file
84
parser.py
Executable file
|
@ -0,0 +1,84 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
|
||||||
|
# Getting GEO information for Nginx access.log IP's.
|
||||||
|
# Alexey Nizhegolenko 2018
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
# import sys
|
||||||
|
import json
|
||||||
|
import pygeoip
|
||||||
|
# import subprocess
|
||||||
|
import Geohash
|
||||||
|
import configparser
|
||||||
|
from collections import Counter
|
||||||
|
# from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def logparse(logpath):
|
||||||
|
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
|
||||||
|
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
|
||||||
|
IPS = Counter()
|
||||||
|
with open(logpath, "r") as file:
|
||||||
|
for line in file:
|
||||||
|
IP = re.search(GETIP, line).group(1)
|
||||||
|
if IP:
|
||||||
|
IPS[IP] += 1
|
||||||
|
OUTPUT = []
|
||||||
|
for KEYIP, VALUE in IPS.items():
|
||||||
|
OUTIPS = {}
|
||||||
|
INFO = GI.record_by_addr(KEYIP)
|
||||||
|
if INFO is not None:
|
||||||
|
HASH = Geohash.encode(INFO['latitude'], INFO['longitude'])
|
||||||
|
OUTIPS['ip'] = KEYIP
|
||||||
|
OUTIPS['geohash'] = HASH
|
||||||
|
OUTIPS['count'] = VALUE
|
||||||
|
OUTPUT.append(OUTIPS)
|
||||||
|
|
||||||
|
return OUTPUT
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
def logparse(logpath):
|
||||||
|
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
|
||||||
|
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
|
||||||
|
OUTPUT = []
|
||||||
|
IPS = {}
|
||||||
|
with open(logpath, "r") as file:
|
||||||
|
for line in file:
|
||||||
|
IP = re.search(GETIP, line)
|
||||||
|
if IP:
|
||||||
|
INFO = GI.record_by_addr(IP.group(1))
|
||||||
|
if INFO is not None:
|
||||||
|
HASH = Geohash.encode(INFO['latitude'], INFO['longitude'])
|
||||||
|
# IPS['key'] = INFO['country_code']
|
||||||
|
# IPS['name'] = INFO['country_name']
|
||||||
|
IPS['ip'] = IP.group(1)
|
||||||
|
IPS['geohash'] = HASH
|
||||||
|
# IPS['data'] = {'latitude': INFO['latitude'], 'longitude': INFO['longitude']} # NOQA
|
||||||
|
# IPS['latitude'] = INFO['latitude']
|
||||||
|
# IPS['longitude'] = INFO['longitude']
|
||||||
|
OUTPUT.append(IPS)
|
||||||
|
return OUTPUT
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Set LANG ENV
|
||||||
|
os.environ["LC_ALL"] = "C"
|
||||||
|
# Getting params from config
|
||||||
|
pwd = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.read('%s/settings.ini' % pwd)
|
||||||
|
logpath = config.get('NGINX_LOG', 'logpath')
|
||||||
|
# Parse given log file
|
||||||
|
IPS_IN = logparse(logpath)
|
||||||
|
# Send result to fle
|
||||||
|
with open('/tmp/metrics.json', 'w') as outputfile:
|
||||||
|
json.dump(IPS_IN, outputfile, indent=4, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
configparser==3.5.0
|
||||||
|
parsedatetime==2.4
|
||||||
|
pygeoip==0.3.2
|
||||||
|
Geohash==1.0
|
2
settings.ini.back
Normal file
2
settings.ini.back
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[NGINX_LOG]
|
||||||
|
logpath = /var/log/nginx/access.log
|
Loading…
Reference in a new issue