2018-10-08 17:13:00 +02:00
|
|
|
#! /usr/bin/env python
|
|
|
|
|
|
|
|
# Getting GEO information for Nginx access.log IP's.
|
|
|
|
# Alexey Nizhegolenko 2018
|
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
# import sys
|
|
|
|
import time
|
|
|
|
# import json
|
|
|
|
import pygeoip
|
|
|
|
# import subprocess
|
|
|
|
import Geohash
|
|
|
|
import configparser
|
|
|
|
# from collections import Counter
|
|
|
|
# from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
def logparse(logpath):
|
|
|
|
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
|
|
|
|
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
|
|
|
|
IPS = {}
|
|
|
|
with open(logpath, "r") as FILE:
|
|
|
|
STR_RESULTS = os.stat(logpath)
|
|
|
|
ST_SIZE = STR_RESULTS[6]
|
|
|
|
FILE.seek(ST_SIZE)
|
|
|
|
while 1:
|
|
|
|
WHERE = FILE.tell()
|
|
|
|
LINE = FILE.readline()
|
|
|
|
if not LINE:
|
|
|
|
time.sleep(1)
|
|
|
|
FILE.seek(WHERE)
|
|
|
|
else:
|
|
|
|
IP = re.search(GETIP, LINE).group(1)
|
|
|
|
if IP:
|
|
|
|
INFO = GI.record_by_addr(IP)
|
|
|
|
if INFO is not None:
|
|
|
|
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA
|
|
|
|
IPS['count'] = 1
|
|
|
|
IPS['geohash'] = HASH
|
2018-10-08 17:14:52 +02:00
|
|
|
print(IPS)
|
2018-10-08 17:13:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
# Getting params from config
|
|
|
|
pwd = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
|
|
|
config = configparser.ConfigParser()
|
|
|
|
config.read('%s/settings.ini' % pwd)
|
|
|
|
logpath = config.get('NGINX_LOG', 'logpath')
|
|
|
|
# Parse given log file and send metrics
|
|
|
|
logparse(logpath)
|
|
|
|
# Send result to fle
|
|
|
|
# with open('/tmp/metrics.json', 'w') as outputfile:
|
|
|
|
# json.dump(IPS_IN, outputfile, indent=4, sort_keys=True)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|