geostat/geoparser.py

75 lines
2.4 KiB
Python
Raw Normal View History

2018-10-08 17:13:00 +02:00
#! /usr/bin/env python
# Getting GEO information for Nginx access.log IP's.
# Alexey Nizhegolenko 2018
import os
import re
# import sys
import time
2018-10-08 18:00:33 +02:00
import json
2018-10-08 17:13:00 +02:00
import pygeoip
# import subprocess
import Geohash
import configparser
2018-10-08 18:00:33 +02:00
from influxdb import InfluxDBClient
2018-10-08 17:13:00 +02:00
# from collections import Counter
# from datetime import datetime
2018-10-08 18:00:33 +02:00
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT): # NOQA
2018-10-08 17:13:00 +02:00
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
IPS = {}
2018-10-08 18:00:33 +02:00
GEOHASH = {}
COUNT = {}
2018-10-08 20:07:59 +02:00
METRICS = []
2018-10-08 18:00:33 +02:00
with open(LOGPATH, "r") as FILE:
STR_RESULTS = os.stat(LOGPATH)
2018-10-08 17:13:00 +02:00
ST_SIZE = STR_RESULTS[6]
FILE.seek(ST_SIZE)
while 1:
WHERE = FILE.tell()
LINE = FILE.readline()
if not LINE:
time.sleep(1)
FILE.seek(WHERE)
else:
IP = re.search(GETIP, LINE).group(1)
if IP:
INFO = GI.record_by_addr(IP)
if INFO is not None:
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA
2018-10-08 18:00:33 +02:00
GEOHASH['geohash'] = HASH
COUNT['count'] = 1
2018-10-08 20:07:59 +02:00
IPS['measurement'] = MEASUREMENT
2018-10-08 18:00:33 +02:00
IPS['tags'] = GEOHASH
IPS['fields'] = COUNT
2018-10-08 20:07:59 +02:00
METRICS.append(IPS)
RESULT = json.dumps(METRICS)
2018-10-08 18:00:33 +02:00
print(RESULT)
2018-10-08 17:13:00 +02:00
def main():
2018-10-08 18:00:33 +02:00
# Preparing of config reading
PWD = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
CONFIG = configparser.ConfigParser()
CONFIG.read('%s/settings.ini' % PWD)
2018-10-08 17:13:00 +02:00
# Getting params from config
2018-10-08 18:00:33 +02:00
LOGPATH = CONFIG.get('NGINX_LOG', 'logpath')
INFLUXHOST = CONFIG.get('INFLUXDB', 'host')
INFLUXPORT = CONFIG.get('INFLUXDB', 'port')
INFLUXDBDB = CONFIG.get('INFLUXDB', 'database')
INFLUXUSER = CONFIG.get('INFLUXDB', 'username')
MEASUREMENT = CONFIG.get('INFLUXDB', 'measurement')
INFLUXUSERPASS = CONFIG.get('INFLUXDB', 'password')
# Parsing log file and sending metrics to Influxdb
logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT) # NOQA
2018-10-08 17:13:00 +02:00
if __name__ == '__main__':
main()