geostat/geoparser.py

82 lines
2.8 KiB
Python
Raw Normal View History

2018-10-08 17:13:00 +02:00
#! /usr/bin/env python
2018-10-08 22:25:40 +02:00
# Getting GEO information from Nginx access.log by IP's.
2018-10-08 17:13:00 +02:00
# Alexey Nizhegolenko 2018
import os
import re
2018-10-08 22:25:40 +02:00
import sys
2018-10-08 17:13:00 +02:00
import time
import pygeoip
import Geohash
import configparser
2018-10-08 18:00:33 +02:00
from influxdb import InfluxDBClient
2018-10-08 17:13:00 +02:00
2018-10-08 18:00:33 +02:00
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT): # NOQA
2018-10-08 22:25:40 +02:00
# Preparing variables and params
IPS = {}
COUNT = {}
GEOHASH = {}
2018-10-12 20:38:06 +02:00
HOSTNAME = os.uname()[1]
2018-10-08 20:20:54 +02:00
CLIENT = InfluxDBClient(host=INFLUXHOST, port=INFLUXPORT,
username=INFLUXUSER, password=INFLUXUSERPASS, database=INFLUXDBDB) # NOQA
2018-10-08 17:13:00 +02:00
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
2018-10-08 20:20:54 +02:00
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
2018-10-08 22:25:40 +02:00
# Main loop to parse access.log file in tailf style with sending metrcs
2018-10-08 18:00:33 +02:00
with open(LOGPATH, "r") as FILE:
STR_RESULTS = os.stat(LOGPATH)
2018-10-08 17:13:00 +02:00
ST_SIZE = STR_RESULTS[6]
FILE.seek(ST_SIZE)
while 1:
2018-10-08 20:09:42 +02:00
METRICS = []
2018-10-08 17:13:00 +02:00
WHERE = FILE.tell()
LINE = FILE.readline()
if not LINE:
time.sleep(1)
FILE.seek(WHERE)
else:
IP = re.search(GETIP, LINE).group(1)
if IP:
INFO = GI.record_by_addr(IP)
if INFO is not None:
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA
2018-10-08 18:00:33 +02:00
COUNT['count'] = 1
2018-10-08 22:25:40 +02:00
GEOHASH['geohash'] = HASH
2018-10-12 20:38:06 +02:00
GEOHASH['host'] = HOSTNAME
GEOHASH['country_code'] = INFO['country_code']
2018-10-08 18:00:33 +02:00
IPS['tags'] = GEOHASH
IPS['fields'] = COUNT
2018-10-08 22:25:40 +02:00
IPS['measurement'] = MEASUREMENT
2018-10-08 20:07:59 +02:00
METRICS.append(IPS)
2018-10-08 17:13:00 +02:00
2018-10-08 22:25:40 +02:00
# Sending json data to InfluxDB
CLIENT.write_points(METRICS)
2018-10-08 17:13:00 +02:00
2018-10-08 18:00:33 +02:00
2018-10-08 22:25:40 +02:00
def main():
# Preparing for reading config file
2018-10-08 18:00:33 +02:00
PWD = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
CONFIG = configparser.ConfigParser()
CONFIG.read('%s/settings.ini' % PWD)
2018-10-08 17:13:00 +02:00
# Getting params from config
2018-10-08 18:00:33 +02:00
LOGPATH = CONFIG.get('NGINX_LOG', 'logpath')
INFLUXHOST = CONFIG.get('INFLUXDB', 'host')
INFLUXPORT = CONFIG.get('INFLUXDB', 'port')
INFLUXDBDB = CONFIG.get('INFLUXDB', 'database')
INFLUXUSER = CONFIG.get('INFLUXDB', 'username')
MEASUREMENT = CONFIG.get('INFLUXDB', 'measurement')
INFLUXUSERPASS = CONFIG.get('INFLUXDB', 'password')
# Parsing log file and sending metrics to Influxdb
logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT) # NOQA
2018-10-08 17:13:00 +02:00
if __name__ == '__main__':
2018-10-08 22:25:40 +02:00
try:
main()
except KeyboardInterrupt:
2018-10-10 20:48:48 +02:00
sys.exit(0)