Update to add ipv6 based lines

Use geoip2
update hashes and stuff for geoip2
This commit is contained in:
Remko Lodder 2019-02-04 13:59:41 +01:00
parent 2365ba8e25
commit f384a78f45

View file

@ -2,18 +2,20 @@
# Getting GEO information from Nginx access.log by IP's. # Getting GEO information from Nginx access.log by IP's.
# Alexey Nizhegolenko 2018 # Alexey Nizhegolenko 2018
# Parts added by Remko Lodder, 2019.
# Added: IPv6 matching, make query based on geoip2 instead of
# geoip, which is going away r.s.n.
import os import os
import re import re
import sys import sys
import time import time
import pygeoip import geoip2.database
import Geohash import Geohash
import configparser import configparser
from influxdb import InfluxDBClient from influxdb import InfluxDBClient
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, GEOIPDB, INODE): # NOQA
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, INODE): # NOQA
# Preparing variables and params # Preparing variables and params
IPS = {} IPS = {}
COUNT = {} COUNT = {}
@ -21,8 +23,11 @@ def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSER
HOSTNAME = os.uname()[1] HOSTNAME = os.uname()[1]
CLIENT = InfluxDBClient(host=INFLUXHOST, port=INFLUXPORT, CLIENT = InfluxDBClient(host=INFLUXHOST, port=INFLUXPORT,
username=INFLUXUSER, password=INFLUXUSERPASS, database=INFLUXDBDB) # NOQA username=INFLUXUSER, password=INFLUXUSERPASS, database=INFLUXDBDB) # NOQA
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE) re_IPV4 = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
re_IPV6 = re.compile('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
GI = geoip2.database.Reader(GEOIPDB)
# Main loop to parse access.log file in tailf style with sending metrcs # Main loop to parse access.log file in tailf style with sending metrcs
with open(LOGPATH, "r") as FILE: with open(LOGPATH, "r") as FILE:
@ -40,15 +45,21 @@ def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSER
time.sleep(1) time.sleep(1)
FILE.seek(WHERE) FILE.seek(WHERE)
else: else:
IP = re.search(GETIP, LINE).group(1) if re_IPV4.match(LINE):
m = re_IPV4.match(LINE)
IP = m.group(1)
elif re_IPV6.match(LINE):
m = re_IPV6.match(LINE)
IP = m.group(1)
if IP: if IP:
INFO = GI.record_by_addr(IP) INFO = GI.city(IP)
if INFO is not None: if INFO is not None:
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA HASH = Geohash.encode(INFO.location.latitude, INFO.location.longitude) # NOQA
COUNT['count'] = 1 COUNT['count'] = 1
GEOHASH['geohash'] = HASH GEOHASH['geohash'] = HASH
GEOHASH['host'] = HOSTNAME GEOHASH['host'] = HOSTNAME
GEOHASH['country_code'] = INFO['country_code'] GEOHASH['country_code'] = INFO.country.iso_code
IPS['tags'] = GEOHASH IPS['tags'] = GEOHASH
IPS['fields'] = COUNT IPS['fields'] = COUNT
IPS['measurement'] = MEASUREMENT IPS['measurement'] = MEASUREMENT
@ -65,6 +76,7 @@ def main():
CONFIG.read('%s/settings.ini' % PWD) CONFIG.read('%s/settings.ini' % PWD)
# Getting params from config # Getting params from config
GEOIPDB = CONFIG.get('GEOIP', 'geoipdb')
LOGPATH = CONFIG.get('NGINX_LOG', 'logpath') LOGPATH = CONFIG.get('NGINX_LOG', 'logpath')
INFLUXHOST = CONFIG.get('INFLUXDB', 'host') INFLUXHOST = CONFIG.get('INFLUXDB', 'host')
INFLUXPORT = CONFIG.get('INFLUXDB', 'port') INFLUXPORT = CONFIG.get('INFLUXDB', 'port')
@ -79,7 +91,7 @@ def main():
INODE = os.stat(LOGPATH).st_ino INODE = os.stat(LOGPATH).st_ino
# Run main loop and grep a log file # Run main loop and grep a log file
if os.path.exists(LOGPATH): if os.path.exists(LOGPATH):
logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, INODE) # NOQA logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, GEOIPDB, INODE) # NOQA
else: else:
print('File %s not found' % LOGPATH) print('File %s not found' % LOGPATH)