Merge pull request #1 from remkolodder/master
update to use geoip2 + ipv6 matching
This commit is contained in:
commit
04f43c4bc6
2 changed files with 26 additions and 10 deletions
32
geoparser.py
32
geoparser.py
|
@ -2,18 +2,20 @@
|
||||||
|
|
||||||
# Getting GEO information from Nginx access.log by IP's.
|
# Getting GEO information from Nginx access.log by IP's.
|
||||||
# Alexey Nizhegolenko 2018
|
# Alexey Nizhegolenko 2018
|
||||||
|
# Parts added by Remko Lodder, 2019.
|
||||||
|
# Added: IPv6 matching, make query based on geoip2 instead of
|
||||||
|
# geoip, which is going away r.s.n.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import pygeoip
|
import geoip2.database
|
||||||
import Geohash
|
import Geohash
|
||||||
import configparser
|
import configparser
|
||||||
from influxdb import InfluxDBClient
|
from influxdb import InfluxDBClient
|
||||||
|
|
||||||
|
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, GEOIPDB, INODE): # NOQA
|
||||||
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, INODE): # NOQA
|
|
||||||
# Preparing variables and params
|
# Preparing variables and params
|
||||||
IPS = {}
|
IPS = {}
|
||||||
COUNT = {}
|
COUNT = {}
|
||||||
|
@ -21,8 +23,11 @@ def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSER
|
||||||
HOSTNAME = os.uname()[1]
|
HOSTNAME = os.uname()[1]
|
||||||
CLIENT = InfluxDBClient(host=INFLUXHOST, port=INFLUXPORT,
|
CLIENT = InfluxDBClient(host=INFLUXHOST, port=INFLUXPORT,
|
||||||
username=INFLUXUSER, password=INFLUXUSERPASS, database=INFLUXDBDB) # NOQA
|
username=INFLUXUSER, password=INFLUXUSERPASS, database=INFLUXDBDB) # NOQA
|
||||||
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
|
|
||||||
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
|
re_IPV4 = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
|
||||||
|
re_IPV6 = re.compile('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
|
||||||
|
|
||||||
|
GI = geoip2.database.Reader(GEOIPDB)
|
||||||
|
|
||||||
# Main loop to parse access.log file in tailf style with sending metrcs
|
# Main loop to parse access.log file in tailf style with sending metrcs
|
||||||
with open(LOGPATH, "r") as FILE:
|
with open(LOGPATH, "r") as FILE:
|
||||||
|
@ -40,15 +45,21 @@ def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSER
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
FILE.seek(WHERE)
|
FILE.seek(WHERE)
|
||||||
else:
|
else:
|
||||||
IP = re.search(GETIP, LINE).group(1)
|
if re_IPV4.match(LINE):
|
||||||
|
m = re_IPV4.match(LINE)
|
||||||
|
IP = m.group(1)
|
||||||
|
elif re_IPV6.match(LINE):
|
||||||
|
m = re_IPV6.match(LINE)
|
||||||
|
IP = m.group(1)
|
||||||
|
|
||||||
if IP:
|
if IP:
|
||||||
INFO = GI.record_by_addr(IP)
|
INFO = GI.city(IP)
|
||||||
if INFO is not None:
|
if INFO is not None:
|
||||||
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA
|
HASH = Geohash.encode(INFO.location.latitude, INFO.location.longitude) # NOQA
|
||||||
COUNT['count'] = 1
|
COUNT['count'] = 1
|
||||||
GEOHASH['geohash'] = HASH
|
GEOHASH['geohash'] = HASH
|
||||||
GEOHASH['host'] = HOSTNAME
|
GEOHASH['host'] = HOSTNAME
|
||||||
GEOHASH['country_code'] = INFO['country_code']
|
GEOHASH['country_code'] = INFO.country.iso_code
|
||||||
IPS['tags'] = GEOHASH
|
IPS['tags'] = GEOHASH
|
||||||
IPS['fields'] = COUNT
|
IPS['fields'] = COUNT
|
||||||
IPS['measurement'] = MEASUREMENT
|
IPS['measurement'] = MEASUREMENT
|
||||||
|
@ -65,6 +76,7 @@ def main():
|
||||||
CONFIG.read('%s/settings.ini' % PWD)
|
CONFIG.read('%s/settings.ini' % PWD)
|
||||||
|
|
||||||
# Getting params from config
|
# Getting params from config
|
||||||
|
GEOIPDB = CONFIG.get('GEOIP', 'geoipdb')
|
||||||
LOGPATH = CONFIG.get('NGINX_LOG', 'logpath')
|
LOGPATH = CONFIG.get('NGINX_LOG', 'logpath')
|
||||||
INFLUXHOST = CONFIG.get('INFLUXDB', 'host')
|
INFLUXHOST = CONFIG.get('INFLUXDB', 'host')
|
||||||
INFLUXPORT = CONFIG.get('INFLUXDB', 'port')
|
INFLUXPORT = CONFIG.get('INFLUXDB', 'port')
|
||||||
|
@ -79,7 +91,7 @@ def main():
|
||||||
INODE = os.stat(LOGPATH).st_ino
|
INODE = os.stat(LOGPATH).st_ino
|
||||||
# Run main loop and grep a log file
|
# Run main loop and grep a log file
|
||||||
if os.path.exists(LOGPATH):
|
if os.path.exists(LOGPATH):
|
||||||
logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, INODE) # NOQA
|
logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, GEOIPDB, INODE) # NOQA
|
||||||
else:
|
else:
|
||||||
print('File %s not found' % LOGPATH)
|
print('File %s not found' % LOGPATH)
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,10 @@
|
||||||
#Path for the log file (Nginx)
|
#Path for the log file (Nginx)
|
||||||
logpath = /var/log/nginx/access.log
|
logpath = /var/log/nginx/access.log
|
||||||
|
|
||||||
|
[GEOIP]
|
||||||
|
geoipdb = /usr/local/share/GeoIP/GeoLite2-City.mmdb
|
||||||
|
# For country use /usr/local/share/GeoIP/GeoLite2-Country.mmdb
|
||||||
|
|
||||||
[INFLUXDB]
|
[INFLUXDB]
|
||||||
# Database URL
|
# Database URL
|
||||||
host = ip_address
|
host = ip_address
|
||||||
|
|
Loading…
Reference in a new issue