Merge pull request #1 from remkolodder/master

update to use geoip2 + ipv6 matching
This commit is contained in:
Alexey Nizhegolenko 2019-02-05 12:22:45 +02:00 committed by GitHub
commit 04f43c4bc6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 10 deletions

View file

@ -2,18 +2,20 @@
# Getting GEO information from Nginx access.log by IP's.
# Alexey Nizhegolenko 2018
# Parts added by Remko Lodder, 2019.
# Added: IPv6 matching, make query based on geoip2 instead of
# geoip, which is going away r.s.n.
import os
import re
import sys
import time
import pygeoip
import geoip2.database
import Geohash
import configparser
from influxdb import InfluxDBClient
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, INODE): # NOQA
def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, GEOIPDB, INODE): # NOQA
# Preparing variables and params
IPS = {}
COUNT = {}
@ -21,8 +23,11 @@ def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSER
HOSTNAME = os.uname()[1]
CLIENT = InfluxDBClient(host=INFLUXHOST, port=INFLUXPORT,
username=INFLUXUSER, password=INFLUXUSERPASS, database=INFLUXDBDB) # NOQA
GETIP = r"^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3})"
GI = pygeoip.GeoIP('GeoLiteCity.dat', pygeoip.const.MEMORY_CACHE)
re_IPV4 = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
re_IPV6 = re.compile('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))')
GI = geoip2.database.Reader(GEOIPDB)
# Main loop to parse access.log file in tailf style with sending metrcs
with open(LOGPATH, "r") as FILE:
@ -40,15 +45,21 @@ def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSER
time.sleep(1)
FILE.seek(WHERE)
else:
IP = re.search(GETIP, LINE).group(1)
if re_IPV4.match(LINE):
m = re_IPV4.match(LINE)
IP = m.group(1)
elif re_IPV6.match(LINE):
m = re_IPV6.match(LINE)
IP = m.group(1)
if IP:
INFO = GI.record_by_addr(IP)
INFO = GI.city(IP)
if INFO is not None:
HASH = Geohash.encode(INFO['latitude'], INFO['longitude']) # NOQA
HASH = Geohash.encode(INFO.location.latitude, INFO.location.longitude) # NOQA
COUNT['count'] = 1
GEOHASH['geohash'] = HASH
GEOHASH['host'] = HOSTNAME
GEOHASH['country_code'] = INFO['country_code']
GEOHASH['country_code'] = INFO.country.iso_code
IPS['tags'] = GEOHASH
IPS['fields'] = COUNT
IPS['measurement'] = MEASUREMENT
@ -65,6 +76,7 @@ def main():
CONFIG.read('%s/settings.ini' % PWD)
# Getting params from config
GEOIPDB = CONFIG.get('GEOIP', 'geoipdb')
LOGPATH = CONFIG.get('NGINX_LOG', 'logpath')
INFLUXHOST = CONFIG.get('INFLUXDB', 'host')
INFLUXPORT = CONFIG.get('INFLUXDB', 'port')
@ -79,7 +91,7 @@ def main():
INODE = os.stat(LOGPATH).st_ino
# Run main loop and grep a log file
if os.path.exists(LOGPATH):
logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, INODE) # NOQA
logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, GEOIPDB, INODE) # NOQA
else:
print('File %s not found' % LOGPATH)

View file

@ -2,6 +2,10 @@
#Path for the log file (Nginx)
logpath = /var/log/nginx/access.log
[GEOIP]
geoipdb = /usr/local/share/GeoIP/GeoLite2-City.mmdb
# For country use /usr/local/share/GeoIP/GeoLite2-Country.mmdb
[INFLUXDB]
# Database URL
host = ip_address