Python access to pure IP database code
- 2020-04-02 09:24:51
- OfStack
Core code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bisect import bisect
_LIST1, _LIST2 = [], []
_INIT = False
ip2int = lambda ip_str: reduce(lambda a, b: (a << 8) + b, [int(i) for i in ip_str.split('.')])
def _init():
global _LIST, _INIT
if not _INIT:
for l in open('ipdata.txt', 'rb'):
ip1, ip2 = l.split()[:2]
addr = ' '.join(l.split()[2:])
ip1, ip2 = ip2int(ip1), ip2int(ip2)
_LIST1.append(ip1)
_LIST2.append((ip1, ip2, addr))
_INIT = True
def ip_from(ip):
_init()
i = ip2int(ip)
idx = bisect(_LIST1, i)
assert(idx > 0)
if len(_LIST1) <= idx:
return u'unknown ip address %s' % ip
else:
frm, to ,addr = _LIST2[idx - 1]
if frm <= i <= to:
return addr
else:
return u'unknown ip address %s' % ip
if __name__ == '__main__':
print ip_from('115.238.54.106')
print ip_from('220.181.29.160')
print ip_from('115.238.54.107')
print ip_from('8.8.8.8')
Download code package (link: http://xiazai.jb51.net/201105/yuanma/ipaddress.7z)
Here are some more perfect code:
#!/usr/bin/env python
# coding: utf-8
''' with Python Script query innocence IP library
QQWry.Dat The format is as follows :
+----------+
| The file header | (8 byte )
+----------+
| Recording area | (variable length)
+----------+
| The index area | (size determined by file header)
+----------+
The file header: 4 The byte begins to index the offset value +4 Byte end index offset
Record area: Each of the IP Record format ==> IP address [ The state information ][ Regional information ]
For national records, there are three possible representations:
String form (IP Record the first 5 Bytes are not equal to 0x01 and 0x02 In the case ) .
Redirect mode 1( The first 5 bytes 0x01), Is the following 3 The byte is the offset value of the country where the information is stored
Redirect mode ( The first 5 bytes 0x02),
For district records, there are two possible representations: String form and redirection
The last rule: redirect mode 1 Country records cannot be followed by regional records
The index area: Format of each index record ==> 4 Bytes starting IP address + 3 Bytes to IP The offset value of the record
The index area IP And the area it points to IP Constitute a IP Range. The query information is this
Within the scope of IP The information of
'''
import sys
import socket
from struct import pack, unpack
class IPInfo(object):
'''QQWry.Dat Collection of database query functions
'''
def __init__(self, dbname):
''' Initialize the class, read the database content as a string,
By starting 8 Bytes determine the index information for the database '''
self.dbname = dbname
# f = file(dbname, 'r')
# Demon Note: in Windows with 'r' There will be problems. There will be rn Converted to n
# As shown in the http://demon.tw/programming/python-open-mode.html
# There are Python It is not recommended in the documentation file Function to open a file. Recommended open
f = open(dbname, 'rb')
self.img = f.read()
f.close()
# QQWry.Dat Start of file 8 The byte is the index information , before 4 The byte is the offset of the starting index,
# after 4 The byte is the offset value that ends the index.
# (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8])
# Demon Note: unpack Default used endian It has to do with machines
# Intel x86 and AMD64(x86-64) is little-endian
# Motorola 68000 and PowerPC G5 is big-endian
# And pure database all adopted little-endian Byte order
# So in some cases big-endian The original code will fail on the machine
(self.firstIndex, self.lastIndex) = unpack('<II', self.img[:8])
# Each index length 7 Bytes, we get the total number of indexes
self.indexCount = (self.lastIndex - self.firstIndex) / 7 + 1
def getString(self, offset = 0):
''' Reads string information, including " countries " Information and " region " information
QQWry.Dat Each message is one '0' Ending string '''
o2 = self.img.find('0', offset)
#return self.img[offset:o2]
# It is possible that there is only national information and no regional information,
gb2312_str = self.img[offset:o2]
try:
utf8_str = unicode(gb2312_str,'gb2312').encode('utf-8')
except:
return ' The unknown '
return utf8_str
def getLong3(self, offset = 0):
'''QQWry.Dat All the offset records are 3 Bytes, this function gets 3 A general representation of an offset of bytes
QQWry.Dat Use "strings" to store these values '''
s = self.img[offset: offset + 3]
s += '0'
# unpack With a 'I' As a format , the following string must be 4 byte
# return unpack('I', s)[0]
# Demon Note: as above, mandatory use little-endian
return unpack('<I', s)[0]
def getAreaAddr(self, offset = 0):
''' Get the region information string by giving the offset value, '''
byte = ord(self.img[offset])
if byte == 1 or byte == 2:
# The first byte is 1 or 2 When, 2-4 The byte calls itself as an offset
p = self.getLong3(offset + 1)
return self.getAreaAddr(p)
else:
return self.getString(offset)
def getAddr(self, offset, ip = 0):
img = self.img
o = offset
byte = ord(img[o])
if byte == 1:
# Redirect mode 1
# [IP][0x01][ Absolute offset address for country and region information ]
# Use the following 3 The bytes are called as offsets to get the information
return self.getAddr(self.getLong3(o + 1))
if byte == 2:
# Redirect mode 2
# [IP][0x02][ Absolute drift of country information ][ Locale information string ]
# Call yourself with the country information offset to get the string information
cArea = self.getAreaAddr(self.getLong3(o + 1))
o += 4
# Skip the former 4 The byte takes the string as the locale information
aArea = self.getAreaAddr(o)
return (cArea, aArea)
if byte != 1 and byte != 2:
# The most simple IP Record form, [IP][ The state information ][ Regional information ]
# Redirect mode 1 In one case, the offset points to two strings containing country and region information
# That is, the first byte that the offset points to is not 1 or 2, I'm just going to use this branch right here
# Simply put: take two strings in a row!
cArea = self.getString(o)
#o += 2*len(cArea) + 1
# We have modified it. cArea for utf-8 The characters are encoded, len It's going to vary in length,
# So let's do it the following way offset
o = self.img.find('0',o) + 1
aArea = self.getString(o)
if aArea == "?":
aArea = " telecom "
if aArea == " The letter ":
aArea = ""
if aArea == "[":
aArea = " unicom "
return (cArea, aArea)
def find(self, ip, l, r):
''' Use dichotomy to find network byte - encoded IP An index record of addresses '''
if r - l <= 1:
return l
m = (l + r) / 2
o = self.firstIndex + m * 7
#new_ip = unpack('I', self.img[o: o+4])[0]
# Demon Note: as above, mandatory use little-endian
new_ip = unpack('<I', self.img[o: o+4])[0]
if ip <= new_ip:
return self.find(ip, l, m)
else:
return self.find(ip, m, r)
def getIPAddr(self, ip):
''' Call other functions, get information! '''
# Use network byte encoding IP address
ip = unpack('!I', socket.inet_aton(ip))[0]
# use self.find Function to find the ip Index offset of
i = self.find(ip, 0, self.indexCount - 1)
# Get the index record
o = self.firstIndex + i * 7
# The index record format is: before 4 byte IP information +3 Bytes to IP The offset of the recorded information
# So this is after use 3 The bytes as offsets get their regular representation ( QQWry.Dat Value as a string)
o2 = self.getLong3(o + 4)
# IP Record offset +4 Before it can be discarded 4 Bytes of IP Address information.
(c, a) = self.getAddr(o2 + 4)
return (c, a)
def output(self, first, last):
for i in range(first, last):
o = self.firstIndex + i * 7
ip = socket.inet_ntoa(pack('!I', unpack('I', self.img[o:o+4])[0]))
offset = self.getLong3(o + 4)
(c, a) = self.getAddr(offset + 4)
print "%s %d %s/%s" % (ip, offset, c, a)
def getIP(ip):
import os
_localDir=os.path.dirname(__file__)
_curpath=os.path.normpath(os.path.join(os.getcwd(),_localDir))
curpath=_curpath
i = IPInfo(curpath+'/qqwry.dat')
(c, a) = i.getIPAddr(ip)
return c+a
def main():
import os
_localDir=os.path.dirname(__file__)
_curpath=os.path.normpath(os.path.join(os.getcwd(),_localDir))
curpath=_curpath
i = IPInfo(curpath+'/qqwry.dat')
if os.path.exists(sys.argv[1]):
for line in open(sys.argv[1],"r").readlines():
line = line.replace("r","").replace("n","")
(c, a) = i.getIPAddr(line)
# Demon Note: if yes Windows Run from the command line to put the code back gb2312 To avoid garbled codes
if sys.platform == 'win32':
c = unicode(c, 'utf-8').encode('gb2312')
a = unicode(a, 'utf-8').encode('gb2312')
print '%s %s/%s' % (line, c, a)
else:
(c, a) = i.getIPAddr(sys.argv[1])
# Demon Note: if yes Windows Run from the command line to put the code back gb2312 To avoid garbled codes
if sys.platform == 'win32':
c = unicode(c, 'utf-8').encode('gb2312')
a = unicode(a, 'utf-8').encode('gb2312')
print '%s %s/%s' % (sys.argv[1], c, a)
if __name__ == '__main__':
main()
Querying pure IP library QQWry. Dat (Demon modification) with Python script
Since I want to use Python to read an IPv6 database similar to the pure IP database QQWry. Dat format, I searched the Internet, saw a Python script in LinuxTOY, found some minor problems, so I modified it.
#!/usr/bin/env python
# coding: utf-8
# from: http://linuxtoy.org/files/pyip.py
# Blog: http://linuxtoy.org/archives/python-ip.html
# Modified by Demon
# Blog: http://demon.tw/programming/python-qqwry-dat.html
''' with Python Script query innocence IP library
QQWry.Dat The format is as follows :
+----------+
| The file header | (8 byte )
+----------+
| Recording area | (variable length)
+----------+
| The index area | (size determined by file header)
+----------+
The file header: 4 The byte begins to index the offset value +4 Byte end index offset
Record area: Each of the IP Record format ==> IP address [ The state information ][ Regional information ]
For national records, there are three possible representations:
String form (IP Record the first 5 Bytes are not equal to 0x01 and 0x02 In the case ) .
Redirect mode 1( The first 5 bytes 0x01), Is the following 3 The byte is the offset value of the country where the information is stored
Redirect mode ( The first 5 bytes 0x02),
For district records, there are two possible representations: String form and redirection
The last rule: redirect mode 1 Country records cannot be followed by regional records
The index area: Format of each index record ==> 4 Bytes starting IP address + 3 Bytes to IP The offset value of the record
The index area IP And the area it points to IP Constitute a IP Range. The query information is this
Within the scope of IP The information of
'''
import sys
import socket
from struct import pack, unpack
class IPInfo(object):
'''QQWry.Dat Collection of database query functions
'''
def __init__(self, dbname):
''' Initialize the class, read the database content as a string,
By starting 8 Bytes determine the index information for the database '''
self.dbname = dbname
# f = file(dbname, 'r')
# Demon Note: in Windows with 'r' There will be problems. There will be rn Converted to n
# As shown in the http://demon.tw/programming/python-open-mode.html
# There are Python It is not recommended in the documentation file Function to open a file. Recommended open
f = open(dbname, 'rb')
self.img = f.read()
f.close()
# QQWry.Dat Start of file 8 The byte is the index information , before 4 The byte is the offset of the starting index,
# after 4 The byte is the offset value that ends the index.
# (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8])
# Demon Note: unpack Default used endian It has to do with machines
# Intel x86 and AMD64(x86-64) is little-endian
# Motorola 68000 and PowerPC G5 is big-endian
# And pure database all adopted little-endian Byte order
# So in some cases big-endian The original code will fail on the machine
(self.firstIndex, self.lastIndex) = unpack('<II', self.img[:8])
# Each index length 7 Bytes, we get the total number of indexes
self.indexCount = (self.lastIndex - self.firstIndex) / 7 + 1
def getString(self, offset = 0):
''' Reads string information, including " countries " Information and " region " information
QQWry.Dat Each message is one '0' Ending string '''
o2 = self.img.find('0', offset)
#return self.img[offset:o2]
# It is possible that there is only national information and no regional information,
gb2312_str = self.img[offset:o2]
try:
utf8_str = unicode(gb2312_str,'gb2312').encode('utf-8')
except:
return ' The unknown '
return utf8_str
def getLong3(self, offset = 0):
'''QQWry.Dat All the offset records are 3 Bytes, this function gets 3 A general representation of an offset of bytes
QQWry.Dat Use "strings" to store these values '''
s = self.img[offset: offset + 3]
s += '0'
# unpack With a 'I' As a format , the following string must be 4 byte
# return unpack('I', s)[0]
# Demon Note: as above, mandatory use little-endian
return unpack('<I', s)[0]
def getAreaAddr(self, offset = 0):
''' Get the region information string by giving the offset value, '''
byte = ord(self.img[offset])
if byte == 1 or byte == 2:
# The first byte is 1 or 2 When, 2-4 The byte calls itself as an offset
p = self.getLong3(offset + 1)
return self.getAreaAddr(p)
else:
return self.getString(offset)
def getAddr(self, offset, ip = 0):
img = self.img
o = offset
byte = ord(img[o])
if byte == 1:
# Redirect mode 1
# [IP][0x01][ Absolute offset address for country and region information ]
# Use the following 3 The bytes are called as offsets to get the information
return self.getAddr(self.getLong3(o + 1))
if byte == 2:
# Redirect mode 2
# [IP][0x02][ Absolute drift of country information ][ Locale information string ]
# Call yourself with the country information offset to get the string information
cArea = self.getAreaAddr(self.getLong3(o + 1))
o += 4
# Skip the former 4 The byte takes the string as the locale information
aArea = self.getAreaAddr(o)
return (cArea, aArea)
if byte != 1 and byte != 2:
# The most simple IP Record form, [IP][ The state information ][ Regional information ]
# Redirect mode 1 In one case, the offset points to two strings containing country and region information
# That is, the first byte that the offset points to is not 1 or 2, I'm just going to use this branch right here
# Simply put: take two strings in a row!
cArea = self.getString(o)
#o += len(cArea) + 1
# We have modified it. cArea for utf-8 The characters are encoded, len It's going to vary in length,
# So let's do it the following way offset
o = self.img.find('0',o) + 1
aArea = self.getString(o)
return (cArea, aArea)
def find(self, ip, l, r):
''' Use dichotomy to find network byte - encoded IP An index record of addresses '''
if r - l <= 1:
return l
m = (l + r) / 2
o = self.firstIndex + m * 7
#new_ip = unpack('I', self.img[o: o+4])[0]
# Demon Note: as above, mandatory use little-endian
new_ip = unpack('<I', self.img[o: o+4])[0]
if ip <= new_ip:
return self.find(ip, l, m)
else:
return self.find(ip, m, r)
def getIPAddr(self, ip):
''' Call other functions, get information! '''
# Use network byte encoding IP address
ip = unpack('!I', socket.inet_aton(ip))[0]
# use self.find Function to find the ip Index offset of
i = self.find(ip, 0, self.indexCount - 1)
# Get the index record
o = self.firstIndex + i * 7
# The index record format is: before 4 byte IP information +3 Bytes to IP The offset of the recorded information
# So this is after use 3 The bytes as offsets get their regular representation ( QQWry.Dat Value as a string)
o2 = self.getLong3(o + 4)
# IP Record offset +4 Before it can be discarded 4 Bytes of IP Address information.
(c, a) = self.getAddr(o2 + 4)
return (c, a)
def output(self, first, last):
for i in range(first, last):
o = self.firstIndex + i * 7
ip = socket.inet_ntoa(pack('!I', unpack('I', self.img[o:o+4])[0]))
offset = self.getLong3(o + 4)
(c, a) = self.getAddr(offset + 4)
print "%s %d %s/%s" % (ip, offset, c, a)
def main():
i = IPInfo('QQWry.Dat')
(c, a) = i.getIPAddr(sys.argv[1])
# Demon Note: if yes Windows Run from the command line to put the code back gb2312 To avoid garbled codes
if sys.platform == 'win32':
c = unicode(c, 'utf-8').encode('gb2312')
a = unicode(a, 'utf-8').encode('gb2312')
print '%s %s/%s' % (sys.argv[1], c, a)
if __name__ == '__main__':
main()
# changelog
# Time: 2009 years 5 month 29 day
# 1. Tool below netizen's suggestion, revise "o += len(cArea) + 1"
# http://linuxtoy.org/archives/python-ip.html#comment-113960
# Because at this point I've changed the resulting string to utf-8 Coded, the length will change!