Python access to pure IP database code

2020-04-02 09:24:51
OfStack

Core code:


#!/usr/bin/env python 
# -*- coding: utf-8 -*- 

from bisect import bisect 

_LIST1, _LIST2 = [], [] 
_INIT = False 

ip2int = lambda ip_str: reduce(lambda a, b: (a << 8) + b, [int(i) for i in ip_str.split('.')]) 

def _init(): 
global _LIST, _INIT 
if not _INIT: 
for l in open('ipdata.txt', 'rb'): 
ip1, ip2 = l.split()[:2] 
addr = ' '.join(l.split()[2:]) 
ip1, ip2 = ip2int(ip1), ip2int(ip2) 
_LIST1.append(ip1) 
_LIST2.append((ip1, ip2, addr)) 
_INIT = True 

def ip_from(ip): 
_init() 
i = ip2int(ip) 
idx = bisect(_LIST1, i) 
assert(idx > 0) 
if len(_LIST1) <= idx: 
return u'unknown ip address %s' % ip 
else: 
frm, to ,addr = _LIST2[idx - 1] 
if frm <= i <= to: 
return addr 
else: 
return u'unknown ip address %s' % ip 

if __name__ == '__main__': 
print ip_from('115.238.54.106') 
print ip_from('220.181.29.160') 
print ip_from('115.238.54.107') 
print ip_from('8.8.8.8')

Download code package (link: http://xiazai.jb51.net/201105/yuanma/ipaddress.7z)

Here are some more perfect code:


#!/usr/bin/env python
# coding: utf-8
 
''' with Python Script query innocence IP library 
 
QQWry.Dat The format is as follows :
 
+----------+
|  The file header  | (8 byte )
+----------+
|  Recording area  |  (variable length) 
+----------+
|  The index area  |  (size determined by file header) 
+----------+
 
 The file header: 4 The byte begins to index the offset value +4 Byte end index offset 
 
 Record area:   Each of the IP Record format  ==> IP address [ The state information ][ Regional information ]
 
   For national records, there are three possible representations: 
 
     String form (IP Record the first 5 Bytes are not equal to 0x01 and 0x02 In the case ) . 
     Redirect mode 1( The first 5 bytes 0x01), Is the following 3 The byte is the offset value of the country where the information is stored 
     Redirect mode ( The first 5 bytes 0x02),
 
   For district records, there are two possible representations:   String form and redirection 
 
   The last rule: redirect mode 1 Country records cannot be followed by regional records 
 
 The index area:   Format of each index record  ==> 4 Bytes starting IP address  + 3 Bytes to IP The offset value of the record 
 
   The index area IP And the area it points to IP Constitute a IP Range. The query information is this 
   Within the scope of IP The information of 
 
'''
 
import sys
import socket
from struct import pack, unpack
 
class IPInfo(object):
  '''QQWry.Dat Collection of database query functions 
  '''
  def __init__(self, dbname):
    '''  Initialize the class, read the database content as a string, 
     By starting 8 Bytes determine the index information for the database '''
 
    self.dbname = dbname
    # f = file(dbname, 'r')
 
    # Demon Note: in Windows with 'r' There will be problems. There will be rn Converted to n
    #  As shown in the http://demon.tw/programming/python-open-mode.html
    #  There are Python It is not recommended in the documentation file Function to open a file. Recommended open
    f = open(dbname, 'rb')
 
    self.img = f.read()
    f.close()
 
    # QQWry.Dat Start of file 8 The byte is the index information , before 4 The byte is the offset of the starting index, 
    #  after 4 The byte is the offset value that ends the index. 
    # (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8])
 
    # Demon Note: unpack Default used endian It has to do with machines 
    # Intel x86 and AMD64(x86-64) is little-endian
    # Motorola 68000 and PowerPC G5 is big-endian
    #  And pure database all adopted little-endian Byte order 
    #  So in some cases big-endian The original code will fail on the machine 
    (self.firstIndex, self.lastIndex) = unpack('<II', self.img[:8])
 
    #  Each index length 7 Bytes, we get the total number of indexes 
    self.indexCount = (self.lastIndex - self.firstIndex) / 7 + 1
 
  def getString(self, offset = 0):
    '''  Reads string information, including " countries " Information and " region " information 
 
    QQWry.Dat Each message is one '0' Ending string '''
 
    o2 = self.img.find('0', offset)
    #return self.img[offset:o2]
    #  It is possible that there is only national information and no regional information, 
    gb2312_str = self.img[offset:o2]
    try:
      utf8_str = unicode(gb2312_str,'gb2312').encode('utf-8')
    except:
      return ' The unknown '
    return utf8_str
 
  def getLong3(self, offset = 0):
    '''QQWry.Dat All the offset records are 3 Bytes, this function gets 3 A general representation of an offset of bytes 
    QQWry.Dat Use "strings" to store these values '''
    s = self.img[offset: offset + 3]
    s += '0'
    # unpack With a 'I' As a format , the following string must be 4 byte 
    # return unpack('I', s)[0]
 
    # Demon Note: as above, mandatory use little-endian
    return unpack('<I', s)[0]
 
  def getAreaAddr(self, offset = 0):
    '''  Get the region information string by giving the offset value, '''
 
    byte = ord(self.img[offset])
    if byte == 1 or byte == 2:
      #  The first byte is 1 or 2 When, 2-4 The byte calls itself as an offset 
      p = self.getLong3(offset + 1)
      return self.getAreaAddr(p)
    else:
      return self.getString(offset)
 
  def getAddr(self, offset, ip = 0):
    img = self.img
    o = offset
    byte = ord(img[o])
 
    if byte == 1:
      #  Redirect mode 1
      # [IP][0x01][ Absolute offset address for country and region information ]
      #  Use the following 3 The bytes are called as offsets to get the information 
      return self.getAddr(self.getLong3(o + 1))
 
    if byte == 2:
      #  Redirect mode 2
      # [IP][0x02][ Absolute drift of country information ][ Locale information string ]
      #  Call yourself with the country information offset to get the string information 
      cArea = self.getAreaAddr(self.getLong3(o + 1))
      o += 4
      #  Skip the former 4 The byte takes the string as the locale information 
      aArea = self.getAreaAddr(o)
      return (cArea, aArea)
 
    if byte != 1 and byte != 2:
      #  The most simple IP Record form, [IP][ The state information ][ Regional information ]
      #  Redirect mode 1 In one case, the offset points to two strings containing country and region information 
      #  That is, the first byte that the offset points to is not 1 or 2, I'm just going to use this branch right here 
      #  Simply put: take two strings in a row! 
 
      cArea = self.getString(o)
      #o += 2*len(cArea) + 1
      #  We have modified it. cArea for utf-8 The characters are encoded, len It's going to vary in length, 
      #  So let's do it the following way offset
 
      o = self.img.find('0',o) + 1
      aArea = self.getString(o)
      if aArea == "?":
        aArea = " telecom "
      if aArea == " The letter ":
        aArea = ""
      if aArea == "[":
        aArea = " unicom "
      return (cArea, aArea)
 
  def find(self, ip, l, r):
    '''  Use dichotomy to find network byte - encoded IP An index record of addresses '''
    if r - l <= 1:
      return l
 
    m = (l + r) / 2
    o = self.firstIndex + m * 7
    #new_ip = unpack('I', self.img[o: o+4])[0]
 
    # Demon Note: as above, mandatory use little-endian
    new_ip = unpack('<I', self.img[o: o+4])[0]
 
    if ip <= new_ip:
      return self.find(ip, l, m)
    else:
      return self.find(ip, m, r)
 
  def getIPAddr(self, ip):
    '''  Call other functions, get information! '''
    #  Use network byte encoding IP address 
    ip = unpack('!I', socket.inet_aton(ip))[0]
    #  use  self.find  Function to find the ip Index offset of 
    i = self.find(ip, 0, self.indexCount - 1)
    #  Get the index record 
    o = self.firstIndex + i * 7
    #  The index record format is:   before 4 byte IP information +3 Bytes to IP The offset of the recorded information 
    #  So this is after use 3 The bytes as offsets get their regular representation ( QQWry.Dat Value as a string) 
    o2 = self.getLong3(o + 4)
    # IP Record offset +4 Before it can be discarded 4 Bytes of IP Address information. 
    (c, a) = self.getAddr(o2 + 4)
    return (c, a)
 
  def output(self, first, last):
    for i in range(first, last):
      o = self.firstIndex + i * 7
      ip = socket.inet_ntoa(pack('!I', unpack('I', self.img[o:o+4])[0]))
      offset = self.getLong3(o + 4)
      (c, a) = self.getAddr(offset + 4)
      print "%s %d %s/%s" % (ip, offset, c, a)
def getIP(ip):
  import os
  _localDir=os.path.dirname(__file__)
  _curpath=os.path.normpath(os.path.join(os.getcwd(),_localDir))
  curpath=_curpath
  i = IPInfo(curpath+'/qqwry.dat')
  (c, a) = i.getIPAddr(ip)
  return c+a
def main():
  import os
  _localDir=os.path.dirname(__file__)
  _curpath=os.path.normpath(os.path.join(os.getcwd(),_localDir))
  curpath=_curpath
  i = IPInfo(curpath+'/qqwry.dat')
  if os.path.exists(sys.argv[1]):
    for line in open(sys.argv[1],"r").readlines():
      line = line.replace("r","").replace("n","")
      (c, a) = i.getIPAddr(line)
      # Demon Note: if yes Windows Run from the command line to put the code back gb2312 To avoid garbled codes 
      if sys.platform == 'win32':
        c = unicode(c, 'utf-8').encode('gb2312')
        a = unicode(a, 'utf-8').encode('gb2312')
      print '%s %s/%s' % (line, c, a)
  else:
    (c, a) = i.getIPAddr(sys.argv[1])
    # Demon Note: if yes Windows Run from the command line to put the code back gb2312 To avoid garbled codes 
    if sys.platform == 'win32':
      c = unicode(c, 'utf-8').encode('gb2312')
      a = unicode(a, 'utf-8').encode('gb2312')
    print '%s %s/%s' % (sys.argv[1], c, a)
 
if __name__ == '__main__':
  main()

Querying pure IP library QQWry. Dat (Demon modification) with Python script

Since I want to use Python to read an IPv6 database similar to the pure IP database QQWry. Dat format, I searched the Internet, saw a Python script in LinuxTOY, found some minor problems, so I modified it.


#!/usr/bin/env python
# coding: utf-8

# from: http://linuxtoy.org/files/pyip.py
# Blog: http://linuxtoy.org/archives/python-ip.html
# Modified by Demon
# Blog: http://demon.tw/programming/python-qqwry-dat.html

''' with Python Script query innocence IP library 

QQWry.Dat The format is as follows :

+----------+
|  The file header  | (8 byte )
+----------+
|  Recording area  |  (variable length) 
+----------+
|  The index area  |  (size determined by file header) 
+----------+

 The file header: 4 The byte begins to index the offset value +4 Byte end index offset 

 Record area:   Each of the IP Record format  ==> IP address [ The state information ][ Regional information ]

   For national records, there are three possible representations: 

     String form (IP Record the first 5 Bytes are not equal to 0x01 and 0x02 In the case ) . 
     Redirect mode 1( The first 5 bytes 0x01), Is the following 3 The byte is the offset value of the country where the information is stored 
     Redirect mode ( The first 5 bytes 0x02),
  
   For district records, there are two possible representations:   String form and redirection 

   The last rule: redirect mode 1 Country records cannot be followed by regional records 

 The index area:   Format of each index record  ==> 4 Bytes starting IP address  + 3 Bytes to IP The offset value of the record 

   The index area IP And the area it points to IP Constitute a IP Range. The query information is this 
   Within the scope of IP The information of 

'''

import sys
import socket
from struct import pack, unpack

class IPInfo(object):
  '''QQWry.Dat Collection of database query functions 
  '''
  def __init__(self, dbname):
    '''  Initialize the class, read the database content as a string, 
     By starting 8 Bytes determine the index information for the database '''
    
    self.dbname = dbname
    # f = file(dbname, 'r')

    # Demon Note: in Windows with 'r' There will be problems. There will be rn Converted to n
    #  As shown in the http://demon.tw/programming/python-open-mode.html
    #  There are Python It is not recommended in the documentation file Function to open a file. Recommended open
    f = open(dbname, 'rb')

    self.img = f.read()
    f.close()

    # QQWry.Dat Start of file 8 The byte is the index information , before 4 The byte is the offset of the starting index, 
    #  after 4 The byte is the offset value that ends the index. 
    # (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8])

    # Demon Note: unpack Default used endian It has to do with machines 
    # Intel x86 and AMD64(x86-64) is little-endian
    # Motorola 68000 and PowerPC G5 is big-endian
    #  And pure database all adopted little-endian Byte order 
    #  So in some cases big-endian The original code will fail on the machine 
    (self.firstIndex, self.lastIndex) = unpack('<II', self.img[:8])

    #  Each index length 7 Bytes, we get the total number of indexes 
    self.indexCount = (self.lastIndex - self.firstIndex) / 7 + 1
  
  def getString(self, offset = 0):
    '''  Reads string information, including " countries " Information and " region " information 

    QQWry.Dat Each message is one '0' Ending string '''
    
    o2 = self.img.find('0', offset)
    #return self.img[offset:o2]
    #  It is possible that there is only national information and no regional information, 
    gb2312_str = self.img[offset:o2]
    try:
      utf8_str = unicode(gb2312_str,'gb2312').encode('utf-8')
    except:
      return ' The unknown '
    return utf8_str

  def getLong3(self, offset = 0):
    '''QQWry.Dat All the offset records are 3 Bytes, this function gets 3 A general representation of an offset of bytes 
    QQWry.Dat Use "strings" to store these values '''
    s = self.img[offset: offset + 3]
    s += '0'
    # unpack With a 'I' As a format , the following string must be 4 byte 
    # return unpack('I', s)[0]

    # Demon Note: as above, mandatory use little-endian
    return unpack('<I', s)[0]

  def getAreaAddr(self, offset = 0):
    '''  Get the region information string by giving the offset value, '''
    
    byte = ord(self.img[offset])
    if byte == 1 or byte == 2:
      #  The first byte is 1 or 2 When, 2-4 The byte calls itself as an offset 
      p = self.getLong3(offset + 1)
      return self.getAreaAddr(p)
    else:
      return self.getString(offset)

  def getAddr(self, offset, ip = 0):
    img = self.img
    o = offset
    byte = ord(img[o])

    if byte == 1:
      #  Redirect mode 1
      # [IP][0x01][ Absolute offset address for country and region information ]
      #  Use the following 3 The bytes are called as offsets to get the information 
      return self.getAddr(self.getLong3(o + 1))
    
    if byte == 2:
      #  Redirect mode 2
      # [IP][0x02][ Absolute drift of country information ][ Locale information string ]
      #  Call yourself with the country information offset to get the string information 
      cArea = self.getAreaAddr(self.getLong3(o + 1))
      o += 4
      #  Skip the former 4 The byte takes the string as the locale information 
      aArea = self.getAreaAddr(o)
      return (cArea, aArea)
      
    if byte != 1 and byte != 2:
      #  The most simple IP Record form, [IP][ The state information ][ Regional information ]
      #  Redirect mode 1 In one case, the offset points to two strings containing country and region information 
      #  That is, the first byte that the offset points to is not 1 or 2, I'm just going to use this branch right here 
      #  Simply put: take two strings in a row! 

      cArea = self.getString(o)
      #o += len(cArea) + 1
      #  We have modified it. cArea for utf-8 The characters are encoded, len It's going to vary in length, 
      #  So let's do it the following way offset
      o = self.img.find('0',o) + 1
      aArea = self.getString(o)
      return (cArea, aArea)

  def find(self, ip, l, r):
    '''  Use dichotomy to find network byte - encoded IP An index record of addresses '''
    if r - l <= 1:
      return l

    m = (l + r) / 2
    o = self.firstIndex + m * 7
    #new_ip = unpack('I', self.img[o: o+4])[0]

    # Demon Note: as above, mandatory use little-endian
    new_ip = unpack('<I', self.img[o: o+4])[0]

    if ip <= new_ip:
      return self.find(ip, l, m)
    else:
      return self.find(ip, m, r)
    
  def getIPAddr(self, ip):
    '''  Call other functions, get information! '''
    #  Use network byte encoding IP address 
    ip = unpack('!I', socket.inet_aton(ip))[0]
    #  use  self.find  Function to find the ip Index offset of 
    i = self.find(ip, 0, self.indexCount - 1)
    #  Get the index record 
    o = self.firstIndex + i * 7
    #  The index record format is:   before 4 byte IP information +3 Bytes to IP The offset of the recorded information 
    #  So this is after use 3 The bytes as offsets get their regular representation ( QQWry.Dat Value as a string) 
    o2 = self.getLong3(o + 4)
    # IP Record offset +4 Before it can be discarded 4 Bytes of IP Address information. 
    (c, a) = self.getAddr(o2 + 4)
    return (c, a)
    
  def output(self, first, last):
    for i in range(first, last):
      o = self.firstIndex + i * 7
      ip = socket.inet_ntoa(pack('!I', unpack('I', self.img[o:o+4])[0]))
      offset = self.getLong3(o + 4)
      (c, a) = self.getAddr(offset + 4)
      print "%s %d %s/%s" % (ip, offset, c, a)


def main():
  i = IPInfo('QQWry.Dat')
  (c, a) = i.getIPAddr(sys.argv[1])

  # Demon Note: if yes Windows Run from the command line to put the code back gb2312 To avoid garbled codes 
  if sys.platform == 'win32':
    c = unicode(c, 'utf-8').encode('gb2312')
    a = unicode(a, 'utf-8').encode('gb2312')
  print '%s %s/%s' % (sys.argv[1], c, a)

if __name__ == '__main__':
  main()

# changelog
#  Time: 2009 years 5 month 29 day 
# 1.  Tool below netizen's suggestion, revise "o += len(cArea) + 1"
#  http://linuxtoy.org/archives/python-ip.html#comment-113960
#   Because at this point I've changed the resulting string to utf-8 Coded, the length will change!