mirror of
https://github.com/lionsoul2014/ip2region.git
synced 2025-12-08 19:25:22 +00:00
1.增加bench_test
2.增加search_test 3.修改ip2Region.py及类名,改为xdbSearcher Signed-off-by: 厉害的花花 <117415792@qq.com>
This commit is contained in:
parent
b9ed424125
commit
b8b3cde805
@ -5,12 +5,12 @@
|
||||
### 完全基于文件的查询
|
||||
|
||||
```python
|
||||
import ip2Region
|
||||
from xdbSearcher import XdbSearcher
|
||||
|
||||
if __name__ == '__main__':
|
||||
def searchWithFile():
|
||||
# 1. 创建查询对象
|
||||
dbPath = "./data/ip2region.xdb";
|
||||
searcher = ip2Region.Ip2Region(dbfile=dbPath)
|
||||
dbPath = "../../data/ip2region.xdb"
|
||||
searcher = XdbSearcher(dbfile=dbPath)
|
||||
|
||||
# 2. 执行查询
|
||||
ip = "1.2.3.4"
|
||||
@ -26,21 +26,21 @@ if __name__ == '__main__':
|
||||
我们可以提前从 `xdb` 文件中加载出来 `VectorIndex` 数据,然后全局缓存,每次创建 Searcher 对象的时候使用全局的 VectorIndex 缓存可以减少一次固定的 IO 操作,从而加速查询,减少 IO 压力。
|
||||
|
||||
```python
|
||||
import ip2Region
|
||||
from xdbSearcher import XdbSearcher
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 1. 预先加载 VectorIndex 缓存
|
||||
dbPath = "./data/ip2region.xdb";
|
||||
vi = ip2Region.Ip2Region.loadVectorIndexFromFile(dbfile=dbPath)
|
||||
def searchWithVectorIndex():
|
||||
# 1. 预先加载整个 xdb
|
||||
dbPath = "../../data/ip2region.xdb"
|
||||
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbPath)
|
||||
|
||||
# 2. 使用上面的缓存创建查询对象, 同时也要加载 xdb 文件
|
||||
searcher = ip2Region.Ip2Region(dbfile=dbPath, vectorIndex=vi)
|
||||
searcher = XdbSearcher(dbfile=dbPath, vectorIndex=vi)
|
||||
|
||||
# 3. 执行查询
|
||||
ip = "1.2.3.4"
|
||||
region_str = searcher.searchByIPStr(ip)
|
||||
region_str = searcher.search(ip)
|
||||
print(region_str)
|
||||
|
||||
|
||||
# 4. 关闭searcher
|
||||
searcher.close()
|
||||
```
|
||||
@ -50,25 +50,65 @@ if __name__ == '__main__':
|
||||
我们也可以预先加载整个 ip2region.xdb 的数据到内存,然后基于这个数据创建查询对象来实现完全基于文件的查询,类似之前的 memory search。
|
||||
|
||||
```python
|
||||
import ip2Region
|
||||
from xdbSearcher import XdbSearcher
|
||||
|
||||
if __name__ == '__main__':
|
||||
def searchWithContent():
|
||||
# 1. 预先加载整个 xdb
|
||||
dbPath = "./data/ip2region.xdb";
|
||||
cb = ip2Region.Ip2Region.loadContentFromFile(dbfile=dbPath)
|
||||
dbPath = "../../data/ip2region.xdb";
|
||||
cb = XdbSearcher.loadContentFromFile(dbfile=dbPath)
|
||||
|
||||
# 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
|
||||
searcher = ip2Region.Ip2Region(contentBuff=cb)
|
||||
searcher = XdbSearcher(contentBuff=cb)
|
||||
|
||||
# 3. 执行查询
|
||||
ip = "1.2.3.4"
|
||||
region_str = searcher.searchByIPStr(ip)
|
||||
region_str = searcher.search(ip)
|
||||
print(region_str)
|
||||
|
||||
|
||||
# 4. 关闭searcher
|
||||
searcher.close()
|
||||
|
||||
```
|
||||
# 查询测试
|
||||
|
||||
通过 `search_test.py` 脚本来进行查询测试:
|
||||
```bash
|
||||
➜ python git:(python_dev) ✗ python3 ./search_test.py
|
||||
python3 search_test.py [command options]
|
||||
options:
|
||||
--db string ip2region binary xdb file path
|
||||
--cache-policy string cache policy: file/vectorIndex/content
|
||||
```
|
||||
|
||||
例如:使用默认的 data/ip2region.xdb 进行查询测试:
|
||||
```bash
|
||||
➜ python git:(python_dev) ✗ python3 ./search_test.py --db=../../data/ip2region.xdb --cache-policy=content
|
||||
ip2region xdb searcher test program, cachePolicy: content
|
||||
type 'quit' to exit
|
||||
ip2region>> 1.2.3.4
|
||||
region :美国|0|华盛顿|0|谷歌 , took 0.0689 ms
|
||||
ip2region>> quit
|
||||
searcher test program exited, thanks for trying
|
||||
```
|
||||
|
||||
输入 ip 即可进行查询测试。也可以分别设置 `cache-policy` 为 file/vectorIndex/content 来测试三种不同缓存实现的效率。
|
||||
|
||||
# bench 测试
|
||||
|
||||
通过 `bench_test.py` 脚本来进行自动 bench 测试,一方面确保 `xdb` 文件没有错误,另一方面通过大量的查询测试平均查询性能:
|
||||
```bash
|
||||
➜ python git:(python_dev) ✗ python3 ./bench_test.py
|
||||
python bench_test.py [command options]
|
||||
options:
|
||||
--db string ip2region binary xdb file path
|
||||
--src string source ip text file path
|
||||
--cache-policy string cache policy: file/vectorIndex/content
|
||||
```
|
||||
|
||||
例如:通过默认的 data/ip2region.xdb 和 data/ip.merge.txt 来进行 bench 测试:
|
||||
```bash
|
||||
➜ python git:(python_dev) ✗ python3 ./bench_test.py --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt --cache-policy=content
|
||||
Bench finished, [cachePolicy: content, total: 3417955, took: 34.93 s, cost: 0.0094 ms/op]
|
||||
```
|
||||
|
||||
可以通过设置 `cache-policy` 参数来分别测试 file/vectorIndex/content 三种不同的缓存实现的的性能。
|
||||
@Note:请注意 bench 使用的 src 文件需要是生成对应的 xdb 文件的相同的源文件。
|
||||
120
binding/python/bench_test.py
Normal file
120
binding/python/bench_test.py
Normal file
@ -0,0 +1,120 @@
|
||||
#
|
||||
# bench_test.py
|
||||
# bench_test
|
||||
#
|
||||
# Created by luckydog on 2022/7/1.
|
||||
# Copyright © 2022年 luckydog. All rights reserved.
|
||||
#
|
||||
from ast import main
|
||||
import io
|
||||
|
||||
from xdbSearcher import XdbSearcher
|
||||
import argparse
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
def printHelp():
|
||||
print("python bench_test.py [command options]")
|
||||
print("options: ")
|
||||
print(" --db string ip2region binary xdb file path")
|
||||
print(" --src string source ip text file path")
|
||||
print(" --cache-policy string cache policy: file/vectorIndex/content")
|
||||
|
||||
def trim(string):
|
||||
if string[:1] != ' ' and string[-1:] != ' ':
|
||||
return string
|
||||
elif string[:1] == ' ':
|
||||
return trim(string[1:])
|
||||
else:
|
||||
return trim(string[:-1])
|
||||
|
||||
def start_bench(dbFile="", srcFile="", cachePolicy="vectorIndex"):
|
||||
if cachePolicy == "file":
|
||||
try:
|
||||
searcher = XdbSearcher(dbfile=dbFile)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
return
|
||||
elif cachePolicy == "vectorIndex":
|
||||
try:
|
||||
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbFile)
|
||||
if vi is None:
|
||||
print(f"failed to load vector index from {dbFile}\n")
|
||||
searcher = XdbSearcher(dbfile=dbFile, vectorIndex=vi)
|
||||
|
||||
except Exception as err:
|
||||
print(err)
|
||||
return
|
||||
else:
|
||||
try:
|
||||
cb = XdbSearcher.loadContentFromFile(dbfile=dbFile)
|
||||
if cb is None:
|
||||
print(f"failed to load xdb content from {dbFile}\n")
|
||||
searcher = XdbSearcher(contentBuff=cb)
|
||||
|
||||
except Exception as err:
|
||||
print(err)
|
||||
return
|
||||
# do the bench test
|
||||
|
||||
try:
|
||||
count = 0
|
||||
costs = 0
|
||||
sTime = time.time()
|
||||
f = io.open(srcFile, "rb")
|
||||
while True:
|
||||
line = trim(f.readline(1024)).decode("utf-8").replace("\n", "")
|
||||
if len(line) < 1:
|
||||
break
|
||||
|
||||
ps = line.split("|",2)
|
||||
if len(ps) != 3:
|
||||
print(f"invalid ip segment line :{line}")
|
||||
return
|
||||
sip = XdbSearcher.ip2long(None, ps[0])
|
||||
eip = XdbSearcher.ip2long(None, ps[1])
|
||||
|
||||
if sip > eip:
|
||||
print(f"start ip({ps[0]}) should not be greater than end ip({ps[1]})")
|
||||
return
|
||||
|
||||
mip = (sip + eip) >> 1
|
||||
|
||||
for ip in [sip, (sip + mip) >> 1, mip, (mip + eip) >> 1, eip]:
|
||||
try:
|
||||
cTime = time.time()
|
||||
region = searcher.search(ip)
|
||||
costs = costs + (time.time() - cTime)
|
||||
except Exception as error:
|
||||
print(f"failed to search ip :{ip}")
|
||||
return
|
||||
|
||||
if region is None:
|
||||
print(f"failed to search ip :{ip}")
|
||||
return
|
||||
if region != ps[2]:
|
||||
print(f"failed search({ip}) with ({region} != {ps[2]})")
|
||||
return
|
||||
count = count + 1
|
||||
|
||||
# close the searcher at last
|
||||
f.close()
|
||||
searcher.close()
|
||||
print(f"Bench finished, [cachePolicy: {cachePolicy}, total: {count}, took: {round(time.time() - sTime, 2)} s, cost: {round(costs/count*1000, 4)} ms/op]")
|
||||
except Exception as err:
|
||||
print(f"failed to open source text file :{err}")
|
||||
return
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
printHelp()
|
||||
exit(0)
|
||||
parse = argparse.ArgumentParser()
|
||||
parse.add_argument("--db", help="ip2region binary xdb file path")
|
||||
parse.add_argument("--src", help="source ip text file path")
|
||||
parse.add_argument("--cache-policy", choices=["file", "vectorIndex", "content"],
|
||||
help="cache policy: file/vectorIndex/content")
|
||||
args = parse.parse_args()
|
||||
start_bench(dbFile=args.db, srcFile=args.src, cachePolicy=args.cache_policy)
|
||||
@ -1,18 +1,52 @@
|
||||
import ip2Region
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 1. 预先加载整个 xdb
|
||||
dbPath = "./data/ip2region.xdb";
|
||||
# vi = ip2Region.Ip2Region.loadVectorIndexFromFile(dbfile="./data/ip2region.xdb")
|
||||
cb = ip2Region.Ip2Region.loadContentFromFile(dbfile=dbPath)
|
||||
from xdbSearcher import XdbSearcher
|
||||
|
||||
def searchWithFile():
|
||||
# 1. 创建查询对象
|
||||
dbPath = "../../data/ip2region.xdb"
|
||||
searcher = XdbSearcher(dbfile=dbPath)
|
||||
|
||||
# 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
|
||||
searcher = ip2Region.Ip2Region(contentBuff=cb)
|
||||
|
||||
# 3. 执行查询
|
||||
# 2. 执行查询
|
||||
ip = "1.2.3.4"
|
||||
region_str = searcher.searchByIPStr(ip)
|
||||
print(region_str)
|
||||
|
||||
# 3. 关闭searcher
|
||||
searcher.close()
|
||||
|
||||
def searchWithVectorIndex():
|
||||
# 1. 预先加载整个 xdb
|
||||
dbPath = "../../data/ip2region.xdb"
|
||||
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbPath)
|
||||
|
||||
# 2. 使用上面的缓存创建查询对象, 同时也要加载 xdb 文件
|
||||
searcher = XdbSearcher(dbfile=dbPath, vectorIndex=vi)
|
||||
|
||||
# 3. 执行查询
|
||||
ip = "1.2.3.4"
|
||||
region_str = searcher.search(ip)
|
||||
print(region_str)
|
||||
|
||||
# 4. 关闭searcher
|
||||
searcher.close()
|
||||
searcher.close()
|
||||
|
||||
def searchWithContent():
|
||||
# 1. 预先加载整个 xdb
|
||||
dbPath = "../../data/ip2region.xdb";
|
||||
cb = XdbSearcher.loadContentFromFile(dbfile=dbPath)
|
||||
|
||||
# 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
|
||||
searcher = XdbSearcher(contentBuff=cb)
|
||||
|
||||
# 3. 执行查询
|
||||
ip = "1.2.3.4"
|
||||
region_str = searcher.search(ip)
|
||||
print(region_str)
|
||||
|
||||
# 4. 关闭searcher
|
||||
searcher.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
searchWithContent()
|
||||
|
||||
95
binding/python/search_test.py
Normal file
95
binding/python/search_test.py
Normal file
@ -0,0 +1,95 @@
|
||||
#
|
||||
# search_test.py
|
||||
# search_test
|
||||
#
|
||||
# Created by luckydog on 2022/7/1.
|
||||
# Copyright © 2022年 luckydog. All rights reserved.
|
||||
#
|
||||
|
||||
from xdbSearcher import XdbSearcher
|
||||
import argparse
|
||||
import time
|
||||
import sys
|
||||
|
||||
def printHelp():
|
||||
print("python3 search_test.py [command options]")
|
||||
print("options: ")
|
||||
print(" --db string ip2region binary xdb file path")
|
||||
print(" --cache-policy string cache policy: file/vectorIndex/content")
|
||||
|
||||
|
||||
def trim(string):
|
||||
if string[:1] != ' ' and string[-1:] != ' ':
|
||||
return string
|
||||
elif string[:1] == ' ':
|
||||
return trim(string[1:])
|
||||
else:
|
||||
return trim(string[:-1])
|
||||
|
||||
|
||||
def start_search(dbFile="", cachePolicy="vectorIndex"):
|
||||
if cachePolicy == "file":
|
||||
try:
|
||||
searcher = XdbSearcher(dbfile=dbFile)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
return
|
||||
elif cachePolicy == "vectorIndex":
|
||||
try:
|
||||
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbFile)
|
||||
if vi is None:
|
||||
print(f"failed to load vector index from {dbFile}\n")
|
||||
searcher = XdbSearcher(dbfile=dbFile, vectorIndex=vi)
|
||||
|
||||
except Exception as err:
|
||||
print(err)
|
||||
return
|
||||
else:
|
||||
try:
|
||||
cb = XdbSearcher.loadContentFromFile(dbfile=dbFile)
|
||||
if cb is None:
|
||||
print(f"failed to load xdb content from {dbFile}\n")
|
||||
searcher = XdbSearcher(contentBuff=cb)
|
||||
|
||||
except Exception as err:
|
||||
print(err)
|
||||
return
|
||||
|
||||
# 开始的提示
|
||||
print(f"ip2region xdb searcher test program, cachePolicy: {cachePolicy}\ntype 'quit' to exit")
|
||||
while True:
|
||||
line = trim(input("ip2region>> "))
|
||||
# print(f"{line}")
|
||||
|
||||
if len(line) < 2:
|
||||
continue
|
||||
if line == "quit":
|
||||
break
|
||||
|
||||
if not XdbSearcher.isip(None, ip=line):
|
||||
print("Error: invalid ip address")
|
||||
continue
|
||||
start = time.time()
|
||||
|
||||
try:
|
||||
region_str = searcher.searchByIPStr(line)
|
||||
except Exception as error:
|
||||
print(error)
|
||||
return
|
||||
|
||||
print(f"region :{region_str} , took {round((time.time()-start)*1000.00, 4)} ms")
|
||||
# quit
|
||||
searcher.close()
|
||||
print("searcher test program exited, thanks for trying")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
printHelp()
|
||||
exit(0)
|
||||
parse = argparse.ArgumentParser()
|
||||
parse.add_argument("--db", help="ip2region binary xdb file path")
|
||||
parse.add_argument("--cache-policy", choices=["file", "vectorIndex", "content"],
|
||||
help="cache policy: file/vectorIndex/content")
|
||||
args = parse.parse_args()
|
||||
start_search(dbFile=args.db, cachePolicy=args.cache_policy)
|
||||
@ -20,7 +20,7 @@ VectorIndexSize = 8
|
||||
SegmentIndexSize = 14
|
||||
|
||||
|
||||
class Ip2Region(object):
|
||||
class XdbSearcher(object):
|
||||
__f = None
|
||||
|
||||
# the minimal memory allocation.
|
||||
@ -53,9 +53,18 @@ class Ip2Region(object):
|
||||
def __init__(self, dbfile=None, vectorIndex=None, contentBuff=None):
|
||||
self.initDatabase(dbfile, vectorIndex, contentBuff)
|
||||
|
||||
def search(self, ip):
|
||||
if isinstance(ip, str):
|
||||
if not ip.isdigit(): ip = self.ip2long(ip)
|
||||
return self.searchByIPLong(ip)
|
||||
else:
|
||||
return self.searchByIPLong(ip)
|
||||
|
||||
def searchByIPStr(self, ip):
|
||||
if not ip.isdigit(): ip = self.ip2long(ip)
|
||||
|
||||
return self.searchByIPLong(ip)
|
||||
|
||||
def searchByIPLong(self, ip):
|
||||
# locate the segment index block based on the vector index
|
||||
sPtr = ePtr = 0
|
||||
il0 = (int)((ip >> 24) & 0xFF)
|
||||
@ -169,13 +178,13 @@ if __name__ == '__main__':
|
||||
]
|
||||
# 1. 缓存
|
||||
dbPath = "./data/ip2region.xdb";
|
||||
cb = Ip2Region.loadContentFromFile(dbfile=dbPath)
|
||||
cb = Searcher.loadContentFromFile(dbfile=dbPath)
|
||||
|
||||
# 2. 创建查询对象
|
||||
searcher = Ip2Region(contentBuff=cb)
|
||||
searcher = Searcher(contentBuff=cb)
|
||||
|
||||
# 3. 执行查询
|
||||
ip = "1.2.3.4"
|
||||
# ip = "1.2.3.4"
|
||||
for ip in ip_array:
|
||||
region_str = searcher.searchByIPStr(ip)
|
||||
print(region_str)
|
||||
Loading…
x
Reference in New Issue
Block a user