1.增加bench_test

2.增加search_test
3.修改ip2Region.py及类名,改为xdbSearcher

Signed-off-by: 厉害的花花 <117415792@qq.com>
This commit is contained in:
厉害的花花 2022-07-12 09:46:44 +08:00
parent b9ed424125
commit b8b3cde805
5 changed files with 334 additions and 36 deletions

View File

@ -5,12 +5,12 @@
### 完全基于文件的查询
```python
import ip2Region
from xdbSearcher import XdbSearcher
if __name__ == '__main__':
def searchWithFile():
# 1. 创建查询对象
dbPath = "./data/ip2region.xdb";
searcher = ip2Region.Ip2Region(dbfile=dbPath)
dbPath = "../../data/ip2region.xdb"
searcher = XdbSearcher(dbfile=dbPath)
# 2. 执行查询
ip = "1.2.3.4"
@ -26,21 +26,21 @@ if __name__ == '__main__':
我们可以提前从 `xdb` 文件中加载出来 `VectorIndex` 数据,然后全局缓存,每次创建 Searcher 对象的时候使用全局的 VectorIndex 缓存可以减少一次固定的 IO 操作,从而加速查询,减少 IO 压力。
```python
import ip2Region
from xdbSearcher import XdbSearcher
if __name__ == '__main__':
# 1. 预先加载 VectorIndex 缓存
dbPath = "./data/ip2region.xdb";
vi = ip2Region.Ip2Region.loadVectorIndexFromFile(dbfile=dbPath)
def searchWithVectorIndex():
# 1. 预先加载整个 xdb
dbPath = "../../data/ip2region.xdb"
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbPath)
# 2. 使用上面的缓存创建查询对象, 同时也要加载 xdb 文件
searcher = ip2Region.Ip2Region(dbfile=dbPath, vectorIndex=vi)
searcher = XdbSearcher(dbfile=dbPath, vectorIndex=vi)
# 3. 执行查询
ip = "1.2.3.4"
region_str = searcher.searchByIPStr(ip)
region_str = searcher.search(ip)
print(region_str)
# 4. 关闭searcher
searcher.close()
```
@ -50,25 +50,65 @@ if __name__ == '__main__':
我们也可以预先加载整个 ip2region.xdb 的数据到内存,然后基于这个数据创建查询对象来实现完全基于文件的查询,类似之前的 memory search。
```python
import ip2Region
from xdbSearcher import XdbSearcher
if __name__ == '__main__':
def searchWithContent():
# 1. 预先加载整个 xdb
dbPath = "./data/ip2region.xdb";
cb = ip2Region.Ip2Region.loadContentFromFile(dbfile=dbPath)
dbPath = "../../data/ip2region.xdb";
cb = XdbSearcher.loadContentFromFile(dbfile=dbPath)
# 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
searcher = ip2Region.Ip2Region(contentBuff=cb)
searcher = XdbSearcher(contentBuff=cb)
# 3. 执行查询
ip = "1.2.3.4"
region_str = searcher.searchByIPStr(ip)
region_str = searcher.search(ip)
print(region_str)
# 4. 关闭searcher
searcher.close()
```
# 查询测试
通过 `search_test.py` 脚本来进行查询测试:
```bash
➜ python git:(python_dev) ✗ python3 ./search_test.py
python3 search_test.py [command options]
options:
--db string ip2region binary xdb file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:使用默认的 data/ip2region.xdb 进行查询测试:
```bash
➜ python git:(python_dev) ✗ python3 ./search_test.py --db=../../data/ip2region.xdb --cache-policy=content
ip2region xdb searcher test program, cachePolicy: content
type 'quit' to exit
ip2region>> 1.2.3.4
region :美国|0|华盛顿|0|谷歌 , took 0.0689 ms
ip2region>> quit
searcher test program exited, thanks for trying
```
输入 ip 即可进行查询测试。也可以分别设置 `cache-policy` 为 file/vectorIndex/content 来测试三种不同缓存实现的效率。
# bench 测试
通过 `bench_test.py` 脚本来进行自动 bench 测试,一方面确保 `xdb` 文件没有错误,另一方面通过大量的查询测试平均查询性能:
```bash
➜ python git:(python_dev) ✗ python3 ./bench_test.py
python bench_test.py [command options]
options:
--db string ip2region binary xdb file path
--src string source ip text file path
--cache-policy string cache policy: file/vectorIndex/content
```
例如:通过默认的 data/ip2region.xdb 和 data/ip.merge.txt 来进行 bench 测试:
```bash
➜ python git:(python_dev) ✗ python3 ./bench_test.py --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt --cache-policy=content
Bench finished, [cachePolicy: content, total: 3417955, took: 34.93 s, cost: 0.0094 ms/op]
```
可以通过设置 `cache-policy` 参数来分别测试 file/vectorIndex/content 三种不同的缓存实现的的性能。
@Note:请注意 bench 使用的 src 文件需要是生成对应的 xdb 文件的相同的源文件。

View File

@ -0,0 +1,120 @@
#
# bench_test.py
# bench_test
#
# Created by luckydog on 2022/7/1.
# Copyright © 2022年 luckydog. All rights reserved.
#
from ast import main
import io
from xdbSearcher import XdbSearcher
import argparse
import time
import sys
def printHelp():
print("python bench_test.py [command options]")
print("options: ")
print(" --db string ip2region binary xdb file path")
print(" --src string source ip text file path")
print(" --cache-policy string cache policy: file/vectorIndex/content")
def trim(string):
if string[:1] != ' ' and string[-1:] != ' ':
return string
elif string[:1] == ' ':
return trim(string[1:])
else:
return trim(string[:-1])
def start_bench(dbFile="", srcFile="", cachePolicy="vectorIndex"):
if cachePolicy == "file":
try:
searcher = XdbSearcher(dbfile=dbFile)
except Exception as err:
print(err)
return
elif cachePolicy == "vectorIndex":
try:
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbFile)
if vi is None:
print(f"failed to load vector index from {dbFile}\n")
searcher = XdbSearcher(dbfile=dbFile, vectorIndex=vi)
except Exception as err:
print(err)
return
else:
try:
cb = XdbSearcher.loadContentFromFile(dbfile=dbFile)
if cb is None:
print(f"failed to load xdb content from {dbFile}\n")
searcher = XdbSearcher(contentBuff=cb)
except Exception as err:
print(err)
return
# do the bench test
try:
count = 0
costs = 0
sTime = time.time()
f = io.open(srcFile, "rb")
while True:
line = trim(f.readline(1024)).decode("utf-8").replace("\n", "")
if len(line) < 1:
break
ps = line.split("|",2)
if len(ps) != 3:
print(f"invalid ip segment line :{line}")
return
sip = XdbSearcher.ip2long(None, ps[0])
eip = XdbSearcher.ip2long(None, ps[1])
if sip > eip:
print(f"start ip({ps[0]}) should not be greater than end ip({ps[1]})")
return
mip = (sip + eip) >> 1
for ip in [sip, (sip + mip) >> 1, mip, (mip + eip) >> 1, eip]:
try:
cTime = time.time()
region = searcher.search(ip)
costs = costs + (time.time() - cTime)
except Exception as error:
print(f"failed to search ip :{ip}")
return
if region is None:
print(f"failed to search ip :{ip}")
return
if region != ps[2]:
print(f"failed search({ip}) with ({region} != {ps[2]})")
return
count = count + 1
# close the searcher at last
f.close()
searcher.close()
print(f"Bench finished, [cachePolicy: {cachePolicy}, total: {count}, took: {round(time.time() - sTime, 2)} s, cost: {round(costs/count*1000, 4)} ms/op]")
except Exception as err:
print(f"failed to open source text file :{err}")
return
if __name__ == '__main__':
if len(sys.argv) < 2:
printHelp()
exit(0)
parse = argparse.ArgumentParser()
parse.add_argument("--db", help="ip2region binary xdb file path")
parse.add_argument("--src", help="source ip text file path")
parse.add_argument("--cache-policy", choices=["file", "vectorIndex", "content"],
help="cache policy: file/vectorIndex/content")
args = parse.parse_args()
start_bench(dbFile=args.db, srcFile=args.src, cachePolicy=args.cache_policy)

View File

@ -1,18 +1,52 @@
import ip2Region
if __name__ == '__main__':
# 1. 预先加载整个 xdb
dbPath = "./data/ip2region.xdb";
# vi = ip2Region.Ip2Region.loadVectorIndexFromFile(dbfile="./data/ip2region.xdb")
cb = ip2Region.Ip2Region.loadContentFromFile(dbfile=dbPath)
from xdbSearcher import XdbSearcher
def searchWithFile():
# 1. 创建查询对象
dbPath = "../../data/ip2region.xdb"
searcher = XdbSearcher(dbfile=dbPath)
# 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
searcher = ip2Region.Ip2Region(contentBuff=cb)
# 3. 执行查询
# 2. 执行查询
ip = "1.2.3.4"
region_str = searcher.searchByIPStr(ip)
print(region_str)
# 3. 关闭searcher
searcher.close()
def searchWithVectorIndex():
# 1. 预先加载整个 xdb
dbPath = "../../data/ip2region.xdb"
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbPath)
# 2. 使用上面的缓存创建查询对象, 同时也要加载 xdb 文件
searcher = XdbSearcher(dbfile=dbPath, vectorIndex=vi)
# 3. 执行查询
ip = "1.2.3.4"
region_str = searcher.search(ip)
print(region_str)
# 4. 关闭searcher
searcher.close()
searcher.close()
def searchWithContent():
# 1. 预先加载整个 xdb
dbPath = "../../data/ip2region.xdb";
cb = XdbSearcher.loadContentFromFile(dbfile=dbPath)
# 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
searcher = XdbSearcher(contentBuff=cb)
# 3. 执行查询
ip = "1.2.3.4"
region_str = searcher.search(ip)
print(region_str)
# 4. 关闭searcher
searcher.close()
if __name__ == '__main__':
searchWithContent()

View File

@ -0,0 +1,95 @@
#
# search_test.py
# search_test
#
# Created by luckydog on 2022/7/1.
# Copyright © 2022年 luckydog. All rights reserved.
#
from xdbSearcher import XdbSearcher
import argparse
import time
import sys
def printHelp():
print("python3 search_test.py [command options]")
print("options: ")
print(" --db string ip2region binary xdb file path")
print(" --cache-policy string cache policy: file/vectorIndex/content")
def trim(string):
if string[:1] != ' ' and string[-1:] != ' ':
return string
elif string[:1] == ' ':
return trim(string[1:])
else:
return trim(string[:-1])
def start_search(dbFile="", cachePolicy="vectorIndex"):
if cachePolicy == "file":
try:
searcher = XdbSearcher(dbfile=dbFile)
except Exception as err:
print(err)
return
elif cachePolicy == "vectorIndex":
try:
vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbFile)
if vi is None:
print(f"failed to load vector index from {dbFile}\n")
searcher = XdbSearcher(dbfile=dbFile, vectorIndex=vi)
except Exception as err:
print(err)
return
else:
try:
cb = XdbSearcher.loadContentFromFile(dbfile=dbFile)
if cb is None:
print(f"failed to load xdb content from {dbFile}\n")
searcher = XdbSearcher(contentBuff=cb)
except Exception as err:
print(err)
return
# 开始的提示
print(f"ip2region xdb searcher test program, cachePolicy: {cachePolicy}\ntype 'quit' to exit")
while True:
line = trim(input("ip2region>> "))
# print(f"{line}")
if len(line) < 2:
continue
if line == "quit":
break
if not XdbSearcher.isip(None, ip=line):
print("Error: invalid ip address")
continue
start = time.time()
try:
region_str = searcher.searchByIPStr(line)
except Exception as error:
print(error)
return
print(f"region :{region_str} , took {round((time.time()-start)*1000.00, 4)} ms")
# quit
searcher.close()
print("searcher test program exited, thanks for trying")
if __name__ == '__main__':
if len(sys.argv) < 2:
printHelp()
exit(0)
parse = argparse.ArgumentParser()
parse.add_argument("--db", help="ip2region binary xdb file path")
parse.add_argument("--cache-policy", choices=["file", "vectorIndex", "content"],
help="cache policy: file/vectorIndex/content")
args = parse.parse_args()
start_search(dbFile=args.db, cachePolicy=args.cache_policy)

View File

@ -20,7 +20,7 @@ VectorIndexSize = 8
SegmentIndexSize = 14
class Ip2Region(object):
class XdbSearcher(object):
__f = None
# the minimal memory allocation.
@ -53,9 +53,18 @@ class Ip2Region(object):
def __init__(self, dbfile=None, vectorIndex=None, contentBuff=None):
self.initDatabase(dbfile, vectorIndex, contentBuff)
def search(self, ip):
if isinstance(ip, str):
if not ip.isdigit(): ip = self.ip2long(ip)
return self.searchByIPLong(ip)
else:
return self.searchByIPLong(ip)
def searchByIPStr(self, ip):
if not ip.isdigit(): ip = self.ip2long(ip)
return self.searchByIPLong(ip)
def searchByIPLong(self, ip):
# locate the segment index block based on the vector index
sPtr = ePtr = 0
il0 = (int)((ip >> 24) & 0xFF)
@ -169,13 +178,13 @@ if __name__ == '__main__':
]
# 1. 缓存
dbPath = "./data/ip2region.xdb";
cb = Ip2Region.loadContentFromFile(dbfile=dbPath)
cb = Searcher.loadContentFromFile(dbfile=dbPath)
# 2. 创建查询对象
searcher = Ip2Region(contentBuff=cb)
searcher = Searcher(contentBuff=cb)
# 3. 执行查询
ip = "1.2.3.4"
# ip = "1.2.3.4"
for ip in ip_array:
region_str = searcher.searchByIPStr(ip)
print(region_str)