1.增加bench_test

2.增加search_test 3.修改ip2Region.py及类名，改为xdbSearcher Signed-off-by: 厉害的花花 <117415792@qq.com>
2025-12-08 19:25:22 +00:00 · 2022-07-12 09:46:44 +08:00 · 2022-07-12 09:46:44 +08:00 · b8b3cde805
commit b8b3cde805
parent b9ed424125
5 changed files with 334 additions and 36 deletions
--- a/binding/python/ReadMe.md
+++ b/binding/python/ReadMe.md
@ -5,12 +5,12 @@
 ### 完全基于文件的查询

 ```python
-import ip2Region
+from xdbSearcher import XdbSearcher

-if __name__ == '__main__':
+def searchWithFile():
    # 1. 创建查询对象
-    dbPath = "./data/ip2region.xdb";
-    searcher = ip2Region.Ip2Region(dbfile=dbPath)
+    dbPath = "../../data/ip2region.xdb"
+    searcher = XdbSearcher(dbfile=dbPath)
    
    # 2. 执行查询
    ip = "1.2.3.4"
@ -26,21 +26,21 @@ if __name__ == '__main__':
 我们可以提前从 `xdb` 文件中加载出来 `VectorIndex` 数据，然后全局缓存，每次创建 Searcher 对象的时候使用全局的 VectorIndex 缓存可以减少一次固定的 IO 操作，从而加速查询，减少 IO 压力。

 ```python
-import ip2Region
+from xdbSearcher import XdbSearcher

-if __name__ == '__main__':
-    # 1. 预先加载 VectorIndex 缓存
-    dbPath = "./data/ip2region.xdb";
-    vi = ip2Region.Ip2Region.loadVectorIndexFromFile(dbfile=dbPath)
+def searchWithVectorIndex():
+     # 1. 预先加载整个 xdb
+    dbPath = "../../data/ip2region.xdb"
+    vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbPath)

    # 2. 使用上面的缓存创建查询对象, 同时也要加载 xdb 文件
-    searcher = ip2Region.Ip2Region(dbfile=dbPath, vectorIndex=vi)
+    searcher = XdbSearcher(dbfile=dbPath, vectorIndex=vi)
    
    # 3. 执行查询
    ip = "1.2.3.4"
-    region_str = searcher.searchByIPStr(ip)
+    region_str = searcher.search(ip)
    print(region_str)
-    
+
    # 4. 关闭searcher
    searcher.close()
 ```
@ -50,25 +50,65 @@ if __name__ == '__main__':
 我们也可以预先加载整个 ip2region.xdb 的数据到内存，然后基于这个数据创建查询对象来实现完全基于文件的查询，类似之前的 memory search。

 ```python
-import ip2Region
+from xdbSearcher import XdbSearcher

-if __name__ == '__main__':
+def searchWithContent():
    # 1. 预先加载整个 xdb
-    dbPath = "./data/ip2region.xdb";
-    cb = ip2Region.Ip2Region.loadContentFromFile(dbfile=dbPath)
+    dbPath = "../../data/ip2region.xdb";
+    cb = XdbSearcher.loadContentFromFile(dbfile=dbPath)
    
    # 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
-    searcher = ip2Region.Ip2Region(contentBuff=cb)
+    searcher = XdbSearcher(contentBuff=cb)
    
    # 3. 执行查询
    ip = "1.2.3.4"
-    region_str = searcher.searchByIPStr(ip)
+    region_str = searcher.search(ip)
    print(region_str)
-    
+
    # 4. 关闭searcher
    searcher.close()
-
 ```
 # 查询测试

+通过 `search_test.py` 脚本来进行查询测试：
+```bash
+➜  python git:(python_dev) ✗ python3 ./search_test.py
+python3 search_test.py [command options]
+options:
+ --db string             ip2region binary xdb file path
+ --cache-policy string   cache policy: file/vectorIndex/content
+```
+
+例如：使用默认的 data/ip2region.xdb 进行查询测试：
+```bash
+➜  python git:(python_dev) ✗ python3 ./search_test.py --db=../../data/ip2region.xdb --cache-policy=content
+ip2region xdb searcher test program, cachePolicy: content
+type 'quit' to exit
+ip2region>> 1.2.3.4
+region :美国|0|华盛顿|0|谷歌 , took 0.0689 ms
+ip2region>> quit
+searcher test program exited, thanks for trying
+```
+
+输入 ip 即可进行查询测试。也可以分别设置 `cache-policy` 为 file/vectorIndex/content 来测试三种不同缓存实现的效率。
+
 # bench 测试
+
+通过 `bench_test.py` 脚本来进行自动 bench 测试，一方面确保 `xdb` 文件没有错误，另一方面通过大量的查询测试平均查询性能：
+```bash
+➜  python git:(python_dev) ✗ python3 ./bench_test.py
+python bench_test.py [command options]
+options:
+ --db string             ip2region binary xdb file path
+ --src string            source ip text file path
+ --cache-policy string   cache policy: file/vectorIndex/content
+```
+
+例如：通过默认的 data/ip2region.xdb 和 data/ip.merge.txt 来进行 bench 测试：
+```bash
+➜  python git:(python_dev) ✗ python3 ./bench_test.py --db=../../data/ip2region.xdb --src=../../data/ip.merge.txt --cache-policy=content
+Bench finished, [cachePolicy: content, total: 3417955, took: 34.93 s, cost: 0.0094 ms/op]
+```
+
+可以通过设置 `cache-policy` 参数来分别测试 file/vectorIndex/content 三种不同的缓存实现的的性能。
+@Note：请注意 bench 使用的 src 文件需要是生成对应的 xdb 文件的相同的源文件。
--- a/binding/python/bench_test.py
+++ b/binding/python/bench_test.py
@ -0,0 +1,120 @@
+#
+#  bench_test.py
+#  bench_test
+#
+#  Created by luckydog on 2022/7/1.
+#  Copyright © 2022年 luckydog. All rights reserved.
+#
+from ast import main
+import io
+
+from xdbSearcher import XdbSearcher
+import argparse
+import time
+import sys
+
+
+def printHelp():
+    print("python bench_test.py [command options]")
+    print("options: ")
+    print(" --db string             ip2region binary xdb file path")
+    print(" --src string            source ip text file path")
+    print(" --cache-policy string   cache policy: file/vectorIndex/content")
+
+def trim(string):
+    if string[:1] != ' ' and string[-1:] != ' ':
+        return string
+    elif string[:1] == ' ':
+        return trim(string[1:])
+    else:
+        return trim(string[:-1])
+
+def start_bench(dbFile="", srcFile="", cachePolicy="vectorIndex"):
+    if cachePolicy == "file":
+        try:
+            searcher = XdbSearcher(dbfile=dbFile)
+        except Exception as err:
+            print(err)
+            return
+    elif cachePolicy == "vectorIndex":
+        try:
+            vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbFile)
+            if vi is None:
+                print(f"failed to load vector index from {dbFile}\n")
+            searcher = XdbSearcher(dbfile=dbFile, vectorIndex=vi)
+
+        except Exception as err:
+            print(err)
+            return
+    else:
+        try:
+            cb = XdbSearcher.loadContentFromFile(dbfile=dbFile)
+            if cb is None:
+                print(f"failed to load xdb content from {dbFile}\n")
+            searcher = XdbSearcher(contentBuff=cb)
+
+        except Exception as err:
+            print(err)
+            return
+    # do the bench test
+
+    try:
+        count = 0
+        costs = 0
+        sTime = time.time()
+        f = io.open(srcFile, "rb")
+        while True:
+            line = trim(f.readline(1024)).decode("utf-8").replace("\n", "")
+            if len(line) < 1:
+                break
+            
+            ps = line.split("|",2)
+            if len(ps) != 3:
+                print(f"invalid ip segment line :{line}")
+                return
+            sip = XdbSearcher.ip2long(None, ps[0])
+            eip = XdbSearcher.ip2long(None, ps[1])
+            
+            if sip > eip:
+                print(f"start ip({ps[0]}) should not be greater than end ip({ps[1]})")
+                return
+            
+            mip = (sip + eip) >> 1
+            
+            for ip in [sip, (sip + mip) >> 1, mip, (mip + eip) >> 1, eip]:
+                try:
+                    cTime = time.time()
+                    region = searcher.search(ip)
+                    costs = costs + (time.time() - cTime)
+                except Exception as error:
+                    print(f"failed to search ip :{ip}")
+                    return
+
+                if region is None:
+                    print(f"failed to search ip :{ip}")
+                    return
+                if region != ps[2]:
+                    print(f"failed search({ip}) with ({region} != {ps[2]})")
+                    return
+                count = count + 1
+                
+        # close the searcher at last
+        f.close()
+        searcher.close()
+        print(f"Bench finished, [cachePolicy: {cachePolicy}, total: {count}, took: {round(time.time() - sTime, 2)} s, cost: {round(costs/count*1000, 4)} ms/op]")
+    except Exception as err:
+        print(f"failed to open source text file :{err}")
+        return
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        printHelp()
+        exit(0)
+    parse = argparse.ArgumentParser()
+    parse.add_argument("--db", help="ip2region binary xdb file path")
+    parse.add_argument("--src", help="source ip text file path")
+    parse.add_argument("--cache-policy", choices=["file", "vectorIndex", "content"],
+                       help="cache policy: file/vectorIndex/content")
+    args = parse.parse_args()
+    start_bench(dbFile=args.db, srcFile=args.src, cachePolicy=args.cache_policy)
--- a/binding/python/iptest.py
+++ b/binding/python/iptest.py
@ -1,18 +1,52 @@
-import ip2Region

-if __name__ == '__main__':
-    # 1. 预先加载整个 xdb
-    dbPath = "./data/ip2region.xdb";
-    # vi = ip2Region.Ip2Region.loadVectorIndexFromFile(dbfile="./data/ip2region.xdb")
-    cb = ip2Region.Ip2Region.loadContentFromFile(dbfile=dbPath)
+from xdbSearcher import XdbSearcher
+
+def searchWithFile():
+    # 1. 创建查询对象
+    dbPath = "../../data/ip2region.xdb"
+    searcher = XdbSearcher(dbfile=dbPath)
    
-    # 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
-    searcher = ip2Region.Ip2Region(contentBuff=cb)
-    
-    # 3. 执行查询
+    # 2. 执行查询
    ip = "1.2.3.4"
    region_str = searcher.searchByIPStr(ip)
    print(region_str)
    
+    # 3. 关闭searcher
+    searcher.close()
+
+def searchWithVectorIndex():
+     # 1. 预先加载整个 xdb
+    dbPath = "../../data/ip2region.xdb"
+    vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbPath)
+
+    # 2. 使用上面的缓存创建查询对象, 同时也要加载 xdb 文件
+    searcher = XdbSearcher(dbfile=dbPath, vectorIndex=vi)
+    
+    # 3. 执行查询
+    ip = "1.2.3.4"
+    region_str = searcher.search(ip)
+    print(region_str)
+
    # 4. 关闭searcher
-    searcher.close()
+    searcher.close()
+    
+def searchWithContent():
+    # 1. 预先加载整个 xdb
+    dbPath = "../../data/ip2region.xdb";
+    cb = XdbSearcher.loadContentFromFile(dbfile=dbPath)
+    
+    # 2. 仅需要使用上面的全文件缓存创建查询对象, 不需要传源 xdb 文件
+    searcher = XdbSearcher(contentBuff=cb)
+    
+    # 3. 执行查询
+    ip = "1.2.3.4"
+    region_str = searcher.search(ip)
+    print(region_str)
+
+    # 4. 关闭searcher
+    searcher.close()
+    
+    
+if __name__ == '__main__':
+    searchWithContent()
+    
--- a/binding/python/search_test.py
+++ b/binding/python/search_test.py
@ -0,0 +1,95 @@
+#
+#  search_test.py
+#  search_test
+#
+#  Created by luckydog on 2022/7/1.
+#  Copyright © 2022年 luckydog. All rights reserved.
+#
+
+from xdbSearcher import XdbSearcher
+import argparse
+import time
+import sys
+
+def printHelp():
+    print("python3 search_test.py [command options]")
+    print("options: ")
+    print(" --db string             ip2region binary xdb file path")
+    print(" --cache-policy string   cache policy: file/vectorIndex/content")
+
+
+def trim(string):
+    if string[:1] != ' ' and string[-1:] != ' ':
+        return string
+    elif string[:1] == ' ':
+        return trim(string[1:])
+    else:
+        return trim(string[:-1])
+
+
+def start_search(dbFile="", cachePolicy="vectorIndex"):
+    if cachePolicy == "file":
+        try:
+            searcher = XdbSearcher(dbfile=dbFile)
+        except Exception as err:
+            print(err)
+            return
+    elif cachePolicy == "vectorIndex":
+        try:
+            vi = XdbSearcher.loadVectorIndexFromFile(dbfile=dbFile)
+            if vi is None:
+                print(f"failed to load vector index from {dbFile}\n")
+            searcher = XdbSearcher(dbfile=dbFile, vectorIndex=vi)
+
+        except Exception as err:
+            print(err)
+            return
+    else:
+        try:
+            cb = XdbSearcher.loadContentFromFile(dbfile=dbFile)
+            if cb is None:
+                print(f"failed to load xdb content from {dbFile}\n")
+            searcher = XdbSearcher(contentBuff=cb)
+
+        except Exception as err:
+            print(err)
+            return
+
+    # 开始的提示
+    print(f"ip2region xdb searcher test program, cachePolicy: {cachePolicy}\ntype 'quit' to exit")
+    while True:
+        line = trim(input("ip2region>> "))
+        # print(f"{line}")
+
+        if len(line) < 2:
+            continue
+        if line == "quit":
+            break
+
+        if not XdbSearcher.isip(None, ip=line):
+            print("Error: invalid ip address")
+            continue
+        start = time.time()
+
+        try:
+            region_str = searcher.searchByIPStr(line)
+        except Exception as error:
+            print(error)
+            return
+
+        print(f"region :{region_str} , took {round((time.time()-start)*1000.00, 4)} ms")
+    # quit
+    searcher.close()
+    print("searcher test program exited, thanks for trying")
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        printHelp()
+        exit(0)
+    parse = argparse.ArgumentParser()
+    parse.add_argument("--db", help="ip2region binary xdb file path")
+    parse.add_argument("--cache-policy", choices=["file", "vectorIndex", "content"],
+                       help="cache policy: file/vectorIndex/content")
+    args = parse.parse_args()
+    start_search(dbFile=args.db, cachePolicy=args.cache_policy)
--- a/binding/python/xdbSearcher.py
+++ b/binding/python/xdbSearcher.py
@ -20,7 +20,7 @@ VectorIndexSize = 8
 SegmentIndexSize = 14


-class Ip2Region(object):
+class XdbSearcher(object):
    __f = None

    # the minimal memory allocation.
@ -53,9 +53,18 @@ class Ip2Region(object):
    def __init__(self, dbfile=None, vectorIndex=None, contentBuff=None):
        self.initDatabase(dbfile, vectorIndex, contentBuff)

+    def search(self, ip):
+        if isinstance(ip, str):
+            if not ip.isdigit(): ip = self.ip2long(ip)
+            return self.searchByIPLong(ip)
+        else:
+            return self.searchByIPLong(ip)
+       
    def searchByIPStr(self, ip):
        if not ip.isdigit(): ip = self.ip2long(ip)
-
+        return self.searchByIPLong(ip)
+         
+    def searchByIPLong(self, ip):
        # locate the segment index block based on the vector index
        sPtr = ePtr = 0
        il0 = (int)((ip >> 24) & 0xFF)
@ -169,13 +178,13 @@ if __name__ == '__main__':
    ]
    # 1. 缓存
    dbPath = "./data/ip2region.xdb";
-    cb = Ip2Region.loadContentFromFile(dbfile=dbPath)
+    cb = Searcher.loadContentFromFile(dbfile=dbPath)
    
    # 2. 创建查询对象
-    searcher = Ip2Region(contentBuff=cb)
+    searcher = Searcher(contentBuff=cb)
    
    # 3. 执行查询
-    ip = "1.2.3.4"
+    # ip = "1.2.3.4"
    for ip in ip_array:
        region_str = searcher.searchByIPStr(ip)
        print(region_str)