diff --git a/ReadMe.md b/ReadMe.md index 2542f41..f8f390f 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -46,7 +46,7 @@ API 介绍,使用文档和测试程序请参考对应 `searcher` 查询客户 | [Csharp](binding/csharp) | csharp xdb 查询客户端 | :white_check_mark: | :white_check_mark: | [Alen Lee](https://github.com/malus2077) & [ArgoZhang](https://github.com/ArgoZhang) | | [Erlang](binding/erlang) | erlang xdb 查询客户端 | :white_check_mark: | :x: | [malou](https://github.com/malou996) | | [Nginx](binding/nginx) | nginx 扩展 xdb 查询客户端 | :white_check_mark: | :x: | [Wu Jian Ping](https://github.com/wujjpp) | -| [C++](binding/cpp) | C++ xdb 查询客户端 | :white_check_mark: | :x: | [Yunbin Liu](https://github.com/liuyunbin) | +| [C++](binding/cpp) | C++ xdb 查询客户端 | :white_check_mark: | :white_check_mark: | [Yunbin Liu](https://github.com/liuyunbin) | 以下工具链实现由社区开发者通过第三方仓库贡献: @@ -69,13 +69,13 @@ API 介绍,使用文档和测试程序请参考如下 `maker` 生成程序下 | [Python](maker/python) | python xdb 生成程序 | :white_check_mark: | :x: | [leolin49](https://github.com/leolin49) | | [Csharp](maker/csharp) | csharp xdb 生成程序 | :white_check_mark: | :x: | [Alan Lee](https://github.com/malus2077) | | [Rust](maker/rust) | rust xdb 生成程序 | :white_check_mark: | :white_check_mark: | [KevinWang](https://github.com/KevinWL) & [gongzhengyang](https://github.com/gongzhengyang) | -| [C++](maker/cpp) | C++ xdb 生成程序 | :white_check_mark: | :x: | [Yunbin Liu](https://github.com/liuyunbin) | - - +| [C++](maker/cpp) | C++ xdb 生成程序 | :white_check_mark: | :white_check_mark: | [Yunbin Liu](https://github.com/liuyunbin) | + + # `xdb` 数据更新 - + ip2region 项目的核心在于 研究 IP 数据的存储和快速查询的设计和实现, 项目自带的 `./data/ipv4_source.txt` 和 `./data/ipv6_source.txt` 原始数据不会再提供更新,对于数据精度和更新频率要求很高的使用场景建议到 [Ip2Region社区](https://ip2region.net/products/offline) 或者第三方购买商用离线数据,你可以使用如下几种方式来尝试自己更新数据: - + ### 手动编辑更新 你可以基于 ip2region 自带的 `./data/ipv4_source.txt` 和 `./data/ipv6_source.txt` 原始 IP 数据用 ip2region 提供的编辑工具来自己修改,目前数据源有如下几种方式: 1. ip2region 社区提供的数据(请参考地底部的公众号关注社区通知) @@ -87,7 +87,7 @@ ip2region 项目的核心在于 研究 IP 数据的存储和快速查询的 |:------------------------------------|:-------------------------|:-------------------|:-------------------|:-------------------------------------------| | [Golang](maker/golang#xdb-数据编辑) | golang IP 原始数据编辑器 | :white_check_mark: | :white_check_mark: | [Lion](https://github.com/lionsoul2014) | | [Java](maker/java#xdb-数据编辑) | java IP 原始数据编辑器 | :white_check_mark: | :soon: | [Lion](https://github.com/lionsoul2014) | -| [C++](maker/cpp#xdb-数据编辑) | C++ IP 原始数据编辑器 | :white_check_mark: | :x: | [Yunbin Liu](https://github.com/liuyunbin) | +| [C++](maker/cpp) | C++ IP 原始数据编辑器 | :white_check_mark: | :white_check_mark: | [Yunbin Liu](https://github.com/liuyunbin) | ### 检测自动更新 diff --git a/binding/cpp/Makefile b/binding/cpp/Makefile index 2f810d6..5f7599e 100644 --- a/binding/cpp/Makefile +++ b/binding/cpp/Makefile @@ -1,11 +1,27 @@ -all: xdb_search xdb_bench +all: bin header search bench make edit -xdb_search: xdb_search.cc xdb_search_test.cc - g++ -std=c++11 -O2 $^ -o $@ +FILES=$(wildcard src/*.cc) -xdb_bench: xdb_search.cc xdb_bench.cc xdb_bench_test.cc - g++ -std=c++11 -O2 $^ -o $@ +bin: + mkdir -p bin + +header: $(FILES) test/header.cc + g++ -std=c++11 -O2 $^ -o bin/$@ + +search: $(FILES) test/search.cc + g++ -std=c++11 -O2 $^ -o bin/$@ + +bench: $(FILES) test/bench.cc + g++ -std=c++11 -O2 $^ -o bin/$@ + +make: $(FILES) test/make.cc + g++ -std=c++11 -O2 $^ -o bin/$@ + +edit: $(FILES) + g++ -std=c++11 -O2 $^ test/edit_v4.cc -o bin/edit_v4 + g++ -std=c++11 -O2 $^ test/edit_v6.cc -o bin/edit_v6 clean: - rm -f xdb_search xdb_bench + rm -rf bin + diff --git a/binding/cpp/readme.md b/binding/cpp/readme.md index bac5bda..e2baf9f 100644 --- a/binding/cpp/readme.md +++ b/binding/cpp/readme.md @@ -1,108 +1,563 @@ -# ip2region xdb C++ 查询客户端实现 +# ip2region xdb C++ 实现 -## 使用方式 -### 完全基于文件的查询 +## 0. 文件说明 ``` -#include +Makefile --------- 构建 -#include "xdb_search.h" +src ------------------ 源文件目录 +src/base.* ----------- 常量及工具函数 +src/ip.* ------------- 实现 IP 处理 +src/header.* --------- 实现 xdb 头部解析 +src/search.* --------- 实现 xdb 查找 +src/bench.* ---------- 实现 查找 测速 +src/make.* ----------- 实现 生成 xdb 文件 +src/edit.* ----------- 实现 原始数据编辑 -int main(int argc, char* argv[]) { - char file_name[] = "../../data/ip2region.xdb"; - char ip[] = "1.2.3.4"; +test ---------------- 测试目录 +test/header.cc ------ 测试 头部 +test/search.cc ------ 测试 查找 +test/bench.cc ------- 测速 +test/make.cc -------- 生成 xdb 文件 +test/edit_v4.cc ----- 测试 原始数据编辑(ipv4) +test/edit_v6.cc ----- 测试 原始数据编辑(ipv6) - xdb_search_t xdb(file_name); - xdb.init_file(); - std::cout << xdb.search(ip) << std::endl; - return 0; -} +bin --------------- 可执行文件目录(通过 make 生成) +bin/header -------- 测试 头部 +bin/search -------- 测试 查找 +bin/bench --------- 测速 +bin/make ---------- 生成 xdb 文件 +bin/edit_v4 ------- 测试 原始数据编辑(ipv4) +bin/edit_v6 ------- 测试 原始数据编辑(ipv6) + +readme.md --------- readme ``` -### 缓存 `vector_index` 索引 -``` -#include - -#include "xdb_search.h" - -int main(int argc, char* argv[]) { - char file_name[] = "../../data/ip2region.xdb"; - char ip[] = "1.2.3.4"; - - xdb_search_t xdb(file_name); - xdb.init_vector_index(); - - std::cout << xdb.search(ip) << std::endl; - return 0; -} -``` - -### 缓存整个 `xdb` 数据 -``` -#include - -#include "xdb_search.h" - -int main(int argc, char* argv[]) { - char file_name[] = "../../data/ip2region.xdb"; - char ip[] = "1.2.3.4"; - - xdb_search_t xdb(file_name); - xdb.init_content(); - - std::cout << xdb.search(ip) << std::endl; - return 0; -} -``` - -## 测试程序编译 -1. 切换到当前目录 -2. 编译 - +## 1. 编译 ``` $ make -g++ -std=c++11 -O2 xdb_search.cc xdb_search_test.cc -o xdb_search -g++ -std=c++11 -O2 xdb_search.cc xdb_bench.cc xdb_bench_test.cc -o xdb_bench ``` -## 测试查询 -### 说明 +## 2. 查找 +### 2.1 示例 ``` -$ ./xdb_search --help -./xdb_search [command options] -options: - --db string ip2region binary xdb file path - --cache-policy string cache policy: file/vector_index/content - --help print help +#include "src/search.h" + +// IP 版本: xdb::ipv4 xdb::ipv6 +// 策略: xdb::policy_file xdb::policy_vector xdb::policy_content +// 不缓存 部分缓存 全部缓存 +int main() { + std::string xdb_name = "../../data/ip2region_v6.xdb"; + int version = xdb::ipv6; + int policy = xdb::policy_content; + std::string ip = "2001:200:124::"; + + xdb::search_t s(xdb_name, version, policy); + std::cout << s.search(ip) << std::endl; + return 0; +} + +// $ g++ src/*.cc 1.cc --- 编译 +// $ ./a.out ------------- 测试 +// 日本|东京都|千代田区|专线用户 ``` -### 测试 +### 2.2 测试 xdb 头部 ``` -$ ./xdb_search --db ../../data/ip2region.xdb --cache-policy vector_index -cache policy : vector_index -ip2region>> 1.2.3.4 -美国|0|华盛顿|0|谷歌 +$ ./bin/header +测试 IPv4 +版本号: 3 +缓存策略: 1 +文件生成时间: 2025-09-06 02:24:16 +索引起始地址: 955933 +索引结束地址: 11042415 +IP版本: 4 +指针字节数: 4 + +测试 IPv6 +版本号: 3 +缓存策略: 1 +文件生成时间: 2025-10-17 04:41:04 +索引起始地址: 3094259 +索引结束地址: 36258303 +IP版本: 6 +指针字节数: 4 ``` -## bench 测试 -### 说明 +### 2.3 测试查找 ``` -$ ./xdb_bench --help -./xdb_bench [command options] -options: - --db string ip2region binary xdb file path - --src string source ip text file path - --cache-policy string cache policy: file/vector_index/content - --help print help +$ ./bin/search +测试 IPv4 不缓存: 成功 +测试 IPv4 部分缓存: 成功 +测试 IPv4 全部缓存: 成功 +测试 IPv6 不缓存: 成功 +测试 IPv6 部分缓存: 成功 +测试 IPv6 全部缓存: 成功 ``` -### 测试 +## 3. 测速以及检验正确性 ``` -$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy content -total: 3419220, took: 3.44 s, cost: 0.27 μs/op, io count: 0 -$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy vector_index -total: 3419220, took: 45.99 s, cost: 12.24 μs/op, io count: 21739300 -$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy file -total: 3419220, took: 60.39 s, cost: 16.32 μs/op, io count: 25158520 +./bin/bench +测试 IPv4, 不缓存, total: 3910284, took: 27.60s, cost: 6.59μs/op, io count: 28227147 +测试 IPv4, 部分缓存, total: 3910284, took: 21.85s, cost: 5.15μs/op, io count: 24316863 +测试 IPv4, 全部缓存, total: 3910284, took: 2.26s, cost: 0.25μs/op, io count: 0 +测试 IPv6, 不缓存, total: 4792520, took: 100.40s, cost: 20.22μs/op, io count: 80758866 +测试 IPv6, 部分缓存, total: 4792520, took: 93.06s, cost: 18.71μs/op, io count: 75966346 +测试 IPv6, 全部缓存, total: 4792520, took: 6.24s, cost: 0.81μs/op, io count: 0 +``` + +## 4. 生成 xdb 文件 +### 4.1 生成 xdb 文件 +``` +$ ./bin/make +生成 ipv4 的 xdb 文件, took: 0.57s +生成 ipv6 的 xdb 文件, took: 1.24s +``` + +### 4.2 测试正确性 +``` +# ipv4 --- 只有时间不同 +$ diff <(xxd ./ip2region_v4.xdb) <(xxd ../../data/ip2region_v4.xdb) +1c1 +< 00000000: 0300 0100 9f2f 2969 1d96 0e00 6f7e a800 ...../)i....o~.. +--- +> 00000000: 0300 0100 509b bb68 1d96 0e00 6f7e a800 ....P..h....o~.. + +# ipv6 --- 只有时间不同 +$ diff <(xxd ./ip2region_v6.xdb) <(xxd ../../data/ip2region_v6.xdb) +1c1 +< 00000000: 0300 0100 a02f 2969 f336 2f00 ff41 2902 ...../)i.6/..A). +--- +> 00000000: 0300 0100 e0c8 f168 f336 2f00 ff41 2902 .......h.6/..A). +``` + +## 5. 原始数据编辑 +### 5.1. 使用说明 +* 新的IP归属地文件可以包含空行 +* 新的IP归属地文件顺序可以乱序, 程序会自动排序 +* 新的IP归属地文件顺序可以重叠, 只要无二义性, 程序会自动合并 +* 最终的结果会将相邻的且归属地相同的行自动合并 +* 以下测试, 原文件使用仓库自带的数据文件, 新文件使用当前目录下的 1.txt + +### 5.2. 数据正确性测试 -- ipv4 +#### 测试一: 测试数据文件包含空行以及重复的情况 +``` +$ cat -n 1.txt + 1 + 2 1.0.128.0|1.0.128.255|测试归属地 + 3 + 4 1.0.128.0|1.0.128.255|测试归属地 + 5 +$ ./bin/edit_v4 +took: 0.80s +$ git diff ../../data/ +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..5d1fdfa 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -7,7 +7,7 @@ + 1.0.32.0|1.0.63.255|中国|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|清莱府|0|TOT ++1.0.128.0|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|素攀武里府|0|TOT +``` + +#### 测试二: 测试数据文件乱序以及数据有交叉, 归属地相同的情况 +``` +$ cat 1.txt +1.0.128.5|1.0.128.255|测试归属地 +1.0.128.0|1.0.128.9|测试归属地 +$ ./bin/edit_v4 +took: 0.88s +$ git diff ../../data/ +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..5d1fdfa 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -7,7 +7,7 @@ + 1.0.32.0|1.0.63.255|中国|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|清莱府|0|TOT ++1.0.128.0|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|素攀武里府|0|TOT +``` + +#### 测试三: 测试数据文件重叠, 归属地相同的情况 +``` +$ cat 1.txt +1.0.128.0|1.0.128.8|测试归属地 +1.0.128.7|1.0.128.255|测试归属地 +$ ./bin/edit_v4 +took: 0.91s +$ git diff ../../data/ +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..5d1fdfa 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -7,7 +7,7 @@ + 1.0.32.0|1.0.63.255|中国|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|清莱府|0|TOT ++1.0.128.0|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|素攀武里府|0|TOT +``` + +#### 测试四: 测试数据文件重叠, 归属地相同的情况 +``` +$ cat 1.txt +1.0.128.0|1.0.128.8|测试归属地 +1.0.128.8|1.0.128.255|测试归属地 +$ ./bin/edit_v4 +took: 0.81s +git diff ../../data +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..5d1fdfa 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -7,7 +7,7 @@ + 1.0.32.0|1.0.63.255|中国|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|清莱府|0|TOT ++1.0.128.0|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|素攀武里府|0|TOT +``` + +#### 测试五: 测试数据文件连接, 归属地相同的情况 +``` +$ cat 1.txt +1.0.128.0|1.0.128.8|测试归属地 +1.0.128.9|1.0.128.255|测试归属地 +$ ./bin/edit_v4 +took: 0.71s +git diff ../../data +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..5d1fdfa 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -7,7 +7,7 @@ + 1.0.32.0|1.0.63.255|中国|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|清莱府|0|TOT ++1.0.128.0|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|素攀武里府|0|TOT +``` + +#### 测试六: 测试数据文件重叠, 归属地不同情况 +``` +$ cat 1.txt +1.0.128.0|1.0.128.8|测试归属地123 +1.0.128.8|1.0.128.255|测试归属地 +$ ./bin/edit_v4 +数据有二义性: 1.0.128.0|1.0.128.8|测试归属地123, 1.0.128.8|1.0.128.255|测试归属地 +``` + +#### 测试七: 测试数据文件连接, 归属地不同情况 +``` +$ cat 1.txt +1.0.128.0|1.0.128.8|测试归属地123 +1.0.128.9|1.0.128.255|测试归属地 +$ ./bin/edit_v4 +took: 0.75s +git diff ../../data +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..976e6bf 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -7,7 +7,8 @@ + 1.0.32.0|1.0.63.255|中国|广东省|广州市|电信 + 1.0.64.0|1.0.79.255|日本|广岛县|0|0 + 1.0.80.0|1.0.127.255|日本|冈山县|0|0 +-1.0.128.0|1.0.128.255|泰国|清莱府|0|TOT ++1.0.128.0|1.0.128.8|测试归属地123 ++1.0.128.9|1.0.128.255|测试归属地 + 1.0.129.0|1.0.132.191|泰国|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|Nakhon-Ratchasima|0|TOT + 1.0.133.0|1.0.133.255|泰国|素攀武里府|0|TOT +``` + +#### 测试八: 测试将一个IP数据拆成多个IP +``` +$ cat 1.txt +36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动 +36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动 +36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动 +$ ./bin/edit_v4 +took: 0.80s +git diff ../../data +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..f895c2f 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -54778,7 +54778,11 @@ + 36.134.84.0|36.134.85.255|中国|安徽省|合肥市|移动 + 36.134.86.0|36.134.87.255|中国|广西|南宁市|移动 + 36.134.88.0|36.134.89.255|中国|内蒙古|呼和浩特市|移动 +-36.134.90.0|36.141.255.255|中国|0|0|移动 ++36.134.90.0|36.136.0.255|中国|0|0|移动 ++36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动 ++36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动 ++36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动 ++36.136.24.0|36.141.255.255|中国|0|0|移动 + 36.142.0.0|36.142.1.255|中国|四川省|成都市|移动 + 36.142.2.0|36.142.31.255|中国|甘肃省|兰州市|移动 + 36.142.32.0|36.142.127.255|中国|甘肃省|0|移动 +``` + +#### 测试九: 测试将多个IP数据并成一个IP数据 +``` +$ cat 1.txt +1.0.16.0|1.0.127.255|测试归属地 +$ ./bin/edit_v4 +took: 0.76s +git diff ../../data +diff --git a/data/ipv4_source.txt b/data/ipv4_source.txt +index 00dacc3..756354c 100644 +--- a/data/ipv4_source.txt ++++ b/data/ipv4_source.txt +@@ -3,10 +3,7 @@ + 1.0.1.0|1.0.3.255|中国|福建省|福州市|电信 + 1.0.4.0|1.0.7.255|澳大利亚|维多利亚|墨尔本|0 + 1.0.8.0|1.0.15.255|中国|广东省|广州市|电信 +-1.0.16.0|1.0.31.255|日本|0|0|0 +-1.0.32.0|1.0.63.255|中国|广东省|广州市|电信 +-1.0.64.0|1.0.79.255|日本|广岛县|0|0 +-1.0.80.0|1.0.127.255|日本|冈山县|0|0 ++1.0.16.0|1.0.127.255|测试归属地 + 1.0.128.0|1.0.128.255|泰国|清莱府|0|TOT + 1.0.129.0|1.0.132.191|泰国|曼谷|曼谷|TOT + 1.0.132.192|1.0.132.255|泰国|Nakhon-Ratchasima|0|TOT +``` + +### 5.3 数据正确性测试 -- ipv6 +#### 测试一: 测试数据文件包含空行以及重复的情况 +``` +$ cat -n 1.txt + 1 + 2 2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|测试归属地 + 3 + 4 2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|测试归属地 + 5 +$ ./bin/edit_v6 +took: 1.74s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..29617c4 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -2,7 +2,7 @@ + 2001:200::|2001:200:101:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 ++2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|测试归属地 + 2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:12a::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +``` + +#### 测试二: 测试数据文件乱序以及数据有交叉, 归属地相同的情况 +``` +$ cat 1.txt +2001:200:121::|2001:200:125:ffff:ffff:ffff:ffff:ffff|测试归属地 +2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|测试归属地 +$ ./bin/edit_v6 +took: 1.68s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..9e83b03 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -2,9 +2,8 @@ + 2001:200::|2001:200:101:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 ++2001:200:120::|2001:200:125:ffff:ffff:ffff:ffff:ffff|测试归属地 ++2001:200:126::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:12a::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:12b::|2001:200:130:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:131::|2001:200:132:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +``` + +#### 测试三: 测试数据文件重叠, 归属地相同的情况 +``` +$ cat 1.txt +2001:200:120::|2001:200:125:ffff:ffff:ffff:ffff:ffff|测试归属地 +2001:200:125::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 +$ ./bin/edit_v6 +took: 1.75s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..7a23ba2 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -2,9 +2,8 @@ + 2001:200::|2001:200:101:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 ++2001:200:120::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 ++2001:200:127::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:12a::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:12b::|2001:200:130:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:131::|2001:200:132:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +``` + +#### 测试四: 测试数据文件重叠, 归属地相同的情况 +``` +$ cat 1.txt +2001:200:120::|2001:200:125::|测试归属地 +2001:200:125::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 +$ ./bin/edit_v6 +took: 1.46s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..7a23ba2 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -2,9 +2,8 @@ + 2001:200::|2001:200:101:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 ++2001:200:120::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 ++2001:200:127::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:12a::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:12b::|2001:200:130:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:131::|2001:200:132:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +``` + +#### 测试五: 测试数据文件连接, 归属地相同的情况 +``` +$ cat 1.txt +2001:200:120::|2001:200:125:ffff:ffff:ffff:ffff:ffff|测试归属地 +2001:200:126::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 +$ ./bin/edit_v6 +took: 1.79s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..7a23ba2 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -2,9 +2,8 @@ + 2001:200::|2001:200:101:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 ++2001:200:120::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 ++2001:200:127::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:12a::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:12b::|2001:200:130:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:131::|2001:200:132:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +``` + +#### 测试六: 测试数据文件重叠, 归属地不同情况 +``` +$ cat 1.txt +2001:200:120::|2001:200:126::|测试归属地123 +2001:200:126::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 +$ ./bin/edit_v6 +数据有二义性: 2001:200:120::|2001:200:126::|测试归属地123, 2001:200:126::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 +``` + +#### 测试七: 测试数据文件连接, 归属地不同情况 +``` +$ cat 1.txt +2001:200:120::|2001:200:125:ffff:ffff:ffff:ffff:ffff|测试归属地123 +2001:200:126::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 +$ ./bin/edit_v6 +took: 1.78s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..142f7cc 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -2,9 +2,9 @@ + 2001:200::|2001:200:101:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 ++2001:200:120::|2001:200:125:ffff:ffff:ffff:ffff:ffff|测试归属地123 ++2001:200:126::|2001:200:126:ffff:ffff:ffff:ffff:ffff|测试归属地 ++2001:200:127::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:12a::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:12b::|2001:200:130:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:131::|2001:200:132:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +``` + +#### 测试八: 测试将一个IP数据拆成多个IP +``` +$ cat 1.txt +2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ff11|测试归属地1 +2001:200:11f:ffff:ffff:ffff:ffff:ff12|2001:200:11f:ffff:ffff:ffff:ffff:ff33|测试归属地2 +2001:200:11f:ffff:ffff:ffff:ffff:ff34|2001:200:11f:ffff:ffff:ffff:ffff:ffff|测试归属地3 +$ ./bin/edit_v6 +took: 1.52s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..e450e27 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -1,7 +1,9 @@ + 1:1::|2001:1ff:ffff:ffff:ffff:ffff:ffff:ffff|0|0|内网IP|内网IP + 2001:200::|2001:200:101:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 ++2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ff11|测试归属地1 ++2001:200:11f:ffff:ffff:ffff:ffff:ff12|2001:200:11f:ffff:ffff:ffff:ffff:ff33|测试归属地2 ++2001:200:11f:ffff:ffff:ffff:ffff:ff34|2001:200:11f:ffff:ffff:ffff:ffff:ffff|测试归属地3 + 2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +``` + +#### 测试九: 测试将多个IP数据并成一个IP数据 +``` +$ cat 1.txt +2001:200:123::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|测试归属地 +$ ./bin/edit_v6 +took: 1.99s +git diff ../../data +diff --git a/data/ipv6_source.txt b/data/ipv6_source.txt +index 4dee31b..ecd29c3 100644 +--- a/data/ipv6_source.txt ++++ b/data/ipv6_source.txt +@@ -3,9 +3,7 @@ + 2001:200:102::|2001:200:104:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:105::|2001:200:11f:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:120::|2001:200:122:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:123::|2001:200:123:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 +-2001:200:124::|2001:200:129:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 +-2001:200:12a::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 ++2001:200:123::|2001:200:12a:ffff:ffff:ffff:ffff:ffff|测试归属地 + 2001:200:12b::|2001:200:130:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 + 2001:200:131::|2001:200:132:ffff:ffff:ffff:ffff:ffff|日本|神奈川县|藤泽市|专线用户 + 2001:200:133::|2001:200:135:ffff:ffff:ffff:ffff:ffff|日本|东京都|千代田区|专线用户 ``` diff --git a/binding/cpp/src/base.cc b/binding/cpp/src/base.cc new file mode 100644 index 0000000..7493483 --- /dev/null +++ b/binding/cpp/src/base.cc @@ -0,0 +1,74 @@ + +#include "base.h" + +namespace xdb { + +int ip_version; // ip 版本 +int ip_size; // ip 占的字节数 +int content_size; + +void init_xdb(int version) { + ip_version = version; + ip_size = version == ipv4 ? 4 : 16; + content_size = ip_size * 2 + 2 + 4; +} + +void log_exit(const string &msg) { + std::cout << msg << std::endl; + exit(-1); +} + +void read_bin(int index, char *buf, size_t len, FILE *db) { + fseek(db, index, SEEK_SET); + if (fread(buf, 1, len, db) != len) + log_exit(__func__); +} + +unsigned to_uint(const char *buf) { + return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00) | + ((buf[2] << 16) & 0x00FF0000) | ((buf[3] << 24) & 0xFF000000); +} + +unsigned to_ushort(const char *buf) { + return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00); +} + +unsigned to_int(const char *buf, int n) { + return n == 2 ? to_ushort(buf) : to_uint(buf); +} + +void write_uint(unsigned data, char buf[]) { + buf[0] = (data >> 0) & 0xFF; + buf[1] = (data >> 8) & 0xFF; + buf[2] = (data >> 16) & 0xFF; + buf[3] = (data >> 24) & 0xFF; +} + +void write_uint(unsigned data, FILE *dst) { + char buf[4]; + write_uint(data, buf); + fwrite(buf, 1, sizeof(buf), dst); +} + +void write_ushort(unsigned data, char buf[]) { + buf[0] = (data >> 0) & 0xFF; + buf[1] = (data >> 8) & 0xFF; +} + +void write_ushort(unsigned data, FILE *dst) { + char buf[2]; + write_ushort(data, buf); + fwrite(buf, 1, sizeof(buf), dst); +} + +void write_string(const char *buf, unsigned len, FILE *dst) { + fwrite(buf, 1, len, dst); +} + +unsigned long long get_time() { + struct timeval tv1; + gettimeofday(&tv1, NULL); + return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; +} + +} // namespace xdb diff --git a/binding/cpp/src/base.h b/binding/cpp/src/base.h new file mode 100644 index 0000000..287b038 --- /dev/null +++ b/binding/cpp/src/base.h @@ -0,0 +1,58 @@ +#ifndef BASE_H +#define BASE_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace xdb { + +using std::string; + +constexpr int ipv4 = 4; +constexpr int ipv6 = 6; + +constexpr int policy_file = 0; +constexpr int policy_vector = 1; +constexpr int policy_content = 2; + +constexpr int length_header = 256; +constexpr int length_vector = 256 * 256 * 8; + +extern int ip_version; // ip 版本 +extern int ip_size; // ip 占的字节数 +extern int content_size; + +void init_xdb(int version); + +void log_exit(const string &msg); + +void read_bin(int index, char *buf, size_t len, FILE *db); + +unsigned to_uint(const char *buf); +unsigned to_ushort(const char *buf); +unsigned to_int(const char *buf, int n); + +void write_uint(unsigned data, char buf[]); +void write_uint(unsigned data, FILE *dst); + +void write_ushort(unsigned data, char buf[]); +void write_ushort(unsigned data, FILE *dst); + +void write_string(const char *buf, unsigned len, FILE *dst); + +unsigned long long get_time(); + +} // namespace xdb + +#endif diff --git a/binding/cpp/src/bench.cc b/binding/cpp/src/bench.cc new file mode 100644 index 0000000..f1c46e2 --- /dev/null +++ b/binding/cpp/src/bench.cc @@ -0,0 +1,65 @@ + +#include "bench.h" + +namespace xdb { + +bench_t::bench_t(const std::string &file_name, int version, int policy) + : search(file_name, version, policy) { +} + +void bench_t::test_one(const ip_t &ip, const string region) { + if (search.search(ip.to_string()) != region) + xdb::log_exit("failed: " + ip.to_string() + " " + region); + sum_io_count += search.get_io_count(); + sum_cost_time += search.get_cost_time(); + sum_count++; +} + +void bench_t::test_line(char *buf) { + size_t buf_len = strlen(buf); + if (buf_len == 0) + return; + buf[buf_len - 1] = '\0'; // 去掉换行符 + + node_t node(buf); + + // 只测五个 + for (int i = 0; i < 5 && node.ip1 < node.ip2; ++i) { + test_one(node.ip1, node.region); + node.ip1 = node.ip1 + 1; + } + test_one(node.ip2, node.region); +} + +void bench_t::test_file(const std::string &file_name) { + FILE *f = fopen(file_name.data(), "r"); + if (f == NULL) + xdb::log_exit("can't open " + file_name); + char buf[1024]; + while (fgets(buf, sizeof(buf), f) != NULL) + test_line(buf); +} + +void bench_t::test(const string &file_name) { + sum_io_count = 0; + sum_cost_time = 0; + sum_count = 0; + + unsigned long long tv1 = xdb::get_time(); + test_file(file_name); + unsigned long long tv2 = xdb::get_time(); + + double took = (tv2 - tv1) * 1.0 / 1000 / 1000; + double cost = sum_cost_time * 1.0 / sum_count; + + printf( + "total: %llu, took: %8.2fs, cost: %6.2fμs/op, io " + "count: " + "%llu\n", + sum_count, + took, + cost, + sum_io_count); +} + +} // namespace xdb diff --git a/binding/cpp/src/bench.h b/binding/cpp/src/bench.h new file mode 100644 index 0000000..0bc7f76 --- /dev/null +++ b/binding/cpp/src/bench.h @@ -0,0 +1,27 @@ +#ifndef BENCH_H +#define BENCH_H + +#include "search.h" + +namespace xdb { + +class bench_t { +public: + bench_t(const string &file_name, int version, int policy); + + void test(const string &file_name); + +private: + void test_one(const ip_t &ip, const string region); + void test_line(char *buf); + void test_file(const std::string &file_name); + + search_t search; + + unsigned long long sum_io_count; + unsigned long long sum_cost_time; + unsigned long long sum_count; +}; + +} // namespace xdb +#endif diff --git a/binding/cpp/src/edit.cc b/binding/cpp/src/edit.cc new file mode 100644 index 0000000..ad84ecc --- /dev/null +++ b/binding/cpp/src/edit.cc @@ -0,0 +1,170 @@ + +#include "edit.h" + +namespace xdb { + +void handle_ip_txt(const string& name, std::list& regions) { + FILE* f = fopen(name.data(), "r"); + if (f == NULL) + log_exit("can't open " + name); + + char buf[1024]; + while (fgets(buf, sizeof(buf), f) != NULL) { + unsigned int buf_len = strlen(buf); + // 去掉多余的空 + while (buf_len > 0 && isspace(buf[buf_len - 1])) + --buf_len; + if (buf_len == 0) + continue; + buf[buf_len] = '\0'; + regions.push_back(node_t(buf)); + } + + fclose(f); +} + +void edit_t::handle_new_file(const std::string& file_name) { + handle_ip_txt(file_name, new_regions); // 输入 + new_regions.sort(); // 排序 + + // 检验及其去重 + auto it = new_regions.begin(); + + for (;;) { + if (it == new_regions.end()) + break; + auto next = it; + ++next; + if (next == new_regions.end()) + break; + if (it->ip1 > it->ip2) + it = new_regions.erase(it); // 非法, 直接跳过 + else if (it->ip1 == next->ip1 || next->ip1 <= it->ip2) { + // 数据重叠 + if (it->region != next->region) + log_exit("数据有二义性: " + it->to_string() + ", " + + next->to_string()); + it->ip2 = std::max(it->ip2, next->ip2); + new_regions.erase(next); + } else if (it->ip2 + 1 == next->ip1 && it->region == next->region) { + // 数据连接 + it->ip2 = next->ip2; + new_regions.erase(next); + } else { + ++it; + } + } +} + +void edit_t::handle_old_file(const std::string& file_name) { + handle_ip_txt(file_name, old_regions); +} + +void edit_t::merge() { + auto it1 = old_regions.begin(); + auto it2 = new_regions.begin(); + + for (;;) { + if (it2 == new_regions.end()) + break; + if (it2->ip1 > it2->ip2) { + ++it2; + continue; + } + // it1->ip1 it1->ip2 it2->ip1 it2->ip2 + while (it1->ip2 < it2->ip1) + ++it1; + if (it1->ip2 <= it2->ip2) { + // it1->ip1 it2->ip1 it1->ip2 it2->ip2 + node_t node; + node.ip1 = it2->ip1; + node.ip2 = it1->ip2; + node.region = it2->region; + + it1->ip2 = node.ip1 - 1; + it2->ip1 = node.ip2 + 1; + + ++it1; + it1 = old_regions.insert(it1, node); + ++it1; + } else { + // it1->ip1 it2->ip1 it2->ip2 it1->ip2 + node_t node; + node.ip1 = it2->ip2 + 1; + node.ip2 = it1->ip2; + node.region = it1->region; + + it1->ip2 = it2->ip1 - 1; + + ++it1; + it1 = old_regions.insert(it1, *it2); + + ++it1; + it1 = old_regions.insert(it1, node); + + ++it2; + } + } +} + +void edit_t::write_old_file(const std::string& file_name) { + FILE* f = fopen(file_name.data(), "w"); + if (f == NULL) + log_exit("can't open " + file_name); + + auto it = old_regions.begin(); + + // 删除非法的数据 + for (;;) { + if (it == old_regions.end()) + break; + if (it->ip1 > it->ip2) + it = old_regions.erase(it); + else + ++it; + } + + // 合并数据域相同的相邻数据 + it = old_regions.begin(); + for (;;) { + if (it == old_regions.end()) + break; + auto next = it; + ++next; + if (next == old_regions.end()) + break; + if (it->region == next->region) { + it->ip2 = next->ip2; + old_regions.erase(next); + } else { + ++it; + } + } + + for (auto& d : old_regions) { + string res = + d.ip1.to_string() + "|" + d.ip2.to_string() + "|" + d.region + "\n"; + fputs(res.data(), f); + } + + fclose(f); +} + +edit_t::edit_t(const string& name_old, const string& name_new, int version) { + unsigned long long tv1 = get_time(); + + init_xdb(version); + + handle_new_file(name_new); + handle_old_file(name_old); + merge(); + write_old_file(name_old); + + unsigned long long tv2 = get_time(); + + double took = (tv2 - tv1) * 1.0 / 1000 / 1000; + + printf("took: %.2fs\n", took); +} + +} // namespace xdb diff --git a/binding/cpp/src/edit.h b/binding/cpp/src/edit.h new file mode 100644 index 0000000..360b741 --- /dev/null +++ b/binding/cpp/src/edit.h @@ -0,0 +1,24 @@ +#ifndef EDIT_H +#define EDIT_H + +#include "ip.h" + +namespace xdb { + +class edit_t { +public: + edit_t(const string& old_name, const string& new_name, int version); + +private: + void handle_new_file(const string& file_name); + void handle_old_file(const string& file_name); + void merge(); + void write_old_file(const string& file_name); + + std::list old_regions; + std::list new_regions; +}; + +} // namespace xdb + +#endif diff --git a/binding/cpp/src/header.cc b/binding/cpp/src/header.cc new file mode 100644 index 0000000..6abcde1 --- /dev/null +++ b/binding/cpp/src/header.cc @@ -0,0 +1,41 @@ + +#include "header.h" + +namespace xdb { + +header_t::header_t(FILE* db) { + read_bin(0, header, sizeof(header), db); +} + +header_t::~header_t() { +} + +int header_t::version() { + return to_int(header, 2); // 版本号(2) +} + +int header_t::index_policy() { + return to_int(header + 2, 2); // 缓存策略(2) +} + +int header_t::create_at() { + return to_int(header + 4, 4); // 文件生成时间(4) +} + +int header_t::index_start() { + return to_int(header + 8, 4); // 索引起始地址(4) +} + +int header_t::index_end() { + return to_int(header + 12, 4); // 索引结束地址(4) +} + +int header_t::ip_version() { + return to_int(header + 16, 2); // IP 版本(2) +} + +int header_t::ptr() { + return to_int(header + 18, 2); // 指针字节数(2) +} + +} // namespace xdb diff --git a/binding/cpp/src/header.h b/binding/cpp/src/header.h new file mode 100644 index 0000000..f0f4ace --- /dev/null +++ b/binding/cpp/src/header.h @@ -0,0 +1,27 @@ +#ifndef HEADER_H +#define HEADER_H + +#include "base.h" + +namespace xdb { + +class header_t { +public: + header_t(FILE* db); + virtual ~header_t(); + + int version(); // 版本号 + int index_policy(); // 缓存策略 + int create_at(); // 文件生成时间 + int index_start(); // 索引起始地址 + int index_end(); // 索引结束地址 + int ip_version(); // IP 版本 + int ptr(); // 指针字节数 + +protected: + char header[length_header]; +}; + +} // namespace xdb + +#endif diff --git a/binding/cpp/src/ip.cc b/binding/cpp/src/ip.cc new file mode 100644 index 0000000..429f4f1 --- /dev/null +++ b/binding/cpp/src/ip.cc @@ -0,0 +1,158 @@ + +#include "ip.h" + +namespace xdb { + +ip_t::ip_t() { + memset(p, '\0', sizeof(p)); +} + +ip_t::ip_t(const ip_t& rhs, int val) { + memcpy(p, rhs.p, ip_size); + + if (val == 0 || val == 255) + for (int i = 2; i < ip_size; ++i) + p[i] = val; +} + +ip_t::ip_t(const char* p) { + from_xdb(p); +} + +bool ip_t::from_str(const string& str) { + int af_inet = ip_version == ipv4 ? AF_INET : AF_INET6; + return inet_pton(af_inet, str.data(), p) == 1; +} + +void ip_t::from_xdb(const char str[16]) { + for (int i = 0; i < ip_size; ++i) + if (ip_version == ipv6) + p[i] = str[i]; + else + p[i] = str[ip_size - 1 - i]; +} + +ip_t& ip_t::operator=(const ip_t& rhs) { + memcpy(p, rhs.p, ip_size); + return *this; +} + +int ip_t::compare(const ip_t& rhs) const { + for (int i = 0; i < ip_size; ++i) { + if ((unsigned char)p[i] > (unsigned char)rhs.p[i]) + return 1; + if ((unsigned char)p[i] < (unsigned char)rhs.p[i]) + return -1; + } + return 0; +} + +bool ip_t::operator<(const ip_t& rhs) const { + return compare(rhs) < 0; +} + +bool ip_t::operator<=(const ip_t& rhs) const { + return compare(rhs) <= 0; +} + +bool ip_t::operator>(const ip_t& rhs) const { + return compare(rhs) > 0; +} + +bool ip_t::operator>=(const ip_t& rhs) const { + return compare(rhs) >= 0; +} + +bool ip_t::operator==(const ip_t& rhs) const { + return compare(rhs) == 0; +} + +bool ip_t::operator!=(const ip_t& rhs) const { + return compare(rhs) != 0; +} + +string ip_t::to_string() const { + char buf[INET6_ADDRSTRLEN + 1]; + int af_inet = ip_version == ipv4 ? AF_INET : AF_INET6; + inet_ntop(af_inet, p, buf, sizeof(buf)); + return string(buf); +} + +string ip_t::to_bit() const { + string str; + for (int i = 0; i < ip_size; ++i) + if (ip_version == ipv6) + str.push_back(p[i]); + else + str.push_back(p[ip_size - 1 - i]); + return str; +} + +ip_t operator+(const ip_t& lhs, int v) { + ip_t ip; + + int i = ip_size; + while (--i >= 0) { + v += lhs.p[i]; + ip.p[i] = v % 256; + v /= 256; + } + return ip; +} + +ip_t operator-(const ip_t& lhs, int v) { + ip_t ip; + + int i = ip_size; + v = -v; + while (--i >= 0) { + v += lhs.p[i]; + if (v == -1) + ip.p[i] = 255; + else { + ip.p[i] = v; + v = 0; + } + } + return ip; +} + +// node_t +node_t::node_t() { +} + +node_t::node_t(char* buf) { + char* pos1 = strchr(buf, '|'); + + if (pos1 == NULL) + log_exit("invalid data: " + std::string(buf)); + char* pos2 = strchr(pos1 + 1, '|'); + if (pos2 == NULL) + log_exit("invalid data: " + std::string(buf)); + *pos1 = '\0'; + *pos2 = '\0'; + + region = pos2 + 1; + + if (!ip1.from_str(buf) || !ip2.from_str(pos1 + 1) || ip2 < ip1 || + region.empty()) { + *pos1 = *pos2 = '|'; + log_exit(string("invalid data: ") + buf); + } +} + +bool node_t::operator<(const node_t& rhs) const { + if (ip1 < rhs.ip1) + return true; + return ip2 < rhs.ip2; +} + +string node_t::to_string() const { + return ip1.to_string() + "|" + ip2.to_string() + "|" + region; +} + +string node_t::to_bit() const { + return ip1.to_bit() + ip2.to_bit(); +} + +} // namespace xdb diff --git a/binding/cpp/src/ip.h b/binding/cpp/src/ip.h new file mode 100644 index 0000000..5383766 --- /dev/null +++ b/binding/cpp/src/ip.h @@ -0,0 +1,52 @@ +#ifndef IP_H +#define IP_H + +#include "base.h" + +namespace xdb { + +struct ip_t { + unsigned char p[16]; + + ip_t(); + ip_t(const char* p); + // val 为 0 或 255 时, 将 ip 的后几位置为 val + ip_t(const ip_t& rhs, int val = -1); + + bool from_str(const string& str); + void from_xdb(const char str[16]); + + ip_t& operator=(const ip_t& rhs); + + int compare(const ip_t& rhs) const; + bool operator<(const ip_t& rhs) const; + bool operator<=(const ip_t& rhs) const; + bool operator>(const ip_t& rhs) const; + bool operator>=(const ip_t& rhs) const; + bool operator==(const ip_t& rhs) const; + bool operator!=(const ip_t& rhs) const; + + string to_string() const; + string to_bit() const; +}; + +ip_t operator+(const ip_t& lhs, int v); +ip_t operator-(const ip_t& lhs, int v); + +struct node_t { + ip_t ip1; + ip_t ip2; + string region; + + node_t(); + node_t(char* buf); + + bool operator<(const node_t& rhs) const; + + string to_string() const; + string to_bit() const; +}; + +} // namespace xdb + +#endif diff --git a/binding/cpp/src/make.cc b/binding/cpp/src/make.cc new file mode 100644 index 0000000..fb57d24 --- /dev/null +++ b/binding/cpp/src/make.cc @@ -0,0 +1,167 @@ + +#include "make.h" + +namespace xdb { + +void make_t::vector_index_push_back(int row, int col, const node_t &node) { + vector_index[row][col].push_back( + std::make_pair(node.to_bit(), string(node.region))); +} + +void make_t::vector_index_push_back(node_t &node) { + ip_t ip1 = node.ip1; + ip_t ip2 = node.ip2; + + unsigned ip1_1 = ip1.p[0]; + unsigned ip1_2 = ip1.p[1]; + unsigned ip2_1 = ip2.p[0]; + unsigned ip2_2 = ip2.p[1]; + + if (ip1_1 == ip2_1 && ip1_2 == ip2_2) { + vector_index_push_back(ip1_1, ip1_2, node); + return; + } + + node.ip1 = ip1; + node.ip2 = ip_t(ip1, 255); + vector_index_push_back(ip1_1, ip1_2, node); + + node.ip1 = ip_t(ip2, 0); + node.ip2 = ip2; + vector_index_push_back(ip2_1, ip2_2, node); + + for (;;) { + ++ip1_2; + if (ip1_2 == 256) { + ++ip1_1; + ip1_2 = 0; + } + if (ip1_1 == ip2_1 && ip1_2 == ip2_2) + break; + ip1.p[0] = ip1_1; + ip1.p[1] = ip1_2; + node.ip1 = ip_t(ip1, 0); + node.ip2 = ip_t(ip1, 255); + vector_index_push_back(ip1_1, ip1_2, node); + } +} + +void make_t::handle_input_help(char *buf) { + // 去掉多余的空 + unsigned int buf_len = strlen(buf); + while (buf_len > 0 && isspace(buf[buf_len - 1])) + --buf_len; + if (buf_len == 0) + return; + buf[buf_len] = '\0'; + + node_t node(buf); + + if (node.ip1 < next_ip) { + log_exit("ip 未排序: " + node.ip1.to_string() + ", " + + next_ip.to_string()); + } + + next_ip = node.ip2 + 1; + + if (region.find(node.region) == region.end()) { + region[node.region] = region_index; + region_index += node.region.size(); + } + + vector_index_push_back(node); +} + +void make_t::handle_input(const std::string &file_name) { + FILE *src = fopen(file_name.data(), "r"); + if (src == NULL) + log_exit("can't open " + file_name); + + char buf[1024]; + while (fgets(buf, sizeof(buf), src) != NULL) + handle_input_help(buf); + fclose(src); +} + +void make_t::handle_header() { + char buf[length_header]; + memset(buf, 0, length_header); + write_ushort(3, buf); // 版本号 + write_ushort(1, buf + 2); // 缓存策略 + write_uint(time(NULL), buf + 4); // 时间 + // 索引 + unsigned int content_left = length_header + length_vector; + for (auto &d : region) + content_left += d.first.size(); + unsigned int content_right = content_left; + + for (int i = 0; i < 256; ++i) + for (int j = 0; j < 256; ++j) + content_right += vector_index[i][j].size() * content_size; + content_right -= content_size; + write_uint(content_left, buf + 8); + write_uint(content_right, buf + 12); + write_ushort(ip_version, buf + 16); // IP + write_ushort(4, buf + 18); // 指针数 + + write_string(buf, length_header, db); +} + +void make_t::handle_vector_index() { + unsigned index = length_header + length_vector; + for (auto &d : region) + index += d.first.size(); + for (unsigned i = 0; i < 256; ++i) + for (unsigned j = 0; j < 256; ++j) + if (vector_index[i][j].size() == 0) { + write_uint(0, db); + write_uint(0, db); + } else { + write_uint(index, db); + index += content_size * vector_index[i][j].size(); + write_uint(index, db); + } +} + +void make_t::handle_region() { + for (auto &d : region) { + fseek(db, d.second, SEEK_SET); + write_string(d.first.data(), d.first.size(), db); + } +} + +void make_t::handle_content() { + fseek(db, 0, SEEK_END); + for (unsigned i = 0; i < 256; ++i) + for (unsigned j = 0; j < 256; ++j) + for (auto d : vector_index[i][j]) { + write_string(d.first.data(), d.first.size(), db); + write_ushort(d.second.size(), db); + write_uint(region[d.second], db); + } +} + +make_t::make_t(const string &src, const string &dst, int version) + : region_index(length_vector + length_header) { + unsigned long long tv1 = get_time(); + + init_xdb(version); + + handle_input(src); + + db = fopen(dst.data(), "w"); + if (db == NULL) + log_exit("can't open " + dst); + + handle_header(); + handle_vector_index(); + handle_region(); + handle_content(); + + fclose(db); + + unsigned long long tv2 = get_time(); + printf("took: %.2fs\n", (tv2 - tv1) * 1.0 / 1000 / 1000); +} + +} // namespace xdb diff --git a/binding/cpp/src/make.h b/binding/cpp/src/make.h new file mode 100644 index 0000000..1656d1a --- /dev/null +++ b/binding/cpp/src/make.h @@ -0,0 +1,35 @@ +#ifndef MAKE_H +#define MAKE_H + +#include "ip.h" + +namespace xdb { + +class make_t { +public: + make_t(const string &src, const string &dst, int version); + +private: + void vector_index_push_back(int row, int col, const node_t &node); + void vector_index_push_back(node_t &node); + void handle_input_help(char buf[]); + void handle_input(const std::string &file_name); + + void handle_header(); + void handle_vector_index(); + void handle_region(); + void handle_content(); + + FILE *db = NULL; + + std::vector> vector_index[256][256]; + + std::unordered_map region; + + unsigned region_index; + ip_t next_ip; +}; + +} // namespace xdb + +#endif diff --git a/binding/cpp/src/search.cc b/binding/cpp/src/search.cc new file mode 100644 index 0000000..1e89aee --- /dev/null +++ b/binding/cpp/src/search.cc @@ -0,0 +1,137 @@ + +#include "search.h" + +namespace xdb { + +search_t::search_t(const string &file, int version, int p) + : db(fopen(file.data(), "r")), header(db), policy(p) { + init_xdb(version); + + if (db == NULL) + log_exit("can't open " + file); + if (header.ip_version() != version) + log_exit("ip 版本不匹配"); + + if (policy != policy_file) { + read_bin(length_header, vector, length_vector, db); + if (policy == policy_content) { + fseek(db, 0, SEEK_END); + int size = ftell(db) - length_vector - length_header; + content = (char *)malloc(size); + read_bin(length_vector + length_header, content, size, db); + } + } +} + +search_t::~search_t() { + fclose(db); + if (policy == policy_content) + free(content); +} + +int search_t::get_io_count() { + return io_count; +} + +int search_t::get_cost_time() { + return cost_time; +} + +char const *search_t::get_content_index_help(int index) { + if (policy != policy_file) + return vector + index; + + ++io_count; + static char v[8]; + read_bin(length_header + index, v, sizeof(v), db); + return v; +} + +void search_t::get_content_index(const ip_t &ip, int &left, int &right) { + int index = ((unsigned char)ip.p[0] * 256 + (unsigned char)ip.p[1]) * 8; + + const char *p = get_content_index_help(index); + left = to_uint(p); + right = to_uint(p + 4); +} + +char const *search_t::get_content_help(int index) { + if (policy == policy_content) + return content + index - length_header - length_vector; + ++io_count; + static char v[16 + 16 + 2 + 4]; + read_bin(index, v, content_size, db); + return v; +} + +string search_t::get_region(int index, int len) { + if (policy == policy_content) + return string(content + index - length_header - length_vector, len); + ++io_count; + char *p = (char *)malloc(sizeof(char) * len); + read_bin(index, p, len, db); + string res(p, len); + free(p); + return res; +} + +void search_t::get_content(int index, + ip_t &ip_left, + ip_t &ip_right, + int ®ion_len, + int ®ion_index) { + const char *p = get_content_help(index); + + ip_left.from_xdb(p); + ip_right.from_xdb(p + ip_size); + + region_len = to_ushort(p + ip_size * 2); + region_index = to_uint(p + ip_size * 2 + 2); +} + +string search_t::search(const ip_t &ip) { + io_count = 0; + + int content_left, content_right; + get_content_index(ip, content_left, content_right); + + if (content_left == 0 || content_right == 0) + return ""; + + ip_t ip_left, ip_right; + int region_len; + int region_index; + + int left = 0; + int right = (content_right - content_left) / content_size; + + for (;;) { + int mid = left + (right - left) / 2; + int mid_index = content_left + mid * content_size; + get_content(mid_index, ip_left, ip_right, region_len, region_index); + + // ip ip_left ip_right + if (ip < ip_left) + right = mid - 1; + // ip_left ip_right ip + else if (ip_right < ip) + left = mid + 1; + else + return get_region(region_index, region_len); + } +} + +string search_t::search(const string &str) { + unsigned long long t1 = get_time(); + + ip_t ip; + if (ip.from_str(str) == false) + return "invalid ipv" + std::to_string(ip_version) + ": " + str; + string region = search(ip); + + unsigned long long t2 = get_time(); + cost_time = t2 - t1; + return region; +} + +} // namespace xdb diff --git a/binding/cpp/src/search.h b/binding/cpp/src/search.h new file mode 100644 index 0000000..7dce35a --- /dev/null +++ b/binding/cpp/src/search.h @@ -0,0 +1,49 @@ +#ifndef SEARCH_H +#define SEARCH_H + +#include "header.h" +#include "ip.h" + +namespace xdb { + +class search_t { +protected: + FILE *db; + + header_t header; + + int policy; + + int io_count; + int cost_time; + + char vector[length_vector]; + char *content; + +public: + search_t(const string &file_name, int version, int policy); + virtual ~search_t(); + + int get_io_count(); + int get_cost_time(); + + string search(const string &str); + +protected: + string search(const ip_t &ip); + + void get_content_index(const ip_t &ip1, int &left, int &right); + void get_content(int index, + ip_t &left, + ip_t &right, + int ®ion_len, + int ®ion_index); + + char const *get_content_index_help(int index); + char const *get_content_help(int index); + string get_region(int index, int len); +}; + +} // namespace xdb + +#endif diff --git a/binding/cpp/test/bench.cc b/binding/cpp/test/bench.cc new file mode 100644 index 0000000..3458d58 --- /dev/null +++ b/binding/cpp/test/bench.cc @@ -0,0 +1,31 @@ + +#include "../src/bench.h" + +std::map prompt; + +void test_ipv4(int policy) { + std::cout << "测试 IPv4, " << prompt[policy]; + xdb::bench_t("../../data/ip2region_v4.xdb", xdb::ipv4, policy) + .test("../../data/ipv4_source.txt"); +} + +void test_ipv6(int policy) { + std::cout << "测试 IPv6, " << prompt[policy]; + xdb::bench_t("../../data/ip2region_v6.xdb", xdb::ipv6, policy) + .test("../../data/ipv6_source.txt"); +} + +int main() { + prompt[xdb::policy_file] = " 不缓存, "; + prompt[xdb::policy_vector] = "部分缓存, "; + prompt[xdb::policy_content] = "全部缓存, "; + + test_ipv4(xdb::policy_file); + test_ipv4(xdb::policy_vector); + test_ipv4(xdb::policy_content); + + test_ipv6(xdb::policy_file); + test_ipv6(xdb::policy_vector); + test_ipv6(xdb::policy_content); + return 0; +} diff --git a/binding/cpp/test/edit_v4.cc b/binding/cpp/test/edit_v4.cc new file mode 100644 index 0000000..692c27c --- /dev/null +++ b/binding/cpp/test/edit_v4.cc @@ -0,0 +1,9 @@ + +#include "../src/edit.h" + +int main() { + std::string file_name_old = "../../data/ipv4_source.txt"; + std::string file_name_new = "./1.txt"; + xdb::edit_t xdb(file_name_old, file_name_new, xdb::ipv4); + return 0; +} diff --git a/binding/cpp/test/edit_v6.cc b/binding/cpp/test/edit_v6.cc new file mode 100644 index 0000000..eb25c2e --- /dev/null +++ b/binding/cpp/test/edit_v6.cc @@ -0,0 +1,9 @@ + +#include "../src/edit.h" + +int main() { + std::string file_name_old = "../../data/ipv6_source.txt"; + std::string file_name_new = "./1.txt"; + xdb::edit_t xdb(file_name_old, file_name_new, xdb::ipv6); + return 0; +} diff --git a/binding/cpp/test/header.cc b/binding/cpp/test/header.cc new file mode 100644 index 0000000..e5ce60c --- /dev/null +++ b/binding/cpp/test/header.cc @@ -0,0 +1,30 @@ + +#include "../src/header.h" + +void test(const std::string& prompt, const std::string& file_name) { + std::cout << prompt << std::endl; + + xdb::header_t head(fopen(file_name.data(), "r")); + + std::cout << "版本号: " << head.version() << std::endl; + std::cout << "缓存策略: " << head.index_policy() << std::endl; + + time_t rawtime = head.create_at(); + struct tm* info = localtime(&rawtime); + char buf[80]; + strftime(buf, 80, "%Y-%m-%d %H:%M:%S", info); + + std::cout << "文件生成时间: " << buf << std::endl; + std::cout << "索引起始地址: " << head.index_start() << std::endl; + std::cout << "索引结束地址: " << head.index_end() << std::endl; + std::cout << "IP版本: " << head.ip_version() << std::endl; + std::cout << "指针字节数: " << head.ptr() << std::endl; + + std::cout << std::endl; +} + +int main() { + test("测试 IPv4", "../../data/ip2region_v4.xdb"); + test("测试 IPv6", "../../data/ip2region_v6.xdb"); + return 0; +} diff --git a/binding/cpp/test/make.cc b/binding/cpp/test/make.cc new file mode 100644 index 0000000..657a94a --- /dev/null +++ b/binding/cpp/test/make.cc @@ -0,0 +1,26 @@ + +#include "../src/make.h" + +void test(const std::string& prompt, + const std::string& filename_xdb, + const std::string& filename_src, + int version + +) { + std::cout << prompt; + xdb::make_t(filename_xdb, filename_src, version); +} + +int main() { + test("生成 ipv4 的 xdb 文件, ", + "../../data/ipv4_source.txt", + "./ip2region_v4.xdb", + xdb::ipv4); + + test("生成 ipv6 的 xdb 文件, ", + "../../data/ipv6_source.txt", + "./ip2region_v6.xdb", + xdb::ipv6); + + return 0; +} diff --git a/binding/cpp/test/search.cc b/binding/cpp/test/search.cc new file mode 100644 index 0000000..670a224 --- /dev/null +++ b/binding/cpp/test/search.cc @@ -0,0 +1,45 @@ + +#include "../src/search.h" + +std::map prompt; + +void test(xdb::search_t& s, const std::string& ip, const std::string& region) { + if (s.search(ip) != region) + xdb::log_exit("测试失败, ip " + ip + ", region " + region); +} + +void test_ipv4(int policy) { + std::cout << "测试 IPv4 " << prompt[policy]; + + xdb::search_t s("../../data/ip2region_v4.xdb", xdb::ipv4, policy); + test(s, "0.0.0.0", "0|0|内网IP|内网IP"); + test(s, "1.2.3.4", "美国|华盛顿|0|谷歌"); + test(s, "255.255.255.255", "0|0|内网IP|内网IP"); + + std::cout << " 成功" << std::endl; +} + +void test_ipv6(int policy) { + std::cout << "测试 IPv6 " << prompt[policy]; + + xdb::search_t s("../../data/ip2region_v6.xdb", xdb::ipv6, policy); + test(s, "::1", ""); + test(s, "2001:200:124::", "日本|东京都|千代田区|专线用户"); + + std::cout << " 成功" << std::endl; +} + +int main() { + prompt[xdb::policy_file] = " 不缓存:"; + prompt[xdb::policy_vector] = "部分缓存:"; + prompt[xdb::policy_content] = "全部缓存:"; + + test_ipv4(xdb::policy_file); + test_ipv4(xdb::policy_vector); + test_ipv4(xdb::policy_content); + + test_ipv6(xdb::policy_file); + test_ipv6(xdb::policy_vector); + test_ipv6(xdb::policy_content); + return 0; +} diff --git a/binding/cpp/xdb_bench.cc b/binding/cpp/xdb_bench.cc deleted file mode 100644 index a4e04cb..0000000 --- a/binding/cpp/xdb_bench.cc +++ /dev/null @@ -1,135 +0,0 @@ - -#include "xdb_bench.h" - -#include -#include -#include -#include - -#include -#include - -static void log_exit(const std::string &msg) { - std::cout << msg << std::endl; - exit(-1); -} - -static unsigned long long get_time() { - struct timeval tv1; - gettimeofday(&tv1, NULL); - return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; -} - -static bool ip2uint(const char *buf, unsigned int &ip) { - struct in_addr addr; - if (inet_pton(AF_INET, buf, &addr) == 0) - return false; - // 网络字节序为大端存储, 在此转换为小端存储 - ip = (((addr.s_addr >> 0) & 0xFF) << 24) | - (((addr.s_addr >> 8) & 0xFF) << 16) | - (((addr.s_addr >> 16) & 0xFF) << 8) | - (((addr.s_addr >> 24) & 0xFF) << 0); - return true; -} - -static std::string uint2ip(unsigned int ip) { - char buf[16]; - snprintf(buf, - sizeof(buf), - "%d.%d.%d.%d", - (ip >> 24) & 0xFF, - (ip >> 16) & 0xFF, - (ip >> 8) & 0xFF, - ip & 0xFF); - return std::string(buf); -} - -xdb_bench_t::xdb_bench_t(const std::string &file_name) : xdb_search(file_name) { -} - -void xdb_bench_t::init_file() { - xdb_search.init_file(); -} - -void xdb_bench_t::init_vector_index() { - xdb_search.init_vector_index(); -} - -void xdb_bench_t::init_content() { - xdb_search.init_content(); -} - -void xdb_bench_t::bench_test_one(unsigned int ip_uint, const char *region) { - if (xdb_search.search(uint2ip(ip_uint)) != region) - log_exit("failed: " + uint2ip(ip_uint)); - sum_io_count += xdb_search.get_io_count(); - sum_cost_time += xdb_search.get_cost_time(); - sum_count++; -} - -void xdb_bench_t::bench_test_line(char *buf) { - size_t buf_len = strlen(buf); - if (buf_len == 0) - return; - buf[buf_len - 1] = '\0'; // 去掉换行符 - - char *pos1 = strchr(buf, '|'); - - if (pos1 == NULL) - log_exit("invalid data: " + std::string(buf)); - char *pos2 = strchr(pos1 + 1, '|'); - if (pos2 == NULL) - log_exit("invalid data: " + std::string(buf)); - *pos1 = '\0'; - *pos2 = '\0'; - - unsigned int ip1, ip2; - if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2) { - *pos1 = *pos2 = '|'; - log_exit(std::string("invalid data: ") + buf); - } - - const char *region = pos2 + 1; - - unsigned int ip_mid = ip1 + (ip2 - ip1) / 2; - std::vector ip_vec; - ip_vec.push_back(ip1); - ip_vec.push_back(ip1 + (ip_mid - ip1) / 2); - ip_vec.push_back(ip_mid); - ip_vec.push_back(ip_mid + (ip2 - ip_mid) / 2); - ip_vec.push_back(ip2); - - for (auto &d : ip_vec) - bench_test_one(d, region); -} - -void xdb_bench_t::bench_test_file(const std::string &file_name) { - FILE *f = fopen(file_name.data(), "r"); - if (f == NULL) - log_exit("can't open " + file_name); - char buf[1024]; - while (fgets(buf, sizeof(buf), f) != NULL) - bench_test_line(buf); -} - -void xdb_bench_t::bench(const std::string &file_name) { - sum_io_count = 0; - sum_cost_time = 0; - sum_count = 0; - - unsigned long long tv1 = get_time(); - bench_test_file(file_name); - unsigned long long tv2 = get_time(); - - double took = (tv2 - tv1) * 1.0 / 1000 / 1000; - double cost = sum_cost_time * 1.0 / sum_count; - - printf( - "total: %llu, took: %.2f s, cost: %.2f μs/op, io " - "count: " - "%llu\n", - sum_count, - took, - cost, - sum_io_count); -} diff --git a/binding/cpp/xdb_bench.h b/binding/cpp/xdb_bench.h deleted file mode 100644 index 1ec84c9..0000000 --- a/binding/cpp/xdb_bench.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef XDB_BENCH_H -#define XDB_BENCH_H - -#include "xdb_search.h" - -class xdb_bench_t { - public: - xdb_bench_t(const std::string &file_name); - - void init_file(); - void init_vector_index(); - void init_content(); - - void bench(const std::string &file_name); - - private: - void bench_test_one(unsigned int ip_uint, const char *region); - void bench_test_line(char *buf); - void bench_test_file(const std::string &file_name); - - xdb_search_t xdb_search; - - unsigned long long sum_io_count; - unsigned long long sum_cost_time; - unsigned long long sum_count; -}; - -#endif diff --git a/binding/cpp/xdb_bench_test.cc b/binding/cpp/xdb_bench_test.cc deleted file mode 100644 index c282fd7..0000000 --- a/binding/cpp/xdb_bench_test.cc +++ /dev/null @@ -1,70 +0,0 @@ - -#include "xdb_bench.h" - -#include - -#include - -void print_help(int argc, char* argv[]) { - printf("./xdb_bench [command options]\n"); - printf("options:\n"); - printf(" --db string ip2region binary xdb file path\n"); - printf(" --src string source ip text file path\n"); - printf( - " --cache-policy string cache policy: " - "file/vector_index/content\n"); - printf(" --help print help\n"); - exit(-1); -} - -int main(int argc, char* argv[]) { - struct option long_options[] = { - {"db", required_argument, 0, 'd'}, - {"cache-policy", required_argument, 0, 't'}, - {"src", required_argument, 0, 's'}, - {"help", no_argument, 0, 'h'}, - {0, 0, 0, 0 } - }; - - std::string db_file_name = "../../data/ip2region.xdb"; - std::string src_file_name = "../../data/ip.merge.txt"; - std::string cache_policy = "vector_index"; - - while (1) { - int c = getopt_long(argc, argv, "", long_options, NULL); - if (c == -1) - break; - switch (c) { - case 'd': - db_file_name = optarg; - break; - case 'h': - print_help(argc, argv); - break; - case 't': - cache_policy = optarg; - break; - case 's': - src_file_name = optarg; - break; - case '?': - exit(-1); - } - } - - xdb_bench_t xdb(db_file_name); - - if (cache_policy == "content") - xdb.init_content(); - else if (cache_policy == "vector_index") - xdb.init_vector_index(); - else if (cache_policy == "file") - xdb.init_file(); - else { - std::cout << "invalid cache policy: " << cache_policy << std::endl; - exit(-1); - } - - xdb.bench(src_file_name); - return 0; -} diff --git a/binding/cpp/xdb_search.cc b/binding/cpp/xdb_search.cc deleted file mode 100644 index e5937bf..0000000 --- a/binding/cpp/xdb_search.cc +++ /dev/null @@ -1,190 +0,0 @@ - -#include "xdb_search.h" - -#include -#include - -#include - -static void log_exit(const std::string &msg) { - std::cout << msg << std::endl; - exit(-1); -} - -static unsigned long long get_time() { - struct timeval tv1; - gettimeofday(&tv1, NULL); - return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; -} - -static void read_bin(int index, char *buf, size_t len, FILE *db) { - fseek(db, index, SEEK_SET); - if (fread(buf, 1, len, db) != len) - log_exit(__func__); -} - -static unsigned int read_uint(const char *buf) { - return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00) | - ((buf[2] << 16) & 0x00FF0000) | ((buf[3] << 24) & 0xFF000000); -} - -static unsigned short read_ushort(const char *buf) { - return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00); -} - -static bool ip2uint(const char *buf, unsigned int &ip) { - struct in_addr addr; - if (inet_pton(AF_INET, buf, &addr) == 0) - return false; - // 网络字节序为大端存储, 在此转换为小端存储 - ip = (((addr.s_addr >> 0) & 0xFF) << 24) | - (((addr.s_addr >> 8) & 0xFF) << 16) | - (((addr.s_addr >> 16) & 0xFF) << 8) | - (((addr.s_addr >> 24) & 0xFF) << 0); - return true; -} - -void xdb_search_t::get_content_index(unsigned int ip, - unsigned int &left, - unsigned int &right) { - unsigned int ip_1 = (ip >> 24) & 0xFF; - unsigned int ip_2 = (ip >> 16) & 0xFF; - unsigned int index = (ip_1 * vector_index_cols + ip_2) * vector_index_size; - - if (content != NULL) { - left = read_uint(content + index + header_length); - right = read_uint(content + index + header_length + 4); - } else if (vector_index != NULL) { - left = read_uint(vector_index + index); - right = read_uint(vector_index + index + 4); - } else { - ++io_count; - char buf[8]; - read_bin(header_length + index, buf, sizeof(buf), db); - left = read_uint(buf); - right = read_uint(buf + 4); - } -} - -void xdb_search_t::get_content(unsigned int index, - unsigned int &ip_left, - unsigned int &ip_right, - unsigned short ®ion_len, - unsigned int ®ion_index) { - char buf[segment_index_size]; // 4 + 4 + 2 + 4 - const char *p; - - if (content != NULL) { - p = content + index; - } else { - ++io_count; - read_bin(index, buf, sizeof(buf), db); - p = buf; - } - ip_left = read_uint(p); - ip_right = read_uint(p + 4); - region_len = read_ushort(p + 8); - region_index = read_uint(p + 10); -} - -std::string xdb_search_t::get_region(unsigned int index, unsigned short len) { - if (content != NULL) { - return std::string(content + index, len); - } else { - ++io_count; - char *buf = (char *)malloc(sizeof(char) * len); - read_bin(index, buf, len, db); - std::string res(buf, len); - free(buf); - return res; - } -} - -xdb_search_t::xdb_search_t(const std::string &file_name) { - db = fopen(file_name.data(), "r"); - vector_index = NULL; - content = NULL; - - if (db == NULL) - log_exit("can't open " + file_name); -} - -void xdb_search_t::init_file() { -} - -void xdb_search_t::init_vector_index() { - vector_index = (char *)malloc(vector_index_length); - read_bin(header_length, vector_index, vector_index_length, db); -} - -void xdb_search_t::init_content() { - fseek(db, 0, SEEK_END); - unsigned int size = ftell(db); - content = (char *)malloc(size); - read_bin(0, content, size, db); -} - -xdb_search_t::~xdb_search_t() { - if (db != NULL) { - fclose(db); - db = NULL; - } - if (vector_index != NULL) { - free(vector_index); - vector_index = NULL; - } - if (content != NULL) { - free(content); - content = NULL; - } -} - -unsigned long long xdb_search_t::get_io_count() { - return io_count; -} - -unsigned long long xdb_search_t::get_cost_time() { - return cost_time; -} - -std::string xdb_search_t::search(const std::string &ip_str) { - unsigned long long t1 = get_time(); - - unsigned int ip_uint; - if (!ip2uint(ip_str.data(), ip_uint)) - return "invalid ip: " + ip_str; - std::string region = search(ip_uint); - - unsigned long long t2 = get_time(); - cost_time = t2 - t1; - return region; -} - -std::string xdb_search_t::search(unsigned int ip_uint) { - io_count = 0; - - unsigned int content_index_left, content_index_right; - get_content_index(ip_uint, content_index_left, content_index_right); - - unsigned int left, right, mid; - unsigned int ip_left, ip_right; - unsigned short region_len; - unsigned int region_index; - unsigned int mid_index; - - left = 0; - right = (content_index_right - content_index_left) / segment_index_size; - - for (;;) { - mid = left + (right - left) / 2; - mid_index = content_index_left + mid * segment_index_size; - get_content(mid_index, ip_left, ip_right, region_len, region_index); - - if (ip_left > ip_uint) - right = mid - 1; - else if (ip_right < ip_uint) - left = mid + 1; - else - return get_region(region_index, region_len); - } -} diff --git a/binding/cpp/xdb_search.h b/binding/cpp/xdb_search.h deleted file mode 100644 index 44286ef..0000000 --- a/binding/cpp/xdb_search.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef XDB_SEARCH_H -#define XDB_SEARCH_H - -#include - -class xdb_search_t { - public: - xdb_search_t(const std::string &file_name); - ~xdb_search_t(); - - void init_file(); - void init_vector_index(); - void init_content(); - - unsigned long long get_io_count(); - unsigned long long get_cost_time(); - - std::string search(const std::string &ip); - - private: - void get_content_index(unsigned int ip, - unsigned int &left, - unsigned int &right); - - void get_content(unsigned int index, - unsigned int &ip_left, - unsigned int &ip_right, - unsigned short ®ion_len, - unsigned int ®ion_index); - - std::string get_region(unsigned int index, unsigned short len); - - std::string search(unsigned int ip_uint); - - FILE *db; - char *vector_index; - char *content; - unsigned long long io_count; - unsigned long long cost_time; - - static constexpr int header_length = 256; - static constexpr int vector_index_rows = 256; - static constexpr int vector_index_cols = 256; - static constexpr int vector_index_size = 8; - static constexpr int vector_index_length = - vector_index_rows * vector_index_cols * vector_index_size; - static constexpr int segment_index_size = 14; -}; - -#endif diff --git a/binding/cpp/xdb_search_test.cc b/binding/cpp/xdb_search_test.cc deleted file mode 100644 index a2deea2..0000000 --- a/binding/cpp/xdb_search_test.cc +++ /dev/null @@ -1,73 +0,0 @@ - -#include "xdb_search.h" - -#include - -#include - -void print_help(int argc, char* argv[]) { - printf("./xdb_search [command options]\n"); - printf("options:\n"); - printf(" --db string ip2region binary xdb file path\n"); - printf( - " --cache-policy string cache policy: " - "file/vector_index/content\n"); - printf(" --help print help\n"); - exit(-1); -} - -int main(int argc, char* argv[]) { - struct option long_options[] = { - {"db", required_argument, 0, 'd'}, - {"cache-policy", required_argument, 0, 't'}, - {"help", no_argument, 0, 'h'}, - {0, 0, 0, 0 } - }; - - std::string db_file_name = "../../data/ip2region.xdb"; - std::string cache_policy = "vector_index"; - - while (1) { - int c = getopt_long(argc, argv, "", long_options, NULL); - if (c == -1) - break; - switch (c) { - case 'd': - db_file_name = optarg; - break; - case 'h': - print_help(argc, argv); - break; - case 't': - cache_policy = optarg; - break; - case '?': - exit(-1); - } - } - - xdb_search_t xdb(db_file_name); - - if (cache_policy == "content") - xdb.init_content(); - else if (cache_policy == "vector_index") - xdb.init_vector_index(); - else if (cache_policy == "file") - xdb.init_file(); - else { - std::cout << "invalid cache policy: " << cache_policy << std::endl; - exit(-1); - } - - std::string ip; - for (;;) { - std::cout << "ip2region>> "; - std::getline(std::cin, ip); - if (ip.empty()) - continue; - if (ip == "exit" || ip == "quit") - break; - std::cout << xdb.search(ip) << std::endl; - } - return 0; -} diff --git a/maker/cpp/Makefile b/maker/cpp/Makefile deleted file mode 100644 index 0664139..0000000 --- a/maker/cpp/Makefile +++ /dev/null @@ -1,11 +0,0 @@ - -all: xdb_make xdb_edit - -xdb_make: xdb_make.cc xdb_make_test.cc - g++ -std=c++11 -O2 $^ -o $@ - -xdb_edit: xdb_edit.cc xdb_edit_test.cc - g++ -std=c++11 -O2 $^ -o $@ - -clean: - rm -f xdb_make xdb_edit diff --git a/maker/cpp/readme.md b/maker/cpp/readme.md index c09dde0..0ca862c 100644 --- a/maker/cpp/readme.md +++ b/maker/cpp/readme.md @@ -1,217 +1,5 @@ -# ip2region xdb C++ 生成实现 -# 编译 -1. 切换到当前目录 -2. 编译 +[生成 xdb 文件](../../binding/cpp#4-生成-xdb-文件) -``` -$ make -g++ -std=c++11 -O2 xdb_make.cc xdb_make_test.cc -o xdb_make -``` - -# `xdb` 数据生成 -## 使用说明 -``` -$ ./xdb_make --help -./xdb_make [command options] -options: - --db string ip2region binary xdb file path - --src string source ip text file path -``` - -## 数据生成 -``` -$ ./xdb_make --db ip2region.xdb --src ../../data/ip.merge.txt -took: 1.46s -``` - -## 数据正确性测试 -``` -$ make # 1. 编译 -$ ./xdb_maker # 2. 本目录生成 xdb 文件 -$ diff <(xxd ./ip2region.xdb) <(xxd ../../data/ip2region.xdb) # 3. 比较本目录和仓库中的 xdb 文件 - # 只有生成的时间不同 -1c1 -< 00000000: 0200 0100 3c6a f965 2302 0f00 75ea a800 .... 00000000: 0200 0100 469b de65 2302 0f00 75ea a800 ....F..e#...u... -``` - -# `xdb` 数据编辑 -## 使用说明 -* 新的IP归属地文件可以包含空行 -* 新的IP归属地文件顺序可以乱序, 程序会自动排序 -* 新的IP归属地文件顺序可以重叠, 只要无二义性, 程序会自动合并 -* 最终的结果会将相邻的且归属地相同的行自动合并 - -``` -$ ./xdb_edit --help -./xdb_edit [command options] -options: - --old filename old source ip text file path - --new filename new source ip text file path -``` - -## 数据更新 -``` -$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt -took: 1.46s -``` - -## 数据正确性测试 -### 测试一: 测试数据文件包含空行以及重复的情况 -``` -$ cat -n 1.txt - 1 - 2 1.0.128.0|1.0.128.255|测试归属地 - 3 1.0.128.0|1.0.128.255|测试归属地 - 4 -$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt -took: 1.83s -$ git diff ../../data/ -diff --git a/data/ip.merge.txt b/data/ip.merge.txt -index 8976bd3..6da5e18 100644 ---- a/data/ip.merge.txt -+++ b/data/ip.merge.txt -@@ -7,7 +7,7 @@ - 1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信 - 1.0.64.0|1.0.79.255|日本|0|广岛县|0|0 - 1.0.80.0|1.0.127.255|日本|0|冈山县|0|0 --1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT -+1.0.128.0|1.0.128.255|测试归属地 - 1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT - 1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT - 1.0.133.0|1.0.133.255|泰国|0|素攀武里府|0|TOT -@@ -320906,8 +320906,7 @@ - 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 - 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 - 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint --100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP --100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP -+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP - 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile - 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 - 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 -``` - -### 测试二: 测试数据文件乱序以及数据有交叉, 归属地相同的情况 -``` -$ cat -n 1.txt - 1 - 2 1.0.128.5|1.0.128.255|测试归属地 - 3 1.0.128.0|1.0.128.9|测试归属地 - 4 -$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt -took: 1.83s -$ git diff ../../data/ -diff --git a/data/ip.merge.txt b/data/ip.merge.txt -index 8976bd3..6da5e18 100644 ---- a/data/ip.merge.txt -+++ b/data/ip.merge.txt -@@ -7,7 +7,7 @@ - 1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信 - 1.0.64.0|1.0.79.255|日本|0|广岛县|0|0 - 1.0.80.0|1.0.127.255|日本|0|冈山县|0|0 --1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT -+1.0.128.0|1.0.128.255|测试归属地 - 1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT - 1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT - 1.0.133.0|1.0.133.255|泰国|0|素攀武里府|0|TOT -@@ -320906,8 +320906,7 @@ - 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 - 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 - 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint --100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP --100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP -+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP - 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile - 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 - 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 -``` - -### 测试三: 测试数据文件乱序以及数据有交叉的, 归属地不同情况 -``` -$ cat -n 1.txt - 1 - 2 1.0.128.5|1.0.128.255|测试归属地 - 3 1.0.128.0|1.0.128.9|测试归属地123 - 4 -$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt -数据有二义性: 1.0.128.0|1.0.128.9|测试归属地123, 1.0.128.5|1.0.128.255|测试归属地 -``` - -### 测试四: 测试将一个IP数据拆成多个IP -``` -$ cat -n 1.txt - 1 36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动 - 2 36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动 - 3 36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动 -$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt -took: 1.83s -$ git diff ../../data/ -diff --git a/data/ip.merge.txt b/data/ip.merge.txt -index 8976bd3..7be0227 100644 ---- a/data/ip.merge.txt -+++ b/data/ip.merge.txt -@@ -54778,7 +54778,11 @@ - 36.134.84.0|36.134.85.255|中国|0|安徽省|合肥市|移动 - 36.134.86.0|36.134.87.255|中国|0|广西|南宁市|移动 - 36.134.88.0|36.134.89.255|中国|0|内蒙古|呼和浩特市|移动 --36.134.90.0|36.141.255.255|中国|0|0|0|移动 -+36.134.90.0|36.136.0.255|中国|0|0|0|移动 -+36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动 -+36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动 -+36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动 -+36.136.24.0|36.141.255.255|中国|0|0|0|移动 - 36.142.0.0|36.142.1.255|中国|0|四川省|成都市|移动 - 36.142.2.0|36.142.31.255|中国|0|甘肃省|兰州市|移动 - 36.142.32.0|36.142.127.255|中国|0|甘肃省|0|移动 -@@ -320906,8 +320910,7 @@ - 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 - 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 - 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint --100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP --100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP -+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP - 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile - 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 - 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 - ``` - -### 测试五: 测试将多个IP数据并成一个IP数据 -``` -$ cat -n 1.txt - 1 - 2 1.0.16.0|1.0.127.255|测试归属地 - 3 -$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt -took: 1.83s -$ git diff ../../data/ -diff --git a/data/ip.merge.txt b/data/ip.merge.txt -index 8976bd3..acc27a5 100644 ---- a/data/ip.merge.txt -+++ b/data/ip.merge.txt -@@ -3,10 +3,7 @@ - 1.0.1.0|1.0.3.255|中国|0|福建省|福州市|电信 - 1.0.4.0|1.0.7.255|澳大利亚|0|维多利亚|墨尔本|0 - 1.0.8.0|1.0.15.255|中国|0|广东省|广州市|电信 --1.0.16.0|1.0.31.255|日本|0|0|0|0 --1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信 --1.0.64.0|1.0.79.255|日本|0|广岛县|0|0 --1.0.80.0|1.0.127.255|日本|0|冈山县|0|0 -+1.0.16.0|1.0.127.255|测试归属地 - 1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT - 1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT - 1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT -@@ -320906,8 +320903,7 @@ - 100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报 - 100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报 - 100.48.0.0|100.63.255.255|美国|0|0|0|Sprint --100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP --100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP -+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP - 100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile - 101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信 - 101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0 - ``` +[原始数据编辑](../../binding/cpp#5-原始数据编辑) diff --git a/maker/cpp/xdb_edit.cc b/maker/cpp/xdb_edit.cc deleted file mode 100644 index 2279aa2..0000000 --- a/maker/cpp/xdb_edit.cc +++ /dev/null @@ -1,247 +0,0 @@ - -#include "xdb_edit.h" - -#include -#include -#include -#include -#include - -#include -#include - -static void log_exit(const std::string& msg) { - std::cout << msg << std::endl; - exit(-1); -} - -static unsigned long long get_time() { - struct timeval tv1; - gettimeofday(&tv1, NULL); - return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; -} - -static bool ip2uint(const char* buf, unsigned int& ip) { - struct in_addr addr; - if (inet_pton(AF_INET, buf, &addr) == 0) - return false; - // 网络字节序为大端存储, 在此转换为小端存储 - ip = (((addr.s_addr >> 0) & 0xFF) << 24) | - (((addr.s_addr >> 8) & 0xFF) << 16) | - (((addr.s_addr >> 16) & 0xFF) << 8) | - (((addr.s_addr >> 24) & 0xFF) << 0); - return true; -} - -static std::string uint2ip(unsigned int ip) { - char buf[16]; - snprintf(buf, - sizeof(buf), - "%d.%d.%d.%d", - (ip >> 24) & 0xFF, - (ip >> 16) & 0xFF, - (ip >> 8) & 0xFF, - ip & 0xFF); - return std::string(buf); -} - -static void handle_ip_txt(const std::string& file_name, - std::list& regions) { - FILE* f = fopen(file_name.data(), "r"); - if (f == NULL) - log_exit("can't open " + file_name); - - char buf[1024]; - while (fgets(buf, sizeof(buf), f) != NULL) { - unsigned int buf_len = strlen(buf); - // 去掉多余的空 - while (buf_len > 0 && isspace(buf[buf_len - 1])) - --buf_len; - if (buf_len == 0) - continue; - buf[buf_len] = '\0'; - regions.push_back(xdb_node_t(buf)); - } - - fclose(f); -} - -// xdb_node_t -xdb_node_t::xdb_node_t() { -} - -xdb_node_t::xdb_node_t(char* buf) { - char* pos1 = strchr(buf, '|'); - - if (pos1 == NULL) - log_exit("invalid data: " + std::string(buf)); - char* pos2 = strchr(pos1 + 1, '|'); - if (pos2 == NULL) - log_exit("invalid data: " + std::string(buf)); - *pos1 = '\0'; - *pos2 = '\0'; - - region = pos2 + 1; - if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2 || - region.empty()) { - *pos1 = *pos2 = '|'; - log_exit(std::string("invalid data: ") + buf); - } -} - -bool xdb_node_t::operator<(const xdb_node_t& rhs) const { - if (ip1 < rhs.ip1) - return true; - if (ip1 > rhs.ip1) - return false; - return ip2 < rhs.ip2; -} - -std::string xdb_node_t::to_string() const { - return uint2ip(ip1) + "|" + uint2ip(ip2) + "|" + region; -} - -void xdb_edit_t::handle_new_file(const std::string& file_name) { - // 输入 - handle_ip_txt(file_name, new_regions); - - // 排序 - new_regions.sort(); - // 检验及其去重 - auto it = new_regions.begin(); - - for (;;) { - if (it == new_regions.end()) - break; - auto next = it; - ++next; - if (next == new_regions.end()) - break; - if (it->ip1 == next->ip1 || it->ip2 >= next->ip1) { - // 数据重叠 - if (it->region != next->region) - log_exit("数据有二义性: " + it->to_string() + ", " + - next->to_string()); - it->ip2 = std::max(it->ip2, next->ip2); - new_regions.erase(next); - } else if (it->ip2 + 1 == next->ip1 && it->region == next->region) { - // 数据连接 - it->ip2 = next->ip2; - new_regions.erase(next); - } else { - ++it; - } - } -} - -void xdb_edit_t::handle_old_file(const std::string& file_name) { - handle_ip_txt(file_name, old_regions); -} - -void xdb_edit_t::merge() { - auto it1 = old_regions.begin(); - auto it2 = new_regions.begin(); - - for (;;) { - if (it2 == new_regions.end()) - break; - if (it2->ip1 > it2->ip2) { - // 失效数据 - ++it2; - continue; - } - while (it1->ip2 < it2->ip1) - ++it1; - if (it1->ip2 <= it2->ip2) { - xdb_node_t node; - node.ip1 = it2->ip1; - node.ip2 = it1->ip2; - node.region = it2->region; - - it1->ip2 = node.ip1 - 1; - it2->ip1 = node.ip2 + 1; - - // std::cout << "insert: " << node.to_string() << - // std::endl; - ++it1; - it1 = old_regions.insert(it1, node); - ++it1; - } else { - xdb_node_t node; - node.ip1 = it2->ip2 + 1; - node.ip2 = it1->ip2; - node.region = it1->region; - - it1->ip2 = it2->ip1 - 1; - - // std::cout << "insert: " << it2->to_string() << - // std::endl; - ++it1; - it1 = old_regions.insert(it1, *it2); - - ++it1; - it1 = old_regions.insert(it1, node); - - ++it2; - } - } -} - -void xdb_edit_t::write_old_file(const std::string& file_name) { - FILE* f = fopen(file_name.data(), "w"); - if (f == NULL) - log_exit("can't open " + file_name); - - auto it = old_regions.begin(); - - // 删除非法的数据 - for (;;) { - if (it == old_regions.end()) - break; - if (it->ip1 > it->ip2) - it = old_regions.erase(it); - else - ++it; - } - - // 合并数据域相同的相邻数据 - it = old_regions.begin(); - for (;;) { - if (it == old_regions.end()) - break; - auto next = it; - ++next; - if (next == old_regions.end()) - break; - if (it->region == next->region) { - it->ip2 = next->ip2; - old_regions.erase(next); - } else { - ++it; - } - } - - for (auto& d : old_regions) { - std::string res = - uint2ip(d.ip1) + "|" + uint2ip(d.ip2) + "|" + d.region + "\n"; - fputs(res.data(), f); - } - - fclose(f); -} - -xdb_edit_t::xdb_edit_t(const std::string& file_name_old, - const std::string& file_name_new) { - unsigned long long tv1 = get_time(); - - handle_new_file(file_name_new); - handle_old_file(file_name_old); - merge(); - write_old_file(file_name_old); - - unsigned long long tv2 = get_time(); - - double took = (tv2 - tv1) * 1.0 / 1000 / 1000; - - printf("took: %.2fs\n", took); -} diff --git a/maker/cpp/xdb_edit.h b/maker/cpp/xdb_edit.h deleted file mode 100644 index 7fb8640..0000000 --- a/maker/cpp/xdb_edit.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef XDB_EDIT_H -#define XDB_EDIT_H - -#include -#include - -struct xdb_node_t { - unsigned int ip1; - unsigned int ip2; - std::string region; - - xdb_node_t(); - xdb_node_t(char* buf); - - bool operator<(const xdb_node_t& rhs) const; - - std::string to_string() const; -}; - -class xdb_edit_t { - public: - xdb_edit_t(const std::string& file_name_old, - const std::string& file_name_new); - - private: - void handle_new_file(const std::string& file_name); - void handle_old_file(const std::string& file_name); - void merge(); - void write_old_file(const std::string& file_name); - - std::list old_regions; - std::list new_regions; -}; - -#endif diff --git a/maker/cpp/xdb_edit_test.cc b/maker/cpp/xdb_edit_test.cc deleted file mode 100644 index c1d3216..0000000 --- a/maker/cpp/xdb_edit_test.cc +++ /dev/null @@ -1,50 +0,0 @@ - -#include "xdb_edit.h" - -#include -#include - -#include - -void print_help() { - printf("./xdb_edit [command options]\n"); - printf("options:\n"); - printf(" --old filename old source ip text file path\n"); - printf(" --new filename new source ip text file path\n"); - exit(-1); -} - -int main(int argc, char* argv[]) { - struct option long_options[] = { - {"new", required_argument, 0, 'n'}, - {"old", required_argument, 0, 'o'}, - {"help", no_argument, 0, 'h'}, - {0, 0, 0, 0 } - }; - - std::string file_name_old = "../../data/ip.merge.txt"; - std::string file_name_new = "./1.txt"; - - while (1) { - int c = getopt_long(argc, argv, "", long_options, NULL); - if (c == -1) - break; - switch (c) { - case 'n': - file_name_new = optarg; - break; - case 'h': - print_help(); - break; - case 'o': - file_name_old = optarg; - break; - case '?': - exit(-1); - } - } - - xdb_edit_t xdb(file_name_old, file_name_new); - - return 0; -} diff --git a/maker/cpp/xdb_make.cc b/maker/cpp/xdb_make.cc deleted file mode 100644 index dea887f..0000000 --- a/maker/cpp/xdb_make.cc +++ /dev/null @@ -1,240 +0,0 @@ - -#include "xdb_make.h" - -#include -#include -#include -#include - -#include - -static void log_exit(const std::string &msg) { - std::cout << msg << std::endl; - exit(-1); -} - -static unsigned long long get_time() { - struct timeval tv1; - gettimeofday(&tv1, NULL); - return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec; -} - -static void write_uint(unsigned int data, char buf[]) { - buf[0] = (data >> 0) & 0xFF; - buf[1] = (data >> 8) & 0xFF; - buf[2] = (data >> 16) & 0xFF; - buf[3] = (data >> 24) & 0xFF; -} - -static void write_uint(unsigned int data, FILE *dst) { - char buf[4]; - write_uint(data, buf); - fwrite(buf, 1, sizeof(buf), dst); -} - -static void write_ushort(unsigned short data, char buf[]) { - buf[0] = (data >> 0) & 0xFF; - buf[1] = (data >> 8) & 0xFF; -} - -static void write_ushort(unsigned short data, FILE *dst) { - char buf[2]; - write_ushort(data, buf); - fwrite(buf, 1, sizeof(buf), dst); -} - -static void write_string(const char *buf, unsigned int len, FILE *dst) { - fwrite(buf, 1, len, dst); -} - -static bool ip2uint(const char *buf, unsigned int &ip) { - struct in_addr addr; - if (inet_pton(AF_INET, buf, &addr) == 0) - return false; - // 网络字节序为大端存储, 在此转换为小端存储 - ip = (((addr.s_addr >> 0) & 0xFF) << 24) | - (((addr.s_addr >> 8) & 0xFF) << 16) | - (((addr.s_addr >> 16) & 0xFF) << 8) | - (((addr.s_addr >> 24) & 0xFF) << 0); - return true; -} - -static std::string uint2ip(unsigned int ip) { - char buf[16]; - snprintf(buf, - sizeof(buf), - "%d.%d.%d.%d", - (ip >> 24) & 0xFF, - (ip >> 16) & 0xFF, - (ip >> 8) & 0xFF, - ip & 0xFF); - return std::string(buf); -} - -void xdb_make_t::vector_index_push_back(unsigned int row, - unsigned int col, - unsigned int ip1, - unsigned int ip2, - const char *region_str) { - char buf[8]; - write_uint(ip1, buf); - write_uint(ip2, buf + 4); - - vector_index[row][col].push_back(std::make_pair( - std::string(buf, sizeof(buf)), region_str)); -} - -void xdb_make_t::vector_index_push_back(unsigned int ip1, - unsigned int ip2, - const char *region_str) { - unsigned int ip1_1 = (ip1 >> 24) & 0xFF; - unsigned int ip1_2 = (ip1 >> 16) & 0xFF; - unsigned int ip2_1 = (ip2 >> 24) & 0xFF; - unsigned int ip2_2 = (ip2 >> 16) & 0xFF; - - if (ip1_1 == ip2_1 && ip1_2 == ip2_2) { - vector_index_push_back(ip1_1, ip1_2, ip1, ip2, region_str); - return; - } - - vector_index_push_back(ip1_1, ip1_2, ip1, ip1 | 0x0000FFFF, region_str); - vector_index_push_back(ip2_1, ip2_2, ip2 & 0xFFFF0000, ip2, region_str); - - for (;;) { - ++ip1_2; - if (ip1_2 == 256) { - ++ip1_1; - ip1_2 = 0; - } - if (ip1_1 == ip2_1 && ip1_2 == ip2_2) - break; - ip1 = (ip1_1 << 24) | (ip1_2 << 16); - vector_index_push_back(ip1_1, ip1_2, ip1, ip1 | 0x0000FFFF, region_str); - } -} - -void xdb_make_t::handle_input_help(char *buf) { - static unsigned int region_index = vector_index_length + header_length; - static unsigned int next_ip = 0; - - // 去掉多余的空 - unsigned int buf_len = strlen(buf); - while (buf_len > 0 && isspace(buf[buf_len - 1])) - --buf_len; - if (buf_len == 0) - return; - buf[buf_len] = '\0'; - - char *pos1 = strchr(buf, '|'); - - if (pos1 == NULL) - log_exit("invalid data: " + std::string(buf)); - char *pos2 = strchr(pos1 + 1, '|'); - if (pos2 == NULL) - log_exit("invalid data: " + std::string(buf)); - *pos1 = '\0'; - *pos2 = '\0'; - - const char *region_str = pos2 + 1; - - unsigned int ip1, ip2; - if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2 || - *region_str == '\0') { - *pos1 = *pos2 = '|'; - log_exit(std::string("invalid data: ") + buf); - } - - if (next_ip != ip1) - log_exit("ip 不连续: " + uint2ip(ip1)); - next_ip = ip2 + 1; - - if (region.find(region_str) == region.end()) { - region[region_str] = region_index; - region_index += strlen(region_str); - } - - vector_index_push_back(ip1, ip2, region_str); -} - -void xdb_make_t::handle_input(const std::string &file_name) { - FILE *src = fopen(file_name.data(), "r"); - if (src == NULL) - log_exit("can't open " + file_name); - - char buf[1024]; - while (fgets(buf, sizeof(buf), src) != NULL) - handle_input_help(buf); - fclose(src); -} - -void xdb_make_t::handle_header() { - char buf[header_length]; - memset(buf, 0, header_length); - write_ushort(2, buf); // 版本号 - write_ushort(1, buf + 2); // 缓存策略 - write_uint(time(NULL), buf + 4); // 时间 - // 索引 - unsigned int content_left = header_length + vector_index_length; - for (auto &d : region) - content_left += d.first.size(); - unsigned int content_right = content_left; - - for (int i = 0; i < vector_index_rows; ++i) - for (int j = 0; j < vector_index_cols; ++j) - content_right += vector_index[i][j].size() * segment_index_size; - content_right -= segment_index_size; - write_uint(content_left, buf + 8); - write_uint(content_right, buf + 12); - write_string(buf, header_length, dst); -} - -void xdb_make_t::handle_vector_index() { - unsigned int index = header_length + vector_index_length; - for (auto &d : region) - index += d.first.size(); - for (unsigned i = 0; i < vector_index_rows; ++i) - for (unsigned j = 0; j < vector_index_cols; ++j) { - write_uint(index, dst); - index += segment_index_size * vector_index[i][j].size(); - write_uint(index, dst); - } -} - -void xdb_make_t::handle_region() { - for (auto &d : region) { - fseek(dst, d.second, SEEK_SET); - write_string(d.first.data(), d.first.size(), dst); - } -} - -void xdb_make_t::handle_content() { - fseek(dst, 0, SEEK_END); - for (unsigned i = 0; i < vector_index_rows; ++i) - for (unsigned j = 0; j < vector_index_cols; ++j) - for (auto d : vector_index[i][j]) { - write_string(d.first.data(), d.first.size(), dst); - write_ushort(d.second.size(), dst); - write_uint(region[d.second], dst); - } -} - -xdb_make_t::xdb_make_t(const std::string &file_name_src, - const std::string &file_name_dst) { - unsigned long long tv1 = get_time(); - - handle_input(file_name_src); - - dst = fopen(file_name_dst.data(), "w"); - if (dst == NULL) - log_exit("can't open " + std::string(file_name_dst)); - - handle_header(); - handle_vector_index(); - handle_region(); - handle_content(); - - fclose(dst); - - unsigned long long tv2 = get_time(); - printf("took: %.2fs\n", (tv2 - tv1) * 1.0 / 1000 / 1000); -} diff --git a/maker/cpp/xdb_make.h b/maker/cpp/xdb_make.h deleted file mode 100644 index 4aea7fc..0000000 --- a/maker/cpp/xdb_make.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef XDB_MAKE_H -#define XDB_MAKE_H - -#include - -#include -#include -#include - -class xdb_make_t { - public: - xdb_make_t(const std::string &file_name_src, - const std::string &file_name_dst); - - private: - void vector_index_push_back(unsigned int row, - unsigned int col, - unsigned int ip1, - unsigned int ip2, - const char *region); - void vector_index_push_back(unsigned int ip1, - unsigned int ip2, - const char *region); - void handle_input_help(char buf[]); - void handle_input(const std::string &file_name); - - void handle_header(); - void handle_vector_index(); - void handle_region(); - void handle_content(); - - static constexpr int header_length = 256; - static constexpr int vector_index_rows = 256; - static constexpr int vector_index_cols = 256; - static constexpr int vector_index_size = 8; - static constexpr int vector_index_length = - vector_index_rows * vector_index_cols * vector_index_size; - static constexpr int segment_index_size = 14; - - FILE *dst = NULL; - - std::vector> - vector_index[vector_index_rows][vector_index_cols]; - - std::unordered_map region; -}; - -#endif diff --git a/maker/cpp/xdb_make_test.cc b/maker/cpp/xdb_make_test.cc deleted file mode 100644 index b763d04..0000000 --- a/maker/cpp/xdb_make_test.cc +++ /dev/null @@ -1,50 +0,0 @@ - -#include "xdb_make.h" - -#include -#include - -#include - -void print_help() { - printf("./xdb_make [command options]\n"); - printf("options:\n"); - printf(" --db string ip2region binary xdb file path\n"); - printf(" --src string source ip text file path\n"); - exit(-1); -} - -int main(int argc, char* argv[]) { - struct option long_options[] = { - {"db", required_argument, 0, 'd'}, - {"src", required_argument, 0, 's'}, - {"help", no_argument, 0, 'h'}, - {0, 0, 0, 0 } - }; - - std::string file_name_dst = "./ip2region.xdb"; - std::string file_name_src = "../../data/ip.merge.txt"; - - while (1) { - int c = getopt_long(argc, argv, "", long_options, NULL); - if (c == -1) - break; - switch (c) { - case 'd': - file_name_dst = optarg; - break; - case 'h': - print_help(); - break; - case 's': - file_name_src = optarg; - break; - case '?': - exit(-1); - } - } - - xdb_make_t xdb(file_name_src, file_name_dst); - - return 0; -}