add binding maker and editor for C++

This commit is contained in:
Yunbin Liu 2024-03-06 03:18:41 +00:00
parent c82399f3b9
commit 19c2aa8199
17 changed files with 1566 additions and 0 deletions

View File

@ -42,6 +42,7 @@ API 介绍,使用文档和测试程序请参考对应 `searcher` 查询客户
| :white_check_mark: | 已完成 | [erlang](binding/erlang) | erlang xdb 查询客户端实现 | [leihua996](https://github.com/leihua996) |
|     | 待开始 | [php_ext](binding/php7_ext) | php c 扩展 xdb 查询客户端实现 | 待确定 |
| :white_check_mark: | 已完成 | [nginx](binding/nginx) | nginx 扩展 xdb 查询客户端实现 | [Wu Jian Ping](https://github.com/wujjpp) |
| :white_check_mark: | 已完成 | [C++](binding/cpp) | C++ xdb 查询客户端实现 | [Yunbin Liu](https://github.com/liuyunbin) |
以下工具链实现由社区开发者通过第三方仓库贡献:
@ -63,6 +64,7 @@ API 介绍,使用文档和测试程序请参考如下 `maker` 生成程序下
| :white_check_mark: | 已完成 | [python](maker/python) | python xdb 生成程序实现 | [leolin49](https://github.com/leolin49) |
| :white_check_mark: | 已完成 | [csharp](maker/csharp) | csharp xdb 生成程序实现 | [Alan Lee](https://github.com/malus2077) |
| :white_check_mark: | 已完成 | [rust](maker/rust) | rust xdb 生成程序实现 | [KevinWang](https://github.com/KevinWL) |
| :white_check_mark: | 已完成 | [C++](maker/cpp) | C++ xdb 生成程序实现 | [Yunbin Liu](https://github.com/liuyunbin) |
# `xdb` 数据更新
@ -82,6 +84,7 @@ ip2region 旨在于 <b>研究 IP 数据的存储和快速查询的设计和实
|:-------------------|:----| :--- |:-------------------| :--- |
| :white_check_mark: | 已完成 | [golang](maker/golang#xdb-数据编辑) | golang 原始 IP 数据编辑器 | [Lion](https://github.com/lionsoul2014) |
| &nbsp;&nbsp;&nbsp; | 待开始 | [java](maker/java#xdb-数据编辑) | java 原始 IP 数据编辑器 | [Lion](https://github.com/lionsoul2014) |
| :white_check_mark: | 已完成 | [C++](maker/cpp#xdb-数据编辑) | C++ 原始 IP 数据编辑器 | [Yunbin Liu](https://github.com/liuyunbin) |
### 检测自动更新

11
binding/cpp/Makefile Normal file
View File

@ -0,0 +1,11 @@
all: xdb_search xdb_bench
xdb_search: xdb_search.cc xdb_search_test.cc
g++ -std=c++11 -O2 $^ -o $@
xdb_bench: xdb_search.cc xdb_bench.cc xdb_bench_test.cc
g++ -std=c++11 -O2 $^ -o $@
clean:
rm -f xdb_search xdb_bench

108
binding/cpp/readme.md Normal file
View File

@ -0,0 +1,108 @@
# ip2region xdb C++ 查询客户端实现
## 使用方式
### 完全基于文件的查询
```
#include <iostream>
#include "xdb_search.h"
int main(int argc, char* argv[]) {
char file_name[] = "../../data/ip2region.xdb";
char ip[] = "1.2.3.4";
xdb_search_t xdb(file_name);
xdb.init_file();
std::cout << xdb.search(ip) << std::endl;
return 0;
}
```
### 缓存 `vector_index` 索引
```
#include <iostream>
#include "xdb_search.h"
int main(int argc, char* argv[]) {
char file_name[] = "../../data/ip2region.xdb";
char ip[] = "1.2.3.4";
xdb_search_t xdb(file_name);
xdb.init_vector_index();
std::cout << xdb.search(ip) << std::endl;
return 0;
}
```
### 缓存整个 `xdb` 数据
```
#include <iostream>
#include "xdb_search.h"
int main(int argc, char* argv[]) {
char file_name[] = "../../data/ip2region.xdb";
char ip[] = "1.2.3.4";
xdb_search_t xdb(file_name);
xdb.init_content();
std::cout << xdb.search(ip) << std::endl;
return 0;
}
```
## 测试程序编译
1. 切换到当前目录
2. 编译
```
$ make
g++ -std=c++11 -O2 xdb_search.cc xdb_search_test.cc -o xdb_search
g++ -std=c++11 -O2 xdb_search.cc xdb_bench.cc xdb_bench_test.cc -o xdb_bench
```
## 测试查询
### 说明
```
$ ./xdb_search --help
./xdb_search [command options]
options:
--db string ip2region binary xdb file path
--cache-policy string cache policy: file/vector_index/content
--help print help
```
### 测试
```
$ ./xdb_search --db ../../data/ip2region.xdb --cache-policy vector_index
cache policy : vector_index
ip2region>> 1.2.3.4
美国|0|华盛顿|0|谷歌
```
## bench 测试
### 说明
```
$ ./xdb_bench --help
./xdb_bench [command options]
options:
--db string ip2region binary xdb file path
--src string source ip text file path
--cache-policy string cache policy: file/vector_index/content
--help print help
```
### 测试
```
$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy content
total: 3419220, took: 3.44 s, cost: 0.27 μs/op, io count: 0
$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy vector_index
total: 3419220, took: 45.99 s, cost: 12.24 μs/op, io count: 21739300
$ ./xdb_bench --db ../../data/ip2region.xdb --src ../../data/ip.merge.txt --cache-policy file
total: 3419220, took: 60.39 s, cost: 16.32 μs/op, io count: 25158520
```

135
binding/cpp/xdb_bench.cc Normal file
View File

@ -0,0 +1,135 @@
#include "xdb_bench.h"
#include <arpa/inet.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <iostream>
#include <vector>
static void log_exit(const std::string &msg) {
std::cout << msg << std::endl;
exit(-1);
}
static unsigned long long get_time() {
struct timeval tv1;
gettimeofday(&tv1, NULL);
return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec;
}
static bool ip2uint(const char *buf, unsigned int &ip) {
struct in_addr addr;
if (inet_pton(AF_INET, buf, &addr) == 0)
return false;
// 网络字节序为大端存储, 在此转换为小端存储
ip = (((addr.s_addr >> 0) & 0xFF) << 24) |
(((addr.s_addr >> 8) & 0xFF) << 16) |
(((addr.s_addr >> 16) & 0xFF) << 8) |
(((addr.s_addr >> 24) & 0xFF) << 0);
return true;
}
static std::string uint2ip(unsigned int ip) {
char buf[16];
snprintf(buf,
sizeof(buf),
"%d.%d.%d.%d",
(ip >> 24) & 0xFF,
(ip >> 16) & 0xFF,
(ip >> 8) & 0xFF,
ip & 0xFF);
return std::string(buf);
}
xdb_bench_t::xdb_bench_t(const std::string &file_name) : xdb_search(file_name) {
}
void xdb_bench_t::init_file() {
xdb_search.init_file();
}
void xdb_bench_t::init_vector_index() {
xdb_search.init_vector_index();
}
void xdb_bench_t::init_content() {
xdb_search.init_content();
}
void xdb_bench_t::bench_test_one(unsigned int ip_uint, const char *region) {
if (xdb_search.search(uint2ip(ip_uint)) != region)
log_exit("failed: " + uint2ip(ip_uint));
sum_io_count += xdb_search.get_io_count();
sum_cost_time += xdb_search.get_cost_time();
sum_count++;
}
void xdb_bench_t::bench_test_line(char *buf) {
size_t buf_len = strlen(buf);
if (buf_len == 0)
return;
buf[buf_len - 1] = '\0'; // 去掉换行符
char *pos1 = strchr(buf, '|');
if (pos1 == NULL)
log_exit("invalid data: " + std::string(buf));
char *pos2 = strchr(pos1 + 1, '|');
if (pos2 == NULL)
log_exit("invalid data: " + std::string(buf));
*pos1 = '\0';
*pos2 = '\0';
unsigned int ip1, ip2;
if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2) {
*pos1 = *pos2 = '|';
log_exit(std::string("invalid data: ") + buf);
}
const char *region = pos2 + 1;
unsigned int ip_mid = ip1 + (ip2 - ip1) / 2;
std::vector<unsigned int> ip_vec;
ip_vec.push_back(ip1);
ip_vec.push_back(ip1 + (ip_mid - ip1) / 2);
ip_vec.push_back(ip_mid);
ip_vec.push_back(ip_mid + (ip2 - ip_mid) / 2);
ip_vec.push_back(ip2);
for (auto &d : ip_vec)
bench_test_one(d, region);
}
void xdb_bench_t::bench_test_file(const std::string &file_name) {
FILE *f = fopen(file_name.data(), "r");
if (f == NULL)
log_exit("can't open " + file_name);
char buf[1024];
while (fgets(buf, sizeof(buf), f) != NULL)
bench_test_line(buf);
}
void xdb_bench_t::bench(const std::string &file_name) {
sum_io_count = 0;
sum_cost_time = 0;
sum_count = 0;
unsigned long long tv1 = get_time();
bench_test_file(file_name);
unsigned long long tv2 = get_time();
double took = (tv2 - tv1) * 1.0 / 1000 / 1000;
double cost = sum_cost_time * 1.0 / sum_count;
printf(
"total: %llu, took: %.2f s, cost: %.2f μs/op, io "
"count: "
"%llu\n",
sum_count,
took,
cost,
sum_io_count);
}

28
binding/cpp/xdb_bench.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef XDB_BENCH_H
#define XDB_BENCH_H
#include "xdb_search.h"
class xdb_bench_t {
public:
xdb_bench_t(const std::string &file_name);
void init_file();
void init_vector_index();
void init_content();
void bench(const std::string &file_name);
private:
void bench_test_one(unsigned int ip_uint, const char *region);
void bench_test_line(char *buf);
void bench_test_file(const std::string &file_name);
xdb_search_t xdb_search;
unsigned long long sum_io_count;
unsigned long long sum_cost_time;
unsigned long long sum_count;
};
#endif

View File

@ -0,0 +1,70 @@
#include "xdb_bench.h"
#include <getopt.h>
#include <iostream>
void print_help(int argc, char* argv[]) {
printf("./xdb_bench [command options]\n");
printf("options:\n");
printf(" --db string ip2region binary xdb file path\n");
printf(" --src string source ip text file path\n");
printf(
" --cache-policy string cache policy: "
"file/vector_index/content\n");
printf(" --help print help\n");
exit(-1);
}
int main(int argc, char* argv[]) {
struct option long_options[] = {
{"db", required_argument, 0, 'd'},
{"cache-policy", required_argument, 0, 't'},
{"src", required_argument, 0, 's'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0 }
};
std::string db_file_name = "../../data/ip2region.xdb";
std::string src_file_name = "../../data/ip.merge.txt";
std::string cache_policy = "vector_index";
while (1) {
int c = getopt_long(argc, argv, "", long_options, NULL);
if (c == -1)
break;
switch (c) {
case 'd':
db_file_name = optarg;
break;
case 'h':
print_help(argc, argv);
break;
case 't':
cache_policy = optarg;
break;
case 's':
src_file_name = optarg;
break;
case '?':
exit(-1);
}
}
xdb_bench_t xdb(db_file_name);
if (cache_policy == "content")
xdb.init_content();
else if (cache_policy == "vector_index")
xdb.init_vector_index();
else if (cache_policy == "file")
xdb.init_file();
else {
std::cout << "invalid cache policy: " << cache_policy << std::endl;
exit(-1);
}
xdb.bench(src_file_name);
return 0;
}

190
binding/cpp/xdb_search.cc Normal file
View File

@ -0,0 +1,190 @@
#include "xdb_search.h"
#include <arpa/inet.h>
#include <sys/time.h>
#include <iostream>
static void log_exit(const std::string &msg) {
std::cout << msg << std::endl;
exit(-1);
}
static unsigned long long get_time() {
struct timeval tv1;
gettimeofday(&tv1, NULL);
return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec;
}
static void read_bin(int index, char *buf, size_t len, FILE *db) {
fseek(db, index, SEEK_SET);
if (fread(buf, 1, len, db) != len)
log_exit(__func__);
}
static unsigned int read_uint(const char *buf) {
return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00) |
((buf[2] << 16) & 0x00FF0000) | ((buf[3] << 24) & 0xFF000000);
}
static unsigned short read_ushort(const char *buf) {
return ((buf[0]) & 0x000000FF) | ((buf[1] << 8) & 0x0000FF00);
}
static bool ip2uint(const char *buf, unsigned int &ip) {
struct in_addr addr;
if (inet_pton(AF_INET, buf, &addr) == 0)
return false;
// 网络字节序为大端存储, 在此转换为小端存储
ip = (((addr.s_addr >> 0) & 0xFF) << 24) |
(((addr.s_addr >> 8) & 0xFF) << 16) |
(((addr.s_addr >> 16) & 0xFF) << 8) |
(((addr.s_addr >> 24) & 0xFF) << 0);
return true;
}
void xdb_search_t::get_content_index(unsigned int ip,
unsigned int &left,
unsigned int &right) {
unsigned int ip_1 = (ip >> 24) & 0xFF;
unsigned int ip_2 = (ip >> 16) & 0xFF;
unsigned int index = (ip_1 * vector_index_cols + ip_2) * vector_index_size;
if (content != NULL) {
left = read_uint(content + index + header_length);
right = read_uint(content + index + header_length + 4);
} else if (vector_index != NULL) {
left = read_uint(vector_index + index);
right = read_uint(vector_index + index + 4);
} else {
++io_count;
char buf[8];
read_bin(header_length + index, buf, sizeof(buf), db);
left = read_uint(buf);
right = read_uint(buf + 4);
}
}
void xdb_search_t::get_content(unsigned int index,
unsigned int &ip_left,
unsigned int &ip_right,
unsigned short &region_len,
unsigned int &region_index) {
char buf[segment_index_size]; // 4 + 4 + 2 + 4
const char *p;
if (content != NULL) {
p = content + index;
} else {
++io_count;
read_bin(index, buf, sizeof(buf), db);
p = buf;
}
ip_left = read_uint(p);
ip_right = read_uint(p + 4);
region_len = read_ushort(p + 8);
region_index = read_uint(p + 10);
}
std::string xdb_search_t::get_region(unsigned int index, unsigned short len) {
if (content != NULL) {
return std::string(content + index, len);
} else {
++io_count;
char *buf = (char *)malloc(sizeof(char) * len);
read_bin(index, buf, len, db);
std::string res(buf, len);
free(buf);
return res;
}
}
xdb_search_t::xdb_search_t(const std::string &file_name) {
db = fopen(file_name.data(), "r");
vector_index = NULL;
content = NULL;
if (db == NULL)
log_exit("can't open " + file_name);
}
void xdb_search_t::init_file() {
}
void xdb_search_t::init_vector_index() {
vector_index = (char *)malloc(vector_index_length);
read_bin(header_length, vector_index, vector_index_length, db);
}
void xdb_search_t::init_content() {
fseek(db, 0, SEEK_END);
unsigned int size = ftell(db);
content = (char *)malloc(size);
read_bin(0, content, size, db);
}
xdb_search_t::~xdb_search_t() {
if (db != NULL) {
fclose(db);
db = NULL;
}
if (vector_index != NULL) {
free(vector_index);
vector_index = NULL;
}
if (content != NULL) {
free(content);
content = NULL;
}
}
unsigned long long xdb_search_t::get_io_count() {
return io_count;
}
unsigned long long xdb_search_t::get_cost_time() {
return cost_time;
}
std::string xdb_search_t::search(const std::string &ip_str) {
unsigned long long t1 = get_time();
unsigned int ip_uint;
if (!ip2uint(ip_str.data(), ip_uint))
return "invalid ip: " + ip_str;
std::string region = search(ip_uint);
unsigned long long t2 = get_time();
cost_time = t2 - t1;
return region;
}
std::string xdb_search_t::search(unsigned int ip_uint) {
io_count = 0;
unsigned int content_index_left, content_index_right;
get_content_index(ip_uint, content_index_left, content_index_right);
unsigned int left, right, mid;
unsigned int ip_left, ip_right;
unsigned short region_len;
unsigned int region_index;
unsigned int mid_index;
left = 0;
right = (content_index_right - content_index_left) / segment_index_size;
for (;;) {
mid = left + (right - left) / 2;
mid_index = content_index_left + mid * segment_index_size;
get_content(mid_index, ip_left, ip_right, region_len, region_index);
if (ip_left > ip_uint)
right = mid - 1;
else if (ip_right < ip_uint)
left = mid + 1;
else
return get_region(region_index, region_len);
}
}

50
binding/cpp/xdb_search.h Normal file
View File

@ -0,0 +1,50 @@
#ifndef XDB_SEARCH_H
#define XDB_SEARCH_H
#include <string>
class xdb_search_t {
public:
xdb_search_t(const std::string &file_name);
~xdb_search_t();
void init_file();
void init_vector_index();
void init_content();
unsigned long long get_io_count();
unsigned long long get_cost_time();
std::string search(const std::string &ip);
private:
void get_content_index(unsigned int ip,
unsigned int &left,
unsigned int &right);
void get_content(unsigned int index,
unsigned int &ip_left,
unsigned int &ip_right,
unsigned short &region_len,
unsigned int &region_index);
std::string get_region(unsigned int index, unsigned short len);
std::string search(unsigned int ip_uint);
FILE *db;
char *vector_index;
char *content;
unsigned long long io_count;
unsigned long long cost_time;
static constexpr int header_length = 256;
static constexpr int vector_index_rows = 256;
static constexpr int vector_index_cols = 256;
static constexpr int vector_index_size = 8;
static constexpr int vector_index_length =
vector_index_rows * vector_index_cols * vector_index_size;
static constexpr int segment_index_size = 14;
};
#endif

View File

@ -0,0 +1,73 @@
#include "xdb_search.h"
#include <getopt.h>
#include <iostream>
void print_help(int argc, char* argv[]) {
printf("./xdb_search [command options]\n");
printf("options:\n");
printf(" --db string ip2region binary xdb file path\n");
printf(
" --cache-policy string cache policy: "
"file/vector_index/content\n");
printf(" --help print help\n");
exit(-1);
}
int main(int argc, char* argv[]) {
struct option long_options[] = {
{"db", required_argument, 0, 'd'},
{"cache-policy", required_argument, 0, 't'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0 }
};
std::string db_file_name = "../../data/ip2region.xdb";
std::string cache_policy = "vector_index";
while (1) {
int c = getopt_long(argc, argv, "", long_options, NULL);
if (c == -1)
break;
switch (c) {
case 'd':
db_file_name = optarg;
break;
case 'h':
print_help(argc, argv);
break;
case 't':
cache_policy = optarg;
break;
case '?':
exit(-1);
}
}
xdb_search_t xdb(db_file_name);
if (cache_policy == "content")
xdb.init_content();
else if (cache_policy == "vector_index")
xdb.init_vector_index();
else if (cache_policy == "file")
xdb.init_file();
else {
std::cout << "invalid cache policy: " << cache_policy << std::endl;
exit(-1);
}
std::string ip;
for (;;) {
std::cout << "ip2region>> ";
std::getline(std::cin, ip);
if (ip.empty())
continue;
if (ip == "exit" || ip == "quit")
break;
std::cout << xdb.search(ip) << std::endl;
}
return 0;
}

11
maker/cpp/Makefile Normal file
View File

@ -0,0 +1,11 @@
all: xdb_make xdb_edit
xdb_make: xdb_make.cc xdb_make_test.cc
g++ -std=c++11 -O2 $^ -o $@
xdb_edit: xdb_edit.cc xdb_edit_test.cc
g++ -std=c++11 -O2 $^ -o $@
clean:
rm -f xdb_make xdb_edit

217
maker/cpp/readme.md Normal file
View File

@ -0,0 +1,217 @@
# ip2region xdb C++ 生成实现
# 编译
1. 切换到当前目录
2. 编译
```
$ make
g++ -std=c++11 -O2 xdb_make.cc xdb_make_test.cc -o xdb_make
```
# `xdb` 数据生成
## 使用说明
```
$ ./xdb_make --help
./xdb_make [command options]
options:
--db string ip2region binary xdb file path
--src string source ip text file path
```
## 数据生成
```
$ ./xdb_make --db ip2region.xdb --src ../../data/ip.merge.txt
took: 1.46s
```
## 数据正确性测试
```
$ make # 1. 编译
$ ./xdb_maker # 2. 本目录生成 xdb 文件
$ diff <(xxd ./ip2region.xdb) <(xxd ../../data/ip2region.xdb) # 3. 比较本目录和仓库中的 xdb 文件
# 只有生成的时间不同
1c1
< 00000000: 0200 0100 3c6a f965 2302 0f00 75ea a800 ....<j.e#...u...
---
> 00000000: 0200 0100 469b de65 2302 0f00 75ea a800 ....F..e#...u...
```
# `xdb` 数据编辑
## 使用说明
* 新的IP归属地文件可以包含空行
* 新的IP归属地文件顺序可以乱序, 程序会自动排序
* 新的IP归属地文件顺序可以重叠, 只要无二义性, 程序会自动合并
* 最终的结果会将相邻的且归属地相同的行自动合并
```
$ ./xdb_edit --help
./xdb_edit [command options]
options:
--old filename old source ip text file path
--new filename new source ip text file path
```
## 数据更新
```
$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt
took: 1.46s
```
## 数据正确性测试
### 测试一: 测试数据文件包含空行以及重复的情况
```
$ cat -n 1.txt
1
2 1.0.128.0|1.0.128.255|测试归属地
3 1.0.128.0|1.0.128.255|测试归属地
4
$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt
took: 1.83s
$ git diff ../../data/
diff --git a/data/ip.merge.txt b/data/ip.merge.txt
index 8976bd3..6da5e18 100644
--- a/data/ip.merge.txt
+++ b/data/ip.merge.txt
@@ -7,7 +7,7 @@
1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信
1.0.64.0|1.0.79.255|日本|0|广岛县|0|0
1.0.80.0|1.0.127.255|日本|0|冈山县|0|0
-1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT
+1.0.128.0|1.0.128.255|测试归属地
1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT
1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT
1.0.133.0|1.0.133.255|泰国|0|素攀武里府|0|TOT
@@ -320906,8 +320906,7 @@
100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报
100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报
100.48.0.0|100.63.255.255|美国|0|0|0|Sprint
-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP
-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP
+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP
100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile
101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信
101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0
```
### 测试二: 测试数据文件乱序以及数据有交叉, 归属地相同的情况
```
$ cat -n 1.txt
1
2 1.0.128.5|1.0.128.255|测试归属地
3 1.0.128.0|1.0.128.9|测试归属地
4
$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt
took: 1.83s
$ git diff ../../data/
diff --git a/data/ip.merge.txt b/data/ip.merge.txt
index 8976bd3..6da5e18 100644
--- a/data/ip.merge.txt
+++ b/data/ip.merge.txt
@@ -7,7 +7,7 @@
1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信
1.0.64.0|1.0.79.255|日本|0|广岛县|0|0
1.0.80.0|1.0.127.255|日本|0|冈山县|0|0
-1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT
+1.0.128.0|1.0.128.255|测试归属地
1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT
1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT
1.0.133.0|1.0.133.255|泰国|0|素攀武里府|0|TOT
@@ -320906,8 +320906,7 @@
100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报
100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报
100.48.0.0|100.63.255.255|美国|0|0|0|Sprint
-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP
-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP
+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP
100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile
101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信
101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0
```
### 测试三: 测试数据文件乱序以及数据有交叉的, 归属地不同情况
```
$ cat -n 1.txt
1
2 1.0.128.5|1.0.128.255|测试归属地
3 1.0.128.0|1.0.128.9|测试归属地123
4
$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt
数据有二义性: 1.0.128.0|1.0.128.9|测试归属地123, 1.0.128.5|1.0.128.255|测试归属地
```
### 测试四: 测试将一个IP数据拆成多个IP
```
$ cat -n 1.txt
1 36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动
2 36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动
3 36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动
$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt
took: 1.83s
$ git diff ../../data/
diff --git a/data/ip.merge.txt b/data/ip.merge.txt
index 8976bd3..7be0227 100644
--- a/data/ip.merge.txt
+++ b/data/ip.merge.txt
@@ -54778,7 +54778,11 @@
36.134.84.0|36.134.85.255|中国|0|安徽省|合肥市|移动
36.134.86.0|36.134.87.255|中国|0|广西|南宁市|移动
36.134.88.0|36.134.89.255|中国|0|内蒙古|呼和浩特市|移动
-36.134.90.0|36.141.255.255|中国|0|0|0|移动
+36.134.90.0|36.136.0.255|中国|0|0|0|移动
+36.136.1.0|36.136.7.255|中国|0|广西|来宾市|移动
+36.136.8.0|36.136.15.255|中国|0|广西|玉林市|移动
+36.136.16.0|36.136.23.255|中国|0|广西|河池市|移动
+36.136.24.0|36.141.255.255|中国|0|0|0|移动
36.142.0.0|36.142.1.255|中国|0|四川省|成都市|移动
36.142.2.0|36.142.31.255|中国|0|甘肃省|兰州市|移动
36.142.32.0|36.142.127.255|中国|0|甘肃省|0|移动
@@ -320906,8 +320910,7 @@
100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报
100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报
100.48.0.0|100.63.255.255|美国|0|0|0|Sprint
-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP
-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP
+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP
100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile
101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信
101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0
```
### 测试五: 测试将多个IP数据并成一个IP数据
```
$ cat -n 1.txt
1
2 1.0.16.0|1.0.127.255|测试归属地
3
$ ./xdb_edit --old ../../data/ip.merge.txt --new 1.txt
took: 1.83s
$ git diff ../../data/
diff --git a/data/ip.merge.txt b/data/ip.merge.txt
index 8976bd3..acc27a5 100644
--- a/data/ip.merge.txt
+++ b/data/ip.merge.txt
@@ -3,10 +3,7 @@
1.0.1.0|1.0.3.255|中国|0|福建省|福州市|电信
1.0.4.0|1.0.7.255|澳大利亚|0|维多利亚|墨尔本|0
1.0.8.0|1.0.15.255|中国|0|广东省|广州市|电信
-1.0.16.0|1.0.31.255|日本|0|0|0|0
-1.0.32.0|1.0.63.255|中国|0|广东省|广州市|电信
-1.0.64.0|1.0.79.255|日本|0|广岛县|0|0
-1.0.80.0|1.0.127.255|日本|0|冈山县|0|0
+1.0.16.0|1.0.127.255|测试归属地
1.0.128.0|1.0.128.255|泰国|0|清莱府|0|TOT
1.0.129.0|1.0.132.191|泰国|0|曼谷|曼谷|TOT
1.0.132.192|1.0.132.255|泰国|0|Nakhon-Ratchasima|0|TOT
@@ -320906,8 +320903,7 @@
100.47.160.0|100.47.191.255|美国|0|密歇根|0|美国电话电报
100.47.192.0|100.47.255.255|美国|0|0|0|美国电话电报
100.48.0.0|100.63.255.255|美国|0|0|0|Sprint
-100.64.0.0|100.122.255.255|0|0|0|内网IP|内网IP
-100.123.0.0|100.127.255.255|0|0|0|内网IP|内网IP
+100.64.0.0|100.127.255.255|0|0|0|内网IP|内网IP
100.128.0.0|100.255.255.255|美国|0|0|0|T-Mobile
101.0.0.0|101.0.3.255|中国|0|福建省|福州市|电信
101.0.4.0|101.0.7.255|印度尼西亚|0|东爪哇|泗水|0
```

247
maker/cpp/xdb_edit.cc Normal file
View File

@ -0,0 +1,247 @@
#include "xdb_edit.h"
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <algorithm>
#include <iostream>
static void log_exit(const std::string& msg) {
std::cout << msg << std::endl;
exit(-1);
}
static unsigned long long get_time() {
struct timeval tv1;
gettimeofday(&tv1, NULL);
return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec;
}
static bool ip2uint(const char* buf, unsigned int& ip) {
struct in_addr addr;
if (inet_pton(AF_INET, buf, &addr) == 0)
return false;
// 网络字节序为大端存储, 在此转换为小端存储
ip = (((addr.s_addr >> 0) & 0xFF) << 24) |
(((addr.s_addr >> 8) & 0xFF) << 16) |
(((addr.s_addr >> 16) & 0xFF) << 8) |
(((addr.s_addr >> 24) & 0xFF) << 0);
return true;
}
static std::string uint2ip(unsigned int ip) {
char buf[16];
snprintf(buf,
sizeof(buf),
"%d.%d.%d.%d",
(ip >> 24) & 0xFF,
(ip >> 16) & 0xFF,
(ip >> 8) & 0xFF,
ip & 0xFF);
return std::string(buf);
}
static void handle_ip_txt(const std::string& file_name,
std::list<xdb_node_t>& regions) {
FILE* f = fopen(file_name.data(), "r");
if (f == NULL)
log_exit("can't open " + file_name);
char buf[1024];
while (fgets(buf, sizeof(buf), f) != NULL) {
unsigned int buf_len = strlen(buf);
// 去掉多余的空
while (buf_len > 0 && isspace(buf[buf_len - 1]))
--buf_len;
if (buf_len == 0)
continue;
buf[buf_len] = '\0';
regions.push_back(xdb_node_t(buf));
}
fclose(f);
}
// xdb_node_t
xdb_node_t::xdb_node_t() {
}
xdb_node_t::xdb_node_t(char* buf) {
char* pos1 = strchr(buf, '|');
if (pos1 == NULL)
log_exit("invalid data: " + std::string(buf));
char* pos2 = strchr(pos1 + 1, '|');
if (pos2 == NULL)
log_exit("invalid data: " + std::string(buf));
*pos1 = '\0';
*pos2 = '\0';
region = pos2 + 1;
if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2 ||
region.empty()) {
*pos1 = *pos2 = '|';
log_exit(std::string("invalid data: ") + buf);
}
}
bool xdb_node_t::operator<(const xdb_node_t& rhs) const {
if (ip1 < rhs.ip1)
return true;
if (ip1 > rhs.ip1)
return false;
return ip2 < rhs.ip2;
}
std::string xdb_node_t::to_string() const {
return uint2ip(ip1) + "|" + uint2ip(ip2) + "|" + region;
}
void xdb_edit_t::handle_new_file(const std::string& file_name) {
// 输入
handle_ip_txt(file_name, new_regions);
// 排序
new_regions.sort();
// 检验及其去重
auto it = new_regions.begin();
for (;;) {
if (it == new_regions.end())
break;
auto next = it;
++next;
if (next == new_regions.end())
break;
if (it->ip1 == next->ip1 || it->ip2 >= next->ip1) {
// 数据重叠
if (it->region != next->region)
log_exit("数据有二义性: " + it->to_string() + ", " +
next->to_string());
it->ip2 = std::max(it->ip2, next->ip2);
new_regions.erase(next);
} else if (it->ip2 + 1 == next->ip1 && it->region == next->region) {
// 数据连接
it->ip2 = next->ip2;
new_regions.erase(next);
} else {
++it;
}
}
}
void xdb_edit_t::handle_old_file(const std::string& file_name) {
handle_ip_txt(file_name, old_regions);
}
void xdb_edit_t::merge() {
auto it1 = old_regions.begin();
auto it2 = new_regions.begin();
for (;;) {
if (it2 == new_regions.end())
break;
if (it2->ip1 > it2->ip2) {
// 失效数据
++it2;
continue;
}
while (it1->ip2 < it2->ip1)
++it1;
if (it1->ip2 <= it2->ip2) {
xdb_node_t node;
node.ip1 = it2->ip1;
node.ip2 = it1->ip2;
node.region = it2->region;
it1->ip2 = node.ip1 - 1;
it2->ip1 = node.ip2 + 1;
// std::cout << "insert: " << node.to_string() <<
// std::endl;
++it1;
it1 = old_regions.insert(it1, node);
++it1;
} else {
xdb_node_t node;
node.ip1 = it2->ip2 + 1;
node.ip2 = it1->ip2;
node.region = it1->region;
it1->ip2 = it2->ip1 - 1;
// std::cout << "insert: " << it2->to_string() <<
// std::endl;
++it1;
it1 = old_regions.insert(it1, *it2);
++it1;
it1 = old_regions.insert(it1, node);
++it2;
}
}
}
void xdb_edit_t::write_old_file(const std::string& file_name) {
FILE* f = fopen(file_name.data(), "w");
if (f == NULL)
log_exit("can't open " + file_name);
auto it = old_regions.begin();
// 删除非法的数据
for (;;) {
if (it == old_regions.end())
break;
if (it->ip1 > it->ip2)
it = old_regions.erase(it);
else
++it;
}
// 合并数据域相同的相邻数据
it = old_regions.begin();
for (;;) {
if (it == old_regions.end())
break;
auto next = it;
++next;
if (next == old_regions.end())
break;
if (it->region == next->region) {
it->ip2 = next->ip2;
old_regions.erase(next);
} else {
++it;
}
}
for (auto& d : old_regions) {
std::string res =
uint2ip(d.ip1) + "|" + uint2ip(d.ip2) + "|" + d.region + "\n";
fputs(res.data(), f);
}
fclose(f);
}
xdb_edit_t::xdb_edit_t(const std::string& file_name_old,
const std::string& file_name_new) {
unsigned long long tv1 = get_time();
handle_new_file(file_name_new);
handle_old_file(file_name_old);
merge();
write_old_file(file_name_old);
unsigned long long tv2 = get_time();
double took = (tv2 - tv1) * 1.0 / 1000 / 1000;
printf("took: %.2fs\n", took);
}

35
maker/cpp/xdb_edit.h Normal file
View File

@ -0,0 +1,35 @@
#ifndef XDB_EDIT_H
#define XDB_EDIT_H
#include <list>
#include <string>
struct xdb_node_t {
unsigned int ip1;
unsigned int ip2;
std::string region;
xdb_node_t();
xdb_node_t(char* buf);
bool operator<(const xdb_node_t& rhs) const;
std::string to_string() const;
};
class xdb_edit_t {
public:
xdb_edit_t(const std::string& file_name_old,
const std::string& file_name_new);
private:
void handle_new_file(const std::string& file_name);
void handle_old_file(const std::string& file_name);
void merge();
void write_old_file(const std::string& file_name);
std::list<xdb_node_t> old_regions;
std::list<xdb_node_t> new_regions;
};
#endif

View File

@ -0,0 +1,50 @@
#include "xdb_edit.h"
#include <getopt.h>
#include <stdio.h>
#include <iostream>
void print_help() {
printf("./xdb_edit [command options]\n");
printf("options:\n");
printf(" --old filename old source ip text file path\n");
printf(" --new filename new source ip text file path\n");
exit(-1);
}
int main(int argc, char* argv[]) {
struct option long_options[] = {
{"new", required_argument, 0, 'n'},
{"old", required_argument, 0, 'o'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0 }
};
std::string file_name_old = "../../data/ip.merge.txt";
std::string file_name_new = "./1.txt";
while (1) {
int c = getopt_long(argc, argv, "", long_options, NULL);
if (c == -1)
break;
switch (c) {
case 'n':
file_name_new = optarg;
break;
case 'h':
print_help();
break;
case 'o':
file_name_old = optarg;
break;
case '?':
exit(-1);
}
}
xdb_edit_t xdb(file_name_old, file_name_new);
return 0;
}

240
maker/cpp/xdb_make.cc Normal file
View File

@ -0,0 +1,240 @@
#include "xdb_make.h"
#include <arpa/inet.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <iostream>
static void log_exit(const std::string &msg) {
std::cout << msg << std::endl;
exit(-1);
}
static unsigned long long get_time() {
struct timeval tv1;
gettimeofday(&tv1, NULL);
return (unsigned long long)tv1.tv_sec * 1000 * 1000 + tv1.tv_usec;
}
static void write_uint(unsigned int data, char buf[]) {
buf[0] = (data >> 0) & 0xFF;
buf[1] = (data >> 8) & 0xFF;
buf[2] = (data >> 16) & 0xFF;
buf[3] = (data >> 24) & 0xFF;
}
static void write_uint(unsigned int data, FILE *dst) {
char buf[4];
write_uint(data, buf);
fwrite(buf, 1, sizeof(buf), dst);
}
static void write_ushort(unsigned short data, char buf[]) {
buf[0] = (data >> 0) & 0xFF;
buf[1] = (data >> 8) & 0xFF;
}
static void write_ushort(unsigned short data, FILE *dst) {
char buf[2];
write_ushort(data, buf);
fwrite(buf, 1, sizeof(buf), dst);
}
static void write_string(const char *buf, unsigned int len, FILE *dst) {
fwrite(buf, 1, len, dst);
}
static bool ip2uint(const char *buf, unsigned int &ip) {
struct in_addr addr;
if (inet_pton(AF_INET, buf, &addr) == 0)
return false;
// 网络字节序为大端存储, 在此转换为小端存储
ip = (((addr.s_addr >> 0) & 0xFF) << 24) |
(((addr.s_addr >> 8) & 0xFF) << 16) |
(((addr.s_addr >> 16) & 0xFF) << 8) |
(((addr.s_addr >> 24) & 0xFF) << 0);
return true;
}
static std::string uint2ip(unsigned int ip) {
char buf[16];
snprintf(buf,
sizeof(buf),
"%d.%d.%d.%d",
(ip >> 24) & 0xFF,
(ip >> 16) & 0xFF,
(ip >> 8) & 0xFF,
ip & 0xFF);
return std::string(buf);
}
void xdb_make_t::vector_index_push_back(unsigned int row,
unsigned int col,
unsigned int ip1,
unsigned int ip2,
const char *region_str) {
char buf[8];
write_uint(ip1, buf);
write_uint(ip2, buf + 4);
vector_index[row][col].push_back(std::make_pair<std::string, std::string>(
std::string(buf, sizeof(buf)), region_str));
}
void xdb_make_t::vector_index_push_back(unsigned int ip1,
unsigned int ip2,
const char *region_str) {
unsigned int ip1_1 = (ip1 >> 24) & 0xFF;
unsigned int ip1_2 = (ip1 >> 16) & 0xFF;
unsigned int ip2_1 = (ip2 >> 24) & 0xFF;
unsigned int ip2_2 = (ip2 >> 16) & 0xFF;
if (ip1_1 == ip2_1 && ip1_2 == ip2_2) {
vector_index_push_back(ip1_1, ip1_2, ip1, ip2, region_str);
return;
}
vector_index_push_back(ip1_1, ip1_2, ip1, ip1 | 0x0000FFFF, region_str);
vector_index_push_back(ip2_1, ip2_2, ip2 & 0xFFFF0000, ip2, region_str);
for (;;) {
++ip1_2;
if (ip1_2 == 256) {
++ip1_1;
ip1_2 = 0;
}
if (ip1_1 == ip2_1 && ip1_2 == ip2_2)
break;
ip1 = (ip1_1 << 24) | (ip1_2 << 16);
vector_index_push_back(ip1_1, ip1_2, ip1, ip1 | 0x0000FFFF, region_str);
}
}
void xdb_make_t::handle_input_help(char *buf) {
static unsigned int region_index = vector_index_length + header_length;
static unsigned int next_ip = 0;
// 去掉多余的空
unsigned int buf_len = strlen(buf);
while (buf_len > 0 && isspace(buf[buf_len - 1]))
--buf_len;
if (buf_len == 0)
return;
buf[buf_len] = '\0';
char *pos1 = strchr(buf, '|');
if (pos1 == NULL)
log_exit("invalid data: " + std::string(buf));
char *pos2 = strchr(pos1 + 1, '|');
if (pos2 == NULL)
log_exit("invalid data: " + std::string(buf));
*pos1 = '\0';
*pos2 = '\0';
const char *region_str = pos2 + 1;
unsigned int ip1, ip2;
if (!ip2uint(buf, ip1) || !ip2uint(pos1 + 1, ip2) || ip1 > ip2 ||
*region_str == '\0') {
*pos1 = *pos2 = '|';
log_exit(std::string("invalid data: ") + buf);
}
if (next_ip != ip1)
log_exit("ip 不连续: " + uint2ip(ip1));
next_ip = ip2 + 1;
if (region.find(region_str) == region.end()) {
region[region_str] = region_index;
region_index += strlen(region_str);
}
vector_index_push_back(ip1, ip2, region_str);
}
void xdb_make_t::handle_input(const std::string &file_name) {
FILE *src = fopen(file_name.data(), "r");
if (src == NULL)
log_exit("can't open " + file_name);
char buf[1024];
while (fgets(buf, sizeof(buf), src) != NULL)
handle_input_help(buf);
fclose(src);
}
void xdb_make_t::handle_header() {
char buf[header_length];
memset(buf, 0, header_length);
write_ushort(2, buf); // 版本号
write_ushort(1, buf + 2); // 缓存策略
write_uint(time(NULL), buf + 4); // 时间
// 索引
unsigned int content_left = header_length + vector_index_length;
for (auto &d : region)
content_left += d.first.size();
unsigned int content_right = content_left;
for (int i = 0; i < vector_index_rows; ++i)
for (int j = 0; j < vector_index_cols; ++j)
content_right += vector_index[i][j].size() * segment_index_size;
content_right -= segment_index_size;
write_uint(content_left, buf + 8);
write_uint(content_right, buf + 12);
write_string(buf, header_length, dst);
}
void xdb_make_t::handle_vector_index() {
unsigned int index = header_length + vector_index_length;
for (auto &d : region)
index += d.first.size();
for (unsigned i = 0; i < vector_index_rows; ++i)
for (unsigned j = 0; j < vector_index_cols; ++j) {
write_uint(index, dst);
index += segment_index_size * vector_index[i][j].size();
write_uint(index, dst);
}
}
void xdb_make_t::handle_region() {
for (auto &d : region) {
fseek(dst, d.second, SEEK_SET);
write_string(d.first.data(), d.first.size(), dst);
}
}
void xdb_make_t::handle_content() {
fseek(dst, 0, SEEK_END);
for (unsigned i = 0; i < vector_index_rows; ++i)
for (unsigned j = 0; j < vector_index_cols; ++j)
for (auto d : vector_index[i][j]) {
write_string(d.first.data(), d.first.size(), dst);
write_ushort(d.second.size(), dst);
write_uint(region[d.second], dst);
}
}
xdb_make_t::xdb_make_t(const std::string &file_name_src,
const std::string &file_name_dst) {
unsigned long long tv1 = get_time();
handle_input(file_name_src);
dst = fopen(file_name_dst.data(), "w");
if (dst == NULL)
log_exit("can't open " + std::string(file_name_dst));
handle_header();
handle_vector_index();
handle_region();
handle_content();
fclose(dst);
unsigned long long tv2 = get_time();
printf("took: %.2fs\n", (tv2 - tv1) * 1.0 / 1000 / 1000);
}

48
maker/cpp/xdb_make.h Normal file
View File

@ -0,0 +1,48 @@
#ifndef XDB_MAKE_H
#define XDB_MAKE_H
#include <stdio.h>
#include <string>
#include <unordered_map>
#include <vector>
class xdb_make_t {
public:
xdb_make_t(const std::string &file_name_src,
const std::string &file_name_dst);
private:
void vector_index_push_back(unsigned int row,
unsigned int col,
unsigned int ip1,
unsigned int ip2,
const char *region);
void vector_index_push_back(unsigned int ip1,
unsigned int ip2,
const char *region);
void handle_input_help(char buf[]);
void handle_input(const std::string &file_name);
void handle_header();
void handle_vector_index();
void handle_region();
void handle_content();
static constexpr int header_length = 256;
static constexpr int vector_index_rows = 256;
static constexpr int vector_index_cols = 256;
static constexpr int vector_index_size = 8;
static constexpr int vector_index_length =
vector_index_rows * vector_index_cols * vector_index_size;
static constexpr int segment_index_size = 14;
FILE *dst = NULL;
std::vector<std::pair<std::string, std::string>>
vector_index[vector_index_rows][vector_index_cols];
std::unordered_map<std::string, unsigned int> region;
};
#endif

View File

@ -0,0 +1,50 @@
#include "xdb_make.h"
#include <getopt.h>
#include <stdio.h>
#include <iostream>
void print_help() {
printf("./xdb_make [command options]\n");
printf("options:\n");
printf(" --db string ip2region binary xdb file path\n");
printf(" --src string source ip text file path\n");
exit(-1);
}
int main(int argc, char* argv[]) {
struct option long_options[] = {
{"db", required_argument, 0, 'd'},
{"src", required_argument, 0, 's'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0 }
};
std::string file_name_dst = "./ip2region.xdb";
std::string file_name_src = "../../data/ip.merge.txt";
while (1) {
int c = getopt_long(argc, argv, "", long_options, NULL);
if (c == -1)
break;
switch (c) {
case 'd':
file_name_dst = optarg;
break;
case 'h':
print_help();
break;
case 's':
file_name_src = optarg;
break;
case '?':
exit(-1);
}
}
xdb_make_t xdb(file_name_src, file_name_dst);
return 0;
}