Feat: add rust maker for ipv4 and ipv6

This commit is contained in:
gongzhengyang 2025-09-28 15:20:37 +08:00
parent 05c7c17af0
commit df6004b42e
17 changed files with 639 additions and 398 deletions

View File

@ -123,6 +123,7 @@ $ cargo build -r
#### 测试 IPv6
```bash
$ cd binding/rust
$ cargo build -r
$ ./target/release/searcher --xdb='../../data/ip2region_v6.xdb' query
ip2region xdb searcher test program, type `quit` or `Ctrl + c` to exit
@ -140,6 +141,7 @@ ip2region>>
#### 测试 IPv4
```bash
$ cd binding/rust
$ cargo build -r
$ ./target/release/searcher --xdb='../../data/ip2region_v4.xdb' query
ip2region xdb searcher test program, type `quit` or `Ctrl + c` to exit
ip2region>> 1.1.2.1

View File

@ -11,8 +11,7 @@ license = "Apache-2.0"
[dependencies]
tracing = "0.1"
thiserror = "2"
num-traits = "0.2"
num-derive = "0.4"
maker = { path = "../../../maker/rust/maker"}
[dev-dependencies]
criterion = "0.7"

View File

@ -12,9 +12,6 @@ pub enum Ip2RegionError {
#[error("No matched Ipaddress")]
NoMatchedIP,
#[error("Header parse error: {0}")]
HeaderParsed(String),
#[error("Searcher load IPv4 data, couldn't search IPv6 data")]
OnlyIPv4Version,
@ -23,6 +20,9 @@ pub enum Ip2RegionError {
#[error("Try from slice failed")]
TryFromSliceFailed(#[from] std::array::TryFromSliceError),
#[error("Maker crate error: {0}")]
MakerError(#[from] maker::MakerError),
}
pub type Result<T> = std::result::Result<T, Ip2RegionError>;

View File

@ -1,86 +0,0 @@
use num_derive::FromPrimitive;
use num_traits::FromPrimitive;
use crate::error::Ip2RegionError;
pub const HEADER_INFO_LENGTH: usize = 256;
#[allow(dead_code)]
#[derive(Debug)]
pub struct Header {
version: u16,
index_policy: IndexPolicy,
create_time: u32,
start_index_ptr: u32,
end_index_ptr: u32,
ip_version: IpVersion,
runtime_ptr_bytes: u16,
}
impl TryFrom<&[u8; 256]> for Header {
type Error = Ip2RegionError;
fn try_from(value: &[u8; 256]) -> Result<Self, Self::Error> {
if value.len() < 20 {
return Err(Ip2RegionError::HeaderParsed(
"Header bytes too short".into(),
));
}
let index_policy_value = u16::from_le_bytes([value[2], value[3]]);
let ip_version_value = u16::from_le_bytes([value[16], value[17]]);
Ok(Header {
version: u16::from_le_bytes([value[0], value[1]]),
index_policy: IndexPolicy::from_u16(index_policy_value).ok_or_else(|| {
Ip2RegionError::HeaderParsed(format!(
"Header index policy invalid: {index_policy_value}"
))
})?,
create_time: u32::from_le_bytes([value[4], value[5], value[6], value[7]]),
start_index_ptr: u32::from_le_bytes([value[8], value[9], value[10], value[11]]),
end_index_ptr: u32::from_le_bytes([value[12], value[13], value[14], value[15]]),
ip_version: IpVersion::from_u16(ip_version_value).ok_or_else(|| {
Ip2RegionError::HeaderParsed(format!(
"Header ip version invalid: {ip_version_value}"
))
})?,
runtime_ptr_bytes: u16::from_le_bytes([value[18], value[19]]),
})
}
}
#[derive(FromPrimitive, Debug)]
#[repr(u16)]
pub enum IndexPolicy {
VectorIndex = 1,
BTreeIndex = 2,
}
#[derive(FromPrimitive, Debug)]
#[repr(u16)]
pub enum IpVersion {
V4 = 4,
V6 = 6,
}
impl Header {
pub fn ip_bytes_len(&self) -> usize {
match &self.ip_version {
IpVersion::V4 => 4,
IpVersion::V6 => 16,
}
}
pub fn segment_index_size(&self) -> usize {
match &self.ip_version {
IpVersion::V4 => 14,
IpVersion::V6 => 38,
}
}
pub fn ip_version(&self) -> &IpVersion {
&self.ip_version
}
}

View File

@ -1,7 +1,6 @@
mod error;
mod header;
mod ip_value;
mod searcher;
pub use searcher::{CachePolicy, Searcher};
pub use ip_value::IpValueExt;
pub use ip_value::IpValueExt;

View File

@ -9,13 +9,12 @@ use std::sync::OnceLock;
use tracing::{debug, trace, warn};
use crate::error::{Ip2RegionError, Result};
use crate::header::{HEADER_INFO_LENGTH, Header, IpVersion};
use maker::{
HEADER_INFO_LENGTH, Header, IpVersion, VECTOR_INDEX_LENGTH, VECTOR_INDEX_COLS,
VECTOR_INDEX_SIZE
};
use crate::ip_value::{CompareExt, IpValueExt};
const VECTOR_INDEX_LENGTH: usize = 256 * 256 * 8;
const VECTOR_INDEX_COLS: usize = 256;
const VECTOR_INDEX_SIZE: usize = 8;
pub struct Searcher {
pub filepath: String,
pub cache_policy: CachePolicy,

View File

@ -1,32 +1,71 @@
# ip2region xdb rust 生成实现
## 使用方法
* 当前目录下maker子目录下执行 `cargo build --release` 编译生成工具
* 目标生成在targe/release 目录下 maker
* 使用方法:
## 程序编译
```bash
$ cd maker/rust/maker
$ cargo build -r
```
Usage: maker --in-file <IN_FILE> --out-file <OUT_FILE>
Options:
-i, --in-file <IN_FILE>
-o, --out-file <OUT_FILE>
-h, --help Print help
-V, --version Print version
编译成功以后,执行文件位置 `./target/release/maker`
## `xdb` 数据生成
```bash
# CWD ip2region/maker/rust/maker
$ ./target/release/maker --help
Usage: maker [OPTIONS] --src <SRC> --dst <DST> --ip-version <IP_VERSION>
Options:
--src <SRC>
ip source region txt filepath
--dst <DST>
generated xdb filepath
--ip-version <IP_VERSION>
Possible values:
- v4: IPv4
- v6: Ipv6
--index-policy <INDEX_POLICY>
index cache policy
[default: vector-index]
[possible values: vector-index, b-tree-index]
--filter-fields <FILTER_FIELDS>
region filter fields, the index of the fields, e.g. `1,2,3,5`
-h, --help
Print help (see a summary with '-h')
```
例如,通过默认的 data/ip.merge.txt 原数据在target目录下生成一个 ip2region.xdb 二进制文件:
```
kevin@ubuntu ~/i/m/r/m/t/release (master)> ./maker -i ../../../../../data/ip.merge.txt -o ip2region.xdb
load 683844 lines
try to write the segment index ptr ...
write done, dataBlocks: 13827, IndexBlock: 683844, indexPtr: (983587, 11070069)
Done, elpsed: 0m7s
例如,使用默认的仓库 data/ 下默认的原始数据生成生成 xdb 文件到当前目录(`ip2region/maker/rust/maker`)
```bash
# ipv6
./target/release/maker --src=../../../data/ipv6_source.txt --dst=./target/ipv6.xdb --ip-version v6
# ipv4
./target/release/maker --src=../../../data/ipv4_source.txt --dst=./target/ipv4.xdb --ip-version v4
```
## 数据查询/bench 测试
* 通过将以上步骤生成的二进制文件和python 生成工具生成的二进制文件进行二进制比对,除时间戳位置不同,其它均相同。
## `xdb` 数据查询 和 bench 测试
![](./vimdiff.png)
* 已经完成开发的 [binding](../../binding/) 都有查询和 bench 测试程序以及使用文档,你可以使用你熟悉的语言的 searcher 进行查询测试或者bench测试来确认数据的正确性和完整性。
基于xdb 格式的查询功能和测试见 [ip2region binding](../../binding)
## 对比其他 maker 生成的 xdb 文件
推荐使用 `vbindiff`, 与其他文件的差异只有 create time 信息上有差异,其他数据都需要是一样的
golang 版本 maker 构建 xdb
```bash
$ cd maker golang
$ make
$ ./xdb_maker gen --src=../../data/ipv4_source.txt --dst=./ip2region_v4.xdb --version=ipv4
$ ./xdb_maker gen --src=../../data/ipv6_source.txt --dst=./ip2region_v6.xdb --version=ipv6
```
对比 xdb 差异
```bash
$ cd maker/rust/maker
$ ./target/release/maker --src=../../../data/ipv4_source.txt --dst=./target/ipv4.xdb --ip-version v4
$ ./target/release/maker --src=../../../data/ipv6_source.txt --dst=./target/ipv6.xdb --ip-version v6
$ vbindiff ./ipv4.xdb ../../golang/ip2region_v4.xdb
$ vbindiff ./ipv6.xdb ../../golang/ip2region_v6.xdb
```

View File

@ -1,12 +1,18 @@
[package]
name = "maker"
version = "0.1.0"
edition = "2021"
authors = ["Kevin Wang <wanglong.kevin@gmail.com>"]
version = "0.2.0"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.4.18", features = ["derive"] }
lazy_static = "1.4.0"
once_cell = "1.19.0"
clap = { version = "4.5", features = ["derive"] }
thiserror = "2"
bytes = "1"
num-derive = "0.4.2"
num-traits = "0.2.19"
chrono = "0.4"
itertools = "0.14"
ipnetwork = "0.21"
tracing = "0.1"
tracing-subscriber = "0.3"

View File

@ -0,0 +1,22 @@
use clap::Parser;
use crate::IpVersion;
use crate::header::IndexPolicy;
#[derive(Parser, Debug)]
pub struct Command {
/// ip source region txt filepath
#[arg(long)]
pub src: String,
/// generated xdb filepath
#[clap(long)]
pub dst: String,
#[clap(long, value_enum)]
pub ip_version: IpVersion,
/// index cache policy
#[clap(long, value_enum, default_value_t = IndexPolicy::VectorIndex)]
pub index_policy: IndexPolicy,
/// region filter fields, the index of the fields, e.g. `1,2,3,5`
#[clap(long, value_delimiter = ',')]
pub filter_fields: Vec<usize>,
}

View File

@ -0,0 +1,37 @@
#[derive(Debug, thiserror::Error)]
pub enum MakerError {
#[error("Io error: {0}")]
IoError(#[from] std::io::Error),
#[error("Header parse error: {0}")]
HeaderParsed(String),
#[error("Parse line src ip, dst ip, region failed for line: {0}")]
ParseIPRegion(String),
#[error("Invalid sip/eip version")]
InvalidIPVersion,
#[error("Ipaddr parse error: {0}")]
IpaddrParseError(#[from] std::net::AddrParseError),
#[error("Region filter fields value too big, limit: {limit}, actual: {actual}")]
RegionFilterFieldsTooBig { limit: usize, actual: usize },
#[error("Empty segments")]
EmptySegments,
#[error("Try from int failed")]
TryFromIntError(#[from] std::num::TryFromIntError),
#[error("Invalid IP Network")]
InvalidIPNetwork(#[from] ipnetwork::IpNetworkError),
#[error("Try from slice failed")]
TryFromSliceFailed(#[from] std::array::TryFromSliceError),
#[error("Region could not found")]
RegionNotFound,
}
pub type Result<T> = std::result::Result<T, MakerError>;

View File

@ -0,0 +1,165 @@
use std::fmt::Display;
use std::net::IpAddr;
use bytes::{BufMut, Bytes, BytesMut};
use clap::ValueEnum;
use itertools::Itertools;
use num_derive::FromPrimitive;
use num_traits::FromPrimitive;
use crate::error::{MakerError, Result};
pub const VERSION_NO: u16 = 3; // since 2025/09/01 (IPv6 supporting)
pub const HEADER_INFO_LENGTH: usize = 256;
pub const VECTOR_INDEX_COLS: usize = 256;
pub const VECTOR_INDEX_ROWS: usize = 256;
pub const VECTOR_INDEX_SIZE: usize = 8;
pub const VECTOR_INDEX_LENGTH: usize = VECTOR_INDEX_COLS * VECTOR_INDEX_ROWS * VECTOR_INDEX_SIZE;
pub const RUNTIME_PTR_SIZE: u16 = 4;
pub const REGION_START: u64 = (HEADER_INFO_LENGTH + VECTOR_INDEX_LENGTH) as u64;
#[allow(dead_code)]
#[derive(Debug)]
pub struct Header {
version: u16,
index_policy: IndexPolicy,
create_time: u32,
start_index_ptr: u32,
end_index_ptr: u32,
ip_version: IpVersion,
runtime_ptr_bytes: u16,
}
impl TryFrom<&[u8; 256]> for Header {
type Error = MakerError;
fn try_from(value: &[u8; 256]) -> Result<Self> {
if value.len() < 20 {
return Err(MakerError::HeaderParsed("Header bytes too short".into()));
}
let index_policy_value = u16::from_le_bytes([value[2], value[3]]);
let ip_version_value = u16::from_le_bytes([value[16], value[17]]);
Ok(Header {
version: u16::from_le_bytes([value[0], value[1]]),
index_policy: IndexPolicy::from_u16(index_policy_value).ok_or_else(|| {
MakerError::HeaderParsed(format!(
"Header index policy invalid: {index_policy_value}"
))
})?,
create_time: u32::from_le_bytes([value[4], value[5], value[6], value[7]]),
start_index_ptr: u32::from_le_bytes([value[8], value[9], value[10], value[11]]),
end_index_ptr: u32::from_le_bytes([value[12], value[13], value[14], value[15]]),
ip_version: IpVersion::from_u16(ip_version_value).ok_or_else(|| {
MakerError::HeaderParsed(format!("Header ip version invalid: {ip_version_value}"))
})?,
runtime_ptr_bytes: u16::from_le_bytes([value[18], value[19]]),
})
}
}
impl Header {
pub fn new(index_policy: IndexPolicy, ip_version: IpVersion) -> Header {
Header {
version: VERSION_NO,
index_policy,
create_time: chrono::Utc::now().timestamp() as u32,
start_index_ptr: 0,
end_index_ptr: 0,
ip_version,
runtime_ptr_bytes: RUNTIME_PTR_SIZE,
}
}
pub fn encode_bytes(&self, start_index_ptr: u32, end_index_ptr: u32) -> Bytes {
let mut buf = BytesMut::with_capacity(HEADER_INFO_LENGTH);
buf.put_u16_le(VERSION_NO);
buf.put_u16_le(self.index_policy as u16);
buf.put_u32_le(self.create_time);
buf.put_u32_le(start_index_ptr);
// index block end ptr
buf.put_u32_le(end_index_ptr);
buf.put_u16_le(self.ip_version as u16);
buf.put_u16_le(self.runtime_ptr_bytes);
buf.freeze()
}
}
#[derive(FromPrimitive, Debug, Copy, Clone, ValueEnum)]
#[repr(u16)]
pub enum IndexPolicy {
VectorIndex = 1,
BTreeIndex = 2,
}
impl Display for IndexPolicy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IndexPolicy::VectorIndex => write!(f, "VectorIndex"),
IndexPolicy::BTreeIndex => write!(f, "BTreeIndex"),
}
}
}
#[derive(FromPrimitive, Debug, Copy, Clone, ValueEnum, PartialEq)]
#[repr(u16)]
pub enum IpVersion {
/// IPv4
V4 = 4,
/// Ipv6
V6 = 6,
}
impl IpVersion {
pub fn ip_bytes_len(&self) -> usize {
match &self {
IpVersion::V4 => 4,
IpVersion::V6 => 16,
}
}
pub fn segment_index_size(&self) -> usize {
match &self {
IpVersion::V4 => 14,
IpVersion::V6 => 38,
}
}
}
impl Header {
pub fn ip_bytes_len(&self) -> usize {
self.ip_version.ip_bytes_len()
}
pub fn segment_index_size(&self) -> usize {
self.ip_version.segment_index_size()
}
pub fn ip_version(&self) -> &IpVersion {
&self.ip_version
}
}
pub trait IPAddrExt {
fn ipaddr_bytes(&self) -> Vec<u8>;
fn encode_ipaddr_bytes(&self) -> Vec<u8>;
}
impl IPAddrExt for IpAddr {
fn ipaddr_bytes(&self) -> Vec<u8> {
match self {
IpAddr::V4(addr) => addr.octets().to_vec(),
IpAddr::V6(addr) => addr.octets().to_vec(),
}
}
fn encode_ipaddr_bytes(&self) -> Vec<u8> {
match self {
IpAddr::V4(addr) => addr.octets().into_iter().rev().collect_vec(),
IpAddr::V6(addr) => addr.octets().to_vec(),
}
}
}

View File

@ -1,61 +0,0 @@
use std::error::Error;
use std::net::Ipv4Addr;
use std::str::FromStr;
pub trait ToUIntIP {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>>;
}
impl ToUIntIP for u32 {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>> {
Ok(self.to_owned())
}
}
impl ToUIntIP for &str {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>> {
if let Ok(ip_addr) = Ipv4Addr::from_str(self) {
return Ok(u32::from(ip_addr));
}
Ok(self.parse::<u32>()?)
}
}
impl ToUIntIP for Ipv4Addr {
fn to_u32_ip(&self) -> Result<u32, Box<dyn Error>> {
Ok(u32::from(*self))
}
}
#[cfg(test)]
mod test_ip {
use super::*;
#[test]
fn test_ip_str_2_u32() {
let ip_str = "1.1.1.1";
let result = ip_str.to_u32_ip().unwrap();
assert_eq!(result, 1 << 24 | 1 << 16 | 1 << 8 | 1);
}
#[test]
fn test_ip_u32_str() {
let ip = "12";
let result = ip.to_u32_ip().unwrap();
assert_eq!(result, 12);
}
#[test]
fn test_ip_u32() {
let ip: u32 = 33;
let result = ip.to_u32_ip().unwrap();
assert_eq!(result, 33);
}
#[test]
fn test_ip_addr() {
let ip = Ipv4Addr::from_str("0.0.3.12").unwrap();
let result = ip.to_u32_ip().unwrap();
assert_eq!(result, 3 << 8 | 12)
}
}

View File

@ -0,0 +1,13 @@
mod command;
mod error;
mod header;
mod maker;
mod segment;
pub use command::Command;
pub use error::{MakerError, Result};
pub use header::{
HEADER_INFO_LENGTH, Header, IpVersion, REGION_START, VECTOR_INDEX_COLS, VECTOR_INDEX_LENGTH,
VECTOR_INDEX_SIZE,
};
pub use maker::Maker;

View File

@ -1,215 +1,25 @@
use std::time::Instant;
use clap::Parser;
use std::path::PathBuf;
use std::fs::File;
use std::io::{BufReader, BufRead, Error, Write, Seek, SeekFrom};
use lazy_static::lazy_static;
use std::collections::HashMap;
use std::sync::Mutex;
mod ip_value;
pub use self::ip_value::ToUIntIP;
use std::time::{SystemTime, UNIX_EPOCH, Instant};
use maker::{Command, Maker, Result};
use tracing::info;
const HEADER_LEN:u32 = 256;
const VECTOR_INDEX_LEN:u32 = 256*256*8;
const SEGMENT_INDEX_BLOCK_SIZE:u32 = 14;
static mut START_INDEX_POS:u32 = 0;
static mut END_INDEX_POS:u32 = 0;
const PROTOCAL:u16 = 2;
const INDEX_POLICY:u16 = 1;
lazy_static! {
static ref REG_MAP: Mutex<HashMap<String, u32>> = Mutex::new(HashMap::new());
}
lazy_static! {
static ref V_SEG: Mutex<Vec<Segment>> = Mutex::new(Vec::new());
}
#[derive(Parser)]
#[command(author="Kevin Wang <wanglong.kevin@gmail.com>", version="2.0")]
#[command(help_template = " Author: {author-with-newline} {about-section}Version: {version} \n {usage-heading} {usage} \n {all-args} {tab}")]
struct Args {
#[arg(short, long)]
in_file: PathBuf,
#[arg(short, long)]
out_file: PathBuf,
}
struct Segment {
sip: u32,
eip: u32,
reg: String,
}
#[derive(Debug, Clone, Copy)]
struct IndexBlock {
first_pos: u32,
last_pos: u32,
}
impl Segment {
fn new(sip: u32, eip: u32, reg: String) -> Segment {
Segment {sip, eip, reg}
}
}
fn load_segments(in_file: PathBuf) -> std::io::Result<String> {
let in_f = File::open(in_file)?;
let reader = BufReader::new(in_f);
let mut count = 0;
let last_eip = 0;
for line in reader.lines() {
let line = line?;
let v: Vec<&str> = line.splitn(3, '|').collect();
if v.len() != 3 {
panic!("invalid ip segment line '{}'", line)
}
let sip = v[0].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line));
let eip = v[1].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line));
if sip > eip {
panic!("start ip({}) should not be greater than end ip({})", sip, eip);
}
if v[2].len() < 1 {
panic!("empty region info in segment line `{}`", line);
}
// Check the continuity of data segment
if last_eip != 0 && last_eip + 1 != sip {
panic!("discontinuous data segment: last.eip+1!=seg.sip in line {}", line);
}
let segment = Segment::new(sip, eip, v[2].to_string());
V_SEG.lock().unwrap().push(segment);
count += 1;
}
return Ok(count.to_string());
}
fn write_region(out_fd: &mut File) -> std::io::Result<()> {
out_fd.seek(SeekFrom::Start((HEADER_LEN + VECTOR_INDEX_LEN).into()))?;
let v_seg = V_SEG.lock().unwrap();
for seg in v_seg.iter() {
if REG_MAP.lock().unwrap().get(&seg.reg) == None {
let pos = out_fd.stream_position()?;
REG_MAP.lock().unwrap().insert(seg.reg.to_string(), pos as u32);
out_fd.write(seg.reg.as_bytes())?;
}
}
return Ok(());
}
fn split_ip(sip: u32, eip: u32, reg: String) -> Vec<Segment> {
let s1 = sip >> 24 & 0xFF;
let s2 = sip >> 16 & 0xFF;
//let s3 = sip >> 8 & 0xFF;
//let s4 = sip & 0xFF;
let e1 = eip >> 24 & 0xFF;
let e2 = eip >> 16 & 0xFF;
//let e3 = eip >> 8 & 0xFF;
//let e4 = eip & 0xFF;
let mut node_list: Vec<Segment> = Vec::new();
// println!("split:{}.{}.{}.{}~{}.{}.{}.{}", s1,s2,s3,s4,e1,e2,e3,e4);
for i in s1..e1+1 {
for j in (if i == s1 {s2} else {0})..(if i == e1 {e2+1} else {256}){
let s_ip = if i == s1 && j == s2 {
sip
}else {
((i << 24) & 0xff000000) | (j << 16 & 0xff0000)
};
let e_ip = if i == e1 && j == e2 {
eip
}else {
((i << 24) & 0xff000000) | ((j << 16) &0xff0000) | 0xffff
};
node_list.push(Segment{sip:s_ip, eip:e_ip, reg:reg.to_string()});
}
}
return node_list;
}
fn set_vector_index(arr: &mut [[IndexBlock; 256]; 256], ip:u32, block_pos:u32) {
let row: usize = (ip >> 24 & 0xff) as usize;
let col: usize = (ip >> 16 & 0xff ) as usize;
let vi_block = &mut arr[row][col];
if vi_block.first_pos == 0 {
vi_block.first_pos = block_pos;
}
vi_block.last_pos = block_pos + SEGMENT_INDEX_BLOCK_SIZE;
}
fn write_index_block(out_fd: &mut File) -> std::io::Result<()> {
let v_seg = V_SEG.lock().unwrap();
let mut index_arr :[[IndexBlock; 256]; 256] = [[IndexBlock{first_pos: 0, last_pos: 0}; 256]; 256];
for seg in v_seg.iter() {
let pos = REG_MAP.lock().unwrap().get(&seg.reg).copied().unwrap();
let node_list = split_ip(seg.sip, seg.eip, seg.reg.to_string());
for node in node_list {
let block_pos = out_fd.stream_position()?;
out_fd.write(&node.sip.to_le_bytes())?;
out_fd.write(&node.eip.to_le_bytes())?;
out_fd.write(&(node.reg.len() as u16).to_le_bytes())?;
out_fd.write(&pos.to_le_bytes())?;
set_vector_index(&mut index_arr, node.sip, block_pos as u32);
unsafe {
if START_INDEX_POS == 0 {
START_INDEX_POS = block_pos as u32;
}
END_INDEX_POS = block_pos as u32;
}
}
}
println!("try to write the segment index ptr ...");
out_fd.seek(SeekFrom::Start(HEADER_LEN.into()))?;
for i in 0..256 {
for j in 0..256 {
let index = index_arr[i][j];
out_fd.write(&index.first_pos.to_le_bytes())?;
out_fd.write(&index.last_pos.to_le_bytes())?;
}
}
return Ok(());
}
fn write_header(out_fd: &mut File) -> std::io::Result<()> {
out_fd.seek(SeekFrom::Start(0))?;
out_fd.write(&PROTOCAL.to_le_bytes())?;
out_fd.write(&INDEX_POLICY.to_le_bytes())?;
let now = SystemTime::now();
let timestamp = now.duration_since(UNIX_EPOCH).expect("Time went backwards").as_secs() as u32;
out_fd.write(&timestamp.to_le_bytes())?;
unsafe {
out_fd.write(&START_INDEX_POS.to_le_bytes())?;
out_fd.write(&END_INDEX_POS.to_le_bytes())?;
}
return Ok(())
}
fn main() -> Result<(), Error> {
let args = Args::parse();
/// Ip2Region database structure
/// See https://github.com/lionsoul2014/ip2region/blob/master/maker/golang/xdb/maker.go
fn main() -> Result<()> {
tracing_subscriber::fmt::init();
let now = Instant::now();
match load_segments(args.in_file) {
Ok(result) => println!("load {} lines", result),
Err(err) => println!("{}", err),
}
let mut out_fd = File::create(args.out_file).unwrap();
write_region(&mut out_fd)?;
write_index_block(&mut out_fd)?;
write_header(&mut out_fd)?;
unsafe {
println!("write done, dataBlocks: {}, IndexBlock: {}, indexPtr: ({}, {})",
REG_MAP.lock().unwrap().len(),
V_SEG.lock().unwrap().len(),
START_INDEX_POS, END_INDEX_POS
);
}
let sec = now.elapsed().as_secs();
let cmd = Command::parse();
info!(?cmd, "Generate xdb");
let mut maker = Maker::new(
cmd.ip_version,
cmd.index_policy,
&cmd.src,
&cmd.dst,
cmd.filter_fields,
)?;
maker.start()?;
println!("Done, elpsed: {}m{}s", sec/60, sec%60);
return Ok(());
info!(cost_time=?now.elapsed(), "Make completed");
Ok(())
}

View File

@ -0,0 +1,131 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{Seek, SeekFrom, Write};
use std::sync::Arc;
use bytes::{BufMut, BytesMut};
use itertools::Itertools;
use tracing::{info, trace};
use crate::error::{MakerError, Result};
use crate::header::{IPAddrExt, IndexPolicy, IpVersion, VECTOR_INDEX_ROWS};
use crate::segment::Segment;
use crate::{HEADER_INFO_LENGTH, Header, REGION_START, VECTOR_INDEX_COLS, VECTOR_INDEX_SIZE};
pub struct Maker {
ip_version: IpVersion,
dst_file: File,
region_pool: HashMap<Arc<String>, u32>,
vector_index: [[[u8; VECTOR_INDEX_SIZE]; VECTOR_INDEX_ROWS]; VECTOR_INDEX_COLS],
segments: Vec<Segment>,
header: Header,
}
impl Maker {
pub fn new(
ip_version: IpVersion,
index_policy: IndexPolicy,
src_filepath: &str,
end_filepath: &str,
filter_fields: Vec<usize>,
) -> Result<Self> {
let header = Header::new(index_policy, ip_version);
let segments = Segment::from_file(src_filepath, ip_version, &filter_fields)?;
if segments.is_empty() {
return Err(MakerError::EmptySegments);
}
let mut region_pool = HashMap::with_capacity(segments.len());
let mut dst_file = File::create(end_filepath)?;
let mut region_buf = BytesMut::new();
let mut current = u32::try_from(REGION_START)?;
for region in segments.iter().map(|s| s.region.clone()).unique() {
region_buf.extend_from_slice(region.as_bytes());
let region_len = region.len() as u32;
region_pool.insert(region, current);
current += region_len;
}
dst_file.seek(SeekFrom::Start(REGION_START))?;
dst_file.write_all(region_buf.as_ref())?;
info!("Load region pool successfully");
Ok(Self {
ip_version,
dst_file,
region_pool,
vector_index: [[[0; VECTOR_INDEX_SIZE]; VECTOR_INDEX_ROWS]; VECTOR_INDEX_COLS],
segments,
header,
})
}
fn set_vector_index(&mut self, ip: &[u8], ptr: u32) -> Result<()> {
let (l0, l1) = (ip[0] as usize, ip[1] as usize);
let block = &mut self.vector_index[l0][l1];
if block[0..4].eq(&[0; 4]) {
block[0..4].copy_from_slice(&ptr.to_le_bytes());
}
let end_value = ptr + self.ip_version.segment_index_size() as u32;
block[4..].copy_from_slice(&end_value.to_le_bytes());
Ok(())
}
pub fn start(&mut self) -> Result<()> {
let start_index_ptr = u32::try_from(self.dst_file.stream_position()?)?;
let mut count = 0;
let mut buf =
BytesMut::with_capacity(self.ip_version.segment_index_size() * self.segments.len());
for segment in std::mem::take(&mut self.segments) {
let region_ptr = *self
.region_pool
.get(&segment.region)
.ok_or(MakerError::RegionNotFound)?;
let region_len = u16::try_from(segment.region.len())?;
trace!(?segment, "before segment split");
for seg in segment.split()? {
self.set_vector_index(
&seg.start_ip.ipaddr_bytes(),
start_index_ptr + buf.len() as u32,
)?;
buf.put_slice(&seg.start_ip.encode_ipaddr_bytes());
buf.put_slice(&seg.end_ip.encode_ipaddr_bytes());
buf.put_u16_le(region_len);
buf.put_u32_le(region_ptr);
count += 1;
}
}
info!("Write segment index buffer");
self.dst_file
.seek(SeekFrom::Start(start_index_ptr as u64))?;
self.dst_file.write_all(buf.as_ref())?;
info!("Write header buffer");
let header_buf = self.header.encode_bytes(
start_index_ptr,
start_index_ptr + (buf.len() as u32) - (self.ip_version.segment_index_size() as u32),
);
self.dst_file.seek(SeekFrom::Start(0))?;
self.dst_file.write_all(header_buf.as_ref())?;
self.dst_file
.seek(SeekFrom::Start(HEADER_INFO_LENGTH as u64))?;
self.dst_file
.write_all(self.vector_index.as_flattened().as_flattened())?;
info!(
start_index_ptr,
current_index_ptr = start_index_ptr + buf.len() as u32,
region_pool_len = self.region_pool.len(),
count,
"Write done"
);
Ok(())
}
}

View File

@ -0,0 +1,166 @@
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::str::FromStr;
use std::sync::Arc;
use itertools::Itertools;
use tracing::{debug, info, trace};
use crate::IpVersion;
use crate::error::{MakerError, Result};
use crate::header::IPAddrExt;
pub trait IpPlusEq {
fn ip_plus_eq(&self, other: &Self) -> bool;
}
impl IpPlusEq for IpAddr {
fn ip_plus_eq(&self, other: &Self) -> bool {
match (self, other) {
(IpAddr::V4(start), IpAddr::V4(end)) => Ipv4Addr::from(u32::from(*start) + 1).eq(end),
(IpAddr::V6(start), IpAddr::V6(end)) => Ipv6Addr::from(u128::from(*start) + 1).eq(end),
_ => false,
}
}
}
#[derive(Debug)]
pub struct Segment {
pub start_ip: IpAddr,
pub end_ip: IpAddr,
pub region: Arc<String>,
}
fn region_filter(region: &str, filter_fields: &[usize]) -> Result<String> {
if filter_fields.is_empty() {
return Ok(region.to_owned());
}
let fields = region.split('|').collect::<Vec<_>>();
let filtered = filter_fields
.iter()
.map(|idx| {
fields
.get(*idx)
.ok_or(MakerError::RegionFilterFieldsTooBig {
limit: fields.len(),
actual: *idx,
})
})
.collect::<Result<Vec<_>>>()?;
Ok(filtered.into_iter().join("|"))
}
impl Segment {
pub fn from_file(
src_filepath: &str,
ip_version: IpVersion,
filter_fields: &[usize],
) -> Result<Vec<Segment>> {
info!("Read src file");
let mut last = None;
let mut segments = vec![];
let reader = BufReader::new(File::open(src_filepath)?);
for original_line in reader.lines() {
let original_line = original_line?;
let line = original_line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
trace!(?line, "Processing line");
let v = line.splitn(3, '|').collect::<Vec<_>>();
if v.len() != 3 {
return Err(MakerError::ParseIPRegion(line.to_owned()));
}
let (start_ip, end_ip, region) = (v[0], v[1], v[2]);
let (start_ip, end_ip) = if ip_version.eq(&IpVersion::V4) {
(
IpAddr::V4(Ipv4Addr::from_str(start_ip)?),
IpAddr::V4(Ipv4Addr::from_str(end_ip)?),
)
} else {
(
IpAddr::V6(Ipv6Addr::from_str(start_ip)?),
IpAddr::V6(Ipv6Addr::from_str(end_ip)?),
)
};
if start_ip.gt(&end_ip) {
return Err(MakerError::ParseIPRegion(line.to_owned()));
}
let segment = Segment {
start_ip,
end_ip,
region: Arc::new(region_filter(region, filter_fields)?),
};
match last.take() {
None => {
last = Some(segment);
}
Some(mut l)
if segment.region.eq(&l.region) && l.end_ip.ip_plus_eq(&segment.start_ip) =>
{
l.end_ip = segment.end_ip;
last = Some(l);
}
Some(seg) => {
segments.push(seg);
last = Some(segment);
}
}
}
if let Some(last) = last {
segments.push(last);
}
info!(length = segments.len(), "load segments");
Ok(segments)
}
pub fn split(self) -> Result<Vec<Segment>> {
let start_bytes = self.start_ip.ipaddr_bytes();
let end_bytes = self.end_ip.ipaddr_bytes();
let start_byte = u16::from_be_bytes([start_bytes[0], start_bytes[1]]);
let end_byte = u16::from_be_bytes([end_bytes[0], end_bytes[1]]);
let segments = (start_byte..=end_byte)
.map(|index| {
let sip = if index == start_byte {
self.start_ip
} else if self.start_ip.is_ipv4() {
IpAddr::from(Ipv4Addr::from((index as u32) << 16))
} else {
IpAddr::from(Ipv6Addr::from((index as u128) << 112))
};
let eip = if index == end_byte {
self.end_ip
} else if self.start_ip.is_ipv4() {
let mask = (1 << 16) - 1;
let v = (index as u32) << 16;
IpAddr::from(Ipv4Addr::from(v | mask))
} else {
let mask = (1 << 112) - 1;
let v = (index as u128) << 112;
IpAddr::from(Ipv6Addr::from(v | mask))
};
trace!(?index, ?sip, ?eip, ?self.region, "in split segment");
Segment {
start_ip: sip,
end_ip: eip,
region: self.region.clone(),
}
})
.collect_vec();
debug!(?self, length = segments.len(), "Try to index segment");
Ok(segments)
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB