diff --git a/binding/rust/ReadMe.md b/binding/rust/ReadMe.md index 430c61c..9832582 100644 --- a/binding/rust/ReadMe.md +++ b/binding/rust/ReadMe.md @@ -123,6 +123,7 @@ $ cargo build -r #### 测试 IPv6 ```bash $ cd binding/rust +$ cargo build -r $ ./target/release/searcher --xdb='../../data/ip2region_v6.xdb' query ip2region xdb searcher test program, type `quit` or `Ctrl + c` to exit @@ -140,6 +141,7 @@ ip2region>> #### 测试 IPv4 ```bash $ cd binding/rust +$ cargo build -r $ ./target/release/searcher --xdb='../../data/ip2region_v4.xdb' query ip2region xdb searcher test program, type `quit` or `Ctrl + c` to exit ip2region>> 1.1.2.1 diff --git a/binding/rust/ip2region/Cargo.toml b/binding/rust/ip2region/Cargo.toml index a97a888..cc60b34 100644 --- a/binding/rust/ip2region/Cargo.toml +++ b/binding/rust/ip2region/Cargo.toml @@ -11,8 +11,7 @@ license = "Apache-2.0" [dependencies] tracing = "0.1" thiserror = "2" -num-traits = "0.2" -num-derive = "0.4" +maker = { path = "../../../maker/rust/maker"} [dev-dependencies] criterion = "0.7" diff --git a/binding/rust/ip2region/src/error.rs b/binding/rust/ip2region/src/error.rs index abc3c61..be4ae85 100644 --- a/binding/rust/ip2region/src/error.rs +++ b/binding/rust/ip2region/src/error.rs @@ -12,9 +12,6 @@ pub enum Ip2RegionError { #[error("No matched Ipaddress")] NoMatchedIP, - #[error("Header parse error: {0}")] - HeaderParsed(String), - #[error("Searcher load IPv4 data, couldn't search IPv6 data")] OnlyIPv4Version, @@ -23,6 +20,9 @@ pub enum Ip2RegionError { #[error("Try from slice failed")] TryFromSliceFailed(#[from] std::array::TryFromSliceError), + + #[error("Maker crate error: {0}")] + MakerError(#[from] maker::MakerError), } pub type Result = std::result::Result; diff --git a/binding/rust/ip2region/src/header.rs b/binding/rust/ip2region/src/header.rs deleted file mode 100644 index d9c0dc8..0000000 --- a/binding/rust/ip2region/src/header.rs +++ /dev/null @@ -1,86 +0,0 @@ -use num_derive::FromPrimitive; -use num_traits::FromPrimitive; - -use crate::error::Ip2RegionError; - -pub const HEADER_INFO_LENGTH: usize = 256; - -#[allow(dead_code)] -#[derive(Debug)] -pub struct Header { - version: u16, - index_policy: IndexPolicy, - create_time: u32, - start_index_ptr: u32, - end_index_ptr: u32, - ip_version: IpVersion, - runtime_ptr_bytes: u16, -} - -impl TryFrom<&[u8; 256]> for Header { - type Error = Ip2RegionError; - - fn try_from(value: &[u8; 256]) -> Result { - if value.len() < 20 { - return Err(Ip2RegionError::HeaderParsed( - "Header bytes too short".into(), - )); - } - - let index_policy_value = u16::from_le_bytes([value[2], value[3]]); - let ip_version_value = u16::from_le_bytes([value[16], value[17]]); - - Ok(Header { - version: u16::from_le_bytes([value[0], value[1]]), - index_policy: IndexPolicy::from_u16(index_policy_value).ok_or_else(|| { - Ip2RegionError::HeaderParsed(format!( - "Header index policy invalid: {index_policy_value}" - )) - })?, - create_time: u32::from_le_bytes([value[4], value[5], value[6], value[7]]), - start_index_ptr: u32::from_le_bytes([value[8], value[9], value[10], value[11]]), - end_index_ptr: u32::from_le_bytes([value[12], value[13], value[14], value[15]]), - - ip_version: IpVersion::from_u16(ip_version_value).ok_or_else(|| { - Ip2RegionError::HeaderParsed(format!( - "Header ip version invalid: {ip_version_value}" - )) - })?, - runtime_ptr_bytes: u16::from_le_bytes([value[18], value[19]]), - }) - } -} - -#[derive(FromPrimitive, Debug)] -#[repr(u16)] -pub enum IndexPolicy { - VectorIndex = 1, - BTreeIndex = 2, -} - -#[derive(FromPrimitive, Debug)] -#[repr(u16)] -pub enum IpVersion { - V4 = 4, - V6 = 6, -} - -impl Header { - pub fn ip_bytes_len(&self) -> usize { - match &self.ip_version { - IpVersion::V4 => 4, - IpVersion::V6 => 16, - } - } - - pub fn segment_index_size(&self) -> usize { - match &self.ip_version { - IpVersion::V4 => 14, - IpVersion::V6 => 38, - } - } - - pub fn ip_version(&self) -> &IpVersion { - &self.ip_version - } -} diff --git a/binding/rust/ip2region/src/lib.rs b/binding/rust/ip2region/src/lib.rs index f892edc..e51adb7 100644 --- a/binding/rust/ip2region/src/lib.rs +++ b/binding/rust/ip2region/src/lib.rs @@ -1,7 +1,6 @@ mod error; -mod header; mod ip_value; mod searcher; pub use searcher::{CachePolicy, Searcher}; -pub use ip_value::IpValueExt; +pub use ip_value::IpValueExt; \ No newline at end of file diff --git a/binding/rust/ip2region/src/searcher.rs b/binding/rust/ip2region/src/searcher.rs index 4694917..91917ee 100644 --- a/binding/rust/ip2region/src/searcher.rs +++ b/binding/rust/ip2region/src/searcher.rs @@ -9,13 +9,12 @@ use std::sync::OnceLock; use tracing::{debug, trace, warn}; use crate::error::{Ip2RegionError, Result}; -use crate::header::{HEADER_INFO_LENGTH, Header, IpVersion}; +use maker::{ + HEADER_INFO_LENGTH, Header, IpVersion, VECTOR_INDEX_LENGTH, VECTOR_INDEX_COLS, + VECTOR_INDEX_SIZE +}; use crate::ip_value::{CompareExt, IpValueExt}; -const VECTOR_INDEX_LENGTH: usize = 256 * 256 * 8; -const VECTOR_INDEX_COLS: usize = 256; -const VECTOR_INDEX_SIZE: usize = 8; - pub struct Searcher { pub filepath: String, pub cache_policy: CachePolicy, diff --git a/maker/rust/README.md b/maker/rust/README.md index 43da538..9c33d8c 100644 --- a/maker/rust/README.md +++ b/maker/rust/README.md @@ -1,32 +1,71 @@ # ip2region xdb rust 生成实现 -## 使用方法 -* 当前目录下maker子目录下执行 `cargo build --release` 编译生成工具 -* 目标生成在targe/release 目录下 maker -* 使用方法: +## 程序编译 +```bash +$ cd maker/rust/maker +$ cargo build -r ``` - Usage: maker --in-file --out-file - Options: - -i, --in-file - -o, --out-file - -h, --help Print help - -V, --version Print version +编译成功以后,执行文件位置 `./target/release/maker` + +## `xdb` 数据生成 +```bash +# CWD ip2region/maker/rust/maker +$ ./target/release/maker --help +Usage: maker [OPTIONS] --src --dst --ip-version + +Options: + --src + ip source region txt filepath + + --dst + generated xdb filepath + + --ip-version + Possible values: + - v4: IPv4 + - v6: Ipv6 + + --index-policy + index cache policy + + [default: vector-index] + [possible values: vector-index, b-tree-index] + + --filter-fields + region filter fields, the index of the fields, e.g. `1,2,3,5` + + -h, --help + Print help (see a summary with '-h') ``` -例如,通过默认的 data/ip.merge.txt 原数据,在target目录下生成一个 ip2region.xdb 二进制文件: - -``` -kevin@ubuntu ~/i/m/r/m/t/release (master)> ./maker -i ../../../../../data/ip.merge.txt -o ip2region.xdb -load 683844 lines -try to write the segment index ptr ... -write done, dataBlocks: 13827, IndexBlock: 683844, indexPtr: (983587, 11070069) -Done, elpsed: 0m7s - +例如,使用默认的仓库 data/ 下默认的原始数据生成生成 xdb 文件到当前目录(`ip2region/maker/rust/maker`): +```bash +# ipv6 +./target/release/maker --src=../../../data/ipv6_source.txt --dst=./target/ipv6.xdb --ip-version v6 +# ipv4 +./target/release/maker --src=../../../data/ipv4_source.txt --dst=./target/ipv4.xdb --ip-version v4 ``` -## 数据查询/bench 测试 -* 通过将以上步骤生成的二进制文件和python 生成工具生成的二进制文件进行二进制比对,除时间戳位置不同,其它均相同。 +## `xdb` 数据查询 和 bench 测试 -![](./vimdiff.png) -* 已经完成开发的 [binding](../../binding/) 都有查询和 bench 测试程序以及使用文档,你可以使用你熟悉的语言的 searcher 进行查询测试或者bench测试,来确认数据的正确性和完整性。 +基于xdb 格式的查询功能和测试见 [ip2region binding](../../binding) +## 对比其他 maker 生成的 xdb 文件 +推荐使用 `vbindiff`, 与其他文件的差异只有 create time 信息上有差异,其他数据都需要是一样的 + +golang 版本 maker 构建 xdb +```bash +$ cd maker golang +$ make +$ ./xdb_maker gen --src=../../data/ipv4_source.txt --dst=./ip2region_v4.xdb --version=ipv4 +$ ./xdb_maker gen --src=../../data/ipv6_source.txt --dst=./ip2region_v6.xdb --version=ipv6 +``` + +对比 xdb 差异 +```bash +$ cd maker/rust/maker +$ ./target/release/maker --src=../../../data/ipv4_source.txt --dst=./target/ipv4.xdb --ip-version v4 +$ ./target/release/maker --src=../../../data/ipv6_source.txt --dst=./target/ipv6.xdb --ip-version v6 +$ vbindiff ./ipv4.xdb ../../golang/ip2region_v4.xdb +$ vbindiff ./ipv6.xdb ../../golang/ip2region_v6.xdb +``` diff --git a/maker/rust/maker/Cargo.toml b/maker/rust/maker/Cargo.toml index 19d6000..34d0634 100644 --- a/maker/rust/maker/Cargo.toml +++ b/maker/rust/maker/Cargo.toml @@ -1,12 +1,18 @@ [package] name = "maker" -version = "0.1.0" -edition = "2021" -authors = ["Kevin Wang "] +version = "0.2.0" +edition = "2024" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "4.4.18", features = ["derive"] } -lazy_static = "1.4.0" -once_cell = "1.19.0" +clap = { version = "4.5", features = ["derive"] } +thiserror = "2" +bytes = "1" +num-derive = "0.4.2" +num-traits = "0.2.19" +chrono = "0.4" +itertools = "0.14" +ipnetwork = "0.21" +tracing = "0.1" +tracing-subscriber = "0.3" diff --git a/maker/rust/maker/src/command.rs b/maker/rust/maker/src/command.rs new file mode 100644 index 0000000..a5bf47a --- /dev/null +++ b/maker/rust/maker/src/command.rs @@ -0,0 +1,22 @@ +use clap::Parser; + +use crate::IpVersion; +use crate::header::IndexPolicy; + +#[derive(Parser, Debug)] +pub struct Command { + /// ip source region txt filepath + #[arg(long)] + pub src: String, + /// generated xdb filepath + #[clap(long)] + pub dst: String, + #[clap(long, value_enum)] + pub ip_version: IpVersion, + /// index cache policy + #[clap(long, value_enum, default_value_t = IndexPolicy::VectorIndex)] + pub index_policy: IndexPolicy, + /// region filter fields, the index of the fields, e.g. `1,2,3,5` + #[clap(long, value_delimiter = ',')] + pub filter_fields: Vec, +} diff --git a/maker/rust/maker/src/error.rs b/maker/rust/maker/src/error.rs new file mode 100644 index 0000000..3d05c6b --- /dev/null +++ b/maker/rust/maker/src/error.rs @@ -0,0 +1,37 @@ +#[derive(Debug, thiserror::Error)] +pub enum MakerError { + #[error("Io error: {0}")] + IoError(#[from] std::io::Error), + + #[error("Header parse error: {0}")] + HeaderParsed(String), + + #[error("Parse line src ip, dst ip, region failed for line: {0}")] + ParseIPRegion(String), + + #[error("Invalid sip/eip version")] + InvalidIPVersion, + + #[error("Ipaddr parse error: {0}")] + IpaddrParseError(#[from] std::net::AddrParseError), + + #[error("Region filter fields value too big, limit: {limit}, actual: {actual}")] + RegionFilterFieldsTooBig { limit: usize, actual: usize }, + + #[error("Empty segments")] + EmptySegments, + + #[error("Try from int failed")] + TryFromIntError(#[from] std::num::TryFromIntError), + + #[error("Invalid IP Network")] + InvalidIPNetwork(#[from] ipnetwork::IpNetworkError), + + #[error("Try from slice failed")] + TryFromSliceFailed(#[from] std::array::TryFromSliceError), + + #[error("Region could not found")] + RegionNotFound, +} + +pub type Result = std::result::Result; diff --git a/maker/rust/maker/src/header.rs b/maker/rust/maker/src/header.rs new file mode 100644 index 0000000..7b8b1d0 --- /dev/null +++ b/maker/rust/maker/src/header.rs @@ -0,0 +1,165 @@ +use std::fmt::Display; +use std::net::IpAddr; + +use bytes::{BufMut, Bytes, BytesMut}; +use clap::ValueEnum; +use itertools::Itertools; +use num_derive::FromPrimitive; +use num_traits::FromPrimitive; + +use crate::error::{MakerError, Result}; + +pub const VERSION_NO: u16 = 3; // since 2025/09/01 (IPv6 supporting) +pub const HEADER_INFO_LENGTH: usize = 256; +pub const VECTOR_INDEX_COLS: usize = 256; +pub const VECTOR_INDEX_ROWS: usize = 256; +pub const VECTOR_INDEX_SIZE: usize = 8; +pub const VECTOR_INDEX_LENGTH: usize = VECTOR_INDEX_COLS * VECTOR_INDEX_ROWS * VECTOR_INDEX_SIZE; +pub const RUNTIME_PTR_SIZE: u16 = 4; +pub const REGION_START: u64 = (HEADER_INFO_LENGTH + VECTOR_INDEX_LENGTH) as u64; + +#[allow(dead_code)] +#[derive(Debug)] +pub struct Header { + version: u16, + index_policy: IndexPolicy, + create_time: u32, + start_index_ptr: u32, + end_index_ptr: u32, + ip_version: IpVersion, + runtime_ptr_bytes: u16, +} + +impl TryFrom<&[u8; 256]> for Header { + type Error = MakerError; + + fn try_from(value: &[u8; 256]) -> Result { + if value.len() < 20 { + return Err(MakerError::HeaderParsed("Header bytes too short".into())); + } + + let index_policy_value = u16::from_le_bytes([value[2], value[3]]); + let ip_version_value = u16::from_le_bytes([value[16], value[17]]); + + Ok(Header { + version: u16::from_le_bytes([value[0], value[1]]), + index_policy: IndexPolicy::from_u16(index_policy_value).ok_or_else(|| { + MakerError::HeaderParsed(format!( + "Header index policy invalid: {index_policy_value}" + )) + })?, + create_time: u32::from_le_bytes([value[4], value[5], value[6], value[7]]), + start_index_ptr: u32::from_le_bytes([value[8], value[9], value[10], value[11]]), + end_index_ptr: u32::from_le_bytes([value[12], value[13], value[14], value[15]]), + + ip_version: IpVersion::from_u16(ip_version_value).ok_or_else(|| { + MakerError::HeaderParsed(format!("Header ip version invalid: {ip_version_value}")) + })?, + runtime_ptr_bytes: u16::from_le_bytes([value[18], value[19]]), + }) + } +} + +impl Header { + pub fn new(index_policy: IndexPolicy, ip_version: IpVersion) -> Header { + Header { + version: VERSION_NO, + index_policy, + create_time: chrono::Utc::now().timestamp() as u32, + start_index_ptr: 0, + end_index_ptr: 0, + ip_version, + runtime_ptr_bytes: RUNTIME_PTR_SIZE, + } + } + + pub fn encode_bytes(&self, start_index_ptr: u32, end_index_ptr: u32) -> Bytes { + let mut buf = BytesMut::with_capacity(HEADER_INFO_LENGTH); + buf.put_u16_le(VERSION_NO); + buf.put_u16_le(self.index_policy as u16); + buf.put_u32_le(self.create_time); + buf.put_u32_le(start_index_ptr); + // index block end ptr + buf.put_u32_le(end_index_ptr); + buf.put_u16_le(self.ip_version as u16); + buf.put_u16_le(self.runtime_ptr_bytes); + buf.freeze() + } +} + +#[derive(FromPrimitive, Debug, Copy, Clone, ValueEnum)] +#[repr(u16)] +pub enum IndexPolicy { + VectorIndex = 1, + BTreeIndex = 2, +} + +impl Display for IndexPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + IndexPolicy::VectorIndex => write!(f, "VectorIndex"), + IndexPolicy::BTreeIndex => write!(f, "BTreeIndex"), + } + } +} + +#[derive(FromPrimitive, Debug, Copy, Clone, ValueEnum, PartialEq)] +#[repr(u16)] +pub enum IpVersion { + /// IPv4 + V4 = 4, + /// Ipv6 + V6 = 6, +} + +impl IpVersion { + pub fn ip_bytes_len(&self) -> usize { + match &self { + IpVersion::V4 => 4, + IpVersion::V6 => 16, + } + } + + pub fn segment_index_size(&self) -> usize { + match &self { + IpVersion::V4 => 14, + IpVersion::V6 => 38, + } + } +} + +impl Header { + pub fn ip_bytes_len(&self) -> usize { + self.ip_version.ip_bytes_len() + } + + pub fn segment_index_size(&self) -> usize { + self.ip_version.segment_index_size() + } + + pub fn ip_version(&self) -> &IpVersion { + &self.ip_version + } +} + +pub trait IPAddrExt { + fn ipaddr_bytes(&self) -> Vec; + + fn encode_ipaddr_bytes(&self) -> Vec; +} + +impl IPAddrExt for IpAddr { + fn ipaddr_bytes(&self) -> Vec { + match self { + IpAddr::V4(addr) => addr.octets().to_vec(), + IpAddr::V6(addr) => addr.octets().to_vec(), + } + } + + fn encode_ipaddr_bytes(&self) -> Vec { + match self { + IpAddr::V4(addr) => addr.octets().into_iter().rev().collect_vec(), + IpAddr::V6(addr) => addr.octets().to_vec(), + } + } +} diff --git a/maker/rust/maker/src/ip_value.rs b/maker/rust/maker/src/ip_value.rs deleted file mode 100644 index 0a11f92..0000000 --- a/maker/rust/maker/src/ip_value.rs +++ /dev/null @@ -1,61 +0,0 @@ -use std::error::Error; -use std::net::Ipv4Addr; -use std::str::FromStr; - -pub trait ToUIntIP { - fn to_u32_ip(&self) -> Result>; -} - -impl ToUIntIP for u32 { - fn to_u32_ip(&self) -> Result> { - Ok(self.to_owned()) - } -} - -impl ToUIntIP for &str { - fn to_u32_ip(&self) -> Result> { - if let Ok(ip_addr) = Ipv4Addr::from_str(self) { - return Ok(u32::from(ip_addr)); - } - Ok(self.parse::()?) - } -} - -impl ToUIntIP for Ipv4Addr { - fn to_u32_ip(&self) -> Result> { - Ok(u32::from(*self)) - } -} - -#[cfg(test)] -mod test_ip { - use super::*; - - #[test] - fn test_ip_str_2_u32() { - let ip_str = "1.1.1.1"; - let result = ip_str.to_u32_ip().unwrap(); - assert_eq!(result, 1 << 24 | 1 << 16 | 1 << 8 | 1); - } - - #[test] - fn test_ip_u32_str() { - let ip = "12"; - let result = ip.to_u32_ip().unwrap(); - assert_eq!(result, 12); - } - - #[test] - fn test_ip_u32() { - let ip: u32 = 33; - let result = ip.to_u32_ip().unwrap(); - assert_eq!(result, 33); - } - - #[test] - fn test_ip_addr() { - let ip = Ipv4Addr::from_str("0.0.3.12").unwrap(); - let result = ip.to_u32_ip().unwrap(); - assert_eq!(result, 3 << 8 | 12) - } -} diff --git a/maker/rust/maker/src/lib.rs b/maker/rust/maker/src/lib.rs new file mode 100644 index 0000000..1866d55 --- /dev/null +++ b/maker/rust/maker/src/lib.rs @@ -0,0 +1,13 @@ +mod command; +mod error; +mod header; +mod maker; +mod segment; + +pub use command::Command; +pub use error::{MakerError, Result}; +pub use header::{ + HEADER_INFO_LENGTH, Header, IpVersion, REGION_START, VECTOR_INDEX_COLS, VECTOR_INDEX_LENGTH, + VECTOR_INDEX_SIZE, +}; +pub use maker::Maker; diff --git a/maker/rust/maker/src/main.rs b/maker/rust/maker/src/main.rs index 3318503..f8fb642 100644 --- a/maker/rust/maker/src/main.rs +++ b/maker/rust/maker/src/main.rs @@ -1,215 +1,25 @@ +use std::time::Instant; + use clap::Parser; -use std::path::PathBuf; -use std::fs::File; -use std::io::{BufReader, BufRead, Error, Write, Seek, SeekFrom}; -use lazy_static::lazy_static; -use std::collections::HashMap; -use std::sync::Mutex; -mod ip_value; -pub use self::ip_value::ToUIntIP; -use std::time::{SystemTime, UNIX_EPOCH, Instant}; +use maker::{Command, Maker, Result}; +use tracing::info; -const HEADER_LEN:u32 = 256; -const VECTOR_INDEX_LEN:u32 = 256*256*8; -const SEGMENT_INDEX_BLOCK_SIZE:u32 = 14; -static mut START_INDEX_POS:u32 = 0; -static mut END_INDEX_POS:u32 = 0; -const PROTOCAL:u16 = 2; -const INDEX_POLICY:u16 = 1; - -lazy_static! { - static ref REG_MAP: Mutex> = Mutex::new(HashMap::new()); -} - -lazy_static! { - static ref V_SEG: Mutex> = Mutex::new(Vec::new()); -} - -#[derive(Parser)] -#[command(author="Kevin Wang ", version="2.0")] -#[command(help_template = " Author: {author-with-newline} {about-section}Version: {version} \n {usage-heading} {usage} \n {all-args} {tab}")] -struct Args { - #[arg(short, long)] - in_file: PathBuf, - #[arg(short, long)] - out_file: PathBuf, -} - -struct Segment { - sip: u32, - eip: u32, - reg: String, -} - -#[derive(Debug, Clone, Copy)] -struct IndexBlock { - first_pos: u32, - last_pos: u32, -} - -impl Segment { - fn new(sip: u32, eip: u32, reg: String) -> Segment { - Segment {sip, eip, reg} - } -} - -fn load_segments(in_file: PathBuf) -> std::io::Result { - let in_f = File::open(in_file)?; - let reader = BufReader::new(in_f); - let mut count = 0; - let last_eip = 0; - for line in reader.lines() { - let line = line?; - let v: Vec<&str> = line.splitn(3, '|').collect(); - if v.len() != 3 { - panic!("invalid ip segment line '{}'", line) - } - let sip = v[0].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line)); - let eip = v[1].to_u32_ip().expect(&format!("invalid ip address '{}' in line {}", v[0], line)); - if sip > eip { - panic!("start ip({}) should not be greater than end ip({})", sip, eip); - } - if v[2].len() < 1 { - panic!("empty region info in segment line `{}`", line); - } - // Check the continuity of data segment - if last_eip != 0 && last_eip + 1 != sip { - panic!("discontinuous data segment: last.eip+1!=seg.sip in line {}", line); - } - let segment = Segment::new(sip, eip, v[2].to_string()); - V_SEG.lock().unwrap().push(segment); - - count += 1; - } - return Ok(count.to_string()); -} - -fn write_region(out_fd: &mut File) -> std::io::Result<()> { - out_fd.seek(SeekFrom::Start((HEADER_LEN + VECTOR_INDEX_LEN).into()))?; - let v_seg = V_SEG.lock().unwrap(); - for seg in v_seg.iter() { - if REG_MAP.lock().unwrap().get(&seg.reg) == None { - let pos = out_fd.stream_position()?; - REG_MAP.lock().unwrap().insert(seg.reg.to_string(), pos as u32); - out_fd.write(seg.reg.as_bytes())?; - } - } - return Ok(()); -} - -fn split_ip(sip: u32, eip: u32, reg: String) -> Vec { - let s1 = sip >> 24 & 0xFF; - let s2 = sip >> 16 & 0xFF; - //let s3 = sip >> 8 & 0xFF; - //let s4 = sip & 0xFF; - - let e1 = eip >> 24 & 0xFF; - let e2 = eip >> 16 & 0xFF; - //let e3 = eip >> 8 & 0xFF; - //let e4 = eip & 0xFF; - - let mut node_list: Vec = Vec::new(); - - // println!("split:{}.{}.{}.{}~{}.{}.{}.{}", s1,s2,s3,s4,e1,e2,e3,e4); - for i in s1..e1+1 { - for j in (if i == s1 {s2} else {0})..(if i == e1 {e2+1} else {256}){ - let s_ip = if i == s1 && j == s2 { - sip - }else { - ((i << 24) & 0xff000000) | (j << 16 & 0xff0000) - }; - let e_ip = if i == e1 && j == e2 { - eip - }else { - ((i << 24) & 0xff000000) | ((j << 16) &0xff0000) | 0xffff - }; - node_list.push(Segment{sip:s_ip, eip:e_ip, reg:reg.to_string()}); - } - } - - return node_list; -} - -fn set_vector_index(arr: &mut [[IndexBlock; 256]; 256], ip:u32, block_pos:u32) { - let row: usize = (ip >> 24 & 0xff) as usize; - let col: usize = (ip >> 16 & 0xff ) as usize; - let vi_block = &mut arr[row][col]; - if vi_block.first_pos == 0 { - vi_block.first_pos = block_pos; - } - vi_block.last_pos = block_pos + SEGMENT_INDEX_BLOCK_SIZE; -} - -fn write_index_block(out_fd: &mut File) -> std::io::Result<()> { - let v_seg = V_SEG.lock().unwrap(); - let mut index_arr :[[IndexBlock; 256]; 256] = [[IndexBlock{first_pos: 0, last_pos: 0}; 256]; 256]; - for seg in v_seg.iter() { - let pos = REG_MAP.lock().unwrap().get(&seg.reg).copied().unwrap(); - let node_list = split_ip(seg.sip, seg.eip, seg.reg.to_string()); - for node in node_list { - let block_pos = out_fd.stream_position()?; - out_fd.write(&node.sip.to_le_bytes())?; - out_fd.write(&node.eip.to_le_bytes())?; - out_fd.write(&(node.reg.len() as u16).to_le_bytes())?; - out_fd.write(&pos.to_le_bytes())?; - set_vector_index(&mut index_arr, node.sip, block_pos as u32); - unsafe { - if START_INDEX_POS == 0 { - START_INDEX_POS = block_pos as u32; - } - END_INDEX_POS = block_pos as u32; - } - } - } - println!("try to write the segment index ptr ..."); - - out_fd.seek(SeekFrom::Start(HEADER_LEN.into()))?; - for i in 0..256 { - for j in 0..256 { - let index = index_arr[i][j]; - out_fd.write(&index.first_pos.to_le_bytes())?; - out_fd.write(&index.last_pos.to_le_bytes())?; - } - } - return Ok(()); -} - -fn write_header(out_fd: &mut File) -> std::io::Result<()> { - out_fd.seek(SeekFrom::Start(0))?; - out_fd.write(&PROTOCAL.to_le_bytes())?; - out_fd.write(&INDEX_POLICY.to_le_bytes())?; - let now = SystemTime::now(); - let timestamp = now.duration_since(UNIX_EPOCH).expect("Time went backwards").as_secs() as u32; - out_fd.write(×tamp.to_le_bytes())?; - unsafe { - out_fd.write(&START_INDEX_POS.to_le_bytes())?; - out_fd.write(&END_INDEX_POS.to_le_bytes())?; - } - - return Ok(()) - -} - -fn main() -> Result<(), Error> { - let args = Args::parse(); +/// Ip2Region database structure +/// See https://github.com/lionsoul2014/ip2region/blob/master/maker/golang/xdb/maker.go +fn main() -> Result<()> { + tracing_subscriber::fmt::init(); let now = Instant::now(); - match load_segments(args.in_file) { - Ok(result) => println!("load {} lines", result), - Err(err) => println!("{}", err), - } - let mut out_fd = File::create(args.out_file).unwrap(); - write_region(&mut out_fd)?; - write_index_block(&mut out_fd)?; - write_header(&mut out_fd)?; - unsafe { - println!("write done, dataBlocks: {}, IndexBlock: {}, indexPtr: ({}, {})", - REG_MAP.lock().unwrap().len(), - V_SEG.lock().unwrap().len(), - START_INDEX_POS, END_INDEX_POS - ); - } - let sec = now.elapsed().as_secs(); + let cmd = Command::parse(); + info!(?cmd, "Generate xdb"); + let mut maker = Maker::new( + cmd.ip_version, + cmd.index_policy, + &cmd.src, + &cmd.dst, + cmd.filter_fields, + )?; + maker.start()?; - println!("Done, elpsed: {}m{}s", sec/60, sec%60); - return Ok(()); + info!(cost_time=?now.elapsed(), "Make completed"); + Ok(()) } diff --git a/maker/rust/maker/src/maker.rs b/maker/rust/maker/src/maker.rs new file mode 100644 index 0000000..541d21a --- /dev/null +++ b/maker/rust/maker/src/maker.rs @@ -0,0 +1,131 @@ +use std::collections::HashMap; +use std::fs::File; +use std::io::{Seek, SeekFrom, Write}; +use std::sync::Arc; + +use bytes::{BufMut, BytesMut}; +use itertools::Itertools; +use tracing::{info, trace}; + +use crate::error::{MakerError, Result}; +use crate::header::{IPAddrExt, IndexPolicy, IpVersion, VECTOR_INDEX_ROWS}; +use crate::segment::Segment; +use crate::{HEADER_INFO_LENGTH, Header, REGION_START, VECTOR_INDEX_COLS, VECTOR_INDEX_SIZE}; + +pub struct Maker { + ip_version: IpVersion, + dst_file: File, + region_pool: HashMap, u32>, + vector_index: [[[u8; VECTOR_INDEX_SIZE]; VECTOR_INDEX_ROWS]; VECTOR_INDEX_COLS], + segments: Vec, + header: Header, +} + +impl Maker { + pub fn new( + ip_version: IpVersion, + index_policy: IndexPolicy, + src_filepath: &str, + end_filepath: &str, + filter_fields: Vec, + ) -> Result { + let header = Header::new(index_policy, ip_version); + + let segments = Segment::from_file(src_filepath, ip_version, &filter_fields)?; + if segments.is_empty() { + return Err(MakerError::EmptySegments); + } + + let mut region_pool = HashMap::with_capacity(segments.len()); + let mut dst_file = File::create(end_filepath)?; + let mut region_buf = BytesMut::new(); + let mut current = u32::try_from(REGION_START)?; + for region in segments.iter().map(|s| s.region.clone()).unique() { + region_buf.extend_from_slice(region.as_bytes()); + let region_len = region.len() as u32; + region_pool.insert(region, current); + current += region_len; + } + dst_file.seek(SeekFrom::Start(REGION_START))?; + dst_file.write_all(region_buf.as_ref())?; + info!("Load region pool successfully"); + + Ok(Self { + ip_version, + dst_file, + region_pool, + vector_index: [[[0; VECTOR_INDEX_SIZE]; VECTOR_INDEX_ROWS]; VECTOR_INDEX_COLS], + segments, + header, + }) + } + + fn set_vector_index(&mut self, ip: &[u8], ptr: u32) -> Result<()> { + let (l0, l1) = (ip[0] as usize, ip[1] as usize); + let block = &mut self.vector_index[l0][l1]; + + if block[0..4].eq(&[0; 4]) { + block[0..4].copy_from_slice(&ptr.to_le_bytes()); + } + let end_value = ptr + self.ip_version.segment_index_size() as u32; + block[4..].copy_from_slice(&end_value.to_le_bytes()); + Ok(()) + } + + pub fn start(&mut self) -> Result<()> { + let start_index_ptr = u32::try_from(self.dst_file.stream_position()?)?; + let mut count = 0; + let mut buf = + BytesMut::with_capacity(self.ip_version.segment_index_size() * self.segments.len()); + + for segment in std::mem::take(&mut self.segments) { + let region_ptr = *self + .region_pool + .get(&segment.region) + .ok_or(MakerError::RegionNotFound)?; + let region_len = u16::try_from(segment.region.len())?; + + trace!(?segment, "before segment split"); + for seg in segment.split()? { + self.set_vector_index( + &seg.start_ip.ipaddr_bytes(), + start_index_ptr + buf.len() as u32, + )?; + + buf.put_slice(&seg.start_ip.encode_ipaddr_bytes()); + buf.put_slice(&seg.end_ip.encode_ipaddr_bytes()); + buf.put_u16_le(region_len); + buf.put_u32_le(region_ptr); + count += 1; + } + } + + info!("Write segment index buffer"); + self.dst_file + .seek(SeekFrom::Start(start_index_ptr as u64))?; + self.dst_file.write_all(buf.as_ref())?; + + info!("Write header buffer"); + let header_buf = self.header.encode_bytes( + start_index_ptr, + start_index_ptr + (buf.len() as u32) - (self.ip_version.segment_index_size() as u32), + ); + self.dst_file.seek(SeekFrom::Start(0))?; + self.dst_file.write_all(header_buf.as_ref())?; + + self.dst_file + .seek(SeekFrom::Start(HEADER_INFO_LENGTH as u64))?; + self.dst_file + .write_all(self.vector_index.as_flattened().as_flattened())?; + + info!( + start_index_ptr, + current_index_ptr = start_index_ptr + buf.len() as u32, + region_pool_len = self.region_pool.len(), + count, + "Write done" + ); + + Ok(()) + } +} diff --git a/maker/rust/maker/src/segment.rs b/maker/rust/maker/src/segment.rs new file mode 100644 index 0000000..9425eb9 --- /dev/null +++ b/maker/rust/maker/src/segment.rs @@ -0,0 +1,166 @@ +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::str::FromStr; +use std::sync::Arc; + +use itertools::Itertools; +use tracing::{debug, info, trace}; + +use crate::IpVersion; +use crate::error::{MakerError, Result}; +use crate::header::IPAddrExt; + +pub trait IpPlusEq { + fn ip_plus_eq(&self, other: &Self) -> bool; +} + +impl IpPlusEq for IpAddr { + fn ip_plus_eq(&self, other: &Self) -> bool { + match (self, other) { + (IpAddr::V4(start), IpAddr::V4(end)) => Ipv4Addr::from(u32::from(*start) + 1).eq(end), + (IpAddr::V6(start), IpAddr::V6(end)) => Ipv6Addr::from(u128::from(*start) + 1).eq(end), + _ => false, + } + } +} + +#[derive(Debug)] +pub struct Segment { + pub start_ip: IpAddr, + pub end_ip: IpAddr, + pub region: Arc, +} + +fn region_filter(region: &str, filter_fields: &[usize]) -> Result { + if filter_fields.is_empty() { + return Ok(region.to_owned()); + } + let fields = region.split('|').collect::>(); + let filtered = filter_fields + .iter() + .map(|idx| { + fields + .get(*idx) + .ok_or(MakerError::RegionFilterFieldsTooBig { + limit: fields.len(), + actual: *idx, + }) + }) + .collect::>>()?; + Ok(filtered.into_iter().join("|")) +} + +impl Segment { + pub fn from_file( + src_filepath: &str, + ip_version: IpVersion, + filter_fields: &[usize], + ) -> Result> { + info!("Read src file"); + + let mut last = None; + let mut segments = vec![]; + + let reader = BufReader::new(File::open(src_filepath)?); + for original_line in reader.lines() { + let original_line = original_line?; + let line = original_line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + trace!(?line, "Processing line"); + let v = line.splitn(3, '|').collect::>(); + if v.len() != 3 { + return Err(MakerError::ParseIPRegion(line.to_owned())); + } + let (start_ip, end_ip, region) = (v[0], v[1], v[2]); + + let (start_ip, end_ip) = if ip_version.eq(&IpVersion::V4) { + ( + IpAddr::V4(Ipv4Addr::from_str(start_ip)?), + IpAddr::V4(Ipv4Addr::from_str(end_ip)?), + ) + } else { + ( + IpAddr::V6(Ipv6Addr::from_str(start_ip)?), + IpAddr::V6(Ipv6Addr::from_str(end_ip)?), + ) + }; + if start_ip.gt(&end_ip) { + return Err(MakerError::ParseIPRegion(line.to_owned())); + } + + let segment = Segment { + start_ip, + end_ip, + region: Arc::new(region_filter(region, filter_fields)?), + }; + match last.take() { + None => { + last = Some(segment); + } + Some(mut l) + if segment.region.eq(&l.region) && l.end_ip.ip_plus_eq(&segment.start_ip) => + { + l.end_ip = segment.end_ip; + last = Some(l); + } + Some(seg) => { + segments.push(seg); + last = Some(segment); + } + } + } + + if let Some(last) = last { + segments.push(last); + } + + info!(length = segments.len(), "load segments"); + Ok(segments) + } + + pub fn split(self) -> Result> { + let start_bytes = self.start_ip.ipaddr_bytes(); + let end_bytes = self.end_ip.ipaddr_bytes(); + + let start_byte = u16::from_be_bytes([start_bytes[0], start_bytes[1]]); + let end_byte = u16::from_be_bytes([end_bytes[0], end_bytes[1]]); + + let segments = (start_byte..=end_byte) + .map(|index| { + let sip = if index == start_byte { + self.start_ip + } else if self.start_ip.is_ipv4() { + IpAddr::from(Ipv4Addr::from((index as u32) << 16)) + } else { + IpAddr::from(Ipv6Addr::from((index as u128) << 112)) + }; + + let eip = if index == end_byte { + self.end_ip + } else if self.start_ip.is_ipv4() { + let mask = (1 << 16) - 1; + let v = (index as u32) << 16; + IpAddr::from(Ipv4Addr::from(v | mask)) + } else { + let mask = (1 << 112) - 1; + let v = (index as u128) << 112; + IpAddr::from(Ipv6Addr::from(v | mask)) + }; + + trace!(?index, ?sip, ?eip, ?self.region, "in split segment"); + Segment { + start_ip: sip, + end_ip: eip, + region: self.region.clone(), + } + }) + .collect_vec(); + debug!(?self, length = segments.len(), "Try to index segment"); + + Ok(segments) + } +} diff --git a/maker/rust/vimdiff.png b/maker/rust/vimdiff.png deleted file mode 100644 index 39c220e..0000000 Binary files a/maker/rust/vimdiff.png and /dev/null differ