mirror of
https://github.com/lionsoul2014/ip2region.git
synced 2025-12-08 19:25:22 +00:00
Feat: rust binding support IPv6 search
This commit is contained in:
parent
b5b00aeb87
commit
32e005e1e6
@ -10,14 +10,14 @@ use clap::{Parser, Subcommand, ValueEnum};
|
||||
///
|
||||
/// export CHECK='../../../data/ipv4_source.txt'
|
||||
///
|
||||
/// cargo run -- --xdb=$XDB bench $CHECK
|
||||
/// cargo run -r -- --xdb=$XDB bench $CHECK
|
||||
///
|
||||
/// cargo run -- --xdb=$XDB query
|
||||
/// cargo run -r -- --xdb=$XDB query
|
||||
///
|
||||
/// ```
|
||||
#[derive(Parser)]
|
||||
pub struct Command {
|
||||
/// xdb filepath, e.g. `../../../data/ip2region_v4.xdb`
|
||||
/// xdb filepath, e.g. `../../../data/ip2region_v4.xdb` or `../../../data/ip2region_v6.xdb`
|
||||
#[arg(long, env = "XDB")]
|
||||
pub xdb: String,
|
||||
#[arg(long, value_enum, default_value_t = CmdCachePolicy::FullMemory)]
|
||||
@ -29,7 +29,7 @@ pub struct Command {
|
||||
#[derive(Subcommand)]
|
||||
pub enum Action {
|
||||
/// Bench the ip search and output performance info
|
||||
Bench { check_file: String},
|
||||
Bench { check_file: String },
|
||||
/// Interactive input and output, querying one IP and get result at a time
|
||||
Query,
|
||||
}
|
||||
|
||||
@ -1,18 +1,60 @@
|
||||
extern crate core;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::io::Write;
|
||||
use std::net::Ipv4Addr;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use clap::Parser;
|
||||
use ip2region::{Searcher, CachePolicy};
|
||||
use crate::cmd::{Action, CmdCachePolicy, Command};
|
||||
use clap::Parser;
|
||||
use ip2region::{CachePolicy, Searcher};
|
||||
|
||||
mod cmd;
|
||||
|
||||
fn check(searcher: &Searcher, start_ip: IpAddr, end_ip: IpAddr, check: &str) -> usize {
|
||||
match (start_ip, end_ip) {
|
||||
(IpAddr::V4(start_ip), IpAddr::V4(end_ip)) => {
|
||||
let start_ip = u32::from(start_ip);
|
||||
let end_ip = u32::from(end_ip);
|
||||
let mid_ip = (start_ip >> 1) + (end_ip >> 1);
|
||||
|
||||
let checks = [
|
||||
start_ip,
|
||||
(start_ip >> 1) + (mid_ip >> 1),
|
||||
mid_ip,
|
||||
(mid_ip >> 1) + (end_ip >> 1),
|
||||
end_ip,
|
||||
];
|
||||
for ip in checks.iter() {
|
||||
let result = searcher.search(*ip).unwrap();
|
||||
assert_eq!(result.as_str(), check);
|
||||
}
|
||||
checks.len()
|
||||
}
|
||||
(IpAddr::V6(start_ip), IpAddr::V6(end_ip)) => {
|
||||
let start_ip = u128::from(start_ip);
|
||||
let end_ip = u128::from(end_ip);
|
||||
let mid_ip = (start_ip >> 1) + (end_ip >> 1);
|
||||
|
||||
let checks = [
|
||||
start_ip,
|
||||
(start_ip >> 1) + (mid_ip >> 1),
|
||||
mid_ip,
|
||||
(mid_ip >> 1) + (end_ip >> 1),
|
||||
end_ip,
|
||||
];
|
||||
for ip in checks.iter() {
|
||||
let result = searcher.search(*ip).unwrap();
|
||||
assert_eq!(result.as_str(), check);
|
||||
}
|
||||
checks.len()
|
||||
}
|
||||
_ => panic!("invalid start ip and end ip"),
|
||||
}
|
||||
}
|
||||
|
||||
fn bench(searcher: &Searcher, check_filepath: &str) {
|
||||
let file = File::open(check_filepath).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
@ -30,24 +72,13 @@ fn bench(searcher: &Searcher, check_filepath: &str) {
|
||||
if ip_test_line.len() != 3 {
|
||||
panic!("this line {line} don`t have enough `|` for spilt");
|
||||
}
|
||||
let start_ip = Ipv4Addr::from_str(ip_test_line[0]).unwrap();
|
||||
let end_ip = Ipv4Addr::from_str(ip_test_line[1]).unwrap();
|
||||
let start_ip = IpAddr::from_str(ip_test_line[0]).unwrap();
|
||||
let end_ip = IpAddr::from_str(ip_test_line[1]).unwrap();
|
||||
if end_ip < start_ip {
|
||||
panic!("start ip({start_ip}) should not be greater than end ip({end_ip})")
|
||||
}
|
||||
let start_ip = u32::from(start_ip);
|
||||
let end_ip = u32::from(end_ip);
|
||||
let mid_ip = (((start_ip as u64) + (end_ip as u64)) >> 1) as u32;
|
||||
for ip in [
|
||||
start_ip,
|
||||
((start_ip as u64 + mid_ip as u64) >> 1) as u32,
|
||||
mid_ip,
|
||||
((mid_ip as u64 + end_ip as u64) >> 1) as u32,
|
||||
end_ip,
|
||||
] {
|
||||
let result = searcher.search(ip).unwrap();
|
||||
assert_eq!(result.as_str(), ip_test_line[2]);
|
||||
count += 1;
|
||||
{
|
||||
count += check(searcher, start_ip, end_ip, ip_test_line[2]);
|
||||
}
|
||||
}
|
||||
println!(
|
||||
@ -55,7 +86,7 @@ fn bench(searcher: &Searcher, check_filepath: &str) {
|
||||
took: {:?} ,\
|
||||
cost: {:?}/op",
|
||||
now.elapsed(),
|
||||
now.elapsed() / count
|
||||
now.elapsed() / count as u32
|
||||
)
|
||||
}
|
||||
|
||||
@ -84,12 +115,12 @@ fn main() {
|
||||
let cache_policy = match cmd.cache_policy {
|
||||
CmdCachePolicy::FullMemory => CachePolicy::FullMemory,
|
||||
CmdCachePolicy::VectorIndex => CachePolicy::VectorIndex,
|
||||
CmdCachePolicy::NoCache => CachePolicy::NoCache
|
||||
CmdCachePolicy::NoCache => CachePolicy::NoCache,
|
||||
};
|
||||
|
||||
let searcher = Searcher::new(cmd.xdb, cache_policy);
|
||||
|
||||
let searcher = Searcher::new(cmd.xdb, cache_policy).unwrap();
|
||||
match cmd.action {
|
||||
Action::Bench{ check_file} => bench(&searcher, &check_file),
|
||||
Action::Query => query(&searcher)
|
||||
Action::Bench { check_file } => bench(&searcher, &check_file),
|
||||
Action::Query => query(&searcher),
|
||||
}
|
||||
}
|
||||
|
||||
@ -11,6 +11,8 @@ license = "Apache-2.0"
|
||||
[dependencies]
|
||||
tracing = "0.1"
|
||||
thiserror = "2"
|
||||
num-traits = "0.2"
|
||||
num-derive = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.7"
|
||||
|
||||
@ -3,29 +3,56 @@ use rand;
|
||||
|
||||
use ip2region::{CachePolicy, Searcher};
|
||||
|
||||
const XDB_FILEPATH: &'static str = "../../../data/ip2region_v4.xdb";
|
||||
|
||||
macro_rules! bench_search {
|
||||
($name:ident, $cache_policy:expr) => {
|
||||
($name:ident, $xdb:expr, $cache_policy:expr, $ty:ty) => {
|
||||
fn $name(c: &mut Criterion) {
|
||||
c.bench_function(stringify!($name), |b| {
|
||||
let searcher = Searcher::new(XDB_FILEPATH.to_owned(), $cache_policy);
|
||||
let searcher = Searcher::new($xdb.to_owned(), $cache_policy).unwrap();
|
||||
b.iter(|| {
|
||||
searcher.search(rand::random::<u32>()).unwrap();
|
||||
searcher.search(rand::random::<$ty>()).unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
bench_search!(no_memory_bench, CachePolicy::NoCache);
|
||||
bench_search!(vector_index_cache_bench, CachePolicy::VectorIndex);
|
||||
bench_search!(full_memory_cache_bench, CachePolicy::FullMemory);
|
||||
const IPV4_XDB: &'static str = "../../../data/ip2region_v4.xdb";
|
||||
const IPV6_XDB: &'static str = "../../../data/ip2region_v6.xdb";
|
||||
|
||||
bench_search!(ipv4_no_memory_bench, IPV4_XDB, CachePolicy::NoCache, u32);
|
||||
bench_search!(
|
||||
ipv4_vector_index_cache_bench,
|
||||
IPV4_XDB,
|
||||
CachePolicy::VectorIndex,
|
||||
u32
|
||||
);
|
||||
bench_search!(
|
||||
ipv4_full_memory_cache_bench,
|
||||
IPV4_XDB,
|
||||
CachePolicy::FullMemory,
|
||||
u32
|
||||
);
|
||||
bench_search!(ipv6_no_memory_bench, IPV6_XDB, CachePolicy::NoCache, u128);
|
||||
bench_search!(
|
||||
ipv6_vector_index_cache_bench,
|
||||
IPV6_XDB,
|
||||
CachePolicy::VectorIndex,
|
||||
u128
|
||||
);
|
||||
bench_search!(
|
||||
ipv6_full_memory_cache_bench,
|
||||
IPV6_XDB,
|
||||
CachePolicy::FullMemory,
|
||||
u128
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
no_memory_bench,
|
||||
vector_index_cache_bench,
|
||||
full_memory_cache_bench,
|
||||
ipv4_no_memory_bench,
|
||||
ipv4_vector_index_cache_bench,
|
||||
ipv4_full_memory_cache_bench,
|
||||
ipv6_no_memory_bench,
|
||||
ipv6_vector_index_cache_bench,
|
||||
ipv6_full_memory_cache_bench
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
||||
@ -7,10 +7,22 @@ pub enum Ip2RegionError {
|
||||
Utf8Error(#[from] std::string::FromUtf8Error),
|
||||
|
||||
#[error("Parse invalid IP address")]
|
||||
ParseIpaddress(#[from] std::num::ParseIntError),
|
||||
ParseIpaddressFailed,
|
||||
|
||||
#[error("No matched Ipaddress")]
|
||||
NoMatchedIP,
|
||||
|
||||
#[error("Header parse error: {0}")]
|
||||
HeaderParsed(String),
|
||||
|
||||
#[error("Searcher load IPv4 data, couldn't search IPv6 data")]
|
||||
OnlyIPv4Version,
|
||||
|
||||
#[error("Searcher load IPv6 data, couldn't search IPv4 data")]
|
||||
OnlyIPv6Version,
|
||||
|
||||
#[error("Try from slice failed")]
|
||||
TryFromSliceFailed(#[from] std::array::TryFromSliceError),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Ip2RegionError>;
|
||||
|
||||
86
binding/rust/ip2region/src/header.rs
Normal file
86
binding/rust/ip2region/src/header.rs
Normal file
@ -0,0 +1,86 @@
|
||||
use num_derive::FromPrimitive;
|
||||
use num_traits::FromPrimitive;
|
||||
|
||||
use crate::error::Ip2RegionError;
|
||||
|
||||
pub const HEADER_INFO_LENGTH: usize = 256;
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub struct Header {
|
||||
version: u16,
|
||||
index_policy: IndexPolicy,
|
||||
create_time: u32,
|
||||
start_index_ptr: u32,
|
||||
end_index_ptr: u32,
|
||||
ip_version: IpVersion,
|
||||
runtime_ptr_bytes: u16,
|
||||
}
|
||||
|
||||
impl TryFrom<&[u8; 256]> for Header {
|
||||
type Error = Ip2RegionError;
|
||||
|
||||
fn try_from(value: &[u8; 256]) -> Result<Self, Self::Error> {
|
||||
if value.len() < 20 {
|
||||
return Err(Ip2RegionError::HeaderParsed(
|
||||
"Header bytes too short".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let index_policy_value = u16::from_le_bytes([value[2], value[3]]);
|
||||
let ip_version_value = u16::from_le_bytes([value[16], value[17]]);
|
||||
|
||||
Ok(Header {
|
||||
version: u16::from_le_bytes([value[0], value[1]]),
|
||||
index_policy: IndexPolicy::from_u16(index_policy_value).ok_or_else(|| {
|
||||
Ip2RegionError::HeaderParsed(format!(
|
||||
"Header index policy invalid: {index_policy_value}"
|
||||
))
|
||||
})?,
|
||||
create_time: u32::from_le_bytes([value[4], value[5], value[6], value[7]]),
|
||||
start_index_ptr: u32::from_le_bytes([value[8], value[9], value[10], value[11]]),
|
||||
end_index_ptr: u32::from_le_bytes([value[12], value[13], value[14], value[15]]),
|
||||
|
||||
ip_version: IpVersion::from_u16(ip_version_value).ok_or_else(|| {
|
||||
Ip2RegionError::HeaderParsed(format!(
|
||||
"Header ip version invalid: {ip_version_value}"
|
||||
))
|
||||
})?,
|
||||
runtime_ptr_bytes: u16::from_le_bytes([value[18], value[19]]),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(FromPrimitive, Debug)]
|
||||
#[repr(u16)]
|
||||
pub enum IndexPolicy {
|
||||
VectorIndex = 1,
|
||||
BTreeIndex = 2,
|
||||
}
|
||||
|
||||
#[derive(FromPrimitive, Debug)]
|
||||
#[repr(u16)]
|
||||
pub enum IpVersion {
|
||||
V4 = 4,
|
||||
V6 = 6,
|
||||
}
|
||||
|
||||
impl Header {
|
||||
pub fn bytes_len(&self) -> usize {
|
||||
match &self.ip_version {
|
||||
IpVersion::V4 => 4,
|
||||
IpVersion::V6 => 16,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn segment_index_size(&self) -> usize {
|
||||
match &self.ip_version {
|
||||
IpVersion::V4 => 14,
|
||||
IpVersion::V6 => 38,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ip_version(&self) -> &IpVersion {
|
||||
&self.ip_version
|
||||
}
|
||||
}
|
||||
@ -1,62 +1,60 @@
|
||||
use std::net::Ipv4Addr;
|
||||
use std::borrow::Cow;
|
||||
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::error::{Ip2RegionError, Result};
|
||||
|
||||
pub trait ToUIntIP {
|
||||
fn to_u32_ip(&self) -> Result<u32>;
|
||||
pub trait IpValueExt {
|
||||
fn to_ipaddr(self) -> Result<IpAddr>;
|
||||
}
|
||||
|
||||
impl ToUIntIP for u32 {
|
||||
fn to_u32_ip(&self) -> Result<u32> {
|
||||
Ok(self.to_owned())
|
||||
impl IpValueExt for &str {
|
||||
fn to_ipaddr(self) -> Result<IpAddr> {
|
||||
IpAddr::from_str(self).map_err(|_| Ip2RegionError::ParseIpaddressFailed)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToUIntIP for &str {
|
||||
fn to_u32_ip(&self) -> Result<u32> {
|
||||
if let Ok(ip_addr) = Ipv4Addr::from_str(self) {
|
||||
return Ok(u32::from(ip_addr));
|
||||
impl IpValueExt for u32 {
|
||||
fn to_ipaddr(self) -> Result<IpAddr> {
|
||||
Ok(IpAddr::V4(Ipv4Addr::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl IpValueExt for Ipv4Addr {
|
||||
fn to_ipaddr(self) -> Result<IpAddr> {
|
||||
Ok(IpAddr::V4(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl IpValueExt for Ipv6Addr {
|
||||
fn to_ipaddr(self) -> Result<IpAddr> {
|
||||
Ok(IpAddr::V6(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl IpValueExt for u128 {
|
||||
fn to_ipaddr(self) -> Result<IpAddr> {
|
||||
Ok(IpAddr::V6(Ipv6Addr::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CompareExt {
|
||||
fn ip_lt(&self, other: Cow<'_, [u8]>) -> bool;
|
||||
fn ip_gt(&self, other: Cow<'_, [u8]>) -> bool;
|
||||
}
|
||||
|
||||
impl CompareExt for IpAddr {
|
||||
fn ip_lt(&self, other: Cow<'_, [u8]>) -> bool {
|
||||
match self {
|
||||
IpAddr::V4(ip) => ip.octets() < [other[3], other[2], other[1], other[0]],
|
||||
IpAddr::V6(ip) => ip.octets() < other[0..16].try_into().unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
fn ip_gt(&self, other: Cow<'_, [u8]>) -> bool {
|
||||
match self {
|
||||
IpAddr::V4(ip) => ip.octets() > [other[3], other[2], other[1], other[0]],
|
||||
IpAddr::V6(ip) => ip.octets() > other[0..16].try_into().unwrap(),
|
||||
}
|
||||
Ok(self.parse::<u32>()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToUIntIP for Ipv4Addr {
|
||||
fn to_u32_ip(&self) -> Result<u32> {
|
||||
Ok(u32::from(*self))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_ip {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ip_str_2_u32() {
|
||||
let ip_str = "1.1.1.1";
|
||||
let result = ip_str.to_u32_ip().unwrap();
|
||||
assert_eq!(result, 1 << 24 | 1 << 16 | 1 << 8 | 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ip_u32_str() {
|
||||
let ip = "12";
|
||||
let result = ip.to_u32_ip().unwrap();
|
||||
assert_eq!(result, 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ip_u32() {
|
||||
let ip: u32 = 33;
|
||||
let result = ip.to_u32_ip().unwrap();
|
||||
assert_eq!(result, 33);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ip_addr() {
|
||||
let ip = Ipv4Addr::from_str("0.0.3.12").unwrap();
|
||||
let result = ip.to_u32_ip().unwrap();
|
||||
assert_eq!(result, 3 << 8 | 12)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
mod error;
|
||||
mod header;
|
||||
mod ip_value;
|
||||
mod searcher;
|
||||
|
||||
pub use self::ip_value::ToUIntIP;
|
||||
pub use self::searcher::{CachePolicy, Searcher};
|
||||
|
||||
@ -2,28 +2,29 @@ use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::net::IpAddr;
|
||||
use std::path::Path;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use tracing::{debug, trace, warn};
|
||||
|
||||
use crate::ToUIntIP;
|
||||
use crate::error::{Ip2RegionError, Result};
|
||||
use crate::header::{HEADER_INFO_LENGTH, Header, IpVersion};
|
||||
use crate::ip_value::{CompareExt, IpValueExt};
|
||||
|
||||
const HEADER_INFO_LENGTH: usize = 256;
|
||||
const VECTOR_INDEX_LENGTH: usize = 256 * 256 * 8;
|
||||
const VECTOR_INDEX_COLS: usize = 256;
|
||||
const VECTOR_INDEX_SIZE: usize = 8;
|
||||
const SEGMENT_INDEX_SIZE: usize = 14;
|
||||
|
||||
static VECTOR_INDEX_CACHE: OnceLock<Vec<u8>> = OnceLock::new();
|
||||
static FULL_CACHE: OnceLock<Vec<u8>> = OnceLock::new();
|
||||
|
||||
pub struct Searcher {
|
||||
pub filepath: String,
|
||||
pub cache_policy: CachePolicy,
|
||||
pub header: Header,
|
||||
vector_cache: OnceLock<Vec<u8>>,
|
||||
full_cache: OnceLock<Vec<u8>>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
#[derive(PartialEq, Debug, Copy, Clone)]
|
||||
pub enum CachePolicy {
|
||||
NoCache,
|
||||
VectorIndex,
|
||||
@ -31,43 +32,68 @@ pub enum CachePolicy {
|
||||
}
|
||||
|
||||
impl Searcher {
|
||||
pub fn new(filepath: String, cache_policy: CachePolicy) -> Self {
|
||||
Self {
|
||||
pub fn new(filepath: String, cache_policy: CachePolicy) -> Result<Self> {
|
||||
let mut file = File::open(Path::new(&filepath))?;
|
||||
let mut buf = [0; HEADER_INFO_LENGTH];
|
||||
file.read_exact(&mut buf)?;
|
||||
|
||||
let header = Header::try_from(&buf)?;
|
||||
debug!(?header, "Load xdb file with header");
|
||||
|
||||
Ok(Self {
|
||||
filepath,
|
||||
cache_policy,
|
||||
}
|
||||
header,
|
||||
vector_cache: OnceLock::new(),
|
||||
full_cache: OnceLock::new(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn search<T>(&self, ip: T) -> Result<String>
|
||||
where
|
||||
T: ToUIntIP + Display,
|
||||
T: IpValueExt + Display,
|
||||
{
|
||||
let ip = ip.to_u32_ip()?;
|
||||
let il0 = ((ip >> 24) & 0xFF) as usize;
|
||||
let il1 = ((ip >> 16) & 0xFF) as usize;
|
||||
let start_point = VECTOR_INDEX_SIZE * (il0 * VECTOR_INDEX_COLS + il1);
|
||||
let ip = ip.to_ipaddr()?;
|
||||
|
||||
let (il0, il1) = match (ip, self.header.ip_version()) {
|
||||
(IpAddr::V6(ip), IpVersion::V6) => (ip.octets()[0], ip.octets()[1]),
|
||||
(IpAddr::V4(ip), IpVersion::V4) => (ip.octets()[0], ip.octets()[1]),
|
||||
(_, IpVersion::V4) => return Err(Ip2RegionError::OnlyIPv4Version),
|
||||
(_, IpVersion::V6) => return Err(Ip2RegionError::OnlyIPv6Version),
|
||||
};
|
||||
|
||||
let start_point = VECTOR_INDEX_SIZE * ((il0 as usize) * VECTOR_INDEX_COLS + (il1 as usize));
|
||||
let vector_index = self.vector_index()?;
|
||||
let start_ptr = get_block_by_size(&vector_index, start_point, 4);
|
||||
let end_ptr = get_block_by_size(&vector_index, start_point + 4, 4);
|
||||
let start_ptr =
|
||||
u32::from_le_bytes(vector_index[start_point..start_point + 4].try_into()?) as usize;
|
||||
let end_ptr =
|
||||
u32::from_le_bytes(vector_index[start_point + 4..start_point + 8].try_into()?) as usize;
|
||||
|
||||
// Binary search the segment index to get the region
|
||||
let segment_index_size = self.header.segment_index_size();
|
||||
let bytes_len = self.header.bytes_len();
|
||||
|
||||
let mut left: usize = 0;
|
||||
let mut right: usize = (end_ptr - start_ptr) / SEGMENT_INDEX_SIZE;
|
||||
let mut right: usize = (end_ptr - start_ptr) / segment_index_size;
|
||||
|
||||
while left <= right {
|
||||
let mid = (left + right) >> 1;
|
||||
let offset = start_ptr + mid * SEGMENT_INDEX_SIZE;
|
||||
let buffer_ip_value = self.read_buf(offset, SEGMENT_INDEX_SIZE)?;
|
||||
|
||||
let start_ip = get_block_by_size(&buffer_ip_value, 0, 4);
|
||||
if ip < (start_ip as u32) {
|
||||
let offset = start_ptr + mid * segment_index_size;
|
||||
let buffer_ip_value = self.read_buf(offset, segment_index_size)?;
|
||||
if ip.ip_lt(Cow::Borrowed(&buffer_ip_value[0..bytes_len])) {
|
||||
right = mid - 1;
|
||||
} else if ip > (get_block_by_size(&buffer_ip_value, 4, 4) as u32) {
|
||||
} else if ip.ip_gt(Cow::Borrowed(&buffer_ip_value[bytes_len..bytes_len * 2])) {
|
||||
left = mid + 1;
|
||||
} else {
|
||||
let data_length = get_block_by_size(&buffer_ip_value, 8, 2);
|
||||
let data_offset = get_block_by_size(&buffer_ip_value, 10, 4);
|
||||
let result = String::from_utf8(self.read_buf(data_offset, data_length)?.to_vec())?;
|
||||
let start_id = bytes_len * 2;
|
||||
let data_length =
|
||||
u16::from_le_bytes([buffer_ip_value[start_id], buffer_ip_value[start_id + 1]]);
|
||||
let data_offset =
|
||||
u32::from_le_bytes(buffer_ip_value[start_id + 2..start_id + 6].try_into()?);
|
||||
let result = String::from_utf8(
|
||||
self.read_buf(data_offset as usize, data_length as usize)?
|
||||
.to_vec(),
|
||||
)?;
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
@ -79,18 +105,19 @@ impl Searcher {
|
||||
return self.read_buf(HEADER_INFO_LENGTH, VECTOR_INDEX_LENGTH);
|
||||
}
|
||||
|
||||
match VECTOR_INDEX_CACHE.get() {
|
||||
match self.vector_cache.get() {
|
||||
None => {
|
||||
debug!("Load vector index cache");
|
||||
let data = self
|
||||
.read_buf(HEADER_INFO_LENGTH, VECTOR_INDEX_LENGTH)?
|
||||
.to_vec();
|
||||
let _ = VECTOR_INDEX_CACHE
|
||||
let _ = self
|
||||
.vector_cache
|
||||
.set(data)
|
||||
.inspect_err(|_| warn!("Vector index cache already initialized"));
|
||||
|
||||
// Safety: VECTOR_INDEX_CACHE checked and set for empty before
|
||||
let cache = VECTOR_INDEX_CACHE.get().unwrap();
|
||||
// Safety: vector cache checked and set for empty before
|
||||
let cache = self.vector_cache.get().unwrap();
|
||||
Ok(Cow::Borrowed(cache))
|
||||
}
|
||||
Some(cache) => Ok(Cow::Borrowed(cache)),
|
||||
@ -109,18 +136,19 @@ impl Searcher {
|
||||
return Ok(Cow::from(buf));
|
||||
}
|
||||
|
||||
match FULL_CACHE.get() {
|
||||
match self.full_cache.get() {
|
||||
None => {
|
||||
debug!(filepath=?self.filepath, "Load full cache");
|
||||
let mut file = File::open(&self.filepath)?;
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf)?;
|
||||
let _ = FULL_CACHE
|
||||
let _ = self
|
||||
.full_cache
|
||||
.set(buf)
|
||||
.inspect_err(|_| warn!("Full cache already initialized"));
|
||||
|
||||
// Safety: FULL_CACHE checked and set for empty before
|
||||
let cache = FULL_CACHE.get().unwrap();
|
||||
let cache = self.full_cache.get().unwrap();
|
||||
Ok(Cow::from(&cache[offset..offset + size]))
|
||||
}
|
||||
Some(cache) => {
|
||||
@ -131,35 +159,19 @@ impl Searcher {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_block_by_size(bytes: &[u8], offset: usize, length: usize) -> usize {
|
||||
let mut result: usize = 0;
|
||||
for (index, value) in bytes[offset..offset + length].iter().enumerate() {
|
||||
result += usize::from(*value) << (index << 3);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::net::Ipv4Addr;
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::*;
|
||||
|
||||
const XDB_PATH: &str = "../../../data/ip2region_v4.xdb";
|
||||
const CHECK_PATH: &str = "../../../data/ipv4_source.txt";
|
||||
|
||||
fn multi_type_ip(searcher: &Searcher) {
|
||||
searcher.search("2.0.0.0").unwrap();
|
||||
searcher.search("32").unwrap();
|
||||
searcher.search(4294408949).unwrap();
|
||||
searcher
|
||||
.search(Ipv4Addr::from_str("1.1.1.1").unwrap())
|
||||
.unwrap();
|
||||
}
|
||||
// Test ipv6 need after run command `git lfs pull`
|
||||
const IPV4_XDB_PATH: &str = "../../../data/ip2region_v4.xdb";
|
||||
const IPV4_CHECK_PATH: &str = "../../../data/ipv4_source.txt";
|
||||
const IPV6_XDB_PATH: &str = "../../../data/ip2region_v6.xdb";
|
||||
const IPV6_CHECK_PATH: &str = "../../../data/ipv6_source.txt";
|
||||
|
||||
///test all types find correct
|
||||
#[test]
|
||||
@ -169,15 +181,25 @@ mod tests {
|
||||
CachePolicy::FullMemory,
|
||||
CachePolicy::VectorIndex,
|
||||
] {
|
||||
multi_type_ip(&Searcher::new(XDB_PATH.to_owned(), cache_policy));
|
||||
let searcher = Searcher::new(IPV4_XDB_PATH.to_owned(), cache_policy).unwrap();
|
||||
searcher.search("1.0.1.0").unwrap();
|
||||
searcher.search("1.0.1.2").unwrap();
|
||||
searcher.search(0u32).unwrap();
|
||||
|
||||
let searcher = Searcher::new(IPV6_XDB_PATH.to_owned(), cache_policy).unwrap();
|
||||
searcher.search("2c0f:fff1::").unwrap();
|
||||
searcher.search("2c0f:fff1::1").unwrap();
|
||||
searcher.search(111u128).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn match_ip_correct(searcher: &Searcher) {
|
||||
let file = File::open(CHECK_PATH).unwrap();
|
||||
fn match_ip_correct(xdb_filepath: &str, check_path: &str, cache_policy: CachePolicy) {
|
||||
let searcher = Searcher::new(xdb_filepath.to_owned(), cache_policy).unwrap();
|
||||
|
||||
let file = File::open(check_path).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
for line in reader.lines().take(100) {
|
||||
for line in reader.lines().take(10_000) {
|
||||
let line = line.unwrap();
|
||||
|
||||
if !line.contains("|") {
|
||||
@ -185,11 +207,20 @@ mod tests {
|
||||
}
|
||||
|
||||
let ip_test_line = line.splitn(3, "|").collect::<Vec<&str>>();
|
||||
let start_ip = Ipv4Addr::from_str(ip_test_line[0]).unwrap();
|
||||
let end_ip = Ipv4Addr::from_str(ip_test_line[1]).unwrap();
|
||||
for _ in 0..10 {
|
||||
let value = rand::random_range(u32::from(start_ip)..u32::from(end_ip) + 1);
|
||||
let result = searcher.search(value).unwrap();
|
||||
let start_ip = IpAddr::from_str(ip_test_line[0]).unwrap();
|
||||
let end_ip = IpAddr::from_str(ip_test_line[1]).unwrap();
|
||||
for _ in 0..3 {
|
||||
let result = match (start_ip, end_ip) {
|
||||
(IpAddr::V4(start), IpAddr::V4(end)) => {
|
||||
let value = rand::random_range(u32::from(start)..u32::from(end) + 1);
|
||||
searcher.search(value).unwrap()
|
||||
}
|
||||
(IpAddr::V6(start), IpAddr::V6(end)) => {
|
||||
let value = rand::random_range(u128::from(start)..u128::from(end) + 1);
|
||||
searcher.search(value).unwrap()
|
||||
}
|
||||
_ => panic!("invalid ip address"),
|
||||
};
|
||||
assert_eq!(result.as_str(), ip_test_line[2])
|
||||
}
|
||||
}
|
||||
@ -202,7 +233,8 @@ mod tests {
|
||||
CachePolicy::FullMemory,
|
||||
CachePolicy::VectorIndex,
|
||||
] {
|
||||
match_ip_correct(&Searcher::new(XDB_PATH.to_owned(), cache_policy));
|
||||
match_ip_correct(IPV4_XDB_PATH, IPV4_CHECK_PATH, cache_policy);
|
||||
match_ip_correct(IPV6_XDB_PATH, IPV6_CHECK_PATH, cache_policy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user