feat: change package name

This commit is contained in:
gongzhengyang 2022-12-22 18:27:01 +08:00
parent 76345e67b0
commit 5702ca9259
10 changed files with 125 additions and 94 deletions

View File

@ -1,2 +1,2 @@
[workspace]
members = ["bin", "search"]
members = ["example", "ip2region2"]

View File

@ -2,10 +2,6 @@
# 使用方式
### 缓存整个 `xdb` 数据
预先加载整个` ip2region.xdb` 到内存,完全基于内存查询,该方式线程安全,采用`once_cell::sync::OnceCell`,只会加载一次数据,多线程安全,可以自由使用`tokio`异步运行时或者标准库的多线程`std::thread`
配置`Cargo.toml`如下
```toml
@ -15,6 +11,14 @@ search = { git = "https://github.com/lionsoul2014/ip2region.git", branch = "mast
tokio = { version = "1", features = ["full"]}
```
程序启动的时候是没加载文件,这个程序占用内存`1M`左右
一旦开始执行查询,`ip2region.xdb`文件会直接加载到内存,程序占用内存`12M`左右
预先加载整个` ip2region.xdb` 到内存,完全基于内存查询,该方式线程安全,采用`once_cell::sync::OnceCell`,只会加载一次数据,多线程安全,可以自由使用`tokio`异步运行时或者标准库的多线程`std::thread`
### 缓存整个 `xdb` 数据
编写`main.rs`
**需要使用`XDB_FILEPATH`指定`ip2region.xdb`文件的路径**,该参数可以使用相对路径或者绝对路径,如果使用相对路径报错,请修改为绝对路径
@ -28,6 +32,10 @@ async fn main() {
"XDB_FILEPATH",
"../data/ip2region.xdb",
);
//可以调用如下直接加载文件
// search::global_searcher();
// search_by_ip的参数可以是u32类型字符串IP类型字符串数字类型
for i in 1..5 {
tokio::spawn(async move {

View File

@ -1,6 +1,6 @@
[package]
name = "ip2region"
default-run = "ip2region"
name = "example"
default-run = "example"
version = "0.1.0"
edition = "2021"
rust-version = "1.66.0"
@ -10,5 +10,5 @@ license = "Apache-2.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
search = { path = "../search" }
ip2region2 = { path = "../ip2region2" }
clap = { version = "4.0" }

View File

@ -7,15 +7,13 @@ mod cmd;
fn main() {
env::var("XDB_FILEPATH").unwrap_or_else(|_| {
let matches = cmd::get_matches();
let xdb_filepath = matches
.get_one::<String>("xdb");
if xdb_filepath.is_some() {
env::set_var("XDB_FILEPATH", xdb_filepath.unwrap());
if let Some(xdb_filepath) = matches.get_one::<String>("xdb") {
env::set_var("XDB_FILEPATH", xdb_filepath);
}
"".to_owned()
});
search::global_searcher();
ip2region2::global_searcher();
println!("ip2region xdb searcher test program, type `quit` or `Ctrl + c` to exit");
loop {
print!("ip2region>> ");
@ -26,7 +24,7 @@ fn main() {
break;
}
let now = Instant::now();
let result = search::search_by_ip(line.trim());
let result = ip2region2::search_by_ip(line.trim());
println!("region: {:?}, took: {:?}", result, now.elapsed());
}
}

View File

@ -1,5 +1,5 @@
[package]
name = "search"
name = "ip2region2"
version = "0.1.0"
edition = "2021"
rust-version = "1.66.0"

View File

@ -1,7 +1,7 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use criterion::{criterion_group, criterion_main, Criterion};
use rand;
use search::{buffer_value, get_block_by_size, get_start_end_ptr, global_searcher, search_by_ip};
use ip2region2::{buffer_value, get_block_by_size, get_start_end_ptr, global_searcher, search_by_ip};
fn ip_search_bench(c: &mut Criterion) {
c.bench_function("ip_search_bench", |b| {
@ -23,19 +23,29 @@ fn buffer_value_bench(c: &mut Criterion) {
fn get_block_by_size_bench(c: &mut Criterion) {
c.bench_function("get_block_by_size", |b| {
b.iter(||{
get_block_by_size(&global_searcher().buffer, rand::random::<u16>() as usize, 4);
b.iter(|| {
get_block_by_size(
&global_searcher().buffer(),
rand::random::<u16>() as usize,
4,
);
})
});
}
fn get_start_end_ptr_bench(c: &mut Criterion) {
c.bench_function("get_start_end_ptr", |b| {
b.iter(|| {
get_start_end_ptr(rand::random::<u32>());
})
b.iter(|| {
get_start_end_ptr(rand::random::<u32>());
})
});
}
criterion_group!(benches, ip_search_bench, buffer_value_bench, get_block_by_size_bench, get_start_end_ptr_bench);
criterion_group!(
benches,
ip_search_bench,
buffer_value_bench,
get_block_by_size_bench,
get_start_end_ptr_bench
);
criterion_main!(benches);

View File

@ -1,80 +1,33 @@
use std::env;
use std::error::Error;
use std::fmt;
use std::fmt::Formatter;
use std::fs::File;
use std::io::Read;
use std::path::Path;
use once_cell::sync::OnceCell;
use std::fmt::Display;
use ip_value::ToUIntIP;
mod ip_value;
mod searcher;
pub use searcher::global_searcher;
const HEADER_INFO_LENGTH: usize = 256;
const VECTOR_INDEX_COLS: usize = 256;
const VECTOR_INDEX_SIZE: usize = 8;
const SEGMENT_INDEX_SIZE: usize = 14;
/// store the xdb file in memory totally
pub struct Searcher {
pub buffer: Vec<u8>,
}
impl Searcher {
/// you can set the XDB_FILEPATH
/// or super dir has data dir with the file ip2region.xdb
/// it will check ../data/ip2region.xdb, ../../data/ip2region.xdb, ../../../data/ip2region.xdb
pub fn new() -> Result<Self, Box<dyn Error>> {
let xdb_filepath = env::var("XDB_FILEPATH")
.unwrap_or_else(|_| {
let prefix = "../".to_owned();
for recurse in 1..4 {
let filepath = prefix.repeat(recurse) + "data/ip2region.xdb";
if Path::new(filepath.as_str()).exists() {
return filepath
}
};
panic!("you must set XDB_FILEPATH or put file in ../data/ip2region.xdb")
});
println!("load xdb searcher file at {xdb_filepath}");
let mut f = File::open(xdb_filepath)?;
let mut buffer = Vec::new();
f.read_to_end(&mut buffer)?;
Ok(Self { buffer })
}
}
/// global init searcher thread safely
pub fn global_searcher() -> &'static Searcher {
static SEARCHER: OnceCell<Searcher> = OnceCell::new();
SEARCHER.get_or_init(|| {
Searcher::new().unwrap()
})
}
impl fmt::Display for Searcher {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "searcher_with_len {}", self.buffer.len())
}
}
pub fn get_start_end_ptr(ip: u32) -> (usize, usize) {
let il0= ((ip >> 24) & 0xFF) as usize;
let il0 = ((ip >> 24) & 0xFF) as usize;
let il1 = ((ip >> 16) & 0xFF) as usize;
let idx = VECTOR_INDEX_SIZE * (il0 * VECTOR_INDEX_COLS + il1);
let start_point = HEADER_INFO_LENGTH + idx;
let start_ptr = get_block_by_size(&global_searcher().buffer, start_point, 4);
let end_ptr = get_block_by_size(&global_searcher().buffer, start_point + 4, 4);
let start_ptr = get_block_by_size(global_searcher().buffer(), start_point, 4);
let end_ptr = get_block_by_size(global_searcher().buffer(), start_point + 4, 4);
(start_ptr, end_ptr)
}
/// check https://mp.weixin.qq.com/s/ndjzu0BgaeBmDOCw5aqHUg for details
pub fn search_by_ip<T>(ip: T) -> Result<String, Box<dyn Error>>
where
T: ToUIntIP,
T: ToUIntIP + Display,
{
let ip = ip.to_u32_ip()?;
let (start_ptr, end_ptr) = get_start_end_ptr(ip);
@ -83,31 +36,25 @@ where
while left <= right {
let mid = (left + right) >> 1;
let offset = &start_ptr + mid * SEGMENT_INDEX_SIZE;
let offset = start_ptr + mid * SEGMENT_INDEX_SIZE;
let buffer_ip_value = buffer_value(offset, SEGMENT_INDEX_SIZE);
let start_ip = get_block_by_size(&buffer_ip_value, 0, 4);
if &ip < &(start_ip as u32) {
let start_ip = get_block_by_size(buffer_ip_value, 0, 4);
if ip < (start_ip as u32) {
right = mid - 1;
} else if &ip > &(get_block_by_size(&buffer_ip_value, 4, 4) as u32) {
} else if ip > (get_block_by_size(buffer_ip_value, 4, 4) as u32) {
left = mid + 1;
} else {
let data_length = get_block_by_size(&buffer_ip_value, 8, 2);
let data_offset = get_block_by_size(&buffer_ip_value, 10, 4);
let result = String::from_utf8(
buffer_value(data_offset, data_length)
.to_vec());
let data_length = get_block_by_size(buffer_ip_value, 8, 2);
let data_offset = get_block_by_size(buffer_ip_value, 10, 4);
let result = String::from_utf8(buffer_value(data_offset, data_length).to_vec());
return Ok(result?);
}
}
Err("not matched".into())
}
pub fn start_end_buffer_value(bytes: &[u8], offset: usize, length: usize) -> &[u8] {
&bytes[offset..offset+length]
}
pub fn buffer_value(offset: usize, length: usize) -> &'static [u8] {
&global_searcher().buffer[offset..offset + length]
&global_searcher().buffer()[offset..offset + length]
}
#[inline]
@ -117,8 +64,8 @@ where
usize: From<T>,
{
let mut result: usize = 0;
for (index, value) in bytes[offset..offset+length].iter().enumerate() {
result |= usize::from(value.clone()) << (index*8);
for (index, value) in bytes[offset..offset + length].iter().enumerate() {
result |= usize::from(value.clone()) << (index * 8);
}
result
}
@ -128,6 +75,8 @@ mod tests {
use std::net::Ipv4Addr;
use std::str::FromStr;
use std::thread;
use std::fs::File;
use std::io::Read;
use super::*;

View File

@ -0,0 +1,66 @@
use std::env;
use std::error::Error;
use std::fmt;
use std::fmt::{Display, Formatter};
use std::fs::File;
use std::io::Read;
use std::path::Path;
use once_cell::sync::OnceCell;
pub enum CachePolicy {
Never,
VecIndex,
Full,
}
/// store the xdb file in memory totally
pub struct Searcher {
vec_cache: Vec<u8>,
full_cache: Vec<u8>,
}
impl Searcher {
pub fn new(xdb_filepath: Option<&str>, cache_policy: Option<CachePolicy>) -> Result<Self, Box<dyn Error>> {
let xdb_filepath = xdb_filepath.unwrap_or_else(|_| {
Searcher::default_detect_xdb_file().unwrap().as_str()
});
println!("load xdb searcher file at {xdb_filepath}");
let mut f = File::open(xdb_filepath)?;
let mut buffer = Vec::new();
f.read_to_end(&mut buffer)?;
Ok(Self { buffer })
}
/// it will check ../data/ip2region.xdb, ../../data/ip2region.xdb, ../../../data/ip2region.xdb
fn default_detect_xdb_file() -> Result<String, Box<dyn Error>> {
let prefix = "../".to_owned();
for recurse in 1..4 {
let filepath = prefix.repeat(recurse) + "data/ip2region.xdb";
if Path::new(filepath.as_str()).exists() {
return Ok(filepath);
}
}
Err("default filepath not find the xdb file, so you must set xdb_filepath".into())
}
pub fn buffer(&self) -> &Vec<u8> {
self.full_cache.as_ref()
}
pub fn vec_cache(&self) -> &Vec<u8> {
self.vec_cache.as_ref()
}
}
/// global init searcher thread safely
pub fn global_searcher() -> &'static Searcher {
static SEARCHER: OnceCell<Searcher> = OnceCell::new();
SEARCHER.get_or_init(|| Searcher::new().unwrap())
}
impl Display for Searcher {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "searcher_with_len {}", self.buffer.len())
}
}