add rust implement.

This commit is contained in:
biluohc 2018-06-30 16:10:02 +08:00
parent 7fbe737e92
commit 8a48d41657
8 changed files with 675 additions and 0 deletions

8
.gitignore vendored
View File

@ -25,3 +25,11 @@ META-INF/
/binding/java/*.jar
/binding/c/testSearcher
# rust
Cargo.lock
target

17
Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "ip2region"
version = "0.1.0"
authors = ["biluohc <biluohc@qq.com>"]
include = ["src/**/*", "Cargo.toml", "data/ip2region.db", "binding/rust.md"]
[[bin]]
name="ip2region"
path="src/main.rs"
[dependencies]
lazy_static ={ version = "^1", optional = true }
[features]
lazy = ["lazy_static"]

23
binding/rust.md Normal file
View File

@ -0,0 +1,23 @@
## Rust 客户端
## 用法
基本都在 `src/main.rs``overview` 函数里,
另外 `cargo doc --features lazy`可以看到所有`API`
## `lazy` feature 直接把DB打包到二进制
复制下面的 toml 添加依赖即可使用,其 api 是 `memory_search`
只是目前 DB足有3.2M,还是有些感人的。
```toml
[dependencies.ip2region]
git = "https://github.com/lionsoul2014/ip2region"
# git = "https://github.com/biluohc/ip2region"
version = "*"
features = ["lazy"]
```

32
src/error.rs Normal file
View File

@ -0,0 +1,32 @@
use std::{self, io, num, str};
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug)]
pub enum Error {
Str(&'static str),
Io(io::Error),
Utf8(str::Utf8Error),
Int(num::ParseIntError),
}
impl From<&'static str> for Error {
fn from(e: &'static str) -> Self {
Error::Str(e)
}
}
impl From<io::Error> for Error {
fn from(e: io::Error) -> Self {
Error::Io(e)
}
}
impl From<str::Utf8Error> for Error {
fn from(e: str::Utf8Error) -> Self {
Error::Utf8(e)
}
}
impl From<num::ParseIntError> for Error {
fn from(e: num::ParseIntError) -> Self {
Error::Int(e)
}
}

16
src/lazy.rs Normal file
View File

@ -0,0 +1,16 @@
static DB_BYTES: &'static [u8] = include_bytes!("../data/ip2region.db");
lazy_static! {
static ref OWNED_IP_2_REGION: OwnedIp2Region = {
OwnedIp2Region {
db_bin_bytes: Cow::Borrowed(DB_BYTES),
first_index_ptr: get_u32(&DB_BYTES[..], 0),
total_blocks: (get_u32(&DB_BYTES[..], 4) - get_u32(&DB_BYTES[..], 0))
/ INDEX_BLOCK_LENGTH + 1,
}
};
}
pub fn memory_search(ip_str: &str) -> Result<IpInfo> {
OWNED_IP_2_REGION.memory_search(ip_str)
}

346
src/lib.rs Normal file
View File

@ -0,0 +1,346 @@
#[cfg(feature = "lazy")]
#[macro_use]
extern crate lazy_static;
use std::cell::RefCell;
use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom};
use std::{fmt, str};
mod error;
pub use error::{Error, Result};
mod owned;
#[cfg(feature = "lazy")]
pub use owned::memory_search;
pub use owned::{OwnedIp2Region, OwnedIpInfo};
const INDEX_BLOCK_LENGTH: u32 = 12;
const TOTAL_HEADER_LENGTH: usize = 8192;
thread_local!(static BUF: RefCell<[u8;256]> = RefCell::new([0;256]));
thread_local!(static BUF_BTREE: RefCell<[u8;TOTAL_HEADER_LENGTH]> = RefCell::new([0;TOTAL_HEADER_LENGTH]));
#[allow(non_snake_case)]
#[derive(Debug, Default, Clone, PartialEq)]
pub struct IpInfo<'a> {
pub city_id: u32,
pub country: &'a str,
pub region: &'a str,
pub province: &'a str,
pub city: &'a str,
pub ISP: &'a str,
}
impl<'a> IpInfo<'a> {
fn new(city_id: u32, fields: &[&'a str]) -> Self {
if fields.len() < 5 {
panic!(format!("invlid fields: {:?}", fields));
}
IpInfo {
country: fields[0],
region: fields[1],
province: fields[2],
city: fields[3],
ISP: fields[4],
city_id,
}
}
pub fn to_owned(&self) -> OwnedIpInfo {
OwnedIpInfo {
city_id: self.city_id,
country: self.country.to_owned(),
region: self.region.to_owned(),
province: self.province.to_owned(),
city: self.city.to_owned(),
ISP: self.ISP.to_owned(),
}
}
}
impl<'a> fmt::Display for IpInfo<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}|{}|{}|{}|{}|{}",
self.city_id, self.country, self.region, self.province, self.city, self.ISP
)
}
}
fn get_ip_info(city_id: u32, line: &[u8]) -> Result<IpInfo> {
let str = str::from_utf8(line)?;
let fields = str.split('|').collect::<Vec<&str>>();
Ok(IpInfo::new(city_id, &fields[..]))
}
fn get_u32(bytes: &[u8], offset: u32) -> u32 {
let offset = offset as usize;
let tmp = (bytes[offset] as i64) & 0x000000FF
| ((bytes[offset + 1] as i64) << 8) & 0x0000FF00
| ((bytes[offset + 2] as i64) << 16) & 0x00FF0000
| ((bytes[offset + 3] as i64) << 24) & 0xFF000000;
tmp as u32
}
fn ip2u32(ip_str: &str) -> Result<u32> {
let bits = ip_str
.split('.')
.filter(|s| !s.is_empty())
.collect::<Vec<&str>>();
if bits.len() != 4 {
Err("ip format error(it does not have 4 parts, like 1.1.1.1)")?;
}
let mut sum: u32 = 0;
for (i, n) in bits.iter().enumerate() {
let bit = n.parse::<u32>()?;
sum += bit << 24 - 8 * i;
}
Ok(sum)
}
pub struct Ip2Region {
// db file handler
db_file: File,
//header block info
header_sip: Vec<u32>,
header_ptr: Vec<u32>,
header_len: u32,
// super block index info
first_index_ptr: u32,
last_index_ptr: u32,
total_blocks: u32,
}
impl Ip2Region {
pub fn new(path: &str) -> io::Result<Self> {
let file = File::open(path)?;
Ok(Ip2Region {
db_file: file,
header_sip: Vec::new(),
header_ptr: Vec::new(),
header_len: 0,
first_index_ptr: 0,
last_index_ptr: 0,
total_blocks: 0,
})
}
pub fn to_owned(&mut self) -> Result<OwnedIp2Region> {
OwnedIp2Region::new2(&mut self.db_file).map_err(Error::Io)
}
pub fn binary_search(&mut self, ip_str: &str) -> Result<OwnedIpInfo> {
BUF.with(|buf| {
let mut buf = buf.borrow_mut();
if self.total_blocks == 0 {
self.db_file.seek(SeekFrom::Start(0))?;
self.db_file.read_exact(&mut buf[..8])?;
self.first_index_ptr = get_u32(&buf[..8], 0);
self.last_index_ptr = get_u32(&buf[..8], 4);
self.total_blocks =
(self.last_index_ptr - self.first_index_ptr) / INDEX_BLOCK_LENGTH + 1;
}
let ip = ip2u32(ip_str)?;
let mut h = self.total_blocks;
let (mut data_ptr, mut l) = (0u32, 0u32);
while l <= h {
let m = (l + h) >> 1;
let p = self.first_index_ptr + m * INDEX_BLOCK_LENGTH;
self.db_file.seek(SeekFrom::Start(p as u64))?;
self.db_file.read_exact(&mut buf[0..INDEX_BLOCK_LENGTH as usize])?;
let sip = get_u32(&buf[..INDEX_BLOCK_LENGTH as usize], 0);
if ip < sip {
h = m - 1;
} else {
let eip = get_u32(&buf[..INDEX_BLOCK_LENGTH as usize], 4);
if ip > eip {
l = m + 1;
} else {
data_ptr = get_u32(&buf[..INDEX_BLOCK_LENGTH as usize], 8);
break;
}
}
}
if data_ptr == 0 {
Err("not found")?;
}
let data_len = (data_ptr >> 24) & 0xff;
data_ptr = data_ptr & 0x00FFFFFF;
self.db_file.seek(SeekFrom::Start(data_ptr as u64))?;
self.db_file.read_exact(&mut buf[0..data_len as usize])?;
get_ip_info(
get_u32(&buf[..data_len as usize], 0),
&buf[4..data_len as usize],
).map(|i| i.to_owned())
})
}
pub fn btree_search(&mut self, ip_str: &str) -> Result<OwnedIpInfo> {
BUF_BTREE.with(|buf| {
let mut buf = buf.borrow_mut();
if self.header_len == 0 {
self.db_file.seek(SeekFrom::Start(8))?;
self.db_file.read_exact(&mut buf[0..TOTAL_HEADER_LENGTH])?;
let (mut i, mut idx) = (0, 0);
while i < TOTAL_HEADER_LENGTH {
let sip = get_u32(&buf[0..TOTAL_HEADER_LENGTH], i as u32);
let idx_ptr = get_u32(&buf[0..TOTAL_HEADER_LENGTH], i as u32 + 4);
if idx_ptr == 0 {
break;
}
self.header_sip.push(sip);
self.header_ptr.push(idx_ptr);
i += 8;
idx += 1;
}
self.header_len = idx
}
let ip = ip2u32(ip_str)?;
let mut h = self.header_len;
let (mut sptr, mut eptr, mut l) = (0u32, 0u32, 0u32);
while l <= h {
let m = (l + h) >> 1;
if m < self.header_len {
if ip == self.header_sip[m as usize] {
if m > 0 {
sptr = self.header_ptr[m as usize - 1];
eptr = self.header_ptr[m as usize];
} else {
sptr = self.header_ptr[m as usize];
eptr = self.header_ptr[m as usize + 1];
}
break;
}
if ip < self.header_sip[m as usize] {
if m == 0 {
sptr = self.header_ptr[m as usize];
eptr = self.header_ptr[m as usize + 1];
break;
} else if ip > self.header_sip[m as usize - 1] {
sptr = self.header_ptr[m as usize - 1];
eptr = self.header_ptr[m as usize];
break;
}
h = m - 1
} else {
if m == self.header_len - 1 {
println!("m/hl: {}/{}", m, self.header_len);
sptr = self.header_ptr[m as usize - 1];
eptr = self.header_ptr[m as usize];
break;
} else if ip <= self.header_sip[m as usize + 1] {
sptr = self.header_ptr[m as usize];
eptr = self.header_ptr[m as usize + 1];
break;
}
l = m + 1
}
}
}
if sptr == 0 {
Err("not found")?;
}
let block_len = eptr - sptr;
self.db_file.seek(SeekFrom::Start(sptr as u64))?;
let buf_size = (block_len + INDEX_BLOCK_LENGTH) as usize;
self.db_file.read_exact(&mut buf[..buf_size])?;
let mut data_ptr = 0;
h = block_len / INDEX_BLOCK_LENGTH;
l = 0;
while l <= h {
let m = (l + h) >> 1;
let p = m * INDEX_BLOCK_LENGTH;
let sip = get_u32(&buf[..buf_size], p);
if ip < sip {
h = m - 1;
} else {
let eip = get_u32(&buf[..buf_size], p + 4);
if ip > eip {
l = m + 1;
} else {
data_ptr = get_u32(&buf[..buf_size], p + 8);
break;
}
}
}
if data_ptr == 0 {
Err("not found")?;
}
let data_len = (data_ptr >> 24) & 0xff;
data_ptr = data_ptr & 0x00FFFFFF;
self.db_file.seek(SeekFrom::Start(data_ptr as u64))?;
self.db_file.read_exact(&mut buf[0..data_len as usize])?;
get_ip_info(
get_u32(&buf[..data_len as usize], 0),
&buf[4..data_len as usize],
).map(|i| i.to_owned())
})
}
}
// cargo test --features lazy -- --nocapture
#[cfg(test)]
mod tests {
use super::*;
const DB_PATH: &str = "data/ip2region.db";
#[test]
fn it_works() {
let mut ip2 = Ip2Region::new(DB_PATH).unwrap();
let ip2o = ip2.to_owned().unwrap();
for ip in vec![
"117.136.105.202",
"47.95.47.253",
"127.0.0.1",
"10.0.0.1",
"1.1.1.1",
] {
#[cfg(feature = "lazy")]
{
println!("lzay__: {:?}", memory_search(ip));
if ip2o.memory_search(ip).is_ok() {
assert_eq!(ip2o.memory_search(ip).unwrap(), memory_search(ip).unwrap());
} else {
assert!(memory_search(ip).is_err());
}
}
println!("memory: {:?}", ip2o.memory_search(ip));
println!("binary: {:?}", ip2.binary_search(ip));
println!("btree : {:?}", ip2.btree_search(ip));
if ip2o.memory_search(ip).is_ok() {
assert_eq!(
ip2o.memory_search(ip).unwrap().to_owned(),
ip2.binary_search(ip).unwrap()
);
assert_eq!(
ip2o.memory_search(ip).unwrap().to_owned(),
ip2.btree_search(ip).unwrap()
);
} else {
assert!(ip2.binary_search(ip).is_err());
assert!(ip2.btree_search(ip).is_err());
}
println!();
}
}
}

132
src/main.rs Normal file
View File

@ -0,0 +1,132 @@
extern crate ip2region;
const DB_PATH: &str = "data/ip2region.db";
use ip2region::*;
use std::env;
use std::io::{self, *};
use std::time::Instant;
// cargo run --release --features lazy
fn main() {
let args = env::args().skip(1).collect::<Vec<String>>();
println!(
r#"ip2region cli test
+-----------------------------------------------------------------+
| ip2region [db_file] [alrogrithm]
| format : [ip] [alrogrithm]'
| overview: cargo run --release"
| usage: cargo run --release -- [db_file] [alrogrithm]"
| exit: quit or exit or Ctrl+C
+-----------------------------------------------------------------+`"#
);
if !args.is_empty() {
let db_path = if args[0] != "." { &args[0] } else { DB_PATH };
let mut ip2 = Ip2Region::new(db_path).unwrap();
let ip2o = ip2.to_owned().unwrap();
let alg = if args.len() > 1 {
args[1].to_lowercase()
} else {
"memory".to_lowercase()
};
let mut buf = String::with_capacity(256);
loop {
buf.clear();
print!("ip2region>>");
io::stdout().flush().unwrap();
let line = match io::stdin().read_line(&mut buf) {
Ok(_) => buf.trim(),
Err(e) => panic!("[Fatal]: Read String from Stdin Error: {:?}", e),
};
if line.is_empty() {
continue;
}
if line == "quit" || line == "exit" {
println!("[Info]: Thanks for your use, Bye.");
break;
}
let ip_alg = line
.split_whitespace()
.filter(|s| !s.is_empty())
.collect::<Vec<&str>>();
let alg = if ip_alg.len() > 1 {
ip_alg[1].to_owned()
} else {
alg.clone()
};
let start = Instant::now();
let res = match alg.as_str() {
"memory" => ip2o.memory_search(ip_alg[0]).map(|o| o.to_owned()),
"binary" => ip2.binary_search(ip_alg[0]),
"btree" | "b-tree" => ip2.btree_search(ip_alg[0]),
miss => {
eprintln!("Not have the Algorithm: {:?}", miss);
continue;
}
};
let end = start.elapsed().subsec_micros();
match res {
Ok(i) => {
println!("[{:6} {:06} microseconds]: {}", alg, end, i);
}
Err(e) => {
eprintln!("[Error]: {:?}", e);
}
};
}
} else {
overview()
}
}
fn overview() {
let mut ip2 = Ip2Region::new(DB_PATH).unwrap();
let ip2o = ip2.to_owned().unwrap();
for ip in &[
"117.136.105.202",
"47.95.47.253",
"127.0.0.1",
"10.0.0.1",
"1.1.1.1",
] {
let start = Instant::now();
let res = ip2o.memory_search(ip);
let end = start.elapsed().subsec_micros();
println!("memory {:06} microseconds: {:?}", end, res);
#[cfg(feature = "lazy")]
{
let start = Instant::now();
let res = memory_search(ip);
let end = start.elapsed().subsec_micros();
println!("lazy__ {:06} microseconds: {:?}", end, res);
}
let start = Instant::now();
let res = ip2.binary_search(ip);
let end = start.elapsed().subsec_micros();
println!("binary {:06} microseconds: {:?}", end, res);
let start = Instant::now();
let res = ip2.btree_search(ip);
let end = start.elapsed().subsec_micros();
println!("btree {:06} microseconds: {:?}", end, res);
println!();
}
}

101
src/owned.rs Normal file
View File

@ -0,0 +1,101 @@
use super::*;
use std::borrow::Cow;
#[cfg(feature = "lazy")]
include!("lazy.rs");
#[allow(non_snake_case)]
#[derive(Debug, Default, Clone, PartialEq)]
pub struct OwnedIpInfo {
pub city_id: u32,
pub country: String,
pub region: String,
pub province: String,
pub city: String,
pub ISP: String,
}
impl OwnedIpInfo {
pub fn as_ref<'a>(&'a self) -> IpInfo<'a> {
IpInfo {
city_id: self.city_id,
country: &self.country,
region: &self.region,
province: &self.province,
city: &self.city,
ISP: &self.ISP,
}
}
}
impl fmt::Display for OwnedIpInfo {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.as_ref())
}
}
pub struct OwnedIp2Region {
// super block index info
first_index_ptr: u32,
// last_index_ptr: u32,
total_blocks: u32,
db_bin_bytes: Cow<'static, [u8]>,
}
impl OwnedIp2Region {
pub fn new(path: &str) -> io::Result<Self> {
let mut file = File::open(path)?;
Self::new2(&mut file)
}
pub(crate) fn new2(file: &mut File) -> io::Result<Self> {
let file_size = file.metadata()?.len();
let mut bytes = Vec::with_capacity(file_size as usize);
file.read_to_end(&mut bytes)?;
let first_index_ptr = get_u32(&bytes[..], 0);
let last_index_ptr = get_u32(&bytes[..], 4);
let total_blocks = (last_index_ptr - first_index_ptr) / INDEX_BLOCK_LENGTH + 1;
let db_bin_bytes = Cow::Owned(bytes);
Ok(OwnedIp2Region {
first_index_ptr,
total_blocks,
db_bin_bytes,
})
}
pub fn memory_search(&self, ip_str: &str) -> Result<IpInfo> {
let ip = ip2u32(ip_str)?;
let mut h = self.total_blocks;
let (mut data_ptr, mut l) = (0u32, 0u32);
while l <= h {
let m = (l + h) >> 1;
let p = self.first_index_ptr + m * INDEX_BLOCK_LENGTH;
let sip = get_u32(&self.db_bin_bytes[..], p);
if ip < sip {
h = m - 1;
} else {
let eip = get_u32(&self.db_bin_bytes[..], p + 4);
if ip > eip {
l = m + 1;
} else {
data_ptr = get_u32(&self.db_bin_bytes[..], p + 8);
break;
}
}
}
if data_ptr == 0 {
Err("not found")?;
}
let data_len = (data_ptr >> 24) & 0xff;
data_ptr = data_ptr & 0x00FFFFFF;
get_ip_info(
get_u32(&self.db_bin_bytes[..], data_ptr),
&self.db_bin_bytes[(data_ptr + 4) as usize..(data_ptr + data_len) as usize],
)
}
}