mirror of
https://github.com/lionsoul2014/ip2region.git
synced 2025-12-08 19:25:22 +00:00
add vectorIndex/Content cache policy impl
This commit is contained in:
parent
4b4984fce0
commit
08b1656e02
@ -21,7 +21,7 @@ func printHelp() {
|
||||
|
||||
func testSearch() {
|
||||
var err error
|
||||
var dbFile = ""
|
||||
var dbFile, cachePolicy = "", "vectorIndex"
|
||||
for i := 2; i < len(os.Args); i++ {
|
||||
r := os.Args[i]
|
||||
if len(r) < 5 {
|
||||
@ -41,13 +41,16 @@ func testSearch() {
|
||||
switch r[2:eIdx] {
|
||||
case "db":
|
||||
dbFile = r[eIdx+1:]
|
||||
case "cache-policy":
|
||||
cachePolicy = r[eIdx+1:]
|
||||
}
|
||||
}
|
||||
|
||||
if dbFile == "" {
|
||||
fmt.Printf("%s search [command options]\n", os.Args[0])
|
||||
fmt.Printf("options:\n")
|
||||
fmt.Printf(" --db string ip2region binary xdb file path\n")
|
||||
fmt.Printf(" --db string ip2region binary xdb file path\n")
|
||||
fmt.Printf(" --cache-policy string cache policy: file/vectorIndex/content\n")
|
||||
return
|
||||
}
|
||||
|
||||
@ -57,16 +60,20 @@ func testSearch() {
|
||||
return
|
||||
}
|
||||
|
||||
searcher, err := xdb.New(dbPath)
|
||||
// create the searcher with the cache policy setting
|
||||
searcher, err := createSearcher(dbPath, cachePolicy)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to create searcher: %s", err.Error())
|
||||
fmt.Printf("failed to create searcher: %s\n", err.Error())
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
searcher.Close()
|
||||
fmt.Printf("searcher test program exited, thanks for trying\n")
|
||||
}()
|
||||
|
||||
fmt.Println("ip2region xdb searcher test program, type `quit` to exit")
|
||||
fmt.Printf(`ip2region xdb searcher test program, cachePolicy: %s
|
||||
type 'quit' to exit
|
||||
`, cachePolicy)
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
for {
|
||||
fmt.Print("ip2region>> ")
|
||||
@ -87,16 +94,16 @@ func testSearch() {
|
||||
tStart := time.Now()
|
||||
region, err := searcher.SearchByStr(line)
|
||||
if err != nil {
|
||||
fmt.Printf("\x1b[0;31merr:%s\x1b[0m\n", err.Error())
|
||||
fmt.Printf("\x1b[0;31m{err: %s, ioCount: %d}\x1b[0m\n", err.Error(), searcher.GetIOCount())
|
||||
} else {
|
||||
fmt.Printf("\x1b[0;32m{region:%s, took:%s}\x1b[0m\n", region, time.Since(tStart))
|
||||
fmt.Printf("\x1b[0;32m{region: %s, ioCount: %d, took: %s}\x1b[0m\n", region, searcher.GetIOCount(), time.Since(tStart))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testBench() {
|
||||
var err error
|
||||
var dbFile, srcFile = "", ""
|
||||
var dbFile, srcFile, cachePolicy = "", "", "vectorIndex"
|
||||
for i := 2; i < len(os.Args); i++ {
|
||||
r := os.Args[i]
|
||||
if len(r) < 5 {
|
||||
@ -118,14 +125,17 @@ func testBench() {
|
||||
dbFile = r[eIdx+1:]
|
||||
case "src":
|
||||
srcFile = r[eIdx+1:]
|
||||
case "cache-policy":
|
||||
cachePolicy = r[eIdx+1:]
|
||||
}
|
||||
}
|
||||
|
||||
if dbFile == "" || srcFile == "" {
|
||||
fmt.Printf("%s bench [command options]\n", os.Args[0])
|
||||
fmt.Printf("options:\n")
|
||||
fmt.Printf(" --db string ip2region binary xdb file path\n")
|
||||
fmt.Printf(" --src string source ip text file path\n")
|
||||
fmt.Printf(" --db string ip2region binary xdb file path\n")
|
||||
fmt.Printf(" --src string source ip text file path\n")
|
||||
fmt.Printf(" --cache-policy string cache policy: file/vectorIndex/content\n")
|
||||
return
|
||||
}
|
||||
|
||||
@ -135,7 +145,11 @@ func testBench() {
|
||||
return
|
||||
}
|
||||
|
||||
searcher, err := xdb.New(dbPath)
|
||||
searcher, err := createSearcher(dbPath, cachePolicy)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to create searcher: %s\n", err.Error())
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
searcher.Close()
|
||||
}()
|
||||
@ -196,6 +210,29 @@ func testBench() {
|
||||
fmt.Printf("Bench finished, {total: %d, took: %s, cost: %d ns/op}\n", count, cost, cost.Nanoseconds()/int64(count))
|
||||
}
|
||||
|
||||
func createSearcher(dbPath string, cachePolicy string) (*xdb.Searcher, error) {
|
||||
switch cachePolicy {
|
||||
case "nil", "file":
|
||||
return xdb.NewWithFileOnly(dbPath)
|
||||
case "vectorIndex":
|
||||
vIndex, err := xdb.LoadVectorIndexFromFile(dbPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load vector index from `%s`: %w", dbPath, err)
|
||||
}
|
||||
|
||||
return xdb.NewWithVectorIndex(dbPath, vIndex)
|
||||
case "content":
|
||||
cBuff, err := xdb.LoadContentFromFile(dbPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load content from '%s': %w", dbPath, err)
|
||||
}
|
||||
|
||||
return xdb.NewWithBuffer(cBuff)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid cache policy `%s`, options: file/vectorIndex/content", cachePolicy)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
printHelp()
|
||||
|
||||
@ -25,33 +25,79 @@ type Searcher struct {
|
||||
handle *os.File
|
||||
|
||||
// header info
|
||||
header []byte
|
||||
header []byte
|
||||
ioCount int
|
||||
|
||||
// use it only when this feature enabled.
|
||||
// Preload the vector index will reduce the number of IO operations
|
||||
// thus speedup the search process
|
||||
vectorIndex [][]*VectorIndexBlock
|
||||
|
||||
// content buffer.
|
||||
// running with the whole xdb file cached
|
||||
contentBuff []byte
|
||||
}
|
||||
|
||||
func New(dbFile string) (*Searcher, error) {
|
||||
func baseNew(dbFile string, vIndex [][]*VectorIndexBlock, cBuff []byte) (*Searcher, error) {
|
||||
var err error
|
||||
|
||||
// content buff first
|
||||
if cBuff != nil {
|
||||
// check and autoload the vector index
|
||||
if vIndex == nil {
|
||||
vIndex, err = LoadVectorIndexFromBuff(cBuff)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load vector index from buff: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return &Searcher{
|
||||
vectorIndex: vIndex,
|
||||
contentBuff: cBuff,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// open the xdb binary file
|
||||
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Searcher{
|
||||
handle: handle,
|
||||
header: nil,
|
||||
|
||||
vectorIndex: nil,
|
||||
handle: handle,
|
||||
vectorIndex: vIndex,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *Searcher) Close() {
|
||||
err := s.handle.Close()
|
||||
func NewWithFileOnly(dbFile string) (*Searcher, error) {
|
||||
return baseNew(dbFile, nil, nil)
|
||||
}
|
||||
|
||||
func NewWithVectorIndex(dbFile string, vIndex [][]*VectorIndexBlock) (*Searcher, error) {
|
||||
return baseNew(dbFile, vIndex, nil)
|
||||
}
|
||||
|
||||
func NewWithBuffer(cBuff []byte) (*Searcher, error) {
|
||||
vIndex, err := LoadVectorIndexFromBuff(cBuff)
|
||||
if err != nil {
|
||||
return
|
||||
return nil, fmt.Errorf("load vector index from buff: %w", err)
|
||||
}
|
||||
|
||||
return baseNew("", vIndex, cBuff)
|
||||
}
|
||||
|
||||
func (s *Searcher) Close() {
|
||||
if s.handle != nil {
|
||||
err := s.handle.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GetIOCount return the global io count for the last search
|
||||
func (s *Searcher) GetIOCount() int {
|
||||
return s.ioCount
|
||||
}
|
||||
|
||||
// SearchByStr find the region for the specified ip string
|
||||
@ -66,31 +112,23 @@ func (s *Searcher) SearchByStr(str string) (string, error) {
|
||||
|
||||
// Search find the region for the specified long ip
|
||||
func (s *Searcher) Search(ip uint32) (string, error) {
|
||||
// reset the global ioCount
|
||||
s.ioCount = 0
|
||||
|
||||
// locate the segment index block based on the vector index
|
||||
var vIndex *VectorIndexBlock
|
||||
if s.vectorIndex != nil {
|
||||
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
|
||||
} else {
|
||||
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
|
||||
offset := l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
|
||||
pos, err := s.handle.Seek(int64(HeaderInfoLength+offset), 0)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("seek to vector index[%d][%d]: %w", l0, l1, err)
|
||||
}
|
||||
offset := HeaderInfoLength + l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
|
||||
|
||||
var buff = make([]byte, 8)
|
||||
rLen, err := s.handle.Read(buff)
|
||||
// read the vector index block
|
||||
var vIndexBuff = make([]byte, 8)
|
||||
err := s.read(int64(offset), vIndexBuff)
|
||||
vIndex, err = VectorIndexBlockDecode(vIndexBuff)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read vector index at %d: %w", pos, err)
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return "", fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
vIndex, err = VectorIndexBlockDecode(buff)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid vector index block at %d: %w", pos, err)
|
||||
return "", fmt.Errorf("read vector index block at %d: %w", offset, err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,25 +140,12 @@ func (s *Searcher) Search(ip uint32) (string, error) {
|
||||
for l <= h {
|
||||
m := (l + h) >> 1
|
||||
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
|
||||
_, err := s.handle.Seek(int64(p), 0)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("seek to segment block at %d: %w", p, err)
|
||||
}
|
||||
|
||||
rLen, err := s.handle.Read(buff)
|
||||
err := s.read(int64(p), buff)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read segment index at %d: %w", p, err)
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return "", fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
// segIndex, err := SegmentIndexDecode(buff)
|
||||
// if err != nil {
|
||||
// return "", fmt.Errorf("invalid segment index block at %d: %w", p, err)
|
||||
// }
|
||||
// decode the data step by step to reduce the unnecessary calculations
|
||||
// decode the data step by step to reduce the unnecessary operations
|
||||
sip := binary.LittleEndian.Uint32(buff)
|
||||
if ip < sip {
|
||||
h = m - 1
|
||||
@ -141,20 +166,40 @@ func (s *Searcher) Search(ip uint32) (string, error) {
|
||||
}
|
||||
|
||||
// load and return the region data
|
||||
_, err := s.handle.Seek(int64(dataPtr), 0)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("seek to data block at %d: %w", dataPtr, err)
|
||||
}
|
||||
|
||||
var regionBuff = make([]byte, dataLen)
|
||||
rLen, err := s.handle.Read(regionBuff)
|
||||
err := s.read(int64(dataPtr), regionBuff)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read region data at %d: %w", dataPtr, err)
|
||||
}
|
||||
|
||||
if rLen != dataLen {
|
||||
return "", fmt.Errorf("incomplete read: readed bytes should be %d", dataLen)
|
||||
return "", fmt.Errorf("read region at %d: %w", dataPtr, err)
|
||||
}
|
||||
|
||||
return string(regionBuff), nil
|
||||
}
|
||||
|
||||
// do the data read operation based on the setting.
|
||||
// content buffer first or will read from the file.
|
||||
// this operation will invoke the Seek for file based read.
|
||||
func (s *Searcher) read(offset int64, buff []byte) error {
|
||||
if s.contentBuff != nil {
|
||||
cLen := copy(buff, s.contentBuff[offset:])
|
||||
if cLen != len(buff) {
|
||||
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
} else {
|
||||
_, err := s.handle.Seek(offset, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("seek to %d: %w", offset, err)
|
||||
}
|
||||
|
||||
s.ioCount++
|
||||
rLen, err := s.handle.Read(buff)
|
||||
if err != nil {
|
||||
return fmt.Errorf("handle read: %w", err)
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -89,6 +89,24 @@ func LoadVectorIndexFromFile(dbFile string) ([][]*VectorIndexBlock, error) {
|
||||
return LoadVectorIndex(handle)
|
||||
}
|
||||
|
||||
// LoadVectorIndexFromBuff load vector index from content buffer
|
||||
func LoadVectorIndexFromBuff(cBuff []byte) ([][]*VectorIndexBlock, error) {
|
||||
var err error
|
||||
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
|
||||
for r := 0; r < VectorIndexRows; r++ {
|
||||
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
|
||||
for c := 0; c < VectorIndexCols; c++ {
|
||||
offset := HeaderInfoLength + r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
|
||||
vectorIndex[r][c], err = VectorIndexBlockDecode(cBuff[offset:])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vectorIndex, nil
|
||||
}
|
||||
|
||||
// LoadHeader load the header info from the specified handle
|
||||
func LoadHeader(handle *os.File) ([]byte, error) {
|
||||
_, err := handle.Seek(0, 0)
|
||||
|
||||
@ -29,6 +29,22 @@ func TestLoadContent(t *testing.T) {
|
||||
fmt.Printf("buff length: %d\n", len(buff))
|
||||
}
|
||||
|
||||
func TestLoadVectorIndexFromBuff(t *testing.T) {
|
||||
buff, err := LoadContentFromFile("../../../data/ip2region.xdb")
|
||||
if err != nil {
|
||||
fmt.Printf("failed to load xdb content: %s\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
vIndex, err := LoadVectorIndexFromBuff(buff)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to load vector index from buff: %s\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("buff length: %d, vIndex length: %d\n", len(buff), len(vIndex))
|
||||
}
|
||||
|
||||
func TestLoadHeader(t *testing.T) {
|
||||
buff, err := LoadHeaderFromFile("../../../data/ip2region.xdb")
|
||||
if err != nil {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user