ip2region/maker/golang/xdb/searcher.go
2022-06-17 15:23:02 +08:00

198 lines
5.2 KiB
Go

// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ---
// ip2region database v2.0 searcher.
// this is part of the maker for testing and validate.
// please use the searcher in binding/golang for production use.
// And this is a Not thread safe implementation.
package xdb
import (
"encoding/binary"
"fmt"
"os"
)
type Searcher struct {
handle *os.File
// header info
header []byte
// use it only when this feature enabled.
// Preload the vector index will reduce the number of IO operations
// thus speedup the search process
vectorIndex [][]*VectorIndexBlock
}
func NewSearcher(dbFile string) (*Searcher, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, err
}
return &Searcher{
handle: handle,
header: nil,
vectorIndex: nil,
}, nil
}
func (s *Searcher) Close() {
err := s.handle.Close()
if err != nil {
return
}
}
// LoadVectorIndex load and cache the vector index for search speedup.
// this will take up VectorIndexRows x VectorIndexCols x VectorIndexSize bytes memory.
func (s *Searcher) LoadVectorIndex() error {
// loaded already
if s.vectorIndex != nil {
return nil
}
// load all the vector index block
_, err := s.handle.Seek(HeaderInfoLength, 0)
if err != nil {
return fmt.Errorf("seek to vector index: %w", err)
}
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
rLen, err := s.handle.Read(buff)
if err != nil {
return err
}
if rLen != len(buff) {
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// decode the vector index blocks
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
for r := 0; r < VectorIndexRows; r++ {
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
for c := 0; c < VectorIndexCols; c++ {
offset := r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
vectorIndex[r][c], err = VectorIndexBlockDecode(buff[offset:])
if err != nil {
return fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
}
}
}
s.vectorIndex = vectorIndex
return nil
}
// ClearVectorIndex clear preloaded vector index cache
func (s *Searcher) ClearVectorIndex() {
s.vectorIndex = nil
}
// Search find the region for the specified ip address
func (s *Searcher) Search(ip uint32) (string, int, error) {
// locate the segment index block based on the vector index
var ioCount = 0
var vIndex *VectorIndexBlock
if s.vectorIndex != nil {
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
} else {
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
offset := l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
pos, err := s.handle.Seek(int64(HeaderInfoLength+offset), 0)
if err != nil {
return "", ioCount, fmt.Errorf("seek to vector index[%d][%d]: %w", l0, l1, err)
}
ioCount++
var buff = make([]byte, 8)
rLen, err := s.handle.Read(buff)
if err != nil {
return "", ioCount, fmt.Errorf("read vector index at %d: %w", pos, err)
}
if rLen != len(buff) {
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
vIndex, err = VectorIndexBlockDecode(buff)
if err != nil {
return "", ioCount, fmt.Errorf("invalid vector index block at %d: %w", pos, err)
}
}
//log.Printf("vIndex=%s", vIndex)
// binary search the segment index to get the region
var dataLen, dataPtr = 0, uint32(0)
var buff = make([]byte, SegmentIndexBlockSize)
var l, h = 0, int((vIndex.LastPtr - vIndex.FirstPtr) / SegmentIndexBlockSize)
for l <= h {
// log.Printf("l=%d, h=%d", l, h)
m := (l + h) >> 1
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
// log.Printf("m=%d, p=%d", m, p)
_, err := s.handle.Seek(int64(p), 0)
if err != nil {
return "", ioCount, fmt.Errorf("seek to segment block at %d: %w", p, err)
}
ioCount++
rLen, err := s.handle.Read(buff)
if err != nil {
return "", ioCount, fmt.Errorf("read segment index at %d: %w", p, err)
}
if rLen != len(buff) {
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// segIndex, err := SegmentIndexDecode(buff)
// if err != nil {
// return "", fmt.Errorf("invalid segment index block at %d: %w", p, err)
// }
// decode the data step by step to reduce the unnecessary calculations
sip := binary.LittleEndian.Uint32(buff)
if ip < sip {
h = m - 1
} else {
eip := binary.LittleEndian.Uint32(buff[4:])
if ip > eip {
l = m + 1
} else {
dataLen = int(binary.LittleEndian.Uint16(buff[8:]))
dataPtr = binary.LittleEndian.Uint32(buff[10:])
break
}
}
}
if dataLen == 0 {
return "", ioCount, nil
}
// load and return the region data
_, err := s.handle.Seek(int64(dataPtr), 0)
if err != nil {
return "", ioCount, fmt.Errorf("seek to data block at %d: %w", dataPtr, err)
}
ioCount++
var regionBuff = make([]byte, dataLen)
rLen, err := s.handle.Read(regionBuff)
if err != nil {
return "", ioCount, fmt.Errorf("read region data at %d: %w", dataPtr, err)
}
if rLen != dataLen {
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", dataLen)
}
return string(regionBuff), ioCount, nil
}