add vectorIndex/Content cache policy impl

This commit is contained in:
Lion 2022-06-20 15:14:33 +08:00
parent 4b4984fce0
commit 08b1656e02
4 changed files with 179 additions and 63 deletions

View File

@ -21,7 +21,7 @@ func printHelp() {
func testSearch() {
var err error
var dbFile = ""
var dbFile, cachePolicy = "", "vectorIndex"
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
@ -41,13 +41,16 @@ func testSearch() {
switch r[2:eIdx] {
case "db":
dbFile = r[eIdx+1:]
case "cache-policy":
cachePolicy = r[eIdx+1:]
}
}
if dbFile == "" {
fmt.Printf("%s search [command options]\n", os.Args[0])
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --cache-policy string cache policy: file/vectorIndex/content\n")
return
}
@ -57,16 +60,20 @@ func testSearch() {
return
}
searcher, err := xdb.New(dbPath)
// create the searcher with the cache policy setting
searcher, err := createSearcher(dbPath, cachePolicy)
if err != nil {
log.Fatalf("failed to create searcher: %s", err.Error())
fmt.Printf("failed to create searcher: %s\n", err.Error())
return
}
defer func() {
searcher.Close()
fmt.Printf("searcher test program exited, thanks for trying\n")
}()
fmt.Println("ip2region xdb searcher test program, type `quit` to exit")
fmt.Printf(`ip2region xdb searcher test program, cachePolicy: %s
type 'quit' to exit
`, cachePolicy)
reader := bufio.NewReader(os.Stdin)
for {
fmt.Print("ip2region>> ")
@ -87,16 +94,16 @@ func testSearch() {
tStart := time.Now()
region, err := searcher.SearchByStr(line)
if err != nil {
fmt.Printf("\x1b[0;31merr:%s\x1b[0m\n", err.Error())
fmt.Printf("\x1b[0;31m{err: %s, ioCount: %d}\x1b[0m\n", err.Error(), searcher.GetIOCount())
} else {
fmt.Printf("\x1b[0;32m{region:%s, took:%s}\x1b[0m\n", region, time.Since(tStart))
fmt.Printf("\x1b[0;32m{region: %s, ioCount: %d, took: %s}\x1b[0m\n", region, searcher.GetIOCount(), time.Since(tStart))
}
}
}
func testBench() {
var err error
var dbFile, srcFile = "", ""
var dbFile, srcFile, cachePolicy = "", "", "vectorIndex"
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
@ -118,14 +125,17 @@ func testBench() {
dbFile = r[eIdx+1:]
case "src":
srcFile = r[eIdx+1:]
case "cache-policy":
cachePolicy = r[eIdx+1:]
}
}
if dbFile == "" || srcFile == "" {
fmt.Printf("%s bench [command options]\n", os.Args[0])
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --src string source ip text file path\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --src string source ip text file path\n")
fmt.Printf(" --cache-policy string cache policy: file/vectorIndex/content\n")
return
}
@ -135,7 +145,11 @@ func testBench() {
return
}
searcher, err := xdb.New(dbPath)
searcher, err := createSearcher(dbPath, cachePolicy)
if err != nil {
fmt.Printf("failed to create searcher: %s\n", err.Error())
return
}
defer func() {
searcher.Close()
}()
@ -196,6 +210,29 @@ func testBench() {
fmt.Printf("Bench finished, {total: %d, took: %s, cost: %d ns/op}\n", count, cost, cost.Nanoseconds()/int64(count))
}
func createSearcher(dbPath string, cachePolicy string) (*xdb.Searcher, error) {
switch cachePolicy {
case "nil", "file":
return xdb.NewWithFileOnly(dbPath)
case "vectorIndex":
vIndex, err := xdb.LoadVectorIndexFromFile(dbPath)
if err != nil {
return nil, fmt.Errorf("failed to load vector index from `%s`: %w", dbPath, err)
}
return xdb.NewWithVectorIndex(dbPath, vIndex)
case "content":
cBuff, err := xdb.LoadContentFromFile(dbPath)
if err != nil {
return nil, fmt.Errorf("failed to load content from '%s': %w", dbPath, err)
}
return xdb.NewWithBuffer(cBuff)
default:
return nil, fmt.Errorf("invalid cache policy `%s`, options: file/vectorIndex/content", cachePolicy)
}
}
func main() {
if len(os.Args) < 2 {
printHelp()

View File

@ -25,33 +25,79 @@ type Searcher struct {
handle *os.File
// header info
header []byte
header []byte
ioCount int
// use it only when this feature enabled.
// Preload the vector index will reduce the number of IO operations
// thus speedup the search process
vectorIndex [][]*VectorIndexBlock
// content buffer.
// running with the whole xdb file cached
contentBuff []byte
}
func New(dbFile string) (*Searcher, error) {
func baseNew(dbFile string, vIndex [][]*VectorIndexBlock, cBuff []byte) (*Searcher, error) {
var err error
// content buff first
if cBuff != nil {
// check and autoload the vector index
if vIndex == nil {
vIndex, err = LoadVectorIndexFromBuff(cBuff)
if err != nil {
return nil, fmt.Errorf("load vector index from buff: %w", err)
}
}
return &Searcher{
vectorIndex: vIndex,
contentBuff: cBuff,
}, nil
}
// open the xdb binary file
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, err
}
return &Searcher{
handle: handle,
header: nil,
vectorIndex: nil,
handle: handle,
vectorIndex: vIndex,
}, nil
}
func (s *Searcher) Close() {
err := s.handle.Close()
func NewWithFileOnly(dbFile string) (*Searcher, error) {
return baseNew(dbFile, nil, nil)
}
func NewWithVectorIndex(dbFile string, vIndex [][]*VectorIndexBlock) (*Searcher, error) {
return baseNew(dbFile, vIndex, nil)
}
func NewWithBuffer(cBuff []byte) (*Searcher, error) {
vIndex, err := LoadVectorIndexFromBuff(cBuff)
if err != nil {
return
return nil, fmt.Errorf("load vector index from buff: %w", err)
}
return baseNew("", vIndex, cBuff)
}
func (s *Searcher) Close() {
if s.handle != nil {
err := s.handle.Close()
if err != nil {
return
}
}
}
// GetIOCount return the global io count for the last search
func (s *Searcher) GetIOCount() int {
return s.ioCount
}
// SearchByStr find the region for the specified ip string
@ -66,31 +112,23 @@ func (s *Searcher) SearchByStr(str string) (string, error) {
// Search find the region for the specified long ip
func (s *Searcher) Search(ip uint32) (string, error) {
// reset the global ioCount
s.ioCount = 0
// locate the segment index block based on the vector index
var vIndex *VectorIndexBlock
if s.vectorIndex != nil {
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
} else {
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
offset := l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
pos, err := s.handle.Seek(int64(HeaderInfoLength+offset), 0)
if err != nil {
return "", fmt.Errorf("seek to vector index[%d][%d]: %w", l0, l1, err)
}
offset := HeaderInfoLength + l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
var buff = make([]byte, 8)
rLen, err := s.handle.Read(buff)
// read the vector index block
var vIndexBuff = make([]byte, 8)
err := s.read(int64(offset), vIndexBuff)
vIndex, err = VectorIndexBlockDecode(vIndexBuff)
if err != nil {
return "", fmt.Errorf("read vector index at %d: %w", pos, err)
}
if rLen != len(buff) {
return "", fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
vIndex, err = VectorIndexBlockDecode(buff)
if err != nil {
return "", fmt.Errorf("invalid vector index block at %d: %w", pos, err)
return "", fmt.Errorf("read vector index block at %d: %w", offset, err)
}
}
@ -102,25 +140,12 @@ func (s *Searcher) Search(ip uint32) (string, error) {
for l <= h {
m := (l + h) >> 1
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
_, err := s.handle.Seek(int64(p), 0)
if err != nil {
return "", fmt.Errorf("seek to segment block at %d: %w", p, err)
}
rLen, err := s.handle.Read(buff)
err := s.read(int64(p), buff)
if err != nil {
return "", fmt.Errorf("read segment index at %d: %w", p, err)
}
if rLen != len(buff) {
return "", fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// segIndex, err := SegmentIndexDecode(buff)
// if err != nil {
// return "", fmt.Errorf("invalid segment index block at %d: %w", p, err)
// }
// decode the data step by step to reduce the unnecessary calculations
// decode the data step by step to reduce the unnecessary operations
sip := binary.LittleEndian.Uint32(buff)
if ip < sip {
h = m - 1
@ -141,20 +166,40 @@ func (s *Searcher) Search(ip uint32) (string, error) {
}
// load and return the region data
_, err := s.handle.Seek(int64(dataPtr), 0)
if err != nil {
return "", fmt.Errorf("seek to data block at %d: %w", dataPtr, err)
}
var regionBuff = make([]byte, dataLen)
rLen, err := s.handle.Read(regionBuff)
err := s.read(int64(dataPtr), regionBuff)
if err != nil {
return "", fmt.Errorf("read region data at %d: %w", dataPtr, err)
}
if rLen != dataLen {
return "", fmt.Errorf("incomplete read: readed bytes should be %d", dataLen)
return "", fmt.Errorf("read region at %d: %w", dataPtr, err)
}
return string(regionBuff), nil
}
// do the data read operation based on the setting.
// content buffer first or will read from the file.
// this operation will invoke the Seek for file based read.
func (s *Searcher) read(offset int64, buff []byte) error {
if s.contentBuff != nil {
cLen := copy(buff, s.contentBuff[offset:])
if cLen != len(buff) {
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
} else {
_, err := s.handle.Seek(offset, 0)
if err != nil {
return fmt.Errorf("seek to %d: %w", offset, err)
}
s.ioCount++
rLen, err := s.handle.Read(buff)
if err != nil {
return fmt.Errorf("handle read: %w", err)
}
if rLen != len(buff) {
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
}
return nil
}

View File

@ -89,6 +89,24 @@ func LoadVectorIndexFromFile(dbFile string) ([][]*VectorIndexBlock, error) {
return LoadVectorIndex(handle)
}
// LoadVectorIndexFromBuff load vector index from content buffer
func LoadVectorIndexFromBuff(cBuff []byte) ([][]*VectorIndexBlock, error) {
var err error
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
for r := 0; r < VectorIndexRows; r++ {
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
for c := 0; c < VectorIndexCols; c++ {
offset := HeaderInfoLength + r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
vectorIndex[r][c], err = VectorIndexBlockDecode(cBuff[offset:])
if err != nil {
return nil, fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
}
}
}
return vectorIndex, nil
}
// LoadHeader load the header info from the specified handle
func LoadHeader(handle *os.File) ([]byte, error) {
_, err := handle.Seek(0, 0)

View File

@ -29,6 +29,22 @@ func TestLoadContent(t *testing.T) {
fmt.Printf("buff length: %d\n", len(buff))
}
func TestLoadVectorIndexFromBuff(t *testing.T) {
buff, err := LoadContentFromFile("../../../data/ip2region.xdb")
if err != nil {
fmt.Printf("failed to load xdb content: %s\n", err)
return
}
vIndex, err := LoadVectorIndexFromBuff(buff)
if err != nil {
fmt.Printf("failed to load vector index from buff: %s\n", err)
return
}
fmt.Printf("buff length: %d, vIndex length: %d\n", len(buff), len(vIndex))
}
func TestLoadHeader(t *testing.T) {
buff, err := LoadHeaderFromFile("../../../data/ip2region.xdb")
if err != nil {