use the raw []byte as vectorIndex to reduce memory alloc

This commit is contained in:
lion 2022-06-23 17:56:28 +08:00
parent 670cf1dcb3
commit 3b96003ca1
7 changed files with 127 additions and 276 deletions

View File

@ -174,7 +174,7 @@ func testBench() {
return
}
var count, tStart = 0, time.Now()
var count, tStart, costs = int64(0), time.Now(), int64(0)
var scanner = bufio.NewScanner(handle)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
@ -204,12 +204,15 @@ func testBench() {
mip := xdb.MidIP(sip, eip)
for _, ip := range []uint32{sip, xdb.MidIP(sip, mip), mip, xdb.MidIP(mip, eip), eip} {
sTime := time.Now()
region, err := searcher.Search(ip)
if err != nil {
fmt.Printf("failed to search ip '%s': %s\n", xdb.Long2IP(ip), err)
return
}
costs += time.Since(sTime).Nanoseconds()
// check the region info
if region != ps[2] {
fmt.Printf("failed Search(%s) with (%s != %s)\n", xdb.Long2IP(ip), region, ps[2])
@ -221,7 +224,8 @@ func testBench() {
}
cost := time.Since(tStart)
fmt.Printf("Bench finished, {cachePolicy: %s, total: %d, took: %s, cost: %d ns/op}\n", cachePolicy, count, cost, cost.Nanoseconds()/int64(count))
fmt.Printf("Bench finished, {cachePolicy: %s, total: %d, took: %s, cost: %d μs/op}\n",
cachePolicy, count, cost, costs/count/1000)
}
func createSearcher(dbPath string, cachePolicy string) (*xdb.Searcher, error) {

View File

@ -0,0 +1,36 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package xdb
import (
"encoding/binary"
"fmt"
)
type Header struct {
// data []byte
Version uint16
IndexPolicy IndexPolicy
CreatedAt uint32
StartIndexPtr uint32
EndIndexPtr uint32
}
func NewHeader(input []byte) (*Header, error) {
if len(input) < 16 {
return nil, fmt.Errorf("invalid input buffer")
}
return &Header{
Version: binary.LittleEndian.Uint16(input),
IndexPolicy: IndexPolicy(binary.LittleEndian.Uint16(input[2:])),
CreatedAt: binary.LittleEndian.Uint32(input[4:]),
StartIndexPtr: binary.LittleEndian.Uint32(input[8:]),
EndIndexPtr: binary.LittleEndian.Uint32(input[12:]),
}, nil
}

View File

@ -1,133 +0,0 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package xdb
import (
"encoding/binary"
"fmt"
"strings"
)
type IndexPolicy int
const (
VectorIndexPolicy IndexPolicy = 1
BTreeIndexPolicy IndexPolicy = 2
)
func IndexPolicyFromString(str string) (IndexPolicy, error) {
switch strings.ToLower(str) {
case "vector":
return VectorIndexPolicy, nil
case "btree":
return BTreeIndexPolicy, nil
default:
return VectorIndexPolicy, fmt.Errorf("invalid policy '%s'", str)
}
}
func (i IndexPolicy) String() string {
switch i {
case VectorIndexPolicy:
return "VectorIndex"
case BTreeIndexPolicy:
return "BtreeIndex"
default:
return "unknown"
}
}
const SegmentIndexBlockSize = 14
type SegmentIndexBlock struct {
StartIP uint32
EndIP uint32
DataLen uint16
DataPtr uint32
}
func SegmentIndexDecode(input []byte) (*SegmentIndexBlock, error) {
if len(input) < 14 {
return nil, fmt.Errorf("input is less than 14 bytes")
}
return &SegmentIndexBlock{
StartIP: binary.LittleEndian.Uint32(input),
EndIP: binary.LittleEndian.Uint32(input[4:]),
DataLen: binary.LittleEndian.Uint16(input[8:]),
DataPtr: binary.LittleEndian.Uint32(input[10:]),
}, nil
}
func (s *SegmentIndexBlock) Encode() []byte {
var buff = make([]byte, 14)
binary.LittleEndian.PutUint32(buff, s.StartIP)
binary.LittleEndian.PutUint32(buff[4:], s.EndIP)
binary.LittleEndian.PutUint16(buff[8:], s.DataLen)
binary.LittleEndian.PutUint32(buff[10:], s.DataPtr)
return buff
}
func (s *SegmentIndexBlock) String() string {
return fmt.Sprintf("{sip: %d, eip: %d, len: %d, ptr: %d}", s.StartIP, s.EndIP, s.DataLen, s.DataPtr)
}
// ------------
type VectorIndexBlock struct {
FirstPtr uint32
LastPtr uint32
}
func VectorIndexBlockDecode(input []byte) (*VectorIndexBlock, error) {
if len(input) < 8 {
return nil, fmt.Errorf("input should be not less then 8 bytes")
}
return &VectorIndexBlock{
FirstPtr: binary.LittleEndian.Uint32(input),
LastPtr: binary.LittleEndian.Uint32(input[4:]),
}, nil
}
func (v VectorIndexBlock) Encode() []byte {
var buff = make([]byte, 8)
binary.LittleEndian.PutUint32(buff, v.FirstPtr)
binary.LittleEndian.PutUint32(buff[4:], v.LastPtr)
return buff
}
func (v VectorIndexBlock) String() string {
return fmt.Sprintf("{FristPtr: %d, LastPtr: %d}", v.FirstPtr, v.LastPtr)
}
// ------------
type Header struct {
data []byte
}
func (h *Header) Version() int {
return int(binary.LittleEndian.Uint16(h.data))
}
func (h *Header) IndexPolicy() IndexPolicy {
return IndexPolicy(binary.LittleEndian.Uint16(h.data[2:]))
}
func (h *Header) CreatedAt() uint32 {
return binary.LittleEndian.Uint32(h.data[4:])
}
func (h *Header) StartIndexPtr() uint32 {
return binary.LittleEndian.Uint32(h.data[8:])
}
func (h *Header) EndIndexPtr() uint32 {
return binary.LittleEndian.Uint32(h.data[12:])
}

View File

@ -18,12 +18,31 @@ import (
)
const (
HeaderInfoLength = 256
VectorIndexRows = 256
VectorIndexCols = 256
VectorIndexSize = 8
HeaderInfoLength = 256
VectorIndexRows = 256
VectorIndexCols = 256
VectorIndexSize = 8
SegmentIndexBlockSize = 14
)
type IndexPolicy int
const (
VectorIndexPolicy IndexPolicy = 1
BTreeIndexPolicy IndexPolicy = 2
)
func (i IndexPolicy) String() string {
switch i {
case VectorIndexPolicy:
return "VectorIndex"
case BTreeIndexPolicy:
return "BtreeIndex"
default:
return "unknown"
}
}
type Searcher struct {
handle *os.File
@ -34,28 +53,20 @@ type Searcher struct {
// use it only when this feature enabled.
// Preload the vector index will reduce the number of IO operations
// thus speedup the search process
vectorIndex [][]*VectorIndexBlock
vectorIndex []byte
// content buffer.
// running with the whole xdb file cached
contentBuff []byte
}
func baseNew(dbFile string, vIndex [][]*VectorIndexBlock, cBuff []byte) (*Searcher, error) {
func baseNew(dbFile string, vIndex []byte, cBuff []byte) (*Searcher, error) {
var err error
// content buff first
if cBuff != nil {
// check and autoload the vector index
if vIndex == nil {
vIndex, err = LoadVectorIndexFromBuff(cBuff)
if err != nil {
return nil, fmt.Errorf("load vector index from buff: %w", err)
}
}
return &Searcher{
vectorIndex: vIndex,
vectorIndex: nil,
contentBuff: cBuff,
}, nil
}
@ -76,17 +87,12 @@ func NewWithFileOnly(dbFile string) (*Searcher, error) {
return baseNew(dbFile, nil, nil)
}
func NewWithVectorIndex(dbFile string, vIndex [][]*VectorIndexBlock) (*Searcher, error) {
func NewWithVectorIndex(dbFile string, vIndex []byte) (*Searcher, error) {
return baseNew(dbFile, vIndex, nil)
}
func NewWithBuffer(cBuff []byte) (*Searcher, error) {
vIndex, err := LoadVectorIndexFromBuff(cBuff)
if err != nil {
return nil, fmt.Errorf("load vector index from buff: %w", err)
}
return baseNew("", vIndex, cBuff)
return baseNew("", nil, cBuff)
}
func (s *Searcher) Close() {
@ -119,30 +125,37 @@ func (s *Searcher) Search(ip uint32) (string, error) {
s.ioCount = 0
// locate the segment index block based on the vector index
var vIndex *VectorIndexBlock
var il0 = (ip >> 24) & 0xFF
var il1 = (ip >> 16) & 0xFF
var idx = il0*VectorIndexCols*VectorIndexSize + il1*VectorIndexSize
var sPtr, ePtr = uint32(0), uint32(0)
if s.vectorIndex != nil {
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
sPtr = binary.LittleEndian.Uint32(s.vectorIndex[idx:])
ePtr = binary.LittleEndian.Uint32(s.vectorIndex[idx+4:])
} else if s.contentBuff != nil {
sPtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx:])
ePtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx+4:])
} else {
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
offset := HeaderInfoLength + l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
// read the vector index block
var vIndexBuff = make([]byte, 8)
err := s.read(int64(offset), vIndexBuff)
vIndex, err = VectorIndexBlockDecode(vIndexBuff)
var buff = make([]byte, 8)
err := s.read(int64(HeaderInfoLength+idx), buff)
if err != nil {
return "", fmt.Errorf("read vector index block at %d: %w", offset, err)
return "", fmt.Errorf("read vector index block at %d: %w", HeaderInfoLength+idx, err)
}
sPtr = binary.LittleEndian.Uint32(buff)
ePtr = binary.LittleEndian.Uint32(buff[4:])
}
//fmt.Printf("vIndex=%s", vIndex)
// fmt.Printf("sPtr=%d, ePtr=%d", sPtr, ePtr)
// binary search the segment index to get the region
var dataLen, dataPtr = 0, uint32(0)
var buff = make([]byte, SegmentIndexBlockSize)
var l, h = 0, int((vIndex.LastPtr - vIndex.FirstPtr) / SegmentIndexBlockSize)
var l, h = 0, int((ePtr - sPtr) / SegmentIndexBlockSize)
for l <= h {
m := (l + h) >> 1
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
p := sPtr + uint32(m*SegmentIndexBlockSize)
err := s.read(int64(p), buff)
if err != nil {
return "", fmt.Errorf("read segment index at %d: %w", p, err)

View File

@ -1,18 +0,0 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// --
// @Author Lion <chenxin619315@gmail.com>
// @Date 2022/06/16
package xdb
import (
"fmt"
"testing"
)
func TestSearcher_Search(t *testing.T) {
fmt.Printf("search testing...")
}

View File

@ -53,68 +53,6 @@ func MidIP(sip uint32, eip uint32) uint32 {
return uint32((uint64(sip) + uint64(eip)) >> 1)
}
// LoadVectorIndex util function to load the vector index from the specified file handle
func LoadVectorIndex(handle *os.File) ([][]*VectorIndexBlock, error) {
// load all the vector index block
_, err := handle.Seek(HeaderInfoLength, 0)
if err != nil {
return nil, fmt.Errorf("seek to vector index: %w", err)
}
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
rLen, err := handle.Read(buff)
if err != nil {
return nil, err
}
if rLen != len(buff) {
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// decode the vector index blocks
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
for r := 0; r < VectorIndexRows; r++ {
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
for c := 0; c < VectorIndexCols; c++ {
offset := r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
vectorIndex[r][c], err = VectorIndexBlockDecode(buff[offset:])
if err != nil {
return nil, fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
}
}
}
return vectorIndex, nil
}
// LoadVectorIndexFromFile load vector index from a specified file path
func LoadVectorIndexFromFile(dbFile string) ([][]*VectorIndexBlock, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, fmt.Errorf("open xdb file `%s`: %w", dbFile, err)
}
return LoadVectorIndex(handle)
}
// LoadVectorIndexFromBuff load vector index from content buffer
func LoadVectorIndexFromBuff(cBuff []byte) ([][]*VectorIndexBlock, error) {
var err error
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
for r := 0; r < VectorIndexRows; r++ {
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
for c := 0; c < VectorIndexCols; c++ {
offset := HeaderInfoLength + r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
vectorIndex[r][c], err = VectorIndexBlockDecode(cBuff[offset:])
if err != nil {
return nil, fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
}
}
}
return vectorIndex, nil
}
// LoadHeader load the header info from the specified handle
func LoadHeader(handle *os.File) (*Header, error) {
_, err := handle.Seek(0, 0)
@ -132,9 +70,7 @@ func LoadHeader(handle *os.File) (*Header, error) {
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
return &Header{
data: buff,
}, nil
return NewHeader(buff)
}
// LoadHeaderFromFile load header info from the specified db file path
@ -149,9 +85,38 @@ func LoadHeaderFromFile(dbFile string) (*Header, error) {
// LoadHeaderFromBuff wrap the header info from the content buffer
func LoadHeaderFromBuff(cBuff []byte) (*Header, error) {
return &Header{
data: cBuff[0:256],
}, nil
return NewHeader(cBuff[0:256])
}
// LoadVectorIndex util function to load the vector index from the specified file handle
func LoadVectorIndex(handle *os.File) ([]byte, error) {
// load all the vector index block
_, err := handle.Seek(HeaderInfoLength, 0)
if err != nil {
return nil, fmt.Errorf("seek to vector index: %w", err)
}
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
rLen, err := handle.Read(buff)
if err != nil {
return nil, err
}
if rLen != len(buff) {
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
return buff, nil
}
// LoadVectorIndexFromFile load vector index from a specified file path
func LoadVectorIndexFromFile(dbFile string) ([]byte, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, fmt.Errorf("open xdb file `%s`: %w", dbFile, err)
}
return LoadVectorIndex(handle)
}
// LoadContent load the whole xdb content from the specified file handle

View File

@ -34,22 +34,6 @@ func TestLoadContent(t *testing.T) {
fmt.Printf("buff length: %d\n", len(buff))
}
func TestLoadVectorIndexFromBuff(t *testing.T) {
buff, err := LoadContentFromFile("../../../data/ip2region.xdb")
if err != nil {
fmt.Printf("failed to load xdb content: %s\n", err)
return
}
vIndex, err := LoadVectorIndexFromBuff(buff)
if err != nil {
fmt.Printf("failed to load vector index from buff: %s\n", err)
return
}
fmt.Printf("buff length: %d, vIndex length: %d\n", len(buff), len(vIndex))
}
func TestLoadHeader(t *testing.T) {
header, err := LoadHeaderFromFile("../../../data/ip2region.xdb")
if err != nil {
@ -57,9 +41,9 @@ func TestLoadHeader(t *testing.T) {
return
}
fmt.Printf("Version : %d\n", header.Version())
fmt.Printf("IndexPolicy : %s\n", header.IndexPolicy().String())
fmt.Printf("CreatedAt : %d(%s)\n", header.CreatedAt(), time.Unix(int64(header.CreatedAt()), 0).Format(time.RFC3339))
fmt.Printf("StartIndexPtr : %d\n", header.StartIndexPtr())
fmt.Printf("EndIndexPtr : %d\n", header.EndIndexPtr())
fmt.Printf("Version : %d\n", header.Version)
fmt.Printf("IndexPolicy : %s\n", header.IndexPolicy.String())
fmt.Printf("CreatedAt : %d(%s)\n", header.CreatedAt, time.Unix(int64(header.CreatedAt), 0).Format(time.RFC3339))
fmt.Printf("StartIndexPtr : %d\n", header.StartIndexPtr)
fmt.Printf("EndIndexPtr : %d\n", header.EndIndexPtr)
}