mirror of
https://github.com/lionsoul2014/ip2region.git
synced 2025-12-08 19:25:22 +00:00
use the raw []byte as vectorIndex to reduce memory alloc
This commit is contained in:
parent
670cf1dcb3
commit
3b96003ca1
@ -174,7 +174,7 @@ func testBench() {
|
||||
return
|
||||
}
|
||||
|
||||
var count, tStart = 0, time.Now()
|
||||
var count, tStart, costs = int64(0), time.Now(), int64(0)
|
||||
var scanner = bufio.NewScanner(handle)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
for scanner.Scan() {
|
||||
@ -204,12 +204,15 @@ func testBench() {
|
||||
|
||||
mip := xdb.MidIP(sip, eip)
|
||||
for _, ip := range []uint32{sip, xdb.MidIP(sip, mip), mip, xdb.MidIP(mip, eip), eip} {
|
||||
sTime := time.Now()
|
||||
region, err := searcher.Search(ip)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to search ip '%s': %s\n", xdb.Long2IP(ip), err)
|
||||
return
|
||||
}
|
||||
|
||||
costs += time.Since(sTime).Nanoseconds()
|
||||
|
||||
// check the region info
|
||||
if region != ps[2] {
|
||||
fmt.Printf("failed Search(%s) with (%s != %s)\n", xdb.Long2IP(ip), region, ps[2])
|
||||
@ -221,7 +224,8 @@ func testBench() {
|
||||
}
|
||||
|
||||
cost := time.Since(tStart)
|
||||
fmt.Printf("Bench finished, {cachePolicy: %s, total: %d, took: %s, cost: %d ns/op}\n", cachePolicy, count, cost, cost.Nanoseconds()/int64(count))
|
||||
fmt.Printf("Bench finished, {cachePolicy: %s, total: %d, took: %s, cost: %d μs/op}\n",
|
||||
cachePolicy, count, cost, costs/count/1000)
|
||||
}
|
||||
|
||||
func createSearcher(dbPath string, cachePolicy string) (*xdb.Searcher, error) {
|
||||
|
||||
36
binding/golang/xdb/header.go
Normal file
36
binding/golang/xdb/header.go
Normal file
@ -0,0 +1,36 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// @Author Lion <chenxin619315@gmail.com>
|
||||
// @Date 2022/06/16
|
||||
|
||||
package xdb
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Header struct {
|
||||
// data []byte
|
||||
Version uint16
|
||||
IndexPolicy IndexPolicy
|
||||
CreatedAt uint32
|
||||
StartIndexPtr uint32
|
||||
EndIndexPtr uint32
|
||||
}
|
||||
|
||||
func NewHeader(input []byte) (*Header, error) {
|
||||
if len(input) < 16 {
|
||||
return nil, fmt.Errorf("invalid input buffer")
|
||||
}
|
||||
|
||||
return &Header{
|
||||
Version: binary.LittleEndian.Uint16(input),
|
||||
IndexPolicy: IndexPolicy(binary.LittleEndian.Uint16(input[2:])),
|
||||
CreatedAt: binary.LittleEndian.Uint32(input[4:]),
|
||||
StartIndexPtr: binary.LittleEndian.Uint32(input[8:]),
|
||||
EndIndexPtr: binary.LittleEndian.Uint32(input[12:]),
|
||||
}, nil
|
||||
}
|
||||
@ -1,133 +0,0 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// @Author Lion <chenxin619315@gmail.com>
|
||||
// @Date 2022/06/16
|
||||
|
||||
package xdb
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type IndexPolicy int
|
||||
|
||||
const (
|
||||
VectorIndexPolicy IndexPolicy = 1
|
||||
BTreeIndexPolicy IndexPolicy = 2
|
||||
)
|
||||
|
||||
func IndexPolicyFromString(str string) (IndexPolicy, error) {
|
||||
switch strings.ToLower(str) {
|
||||
case "vector":
|
||||
return VectorIndexPolicy, nil
|
||||
case "btree":
|
||||
return BTreeIndexPolicy, nil
|
||||
default:
|
||||
return VectorIndexPolicy, fmt.Errorf("invalid policy '%s'", str)
|
||||
}
|
||||
}
|
||||
|
||||
func (i IndexPolicy) String() string {
|
||||
switch i {
|
||||
case VectorIndexPolicy:
|
||||
return "VectorIndex"
|
||||
case BTreeIndexPolicy:
|
||||
return "BtreeIndex"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
const SegmentIndexBlockSize = 14
|
||||
|
||||
type SegmentIndexBlock struct {
|
||||
StartIP uint32
|
||||
EndIP uint32
|
||||
DataLen uint16
|
||||
DataPtr uint32
|
||||
}
|
||||
|
||||
func SegmentIndexDecode(input []byte) (*SegmentIndexBlock, error) {
|
||||
if len(input) < 14 {
|
||||
return nil, fmt.Errorf("input is less than 14 bytes")
|
||||
}
|
||||
|
||||
return &SegmentIndexBlock{
|
||||
StartIP: binary.LittleEndian.Uint32(input),
|
||||
EndIP: binary.LittleEndian.Uint32(input[4:]),
|
||||
DataLen: binary.LittleEndian.Uint16(input[8:]),
|
||||
DataPtr: binary.LittleEndian.Uint32(input[10:]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *SegmentIndexBlock) Encode() []byte {
|
||||
var buff = make([]byte, 14)
|
||||
binary.LittleEndian.PutUint32(buff, s.StartIP)
|
||||
binary.LittleEndian.PutUint32(buff[4:], s.EndIP)
|
||||
binary.LittleEndian.PutUint16(buff[8:], s.DataLen)
|
||||
binary.LittleEndian.PutUint32(buff[10:], s.DataPtr)
|
||||
return buff
|
||||
}
|
||||
|
||||
func (s *SegmentIndexBlock) String() string {
|
||||
return fmt.Sprintf("{sip: %d, eip: %d, len: %d, ptr: %d}", s.StartIP, s.EndIP, s.DataLen, s.DataPtr)
|
||||
}
|
||||
|
||||
// ------------
|
||||
|
||||
type VectorIndexBlock struct {
|
||||
FirstPtr uint32
|
||||
LastPtr uint32
|
||||
}
|
||||
|
||||
func VectorIndexBlockDecode(input []byte) (*VectorIndexBlock, error) {
|
||||
if len(input) < 8 {
|
||||
return nil, fmt.Errorf("input should be not less then 8 bytes")
|
||||
}
|
||||
|
||||
return &VectorIndexBlock{
|
||||
FirstPtr: binary.LittleEndian.Uint32(input),
|
||||
LastPtr: binary.LittleEndian.Uint32(input[4:]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (v VectorIndexBlock) Encode() []byte {
|
||||
var buff = make([]byte, 8)
|
||||
binary.LittleEndian.PutUint32(buff, v.FirstPtr)
|
||||
binary.LittleEndian.PutUint32(buff[4:], v.LastPtr)
|
||||
return buff
|
||||
}
|
||||
|
||||
func (v VectorIndexBlock) String() string {
|
||||
return fmt.Sprintf("{FristPtr: %d, LastPtr: %d}", v.FirstPtr, v.LastPtr)
|
||||
}
|
||||
|
||||
// ------------
|
||||
|
||||
type Header struct {
|
||||
data []byte
|
||||
}
|
||||
|
||||
func (h *Header) Version() int {
|
||||
return int(binary.LittleEndian.Uint16(h.data))
|
||||
}
|
||||
|
||||
func (h *Header) IndexPolicy() IndexPolicy {
|
||||
return IndexPolicy(binary.LittleEndian.Uint16(h.data[2:]))
|
||||
}
|
||||
|
||||
func (h *Header) CreatedAt() uint32 {
|
||||
return binary.LittleEndian.Uint32(h.data[4:])
|
||||
}
|
||||
|
||||
func (h *Header) StartIndexPtr() uint32 {
|
||||
return binary.LittleEndian.Uint32(h.data[8:])
|
||||
}
|
||||
|
||||
func (h *Header) EndIndexPtr() uint32 {
|
||||
return binary.LittleEndian.Uint32(h.data[12:])
|
||||
}
|
||||
@ -18,12 +18,31 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
HeaderInfoLength = 256
|
||||
VectorIndexRows = 256
|
||||
VectorIndexCols = 256
|
||||
VectorIndexSize = 8
|
||||
HeaderInfoLength = 256
|
||||
VectorIndexRows = 256
|
||||
VectorIndexCols = 256
|
||||
VectorIndexSize = 8
|
||||
SegmentIndexBlockSize = 14
|
||||
)
|
||||
|
||||
type IndexPolicy int
|
||||
|
||||
const (
|
||||
VectorIndexPolicy IndexPolicy = 1
|
||||
BTreeIndexPolicy IndexPolicy = 2
|
||||
)
|
||||
|
||||
func (i IndexPolicy) String() string {
|
||||
switch i {
|
||||
case VectorIndexPolicy:
|
||||
return "VectorIndex"
|
||||
case BTreeIndexPolicy:
|
||||
return "BtreeIndex"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
type Searcher struct {
|
||||
handle *os.File
|
||||
|
||||
@ -34,28 +53,20 @@ type Searcher struct {
|
||||
// use it only when this feature enabled.
|
||||
// Preload the vector index will reduce the number of IO operations
|
||||
// thus speedup the search process
|
||||
vectorIndex [][]*VectorIndexBlock
|
||||
vectorIndex []byte
|
||||
|
||||
// content buffer.
|
||||
// running with the whole xdb file cached
|
||||
contentBuff []byte
|
||||
}
|
||||
|
||||
func baseNew(dbFile string, vIndex [][]*VectorIndexBlock, cBuff []byte) (*Searcher, error) {
|
||||
func baseNew(dbFile string, vIndex []byte, cBuff []byte) (*Searcher, error) {
|
||||
var err error
|
||||
|
||||
// content buff first
|
||||
if cBuff != nil {
|
||||
// check and autoload the vector index
|
||||
if vIndex == nil {
|
||||
vIndex, err = LoadVectorIndexFromBuff(cBuff)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load vector index from buff: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return &Searcher{
|
||||
vectorIndex: vIndex,
|
||||
vectorIndex: nil,
|
||||
contentBuff: cBuff,
|
||||
}, nil
|
||||
}
|
||||
@ -76,17 +87,12 @@ func NewWithFileOnly(dbFile string) (*Searcher, error) {
|
||||
return baseNew(dbFile, nil, nil)
|
||||
}
|
||||
|
||||
func NewWithVectorIndex(dbFile string, vIndex [][]*VectorIndexBlock) (*Searcher, error) {
|
||||
func NewWithVectorIndex(dbFile string, vIndex []byte) (*Searcher, error) {
|
||||
return baseNew(dbFile, vIndex, nil)
|
||||
}
|
||||
|
||||
func NewWithBuffer(cBuff []byte) (*Searcher, error) {
|
||||
vIndex, err := LoadVectorIndexFromBuff(cBuff)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load vector index from buff: %w", err)
|
||||
}
|
||||
|
||||
return baseNew("", vIndex, cBuff)
|
||||
return baseNew("", nil, cBuff)
|
||||
}
|
||||
|
||||
func (s *Searcher) Close() {
|
||||
@ -119,30 +125,37 @@ func (s *Searcher) Search(ip uint32) (string, error) {
|
||||
s.ioCount = 0
|
||||
|
||||
// locate the segment index block based on the vector index
|
||||
var vIndex *VectorIndexBlock
|
||||
var il0 = (ip >> 24) & 0xFF
|
||||
var il1 = (ip >> 16) & 0xFF
|
||||
var idx = il0*VectorIndexCols*VectorIndexSize + il1*VectorIndexSize
|
||||
var sPtr, ePtr = uint32(0), uint32(0)
|
||||
if s.vectorIndex != nil {
|
||||
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
|
||||
sPtr = binary.LittleEndian.Uint32(s.vectorIndex[idx:])
|
||||
ePtr = binary.LittleEndian.Uint32(s.vectorIndex[idx+4:])
|
||||
} else if s.contentBuff != nil {
|
||||
sPtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx:])
|
||||
ePtr = binary.LittleEndian.Uint32(s.contentBuff[HeaderInfoLength+idx+4:])
|
||||
} else {
|
||||
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
|
||||
offset := HeaderInfoLength + l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
|
||||
|
||||
// read the vector index block
|
||||
var vIndexBuff = make([]byte, 8)
|
||||
err := s.read(int64(offset), vIndexBuff)
|
||||
vIndex, err = VectorIndexBlockDecode(vIndexBuff)
|
||||
var buff = make([]byte, 8)
|
||||
err := s.read(int64(HeaderInfoLength+idx), buff)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read vector index block at %d: %w", offset, err)
|
||||
return "", fmt.Errorf("read vector index block at %d: %w", HeaderInfoLength+idx, err)
|
||||
}
|
||||
|
||||
sPtr = binary.LittleEndian.Uint32(buff)
|
||||
ePtr = binary.LittleEndian.Uint32(buff[4:])
|
||||
}
|
||||
|
||||
//fmt.Printf("vIndex=%s", vIndex)
|
||||
// fmt.Printf("sPtr=%d, ePtr=%d", sPtr, ePtr)
|
||||
|
||||
// binary search the segment index to get the region
|
||||
var dataLen, dataPtr = 0, uint32(0)
|
||||
var buff = make([]byte, SegmentIndexBlockSize)
|
||||
var l, h = 0, int((vIndex.LastPtr - vIndex.FirstPtr) / SegmentIndexBlockSize)
|
||||
var l, h = 0, int((ePtr - sPtr) / SegmentIndexBlockSize)
|
||||
for l <= h {
|
||||
m := (l + h) >> 1
|
||||
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
|
||||
p := sPtr + uint32(m*SegmentIndexBlockSize)
|
||||
err := s.read(int64(p), buff)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read segment index at %d: %w", p, err)
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// --
|
||||
// @Author Lion <chenxin619315@gmail.com>
|
||||
// @Date 2022/06/16
|
||||
|
||||
package xdb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSearcher_Search(t *testing.T) {
|
||||
fmt.Printf("search testing...")
|
||||
}
|
||||
@ -53,68 +53,6 @@ func MidIP(sip uint32, eip uint32) uint32 {
|
||||
return uint32((uint64(sip) + uint64(eip)) >> 1)
|
||||
}
|
||||
|
||||
// LoadVectorIndex util function to load the vector index from the specified file handle
|
||||
func LoadVectorIndex(handle *os.File) ([][]*VectorIndexBlock, error) {
|
||||
// load all the vector index block
|
||||
_, err := handle.Seek(HeaderInfoLength, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("seek to vector index: %w", err)
|
||||
}
|
||||
|
||||
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
|
||||
rLen, err := handle.Read(buff)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
// decode the vector index blocks
|
||||
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
|
||||
for r := 0; r < VectorIndexRows; r++ {
|
||||
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
|
||||
for c := 0; c < VectorIndexCols; c++ {
|
||||
offset := r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
|
||||
vectorIndex[r][c], err = VectorIndexBlockDecode(buff[offset:])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vectorIndex, nil
|
||||
}
|
||||
|
||||
// LoadVectorIndexFromFile load vector index from a specified file path
|
||||
func LoadVectorIndexFromFile(dbFile string) ([][]*VectorIndexBlock, error) {
|
||||
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open xdb file `%s`: %w", dbFile, err)
|
||||
}
|
||||
|
||||
return LoadVectorIndex(handle)
|
||||
}
|
||||
|
||||
// LoadVectorIndexFromBuff load vector index from content buffer
|
||||
func LoadVectorIndexFromBuff(cBuff []byte) ([][]*VectorIndexBlock, error) {
|
||||
var err error
|
||||
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
|
||||
for r := 0; r < VectorIndexRows; r++ {
|
||||
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
|
||||
for c := 0; c < VectorIndexCols; c++ {
|
||||
offset := HeaderInfoLength + r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
|
||||
vectorIndex[r][c], err = VectorIndexBlockDecode(cBuff[offset:])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vectorIndex, nil
|
||||
}
|
||||
|
||||
// LoadHeader load the header info from the specified handle
|
||||
func LoadHeader(handle *os.File) (*Header, error) {
|
||||
_, err := handle.Seek(0, 0)
|
||||
@ -132,9 +70,7 @@ func LoadHeader(handle *os.File) (*Header, error) {
|
||||
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
return &Header{
|
||||
data: buff,
|
||||
}, nil
|
||||
return NewHeader(buff)
|
||||
}
|
||||
|
||||
// LoadHeaderFromFile load header info from the specified db file path
|
||||
@ -149,9 +85,38 @@ func LoadHeaderFromFile(dbFile string) (*Header, error) {
|
||||
|
||||
// LoadHeaderFromBuff wrap the header info from the content buffer
|
||||
func LoadHeaderFromBuff(cBuff []byte) (*Header, error) {
|
||||
return &Header{
|
||||
data: cBuff[0:256],
|
||||
}, nil
|
||||
return NewHeader(cBuff[0:256])
|
||||
}
|
||||
|
||||
// LoadVectorIndex util function to load the vector index from the specified file handle
|
||||
func LoadVectorIndex(handle *os.File) ([]byte, error) {
|
||||
// load all the vector index block
|
||||
_, err := handle.Seek(HeaderInfoLength, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("seek to vector index: %w", err)
|
||||
}
|
||||
|
||||
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
|
||||
rLen, err := handle.Read(buff)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return nil, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
return buff, nil
|
||||
}
|
||||
|
||||
// LoadVectorIndexFromFile load vector index from a specified file path
|
||||
func LoadVectorIndexFromFile(dbFile string) ([]byte, error) {
|
||||
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open xdb file `%s`: %w", dbFile, err)
|
||||
}
|
||||
|
||||
return LoadVectorIndex(handle)
|
||||
}
|
||||
|
||||
// LoadContent load the whole xdb content from the specified file handle
|
||||
|
||||
@ -34,22 +34,6 @@ func TestLoadContent(t *testing.T) {
|
||||
fmt.Printf("buff length: %d\n", len(buff))
|
||||
}
|
||||
|
||||
func TestLoadVectorIndexFromBuff(t *testing.T) {
|
||||
buff, err := LoadContentFromFile("../../../data/ip2region.xdb")
|
||||
if err != nil {
|
||||
fmt.Printf("failed to load xdb content: %s\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
vIndex, err := LoadVectorIndexFromBuff(buff)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to load vector index from buff: %s\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("buff length: %d, vIndex length: %d\n", len(buff), len(vIndex))
|
||||
}
|
||||
|
||||
func TestLoadHeader(t *testing.T) {
|
||||
header, err := LoadHeaderFromFile("../../../data/ip2region.xdb")
|
||||
if err != nil {
|
||||
@ -57,9 +41,9 @@ func TestLoadHeader(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Version : %d\n", header.Version())
|
||||
fmt.Printf("IndexPolicy : %s\n", header.IndexPolicy().String())
|
||||
fmt.Printf("CreatedAt : %d(%s)\n", header.CreatedAt(), time.Unix(int64(header.CreatedAt()), 0).Format(time.RFC3339))
|
||||
fmt.Printf("StartIndexPtr : %d\n", header.StartIndexPtr())
|
||||
fmt.Printf("EndIndexPtr : %d\n", header.EndIndexPtr())
|
||||
fmt.Printf("Version : %d\n", header.Version)
|
||||
fmt.Printf("IndexPolicy : %s\n", header.IndexPolicy.String())
|
||||
fmt.Printf("CreatedAt : %d(%s)\n", header.CreatedAt, time.Unix(int64(header.CreatedAt), 0).Format(time.RFC3339))
|
||||
fmt.Printf("StartIndexPtr : %d\n", header.StartIndexPtr)
|
||||
fmt.Printf("EndIndexPtr : %d\n", header.EndIndexPtr)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user