add golang searcher binding

This commit is contained in:
lion 2022-06-17 14:18:44 +08:00
parent c6a7652d9e
commit 985dabd51c
8 changed files with 592 additions and 0 deletions

5
.gitignore vendored
View File

@ -38,6 +38,11 @@ META-INF/
/binding/c/testSearcher
# golang
/binding/golang/searcher
/binding/golang/dbsearcher
/binding/golang/golang
# rust
Cargo.lock
target

5
binding/golang/go.mod Normal file
View File

@ -0,0 +1,5 @@
module github.com/lionsoul2014/ip2region/binding/golang
go 1.17
require github.com/mitchellh/go-homedir v1.1.0

2
binding/golang/go.sum Normal file
View File

@ -0,0 +1,2 @@
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=

View File

@ -0,0 +1,104 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package ip2region
import (
"encoding/binary"
"fmt"
"strings"
)
type IndexPolicy int
const (
VectorIndexPolicy IndexPolicy = 1
BTreeIndexPolicy IndexPolicy = 2
)
func IndexPolicyFromString(str string) (IndexPolicy, error) {
switch strings.ToLower(str) {
case "vector":
return VectorIndexPolicy, nil
case "btree":
return BTreeIndexPolicy, nil
default:
return VectorIndexPolicy, fmt.Errorf("invalid policy '%s'", str)
}
}
func (i IndexPolicy) String() string {
switch i {
case VectorIndexPolicy:
return "VectorIndex"
case BTreeIndexPolicy:
return "BtreeIndex"
default:
return "unknown"
}
}
const SegmentIndexBlockSize = 14
type SegmentIndexBlock struct {
StartIP uint32
EndIP uint32
DataLen uint16
DataPtr uint32
}
func SegmentIndexDecode(input []byte) (*SegmentIndexBlock, error) {
if len(input) < 14 {
return nil, fmt.Errorf("input is less than 14 bytes")
}
return &SegmentIndexBlock{
StartIP: binary.LittleEndian.Uint32(input),
EndIP: binary.LittleEndian.Uint32(input[4:]),
DataLen: binary.LittleEndian.Uint16(input[8:]),
DataPtr: binary.LittleEndian.Uint32(input[10:]),
}, nil
}
func (s *SegmentIndexBlock) Encode() []byte {
var buff = make([]byte, 14)
binary.LittleEndian.PutUint32(buff, s.StartIP)
binary.LittleEndian.PutUint32(buff[4:], s.EndIP)
binary.LittleEndian.PutUint16(buff[8:], s.DataLen)
binary.LittleEndian.PutUint32(buff[10:], s.DataPtr)
return buff
}
func (s *SegmentIndexBlock) String() string {
return fmt.Sprintf("{sip: %d, eip: %d, len: %d, ptr: %d}", s.StartIP, s.EndIP, s.DataLen, s.DataPtr)
}
// ------------
type VectorIndexBlock struct {
FirstPtr uint32
LastPtr uint32
}
func VectorIndexBlockDecode(input []byte) (*VectorIndexBlock, error) {
if len(input) < 8 {
return nil, fmt.Errorf("input should be not less then 8 bytes")
}
return &VectorIndexBlock{
FirstPtr: binary.LittleEndian.Uint32(input),
LastPtr: binary.LittleEndian.Uint32(input[4:]),
}, nil
}
func (v VectorIndexBlock) Encode() []byte {
var buff = make([]byte, 8)
binary.LittleEndian.PutUint32(buff, v.FirstPtr)
binary.LittleEndian.PutUint32(buff[4:], v.LastPtr)
return buff
}
func (v VectorIndexBlock) String() string {
return fmt.Sprintf("{FristPtr: %d, LastPtr: %d}", v.FirstPtr, v.LastPtr)
}

View File

@ -0,0 +1,196 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ---
// ip2region database v2.0 searcher.
// @Note this is a Not thread safe implementation.
package ip2region
import (
"encoding/binary"
"fmt"
"os"
)
const (
HeaderInfoLength = 256
VectorIndexRows = 256
VectorIndexCols = 256
VectorIndexSize = 8
)
type Searcher struct {
handle *os.File
// header info
header []byte
// use it only when this feature enabled.
// Preload the vector index will reduce the number of IO operations
// thus speedup the search process
vectorIndex [][]*VectorIndexBlock
}
func New(dbFile string) (*Searcher, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, err
}
return &Searcher{
handle: handle,
header: nil,
vectorIndex: nil,
}, nil
}
func (s *Searcher) Close() {
err := s.handle.Close()
if err != nil {
return
}
}
// LoadVectorIndex load and cache the vector index for search speedup.
// this will take up VectorIndexRows x VectorIndexCols x VectorIndexSize bytes memory.
func (s *Searcher) LoadVectorIndex() error {
// loaded already
if s.vectorIndex != nil {
return nil
}
// load all the vector index block
_, err := s.handle.Seek(HeaderInfoLength, 0)
if err != nil {
return fmt.Errorf("seek to vector index: %w", err)
}
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
rLen, err := s.handle.Read(buff)
if err != nil {
return err
}
if rLen != len(buff) {
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// decode the vector index blocks
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
for r := 0; r < VectorIndexRows; r++ {
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
for c := 0; c < VectorIndexCols; c++ {
offset := r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
vectorIndex[r][c], err = VectorIndexBlockDecode(buff[offset:])
if err != nil {
return fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
}
}
}
s.vectorIndex = vectorIndex
return nil
}
// ClearVectorIndex clear preloaded vector index cache
func (s *Searcher) ClearVectorIndex() {
s.vectorIndex = nil
}
// Search find the region for the specified ip address
func (s *Searcher) Search(ip uint32) (string, error) {
// locate the segment index block based on the vector index
var vIndex *VectorIndexBlock
if s.vectorIndex != nil {
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
} else {
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
offset := l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
pos, err := s.handle.Seek(int64(HeaderInfoLength+offset), 0)
if err != nil {
return "", fmt.Errorf("seek to vector index[%d][%d]: %w", l0, l1, err)
}
var buff = make([]byte, 8)
rLen, err := s.handle.Read(buff)
if err != nil {
return "", fmt.Errorf("read vector index at %d: %w", pos, err)
}
if rLen != len(buff) {
return "", fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
vIndex, err = VectorIndexBlockDecode(buff)
if err != nil {
return "", fmt.Errorf("invalid vector index block at %d: %w", pos, err)
}
}
//log.Printf("vIndex=%s", vIndex)
// binary search the segment index to get the region
var dataLen, dataPtr = 0, uint32(0)
var buff = make([]byte, SegmentIndexBlockSize)
var l, h = 0, int((vIndex.LastPtr - vIndex.FirstPtr) / SegmentIndexBlockSize)
for l <= h {
m := (l + h) >> 1
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
_, err := s.handle.Seek(int64(p), 0)
if err != nil {
return "", fmt.Errorf("seek to segment block at %d: %w", p, err)
}
rLen, err := s.handle.Read(buff)
if err != nil {
return "", fmt.Errorf("read segment index at %d: %w", p, err)
}
if rLen != len(buff) {
return "", fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// segIndex, err := SegmentIndexDecode(buff)
// if err != nil {
// return "", fmt.Errorf("invalid segment index block at %d: %w", p, err)
// }
// decode the data step by step to reduce the unnecessary calculations
sip := binary.LittleEndian.Uint32(buff)
if ip < sip {
h = m - 1
} else {
eip := binary.LittleEndian.Uint32(buff[4:])
if ip > eip {
l = m + 1
} else {
dataLen = int(binary.LittleEndian.Uint16(buff[8:]))
dataPtr = binary.LittleEndian.Uint32(buff[10:])
break
}
}
}
if dataLen == 0 {
return "", nil
}
// load and return the region data
_, err := s.handle.Seek(int64(dataPtr), 0)
if err != nil {
return "", fmt.Errorf("seek to data block at %d: %w", dataPtr, err)
}
var regionBuff = make([]byte, dataLen)
rLen, err := s.handle.Read(regionBuff)
if err != nil {
return "", fmt.Errorf("read region data at %d: %w", dataPtr, err)
}
if rLen != dataLen {
return "", fmt.Errorf("incomplete read: readed bytes should be %d", dataLen)
}
return string(regionBuff), nil
}

View File

@ -0,0 +1,14 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package ip2region
import (
"fmt"
"testing"
)
func TestSearcher_Search(t *testing.T) {
fmt.Printf("search testing...")
}

View File

@ -0,0 +1,45 @@
package ip2region
import (
"encoding/binary"
"fmt"
"strconv"
"strings"
)
func CheckIP(ip string) (uint32, error) {
var ps = strings.Split(ip, ".")
if len(ps) != 4 {
return 0, fmt.Errorf("invalid ip address `%s`", ip)
}
var buff = make([]byte, 4)
for i, s := range ps {
d, err := strconv.Atoi(s)
if err != nil {
return 0, fmt.Errorf("the %dth part `%s` is not an integer", i, s)
}
if d < 0 || d > 255 {
return 0, fmt.Errorf("the %dth part `%s` should be an integer bettween 0 and 255", i, s)
}
buff[i] = byte(d)
}
// convert the ip to integer
return binary.BigEndian.Uint32(buff), nil
}
func Long2IP(ip uint32) string {
var buff = make([]string, 4)
buff[0] = fmt.Sprintf("%d", (ip>>24)&0xFF)
buff[1] = fmt.Sprintf("%d", (ip>>16)&0xFF)
buff[2] = fmt.Sprintf("%d", (ip>>8)&0xFF)
buff[3] = fmt.Sprintf("%d", (ip>>0)&0xFF)
return strings.Join(buff, ".")
}
func MidIP(sip uint32, eip uint32) uint32 {
return uint32((uint64(sip) + uint64(eip)) >> 1)
}

221
binding/golang/main.go Normal file
View File

@ -0,0 +1,221 @@
package main
import (
"bufio"
"fmt"
"github.com/lionsoul2014/ip2region/binding/golang/ip2region"
"github.com/mitchellh/go-homedir"
"log"
"os"
"strings"
"time"
)
func printHelp() {
fmt.Printf("ip2region searcher 2.0\n")
fmt.Printf("searcher [command] [command options]\n")
fmt.Printf("Command: \n")
fmt.Printf(" search search input test\n")
fmt.Printf(" bench search bench test\n")
}
func testSearch() {
var err error
var dbFile = ""
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
continue
}
if strings.Index(r, "--") != 0 {
continue
}
var eIdx = strings.Index(r, "=")
if eIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r)
return
}
switch r[2:eIdx] {
case "db":
dbFile = r[eIdx+1:]
}
}
if dbFile == "" {
fmt.Printf("dbmaker test [command options]\n")
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
return
}
dbPath, err := homedir.Expand(dbFile)
if err != nil {
fmt.Printf("invalid xdb file path `%s`: %s", dbFile, err)
return
}
searcher, err := ip2region.New(dbPath)
if err != nil {
log.Fatalf("failed to create searcher: %s", err.Error())
}
defer func() {
searcher.Close()
fmt.Printf("searcher test program exited, thanks for trying\n")
}()
fmt.Println("ip2region 2.0 searcher test program, type `quit` to exit")
reader := bufio.NewReader(os.Stdin)
for {
fmt.Print("ip2region>> ")
str, err := reader.ReadString('\n')
if err != nil {
log.Fatalf("failed to read string: %s", err)
}
line := strings.TrimSpace(strings.TrimSuffix(str, "\n"))
if len(line) == 0 {
continue
}
if line == "quit" {
break
}
ip, err := ip2region.CheckIP(line)
if err != nil {
fmt.Printf("invalid ip address `%s`\n", line)
continue
}
tStart := time.Now()
region, err := searcher.Search(ip)
if err != nil {
fmt.Printf("\x1b[0;31merr:%s\x1b[0m\n", err.Error())
} else {
fmt.Printf("\x1b[0;32m{region:%s, took:%s}\x1b[0m\n", region, time.Since(tStart))
}
}
}
func testBench() {
var err error
var dbFile, srcFile = "", ""
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
continue
}
if strings.Index(r, "--") != 0 {
continue
}
var eIdx = strings.Index(r, "=")
if eIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r)
return
}
switch r[2:eIdx] {
case "db":
dbFile = r[eIdx+1:]
case "src":
srcFile = r[eIdx+1:]
}
}
if dbFile == "" || srcFile == "" {
fmt.Printf("searcher bench [command options]\n")
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --src string source ip text file path\n")
return
}
dbPath, err := homedir.Expand(dbFile)
if err != nil {
fmt.Printf("invalid xdb file path `%s`: %s", dbFile, err)
return
}
searcher, err := ip2region.New(dbPath)
defer func() {
searcher.Close()
}()
handle, err := os.OpenFile(srcFile, os.O_RDONLY, 0600)
if err != nil {
fmt.Printf("failed to open source text file: %s\n", err)
return
}
var count, tStart = 0, time.Now()
var scanner = bufio.NewScanner(handle)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
var l = strings.TrimSpace(strings.TrimSuffix(scanner.Text(), "\n"))
var ps = strings.SplitN(l, "|", 3)
if len(ps) != 3 {
fmt.Printf("invalid ip segment line `%s`\n", l)
return
}
sip, err := ip2region.CheckIP(ps[0])
if err != nil {
fmt.Printf("check start ip `%s`: %s\n", ps[0], err)
return
}
eip, err := ip2region.CheckIP(ps[1])
if err != nil {
fmt.Printf("check end ip `%s`: %s\n", ps[1], err)
return
}
if sip > eip {
fmt.Printf("start ip(%s) should not be greater than end ip(%s)\n", ps[0], ps[1])
return
}
mip := ip2region.MidIP(sip, eip)
for _, ip := range []uint32{sip, ip2region.MidIP(sip, mip), mip, ip2region.MidIP(mip, eip), eip} {
region, err := searcher.Search(ip)
if err != nil {
fmt.Printf("failed to search ip '%s': %s\n", ip2region.Long2IP(ip), err)
return
}
// check the region info
if region != ps[2] {
fmt.Printf("failed Search(%s) with (%s != %s)\n", ip2region.Long2IP(ip), region, ps[2])
return
}
count++
}
}
cost := time.Since(tStart)
fmt.Printf("Bench finished, {total: %d, took: %s, cost: %d ns/op}\n", count, cost, cost.Nanoseconds()/int64(count))
}
func main() {
if len(os.Args) < 2 {
printHelp()
return
}
// set the log flag
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
switch strings.ToLower(os.Args[1]) {
case "search":
testSearch()
case "bench":
testBench()
default:
printHelp()
}
}