mirror of
https://github.com/lionsoul2014/ip2region.git
synced 2025-12-08 19:25:22 +00:00
Add golang xdb maker implemetation
This commit is contained in:
parent
d746d5d4b2
commit
602dc9203b
@ -1,7 +1,34 @@
|
||||
# ip2region xdb golang 生成实现
|
||||
|
||||
# 程序编译
|
||||
通过如下方式编译得到 dbmaker 可执行程序:
|
||||
```
|
||||
# 切换到golang maker 根目录
|
||||
cd ./
|
||||
go build
|
||||
```
|
||||
编译成功后会在当前目录生成一个 dbmaker 的可执行文件
|
||||
|
||||
# 数据生成
|
||||
|
||||
通过 `dbmaker gen` 命令生成 ip2region.xdb 二进制文件:
|
||||
```bash
|
||||
➜ golang git:(v2.0_xdb) ✗ ./dbmaker gen
|
||||
dbmaker gen [command options]
|
||||
options:
|
||||
--src string source ip text file path
|
||||
--dst string destination binary db file path
|
||||
```
|
||||
|
||||
例如,使用默认的 data/ip.merge.txt 作为源数据,生成一个 ip2region.xdb 到当前目录:
|
||||
```
|
||||
./dbmaker gen --src=../../data/ip.merge.txt --dst=./ip2region.xdb
|
||||
# 会看到一堆输出,最终会看到类似如下输出表示运行结束
|
||||
...
|
||||
2022/06/16 16:38:48 maker.go:317: write done, with 13804 data blocks and (683591, 720221) index blocks
|
||||
2022/06/16 16:38:48 main.go:89: Done, elapsed: 33.615278847s
|
||||
```
|
||||
|
||||
# 数据查询
|
||||
|
||||
# bench 测试
|
||||
|
||||
3
maker/golang/go.mod
Normal file
3
maker/golang/go.mod
Normal file
@ -0,0 +1,3 @@
|
||||
module dbmaker
|
||||
|
||||
go 1.17
|
||||
2
maker/golang/go.sum
Normal file
2
maker/golang/go.sum
Normal file
@ -0,0 +1,2 @@
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
93
maker/golang/index.go
Normal file
93
maker/golang/index.go
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type IndexPolicy int
|
||||
|
||||
const (
|
||||
VectorIndexPolicy IndexPolicy = 1
|
||||
BTreeIndexPolicy IndexPolicy = 2
|
||||
)
|
||||
|
||||
func IndexPolicyFromString(str string) (IndexPolicy, error) {
|
||||
switch strings.ToLower(str) {
|
||||
case "vector":
|
||||
return VectorIndexPolicy, nil
|
||||
case "btree":
|
||||
return BTreeIndexPolicy, nil
|
||||
default:
|
||||
return VectorIndexPolicy, fmt.Errorf("invalid policy '%s'", str)
|
||||
}
|
||||
}
|
||||
|
||||
const SegmentIndexBlockSize = 14
|
||||
|
||||
type SegmentIndexBlock struct {
|
||||
StartIP uint32
|
||||
EndIP uint32
|
||||
DataLen uint16
|
||||
DataPtr uint32
|
||||
}
|
||||
|
||||
func SegmentIndexDecode(input []byte) (*SegmentIndexBlock, error) {
|
||||
if len(input) < 14 {
|
||||
return nil, fmt.Errorf("input is less than 14 bytes")
|
||||
}
|
||||
|
||||
return &SegmentIndexBlock{
|
||||
StartIP: binary.LittleEndian.Uint32(input),
|
||||
EndIP: binary.LittleEndian.Uint32(input[4:]),
|
||||
DataLen: binary.LittleEndian.Uint16(input[8:]),
|
||||
DataPtr: binary.LittleEndian.Uint32(input[10:]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *SegmentIndexBlock) Encode() []byte {
|
||||
var buff = make([]byte, 14)
|
||||
binary.LittleEndian.PutUint32(buff, s.StartIP)
|
||||
binary.LittleEndian.PutUint32(buff[4:], s.EndIP)
|
||||
binary.LittleEndian.PutUint16(buff[8:], s.DataLen)
|
||||
binary.LittleEndian.PutUint32(buff[10:], s.DataPtr)
|
||||
return buff
|
||||
}
|
||||
|
||||
func (s *SegmentIndexBlock) String() string {
|
||||
return fmt.Sprintf("{sip: %d, eip: %d, len: %d, ptr: %d}", s.StartIP, s.EndIP, s.DataLen, s.DataPtr)
|
||||
}
|
||||
|
||||
// ------------
|
||||
|
||||
type VectorIndexBlock struct {
|
||||
FirstPtr uint32
|
||||
LastPtr uint32
|
||||
}
|
||||
|
||||
func VectorIndexBlockDecode(input []byte) (*VectorIndexBlock, error) {
|
||||
if len(input) < 8 {
|
||||
return nil, fmt.Errorf("input should be not less then 8 bytes")
|
||||
}
|
||||
|
||||
return &VectorIndexBlock{
|
||||
FirstPtr: binary.LittleEndian.Uint32(input),
|
||||
LastPtr: binary.LittleEndian.Uint32(input[4:]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (v VectorIndexBlock) Encode() []byte {
|
||||
var buff = make([]byte, 8)
|
||||
binary.LittleEndian.PutUint32(buff, v.FirstPtr)
|
||||
binary.LittleEndian.PutUint32(buff[4:], v.LastPtr)
|
||||
return buff
|
||||
}
|
||||
|
||||
func (v VectorIndexBlock) String() string {
|
||||
return fmt.Sprintf("{FristPtr: %d, LastPtr: %d}", v.FirstPtr, v.LastPtr)
|
||||
}
|
||||
313
maker/golang/main.go
Normal file
313
maker/golang/main.go
Normal file
@ -0,0 +1,313 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func printHelp() {
|
||||
fmt.Printf("ip2region dbmaker 2.0\n")
|
||||
fmt.Printf("dbmaker [command] [command options]\n")
|
||||
fmt.Printf("Command: \n")
|
||||
fmt.Printf(" gen generate the binary db file\n")
|
||||
fmt.Printf(" search binary db search test\n")
|
||||
fmt.Printf(" bench binary db bench test\n")
|
||||
}
|
||||
|
||||
func genDb() {
|
||||
var err error
|
||||
var srcFile, dstFile = "", ""
|
||||
var indexPolicy = VectorIndexPolicy
|
||||
for i := 2; i < len(os.Args); i++ {
|
||||
r := os.Args[i]
|
||||
if len(r) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Index(r, "--") != 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var eIdx = strings.Index(r, "=")
|
||||
if eIdx < 0 {
|
||||
fmt.Printf("missing = for args pair '%s'\n", r)
|
||||
return
|
||||
}
|
||||
|
||||
switch r[2:eIdx] {
|
||||
case "src":
|
||||
srcFile = r[eIdx+1:]
|
||||
case "dst":
|
||||
dstFile = r[eIdx+1:]
|
||||
case "index":
|
||||
indexPolicy, err = IndexPolicyFromString(r[eIdx+1:])
|
||||
if err != nil {
|
||||
fmt.Printf("parse policy: %s", err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if srcFile == "" || dstFile == "" {
|
||||
fmt.Printf("dbmaker gen [command options]\n")
|
||||
fmt.Printf("options:\n")
|
||||
fmt.Printf(" --src string source ip text file path\n")
|
||||
fmt.Printf(" --dst string destination binary db file path\n")
|
||||
return
|
||||
}
|
||||
|
||||
// make the binary file
|
||||
tStart := time.Now()
|
||||
maker, err := NewMaker(indexPolicy, srcFile, dstFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to create maker: %s", err)
|
||||
}
|
||||
|
||||
err = maker.Init()
|
||||
if err != nil {
|
||||
log.Fatalf("failed Init: %s", err)
|
||||
}
|
||||
|
||||
err = maker.Start()
|
||||
if err != nil {
|
||||
log.Fatalf("failed Start: %s", err)
|
||||
}
|
||||
|
||||
err = maker.End()
|
||||
if err != nil {
|
||||
log.Fatalf("failed End: %s", err)
|
||||
}
|
||||
|
||||
log.Printf("Done, elapsed: %s\n", time.Since(tStart))
|
||||
}
|
||||
|
||||
func testSearch() {
|
||||
var err error
|
||||
var dbFile = ""
|
||||
for i := 2; i < len(os.Args); i++ {
|
||||
r := os.Args[i]
|
||||
if len(r) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Index(r, "--") != 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var eIdx = strings.Index(r, "=")
|
||||
if eIdx < 0 {
|
||||
fmt.Printf("missing = for args pair '%s'\n", r)
|
||||
return
|
||||
}
|
||||
|
||||
switch r[2:eIdx] {
|
||||
case "db":
|
||||
dbFile = r[eIdx+1:]
|
||||
}
|
||||
}
|
||||
|
||||
if dbFile == "" {
|
||||
fmt.Printf("dbmaker test [command options]\n")
|
||||
fmt.Printf("options:\n")
|
||||
fmt.Printf(" --db string ip2region binary db file path\n")
|
||||
return
|
||||
}
|
||||
|
||||
searcher, err := NewSearcher(dbFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to create searcher: %s", err.Error())
|
||||
}
|
||||
defer func() {
|
||||
searcher.Close()
|
||||
fmt.Printf("test program exited, thanks for trying\n")
|
||||
}()
|
||||
|
||||
fmt.Println(`ip2region 2.0 test program, commands:
|
||||
loadIndex : load the vector index for search speedup.
|
||||
clearIndex: clear the vector index.
|
||||
quit : exit the test program`)
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
for {
|
||||
fmt.Print("ip2region>> ")
|
||||
str, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
log.Fatalf("failed to read string: %s", err)
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(strings.TrimSuffix(str, "\n"))
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// command interception and execution
|
||||
if line == "loadIndex" {
|
||||
err = searcher.LoadVectorIndex()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to load vector index: %s", err)
|
||||
}
|
||||
fmt.Printf("vector index cached\n")
|
||||
continue
|
||||
} else if line == "clearIndex" {
|
||||
searcher.ClearVectorIndex()
|
||||
fmt.Printf("vector index cleared\n")
|
||||
continue
|
||||
} else if line == "quit" {
|
||||
break
|
||||
}
|
||||
|
||||
ip, err := CheckIP(line)
|
||||
if err != nil {
|
||||
fmt.Printf("invalid ip address `%s`\n", line)
|
||||
continue
|
||||
}
|
||||
|
||||
tStart := time.Now()
|
||||
region, ioCount, err := searcher.Search(ip)
|
||||
if err != nil {
|
||||
fmt.Printf("\x1b[0;31m{err:%s, iocount:%d}\x1b[0m\n", err.Error(), ioCount)
|
||||
} else {
|
||||
fmt.Printf("\x1b[0;32m{region:%s, iocount:%d, took:%s}\x1b[0m\n", region, ioCount, time.Since(tStart))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testBench() {
|
||||
var err error
|
||||
var dbFile, srcFile = "", ""
|
||||
var ignoreError = false
|
||||
for i := 2; i < len(os.Args); i++ {
|
||||
r := os.Args[i]
|
||||
if len(r) < 5 {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Index(r, "--") != 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var eIdx = strings.Index(r, "=")
|
||||
if eIdx < 0 {
|
||||
fmt.Printf("missing = for args pair '%s'\n", r)
|
||||
return
|
||||
}
|
||||
|
||||
switch r[2:eIdx] {
|
||||
case "db":
|
||||
dbFile = r[eIdx+1:]
|
||||
case "src":
|
||||
srcFile = r[eIdx+1:]
|
||||
case "ignore-error":
|
||||
v := r[eIdx+1:]
|
||||
if v == "true" || v == "1" {
|
||||
ignoreError = true
|
||||
} else if v == "false" || v == "0" {
|
||||
ignoreError = false
|
||||
} else {
|
||||
fmt.Printf("invalid value for ignore-error option, could be false/0 or true/1\n")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if dbFile == "" || srcFile == "" {
|
||||
fmt.Printf("dbmaker bench [command options]\n")
|
||||
fmt.Printf("options:\n")
|
||||
fmt.Printf(" --db string ip2region binary xdb file path\n")
|
||||
fmt.Printf(" --src string source ip text file path\n")
|
||||
fmt.Printf(" --ignore-error bool keep going if bench failed\n")
|
||||
return
|
||||
}
|
||||
|
||||
searcher, err := NewSearcher(dbFile)
|
||||
defer func() {
|
||||
searcher.Close()
|
||||
}()
|
||||
|
||||
handle, err := os.OpenFile(srcFile, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to open source text file: %s\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
var count, errCount, tStart = 0, 0, time.Now()
|
||||
var scanner = bufio.NewScanner(handle)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
for scanner.Scan() {
|
||||
var l = strings.TrimSpace(strings.TrimSuffix(scanner.Text(), "\n"))
|
||||
var ps = strings.SplitN(l, "|", 3)
|
||||
if len(ps) != 3 {
|
||||
fmt.Printf("invalid ip segment line `%s`\n", l)
|
||||
return
|
||||
}
|
||||
|
||||
sip, err := CheckIP(ps[0])
|
||||
if err != nil {
|
||||
fmt.Printf("check start ip `%s`: %s\n", ps[0], err)
|
||||
return
|
||||
}
|
||||
|
||||
eip, err := CheckIP(ps[1])
|
||||
if err != nil {
|
||||
fmt.Printf("check end ip `%s`: %s\n", ps[1], err)
|
||||
return
|
||||
}
|
||||
|
||||
if sip > eip {
|
||||
fmt.Printf("start ip(%s) should not be greater than end ip(%s)\n", ps[0], ps[1])
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("try to bench segment: `%s`\n", l)
|
||||
mip := MidIP(sip, eip)
|
||||
for _, ip := range []uint32{sip, MidIP(sip, mip), mip, MidIP(mip, eip), eip} {
|
||||
fmt.Printf("|-try to bench ip '%s' ... ", Long2IP(ip))
|
||||
region, _, err := searcher.Search(ip)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to search ip '%s': %s\n", Long2IP(ip), err)
|
||||
return
|
||||
}
|
||||
|
||||
// check the region info
|
||||
count++
|
||||
if region != ps[2] {
|
||||
errCount++
|
||||
fmt.Printf(" --[Failed] (%s != %s)\n", region, ps[2])
|
||||
if ignoreError == false {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
fmt.Printf(" --[Ok]\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("Bench finished, {count: %d, failed: %d, took: %s}\n", count, errCount, time.Since(tStart))
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
printHelp()
|
||||
return
|
||||
}
|
||||
|
||||
// set the log flag
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
|
||||
switch strings.ToLower(os.Args[1]) {
|
||||
case "gen":
|
||||
genDb()
|
||||
case "search":
|
||||
testSearch()
|
||||
case "bench":
|
||||
testBench()
|
||||
default:
|
||||
printHelp()
|
||||
}
|
||||
}
|
||||
334
maker/golang/maker.go
Normal file
334
maker/golang/maker.go
Normal file
@ -0,0 +1,334 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// ----
|
||||
// ip2region database v2.0 structure
|
||||
//
|
||||
// +----------------+-------------------+---------------+--------------+
|
||||
// | header space | speed up index | data payload | block index |
|
||||
// +----------------+-------------------+---------------+--------------+
|
||||
// | 256 bytes | 512 KiB (fixed) | dynamic size | dynamic size |
|
||||
// +----------------+-------------------+---------------+--------------+
|
||||
//
|
||||
// 1. padding space : for header info like block index ptr, version, release date eg ... or any other temporary needs.
|
||||
// -- 2bytes: version number, different version means structure update, it fixed to 2 for now
|
||||
// -- 2bytes: index algorithm code.
|
||||
// -- 4bytes: generate unix timestamp (version)
|
||||
// -- 4bytes: index block start ptr
|
||||
// -- 4bytes: index block end ptr
|
||||
//
|
||||
//
|
||||
// 2. data block : region or whatever data info.
|
||||
// 3. segment index block : binary index block.
|
||||
// 4. vector index block : fixed index info for block index search speed up.
|
||||
// space structure table:
|
||||
// -- 0 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
|
||||
// -- 1 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
|
||||
// -- 2 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
|
||||
// -- ...
|
||||
// -- 255 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
|
||||
//
|
||||
//
|
||||
// super block structure:
|
||||
// +-----------------------+----------------------+
|
||||
// | first index block ptr | last index block ptr |
|
||||
// +-----------------------+----------------------+
|
||||
//
|
||||
// data entry structure:
|
||||
// +--------------------+-----------------------+
|
||||
// | 2bytes (for desc) | dynamic length |
|
||||
// +--------------------+-----------------------+
|
||||
// data length whatever in bytes
|
||||
//
|
||||
// index entry structure
|
||||
// +------------+-----------+---------------+------------+
|
||||
// | 4bytes | 4bytes | 2bytes | 4 bytes |
|
||||
// +------------+-----------+---------------+------------+
|
||||
// start ip end ip data length data ptr
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const VersionNo = 2
|
||||
const HeaderInfoLength = 256
|
||||
const VectorIndexRows = 256
|
||||
const VectorIndexCols = 256
|
||||
const VectorIndexSize = 8
|
||||
const VectorIndexLength = VectorIndexRows * VectorIndexCols * VectorIndexSize
|
||||
|
||||
type Maker struct {
|
||||
srcHandle *os.File
|
||||
dstHandle *os.File
|
||||
|
||||
indexPolicy IndexPolicy
|
||||
segments []*Segment
|
||||
regionPool map[string]uint32
|
||||
vectorIndex [VectorIndexCols][VectorIndexRows]VectorIndexBlock
|
||||
}
|
||||
|
||||
func NewMaker(policy IndexPolicy, srcFile string, dstFile string) (*Maker, error) {
|
||||
// open the source file with READONLY mode
|
||||
srcHandle, err := os.OpenFile(srcFile, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open source file `%s`: %w", srcFile, err)
|
||||
}
|
||||
|
||||
// open the destination file with Read/Write mode
|
||||
dstHandle, err := os.OpenFile(dstFile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open target file `%s`: %w", dstFile, err)
|
||||
}
|
||||
|
||||
return &Maker{
|
||||
srcHandle: srcHandle,
|
||||
dstHandle: dstHandle,
|
||||
|
||||
indexPolicy: policy,
|
||||
segments: []*Segment{},
|
||||
regionPool: map[string]uint32{},
|
||||
vectorIndex: [VectorIndexCols][VectorIndexRows]VectorIndexBlock{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *Maker) initDbHeader() error {
|
||||
log.Printf("try to init the db header ... ")
|
||||
|
||||
_, err := m.dstHandle.Seek(0, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// make and write the header space
|
||||
var header = make([]byte, 256)
|
||||
|
||||
// 1, version number
|
||||
binary.LittleEndian.PutUint16(header, uint16(VersionNo))
|
||||
|
||||
// 2, index policy code
|
||||
binary.LittleEndian.PutUint16(header, uint16(m.indexPolicy))
|
||||
|
||||
// 3, generate unix timestamp
|
||||
binary.LittleEndian.PutUint32(header[2:], uint32(time.Now().Unix()))
|
||||
|
||||
// 4, index block start ptr
|
||||
binary.LittleEndian.PutUint32(header[6:], uint32(0))
|
||||
|
||||
// 5, index block end ptr
|
||||
binary.LittleEndian.PutUint32(header[10:], uint32(0))
|
||||
|
||||
_, err = m.dstHandle.Write(header)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Maker) loadSegments() error {
|
||||
log.Printf("try to load the segments ... ")
|
||||
var last *Segment = nil
|
||||
var tStart = time.Now()
|
||||
|
||||
var scanner = bufio.NewScanner(m.srcHandle)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
for scanner.Scan() {
|
||||
var l = strings.TrimSpace(strings.TrimSuffix(scanner.Text(), "\n"))
|
||||
log.Printf("load segment: `%s`", l)
|
||||
|
||||
var ps = strings.SplitN(l, "|", 3)
|
||||
if len(ps) != 3 {
|
||||
return fmt.Errorf("invalid ip segment line `%s`", l)
|
||||
}
|
||||
|
||||
sip, err := CheckIP(ps[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("check start ip `%s`: %s", ps[0], err)
|
||||
}
|
||||
|
||||
eip, err := CheckIP(ps[1])
|
||||
if err != nil {
|
||||
return fmt.Errorf("check end ip `%s`: %s", ps[1], err)
|
||||
}
|
||||
|
||||
if sip > eip {
|
||||
return fmt.Errorf("start ip(%s) should not be greater than end ip(%s)", ps[0], ps[1])
|
||||
}
|
||||
|
||||
if len(ps[2]) < 1 {
|
||||
return fmt.Errorf("empty region info in segment line `%s`", l)
|
||||
}
|
||||
|
||||
var seg = &Segment{
|
||||
StartIP: sip,
|
||||
EndIP: eip,
|
||||
Region: ps[2],
|
||||
}
|
||||
|
||||
// check the continuity of the data segment
|
||||
if last != nil {
|
||||
if last.EndIP+1 != seg.StartIP {
|
||||
return fmt.Errorf("discontinuous data segment: last.eip+1(%d) != seg.sip(%d, %s)", sip, eip, ps[0])
|
||||
}
|
||||
}
|
||||
|
||||
m.segments = append(m.segments, seg)
|
||||
last = seg
|
||||
}
|
||||
|
||||
log.Printf("all segments loaded, length: %d, elapsed: %s", len(m.segments), time.Since(tStart))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Init the db binary file
|
||||
func (m *Maker) Init() error {
|
||||
// init the db header
|
||||
err := m.initDbHeader()
|
||||
if err != nil {
|
||||
return fmt.Errorf("init db header: %w", err)
|
||||
}
|
||||
|
||||
// load all the segments
|
||||
err = m.loadSegments()
|
||||
if err != nil {
|
||||
return fmt.Errorf("load segments: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// refresh the vector index of the specified ip
|
||||
func (m *Maker) setVectorIndex(ip uint32, ptr uint32) {
|
||||
var viBlock = &m.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
|
||||
if viBlock.FirstPtr == 0 {
|
||||
viBlock.FirstPtr = ptr
|
||||
viBlock.LastPtr = ptr + SegmentIndexBlockSize
|
||||
} else {
|
||||
viBlock.LastPtr = ptr + SegmentIndexBlockSize
|
||||
}
|
||||
}
|
||||
|
||||
// Start to make the binary file
|
||||
func (m *Maker) Start() error {
|
||||
if len(m.segments) < 1 {
|
||||
return fmt.Errorf("empty segment list")
|
||||
}
|
||||
|
||||
// 1, write all the region/data to the binary file
|
||||
_, err := m.dstHandle.Seek(int64(HeaderInfoLength+VectorIndexLength), 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("seek to data first ptr: %w", err)
|
||||
}
|
||||
|
||||
log.Printf("try to write the data block ... ")
|
||||
for _, seg := range m.segments {
|
||||
log.Printf("try to write region '%s' ... ", seg.Region)
|
||||
ptr, has := m.regionPool[seg.Region]
|
||||
if has {
|
||||
log.Printf(" --[Cached] with ptr=%d", ptr)
|
||||
continue
|
||||
}
|
||||
|
||||
var region = []byte(seg.Region)
|
||||
if len(region) > 0xFFFF {
|
||||
return fmt.Errorf("too long region info `%s`: shoul be less than %d bytes", seg.Region, 0xFFFF)
|
||||
}
|
||||
|
||||
// get the first ptr of the next region
|
||||
pos, err := m.dstHandle.Seek(0, 1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("seek to current ptr: %w", err)
|
||||
}
|
||||
|
||||
_, err = m.dstHandle.Write(region)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write region '%s': %w", seg.Region, err)
|
||||
}
|
||||
|
||||
m.regionPool[seg.Region] = uint32(pos)
|
||||
log.Printf(" --[Added] with ptr=%d", pos)
|
||||
}
|
||||
|
||||
// 2, write the index block and cache the super index block
|
||||
log.Printf("try to write the segment index block ... ")
|
||||
var counter = 0
|
||||
for _, seg := range m.segments {
|
||||
dataPtr, has := m.regionPool[seg.Region]
|
||||
if !has {
|
||||
return fmt.Errorf("missing ptr cache for region `%s`", seg.Region)
|
||||
}
|
||||
|
||||
var dataLen = len(seg.Region)
|
||||
if dataLen < 1 {
|
||||
// @TODO: could this even be a case ?
|
||||
return fmt.Errorf("empty region info for segment '%s'", seg)
|
||||
}
|
||||
|
||||
var segList = seg.Split()
|
||||
log.Printf("try to index segment(%d splits) %s ...", len(segList), seg.String())
|
||||
for _, s := range segList {
|
||||
pos, err := m.dstHandle.Seek(0, 1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("seek to segment index block: %w", err)
|
||||
}
|
||||
|
||||
var sIndex = &SegmentIndexBlock{
|
||||
StartIP: s.StartIP,
|
||||
EndIP: s.EndIP,
|
||||
DataLen: uint16(dataLen),
|
||||
DataPtr: dataPtr,
|
||||
}
|
||||
|
||||
_, err = m.dstHandle.Write(sIndex.Encode())
|
||||
if err != nil {
|
||||
return fmt.Errorf("write segment index for '%s': %w", s.String(), err)
|
||||
}
|
||||
|
||||
log.Printf("|-segment index: %d, ptr: %d, segment: %s\n", counter, pos, s.String())
|
||||
m.setVectorIndex(s.StartIP, uint32(pos))
|
||||
counter++
|
||||
}
|
||||
}
|
||||
|
||||
// synchronized the vector index block
|
||||
_, err = m.dstHandle.Seek(int64(HeaderInfoLength), 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("seek vector index first ptr: %w", err)
|
||||
}
|
||||
|
||||
for i, l := range m.vectorIndex {
|
||||
for j, c := range l {
|
||||
_, err = m.dstHandle.Write(c.Encode())
|
||||
if err != nil {
|
||||
return fmt.Errorf("write vector index [%d][%d]: %w", i, j, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("write done, with %d data blocks and (%d, %d) index blocks", len(m.regionPool), len(m.segments), counter)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Maker) End() error {
|
||||
err := m.dstHandle.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = m.srcHandle.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
197
maker/golang/searcher.go
Normal file
197
maker/golang/searcher.go
Normal file
@ -0,0 +1,197 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// ---
|
||||
// ip2region database v2.0 searcher.
|
||||
// this is part of the maker for testing and validate.
|
||||
// please use the searcher in binding/golang for production use.
|
||||
// And this is a Not thread safe implementation.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
type Searcher struct {
|
||||
handle *os.File
|
||||
|
||||
// header info
|
||||
header []byte
|
||||
|
||||
// use it only when this feature enabled.
|
||||
// Preload the vector index will reduce the number of IO operations
|
||||
// thus speedup the search process
|
||||
vectorIndex [][]*VectorIndexBlock
|
||||
}
|
||||
|
||||
func NewSearcher(dbFile string) (*Searcher, error) {
|
||||
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Searcher{
|
||||
handle: handle,
|
||||
header: nil,
|
||||
|
||||
vectorIndex: nil,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *Searcher) Close() {
|
||||
err := s.handle.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// LoadVectorIndex load and cache the vector index for search speedup.
|
||||
// this will take up VectorIndexRows x VectorIndexCols x VectorIndexSize bytes memory.
|
||||
func (s *Searcher) LoadVectorIndex() error {
|
||||
// loaded already
|
||||
if s.vectorIndex != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// load all the vector index block
|
||||
_, err := s.handle.Seek(HeaderInfoLength, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("seek to vector index: %w", err)
|
||||
}
|
||||
|
||||
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
|
||||
rLen, err := s.handle.Read(buff)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
// decode the vector index blocks
|
||||
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
|
||||
for r := 0; r < VectorIndexRows; r++ {
|
||||
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
|
||||
for c := 0; c < VectorIndexCols; c++ {
|
||||
offset := r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
|
||||
vectorIndex[r][c], err = VectorIndexBlockDecode(buff[offset:])
|
||||
if err != nil {
|
||||
return fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.vectorIndex = vectorIndex
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClearVectorIndex clear preloaded vector index cache
|
||||
func (s *Searcher) ClearVectorIndex() {
|
||||
s.vectorIndex = nil
|
||||
}
|
||||
|
||||
// Search find the region for the specified ip address
|
||||
func (s *Searcher) Search(ip uint32) (string, int, error) {
|
||||
// locate the segment index block based on the vector index
|
||||
var ioCount = 0
|
||||
var vIndex *VectorIndexBlock
|
||||
if s.vectorIndex != nil {
|
||||
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
|
||||
} else {
|
||||
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
|
||||
offset := l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
|
||||
pos, err := s.handle.Seek(int64(HeaderInfoLength+offset), 0)
|
||||
if err != nil {
|
||||
return "", ioCount, fmt.Errorf("seek to vector index[%d][%d]: %w", l0, l1, err)
|
||||
}
|
||||
|
||||
ioCount++
|
||||
var buff = make([]byte, 8)
|
||||
rLen, err := s.handle.Read(buff)
|
||||
if err != nil {
|
||||
return "", ioCount, fmt.Errorf("read vector index at %d: %w", pos, err)
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
vIndex, err = VectorIndexBlockDecode(buff)
|
||||
if err != nil {
|
||||
return "", ioCount, fmt.Errorf("invalid vector index block at %d: %w", pos, err)
|
||||
}
|
||||
}
|
||||
|
||||
//log.Printf("vIndex=%s", vIndex)
|
||||
// binary search the segment index to get the region
|
||||
var dataLen, dataPtr = 0, uint32(0)
|
||||
var buff = make([]byte, SegmentIndexBlockSize)
|
||||
var l, h = 0, int((vIndex.LastPtr - vIndex.FirstPtr) / SegmentIndexBlockSize)
|
||||
for l <= h {
|
||||
// log.Printf("l=%d, h=%d", l, h)
|
||||
m := (l + h) >> 1
|
||||
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
|
||||
// log.Printf("m=%d, p=%d", m, p)
|
||||
_, err := s.handle.Seek(int64(p), 0)
|
||||
if err != nil {
|
||||
return "", ioCount, fmt.Errorf("seek to segment block at %d: %w", p, err)
|
||||
}
|
||||
|
||||
ioCount++
|
||||
rLen, err := s.handle.Read(buff)
|
||||
if err != nil {
|
||||
return "", ioCount, fmt.Errorf("read segment index at %d: %w", p, err)
|
||||
}
|
||||
|
||||
if rLen != len(buff) {
|
||||
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
|
||||
}
|
||||
|
||||
// segIndex, err := SegmentIndexDecode(buff)
|
||||
// if err != nil {
|
||||
// return "", fmt.Errorf("invalid segment index block at %d: %w", p, err)
|
||||
// }
|
||||
// decode the data step by step to reduce the unnecessary calculations
|
||||
sip := binary.LittleEndian.Uint32(buff)
|
||||
if ip < sip {
|
||||
h = m - 1
|
||||
} else {
|
||||
eip := binary.LittleEndian.Uint32(buff[4:])
|
||||
if ip > eip {
|
||||
l = m + 1
|
||||
} else {
|
||||
dataLen = int(binary.LittleEndian.Uint16(buff[8:]))
|
||||
dataPtr = binary.LittleEndian.Uint32(buff[10:])
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if dataLen == 0 {
|
||||
return "", ioCount, nil
|
||||
}
|
||||
|
||||
// load and return the region data
|
||||
_, err := s.handle.Seek(int64(dataPtr), 0)
|
||||
if err != nil {
|
||||
return "", ioCount, fmt.Errorf("seek to data block at %d: %w", dataPtr, err)
|
||||
}
|
||||
|
||||
ioCount++
|
||||
var regionBuff = make([]byte, dataLen)
|
||||
rLen, err := s.handle.Read(regionBuff)
|
||||
if err != nil {
|
||||
return "", ioCount, fmt.Errorf("read region data at %d: %w", dataPtr, err)
|
||||
}
|
||||
|
||||
if rLen != dataLen {
|
||||
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", dataLen)
|
||||
}
|
||||
|
||||
return string(regionBuff), ioCount, nil
|
||||
}
|
||||
97
maker/golang/segment.go
Normal file
97
maker/golang/segment.go
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Segment struct {
|
||||
StartIP uint32
|
||||
EndIP uint32
|
||||
Region string
|
||||
}
|
||||
|
||||
func SegmentFrom(seg string) (*Segment, error) {
|
||||
var ps = strings.SplitN(seg, "|", 3)
|
||||
if len(ps) != 3 {
|
||||
return nil, fmt.Errorf("invalid ip segment `%s`", seg)
|
||||
}
|
||||
|
||||
sip, err := CheckIP(ps[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("check start ip `%s`: %s", ps[0], err)
|
||||
}
|
||||
|
||||
eip, err := CheckIP(ps[1])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("check end ip `%s`: %s", ps[1], err)
|
||||
}
|
||||
|
||||
if sip > eip {
|
||||
return nil, fmt.Errorf("start ip(%s) should not be greater than end ip(%s)", ps[0], ps[1])
|
||||
}
|
||||
|
||||
return &Segment{
|
||||
StartIP: sip,
|
||||
EndIP: eip,
|
||||
Region: ps[2],
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Split the segment based on the pre-two bytes
|
||||
func (s *Segment) Split() []*Segment {
|
||||
// 1, split the segment with the first byte
|
||||
var tList []*Segment
|
||||
var sByte1, eByte1 = (s.StartIP >> 24) & 0xFF, (s.EndIP >> 24) & 0xFF
|
||||
var nSip = s.StartIP
|
||||
for i := sByte1; i <= eByte1; i++ {
|
||||
sip := (i << 24) | (nSip & 0xFFFFFF)
|
||||
eip := (i << 24) | 0xFFFFFF
|
||||
if eip < s.EndIP {
|
||||
nSip = (i + 1) << 24
|
||||
} else {
|
||||
eip = s.EndIP
|
||||
}
|
||||
|
||||
// append the new segment (maybe)
|
||||
tList = append(tList, &Segment{
|
||||
StartIP: sip,
|
||||
EndIP: eip,
|
||||
// @Note: don't bother to copy the region
|
||||
/// Region: s.Region,
|
||||
})
|
||||
}
|
||||
|
||||
// 2, split the segments with the second byte
|
||||
var segList []*Segment
|
||||
for _, seg := range tList {
|
||||
base := seg.StartIP & 0xFF000000
|
||||
nSip := seg.StartIP
|
||||
sb2, eb2 := (seg.StartIP>>16)&0xFF, (seg.EndIP>>16)&0xFF
|
||||
for i := sb2; i <= eb2; i++ {
|
||||
sip := base | (i << 16) | (nSip & 0xFFFF)
|
||||
eip := base | (i << 16) | 0xFFFF
|
||||
if eip < seg.EndIP {
|
||||
nSip = 0
|
||||
} else {
|
||||
eip = seg.EndIP
|
||||
}
|
||||
|
||||
segList = append(segList, &Segment{
|
||||
StartIP: sip,
|
||||
EndIP: eip,
|
||||
Region: s.Region,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return segList
|
||||
}
|
||||
|
||||
func (s *Segment) String() string {
|
||||
return Long2IP(s.StartIP) + "|" + Long2IP(s.EndIP) + "|" + s.Region
|
||||
}
|
||||
73
maker/golang/util.go
Normal file
73
maker/golang/util.go
Normal file
@ -0,0 +1,73 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Util function
|
||||
|
||||
func CheckIP(ip string) (uint32, error) {
|
||||
var ps = strings.Split(ip, ".")
|
||||
if len(ps) != 4 {
|
||||
return 0, fmt.Errorf("invalid ip address `%s`", ip)
|
||||
}
|
||||
|
||||
var buff = make([]byte, 4)
|
||||
for i, s := range ps {
|
||||
d, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("the %dth part `%s` is not an integer", i, s)
|
||||
}
|
||||
|
||||
if d < 0 || d > 255 {
|
||||
return 0, fmt.Errorf("the %dth part `%s` should be an integer bettween 0 and 255", i, s)
|
||||
}
|
||||
|
||||
buff[i] = byte(d)
|
||||
}
|
||||
|
||||
// convert the ip to integer
|
||||
|
||||
return binary.BigEndian.Uint32(buff), nil
|
||||
}
|
||||
|
||||
func Long2IP(ip uint32) string {
|
||||
var buff = make([]string, 4)
|
||||
buff[0] = fmt.Sprintf("%d", (ip>>24)&0xFF)
|
||||
buff[1] = fmt.Sprintf("%d", (ip>>16)&0xFF)
|
||||
buff[2] = fmt.Sprintf("%d", (ip>>8)&0xFF)
|
||||
buff[3] = fmt.Sprintf("%d", (ip>>0)&0xFF)
|
||||
return strings.Join(buff, ".")
|
||||
}
|
||||
|
||||
func MidIP(sip uint32, eip uint32) uint32 {
|
||||
return uint32((uint64(sip) + uint64(eip)) >> 1)
|
||||
}
|
||||
|
||||
func CheckSegments(segList []*Segment) error {
|
||||
var last *Segment
|
||||
for _, seg := range segList {
|
||||
// sip must <= eip
|
||||
if seg.StartIP > seg.EndIP {
|
||||
return fmt.Errorf("segment `%s`: start ip should not be greater than end ip", seg.String())
|
||||
}
|
||||
|
||||
// check the continuity of the data segment
|
||||
if last != nil {
|
||||
if last.EndIP+1 != seg.StartIP {
|
||||
return fmt.Errorf("discontinuous segment `%s`: last.eip+1 != cur.sip", seg.String())
|
||||
}
|
||||
}
|
||||
|
||||
last = seg
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
31
maker/golang/util_test.go
Normal file
31
maker/golang/util_test.go
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSplitSegment(t *testing.T) {
|
||||
// var str = "1.1.0.0|1.3.3.24|中国|广东|深圳|电信"
|
||||
// var str = "0.0.0.0|1.255.225.254|0|0|0|内网IP|内网IP"
|
||||
var str = "28.201.224.0|29.34.191.255|美国|0|0|0|0"
|
||||
seg, err := SegmentFrom(str)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parser segment '%s': %s", str, err)
|
||||
}
|
||||
|
||||
fmt.Printf("idx: src, seg: %s\n", seg.String())
|
||||
var segList = seg.Split()
|
||||
err = CheckSegments(segList)
|
||||
if err != nil {
|
||||
t.Fatalf("check segments: %s", err.Error())
|
||||
}
|
||||
|
||||
for i, s := range segList {
|
||||
fmt.Printf("idx: %3d, seg: %s\n", i, s.String())
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user