Add golang xdb maker implemetation

This commit is contained in:
lion 2022-06-16 16:44:02 +08:00
parent d746d5d4b2
commit 602dc9203b
10 changed files with 1170 additions and 0 deletions

View File

@ -1,7 +1,34 @@
# ip2region xdb golang 生成实现
# 程序编译
通过如下方式编译得到 dbmaker 可执行程序:
```
# 切换到golang maker 根目录
cd ./
go build
```
编译成功后会在当前目录生成一个 dbmaker 的可执行文件
# 数据生成
通过 `dbmaker gen` 命令生成 ip2region.xdb 二进制文件:
```bash
➜ golang git:(v2.0_xdb) ✗ ./dbmaker gen
dbmaker gen [command options]
options:
--src string source ip text file path
--dst string destination binary db file path
```
例如,使用默认的 data/ip.merge.txt 作为源数据,生成一个 ip2region.xdb 到当前目录:
```
./dbmaker gen --src=../../data/ip.merge.txt --dst=./ip2region.xdb
# 会看到一堆输出,最终会看到类似如下输出表示运行结束
...
2022/06/16 16:38:48 maker.go:317: write done, with 13804 data blocks and (683591, 720221) index blocks
2022/06/16 16:38:48 main.go:89: Done, elapsed: 33.615278847s
```
# 数据查询
# bench 测试

3
maker/golang/go.mod Normal file
View File

@ -0,0 +1,3 @@
module dbmaker
go 1.17

2
maker/golang/go.sum Normal file
View File

@ -0,0 +1,2 @@
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=

93
maker/golang/index.go Normal file
View File

@ -0,0 +1,93 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package main
import (
"encoding/binary"
"fmt"
"strings"
)
type IndexPolicy int
const (
VectorIndexPolicy IndexPolicy = 1
BTreeIndexPolicy IndexPolicy = 2
)
func IndexPolicyFromString(str string) (IndexPolicy, error) {
switch strings.ToLower(str) {
case "vector":
return VectorIndexPolicy, nil
case "btree":
return BTreeIndexPolicy, nil
default:
return VectorIndexPolicy, fmt.Errorf("invalid policy '%s'", str)
}
}
const SegmentIndexBlockSize = 14
type SegmentIndexBlock struct {
StartIP uint32
EndIP uint32
DataLen uint16
DataPtr uint32
}
func SegmentIndexDecode(input []byte) (*SegmentIndexBlock, error) {
if len(input) < 14 {
return nil, fmt.Errorf("input is less than 14 bytes")
}
return &SegmentIndexBlock{
StartIP: binary.LittleEndian.Uint32(input),
EndIP: binary.LittleEndian.Uint32(input[4:]),
DataLen: binary.LittleEndian.Uint16(input[8:]),
DataPtr: binary.LittleEndian.Uint32(input[10:]),
}, nil
}
func (s *SegmentIndexBlock) Encode() []byte {
var buff = make([]byte, 14)
binary.LittleEndian.PutUint32(buff, s.StartIP)
binary.LittleEndian.PutUint32(buff[4:], s.EndIP)
binary.LittleEndian.PutUint16(buff[8:], s.DataLen)
binary.LittleEndian.PutUint32(buff[10:], s.DataPtr)
return buff
}
func (s *SegmentIndexBlock) String() string {
return fmt.Sprintf("{sip: %d, eip: %d, len: %d, ptr: %d}", s.StartIP, s.EndIP, s.DataLen, s.DataPtr)
}
// ------------
type VectorIndexBlock struct {
FirstPtr uint32
LastPtr uint32
}
func VectorIndexBlockDecode(input []byte) (*VectorIndexBlock, error) {
if len(input) < 8 {
return nil, fmt.Errorf("input should be not less then 8 bytes")
}
return &VectorIndexBlock{
FirstPtr: binary.LittleEndian.Uint32(input),
LastPtr: binary.LittleEndian.Uint32(input[4:]),
}, nil
}
func (v VectorIndexBlock) Encode() []byte {
var buff = make([]byte, 8)
binary.LittleEndian.PutUint32(buff, v.FirstPtr)
binary.LittleEndian.PutUint32(buff[4:], v.LastPtr)
return buff
}
func (v VectorIndexBlock) String() string {
return fmt.Sprintf("{FristPtr: %d, LastPtr: %d}", v.FirstPtr, v.LastPtr)
}

313
maker/golang/main.go Normal file
View File

@ -0,0 +1,313 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package main
import (
"bufio"
"fmt"
"log"
"os"
"strings"
"time"
)
func printHelp() {
fmt.Printf("ip2region dbmaker 2.0\n")
fmt.Printf("dbmaker [command] [command options]\n")
fmt.Printf("Command: \n")
fmt.Printf(" gen generate the binary db file\n")
fmt.Printf(" search binary db search test\n")
fmt.Printf(" bench binary db bench test\n")
}
func genDb() {
var err error
var srcFile, dstFile = "", ""
var indexPolicy = VectorIndexPolicy
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
continue
}
if strings.Index(r, "--") != 0 {
continue
}
var eIdx = strings.Index(r, "=")
if eIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r)
return
}
switch r[2:eIdx] {
case "src":
srcFile = r[eIdx+1:]
case "dst":
dstFile = r[eIdx+1:]
case "index":
indexPolicy, err = IndexPolicyFromString(r[eIdx+1:])
if err != nil {
fmt.Printf("parse policy: %s", err.Error())
return
}
}
}
if srcFile == "" || dstFile == "" {
fmt.Printf("dbmaker gen [command options]\n")
fmt.Printf("options:\n")
fmt.Printf(" --src string source ip text file path\n")
fmt.Printf(" --dst string destination binary db file path\n")
return
}
// make the binary file
tStart := time.Now()
maker, err := NewMaker(indexPolicy, srcFile, dstFile)
if err != nil {
log.Fatalf("failed to create maker: %s", err)
}
err = maker.Init()
if err != nil {
log.Fatalf("failed Init: %s", err)
}
err = maker.Start()
if err != nil {
log.Fatalf("failed Start: %s", err)
}
err = maker.End()
if err != nil {
log.Fatalf("failed End: %s", err)
}
log.Printf("Done, elapsed: %s\n", time.Since(tStart))
}
func testSearch() {
var err error
var dbFile = ""
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
continue
}
if strings.Index(r, "--") != 0 {
continue
}
var eIdx = strings.Index(r, "=")
if eIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r)
return
}
switch r[2:eIdx] {
case "db":
dbFile = r[eIdx+1:]
}
}
if dbFile == "" {
fmt.Printf("dbmaker test [command options]\n")
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary db file path\n")
return
}
searcher, err := NewSearcher(dbFile)
if err != nil {
log.Fatalf("failed to create searcher: %s", err.Error())
}
defer func() {
searcher.Close()
fmt.Printf("test program exited, thanks for trying\n")
}()
fmt.Println(`ip2region 2.0 test program, commands:
loadIndex : load the vector index for search speedup.
clearIndex: clear the vector index.
quit : exit the test program`)
reader := bufio.NewReader(os.Stdin)
for {
fmt.Print("ip2region>> ")
str, err := reader.ReadString('\n')
if err != nil {
log.Fatalf("failed to read string: %s", err)
}
line := strings.TrimSpace(strings.TrimSuffix(str, "\n"))
if len(line) == 0 {
continue
}
// command interception and execution
if line == "loadIndex" {
err = searcher.LoadVectorIndex()
if err != nil {
log.Fatalf("failed to load vector index: %s", err)
}
fmt.Printf("vector index cached\n")
continue
} else if line == "clearIndex" {
searcher.ClearVectorIndex()
fmt.Printf("vector index cleared\n")
continue
} else if line == "quit" {
break
}
ip, err := CheckIP(line)
if err != nil {
fmt.Printf("invalid ip address `%s`\n", line)
continue
}
tStart := time.Now()
region, ioCount, err := searcher.Search(ip)
if err != nil {
fmt.Printf("\x1b[0;31m{err:%s, iocount:%d}\x1b[0m\n", err.Error(), ioCount)
} else {
fmt.Printf("\x1b[0;32m{region:%s, iocount:%d, took:%s}\x1b[0m\n", region, ioCount, time.Since(tStart))
}
}
}
func testBench() {
var err error
var dbFile, srcFile = "", ""
var ignoreError = false
for i := 2; i < len(os.Args); i++ {
r := os.Args[i]
if len(r) < 5 {
continue
}
if strings.Index(r, "--") != 0 {
continue
}
var eIdx = strings.Index(r, "=")
if eIdx < 0 {
fmt.Printf("missing = for args pair '%s'\n", r)
return
}
switch r[2:eIdx] {
case "db":
dbFile = r[eIdx+1:]
case "src":
srcFile = r[eIdx+1:]
case "ignore-error":
v := r[eIdx+1:]
if v == "true" || v == "1" {
ignoreError = true
} else if v == "false" || v == "0" {
ignoreError = false
} else {
fmt.Printf("invalid value for ignore-error option, could be false/0 or true/1\n")
return
}
}
}
if dbFile == "" || srcFile == "" {
fmt.Printf("dbmaker bench [command options]\n")
fmt.Printf("options:\n")
fmt.Printf(" --db string ip2region binary xdb file path\n")
fmt.Printf(" --src string source ip text file path\n")
fmt.Printf(" --ignore-error bool keep going if bench failed\n")
return
}
searcher, err := NewSearcher(dbFile)
defer func() {
searcher.Close()
}()
handle, err := os.OpenFile(srcFile, os.O_RDONLY, 0600)
if err != nil {
fmt.Printf("failed to open source text file: %s\n", err)
return
}
var count, errCount, tStart = 0, 0, time.Now()
var scanner = bufio.NewScanner(handle)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
var l = strings.TrimSpace(strings.TrimSuffix(scanner.Text(), "\n"))
var ps = strings.SplitN(l, "|", 3)
if len(ps) != 3 {
fmt.Printf("invalid ip segment line `%s`\n", l)
return
}
sip, err := CheckIP(ps[0])
if err != nil {
fmt.Printf("check start ip `%s`: %s\n", ps[0], err)
return
}
eip, err := CheckIP(ps[1])
if err != nil {
fmt.Printf("check end ip `%s`: %s\n", ps[1], err)
return
}
if sip > eip {
fmt.Printf("start ip(%s) should not be greater than end ip(%s)\n", ps[0], ps[1])
return
}
fmt.Printf("try to bench segment: `%s`\n", l)
mip := MidIP(sip, eip)
for _, ip := range []uint32{sip, MidIP(sip, mip), mip, MidIP(mip, eip), eip} {
fmt.Printf("|-try to bench ip '%s' ... ", Long2IP(ip))
region, _, err := searcher.Search(ip)
if err != nil {
fmt.Printf("failed to search ip '%s': %s\n", Long2IP(ip), err)
return
}
// check the region info
count++
if region != ps[2] {
errCount++
fmt.Printf(" --[Failed] (%s != %s)\n", region, ps[2])
if ignoreError == false {
return
}
} else {
fmt.Printf(" --[Ok]\n")
}
}
}
fmt.Printf("Bench finished, {count: %d, failed: %d, took: %s}\n", count, errCount, time.Since(tStart))
}
func main() {
if len(os.Args) < 2 {
printHelp()
return
}
// set the log flag
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
switch strings.ToLower(os.Args[1]) {
case "gen":
genDb()
case "search":
testSearch()
case "bench":
testBench()
default:
printHelp()
}
}

334
maker/golang/maker.go Normal file
View File

@ -0,0 +1,334 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ----
// ip2region database v2.0 structure
//
// +----------------+-------------------+---------------+--------------+
// | header space | speed up index | data payload | block index |
// +----------------+-------------------+---------------+--------------+
// | 256 bytes | 512 KiB (fixed) | dynamic size | dynamic size |
// +----------------+-------------------+---------------+--------------+
//
// 1. padding space : for header info like block index ptr, version, release date eg ... or any other temporary needs.
// -- 2bytes: version number, different version means structure update, it fixed to 2 for now
// -- 2bytes: index algorithm code.
// -- 4bytes: generate unix timestamp (version)
// -- 4bytes: index block start ptr
// -- 4bytes: index block end ptr
//
//
// 2. data block : region or whatever data info.
// 3. segment index block : binary index block.
// 4. vector index block : fixed index info for block index search speed up.
// space structure table:
// -- 0 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
// -- 1 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
// -- 2 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
// -- ...
// -- 255 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
//
//
// super block structure:
// +-----------------------+----------------------+
// | first index block ptr | last index block ptr |
// +-----------------------+----------------------+
//
// data entry structure:
// +--------------------+-----------------------+
// | 2bytes (for desc) | dynamic length |
// +--------------------+-----------------------+
// data length whatever in bytes
//
// index entry structure
// +------------+-----------+---------------+------------+
// | 4bytes | 4bytes | 2bytes | 4 bytes |
// +------------+-----------+---------------+------------+
// start ip end ip data length data ptr
package main
import (
"bufio"
"encoding/binary"
"fmt"
"log"
"os"
"strings"
"time"
)
const VersionNo = 2
const HeaderInfoLength = 256
const VectorIndexRows = 256
const VectorIndexCols = 256
const VectorIndexSize = 8
const VectorIndexLength = VectorIndexRows * VectorIndexCols * VectorIndexSize
type Maker struct {
srcHandle *os.File
dstHandle *os.File
indexPolicy IndexPolicy
segments []*Segment
regionPool map[string]uint32
vectorIndex [VectorIndexCols][VectorIndexRows]VectorIndexBlock
}
func NewMaker(policy IndexPolicy, srcFile string, dstFile string) (*Maker, error) {
// open the source file with READONLY mode
srcHandle, err := os.OpenFile(srcFile, os.O_RDONLY, 0600)
if err != nil {
return nil, fmt.Errorf("open source file `%s`: %w", srcFile, err)
}
// open the destination file with Read/Write mode
dstHandle, err := os.OpenFile(dstFile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
if err != nil {
return nil, fmt.Errorf("open target file `%s`: %w", dstFile, err)
}
return &Maker{
srcHandle: srcHandle,
dstHandle: dstHandle,
indexPolicy: policy,
segments: []*Segment{},
regionPool: map[string]uint32{},
vectorIndex: [VectorIndexCols][VectorIndexRows]VectorIndexBlock{},
}, nil
}
func (m *Maker) initDbHeader() error {
log.Printf("try to init the db header ... ")
_, err := m.dstHandle.Seek(0, 0)
if err != nil {
return err
}
// make and write the header space
var header = make([]byte, 256)
// 1, version number
binary.LittleEndian.PutUint16(header, uint16(VersionNo))
// 2, index policy code
binary.LittleEndian.PutUint16(header, uint16(m.indexPolicy))
// 3, generate unix timestamp
binary.LittleEndian.PutUint32(header[2:], uint32(time.Now().Unix()))
// 4, index block start ptr
binary.LittleEndian.PutUint32(header[6:], uint32(0))
// 5, index block end ptr
binary.LittleEndian.PutUint32(header[10:], uint32(0))
_, err = m.dstHandle.Write(header)
if err != nil {
return err
}
return nil
}
func (m *Maker) loadSegments() error {
log.Printf("try to load the segments ... ")
var last *Segment = nil
var tStart = time.Now()
var scanner = bufio.NewScanner(m.srcHandle)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
var l = strings.TrimSpace(strings.TrimSuffix(scanner.Text(), "\n"))
log.Printf("load segment: `%s`", l)
var ps = strings.SplitN(l, "|", 3)
if len(ps) != 3 {
return fmt.Errorf("invalid ip segment line `%s`", l)
}
sip, err := CheckIP(ps[0])
if err != nil {
return fmt.Errorf("check start ip `%s`: %s", ps[0], err)
}
eip, err := CheckIP(ps[1])
if err != nil {
return fmt.Errorf("check end ip `%s`: %s", ps[1], err)
}
if sip > eip {
return fmt.Errorf("start ip(%s) should not be greater than end ip(%s)", ps[0], ps[1])
}
if len(ps[2]) < 1 {
return fmt.Errorf("empty region info in segment line `%s`", l)
}
var seg = &Segment{
StartIP: sip,
EndIP: eip,
Region: ps[2],
}
// check the continuity of the data segment
if last != nil {
if last.EndIP+1 != seg.StartIP {
return fmt.Errorf("discontinuous data segment: last.eip+1(%d) != seg.sip(%d, %s)", sip, eip, ps[0])
}
}
m.segments = append(m.segments, seg)
last = seg
}
log.Printf("all segments loaded, length: %d, elapsed: %s", len(m.segments), time.Since(tStart))
return nil
}
// Init the db binary file
func (m *Maker) Init() error {
// init the db header
err := m.initDbHeader()
if err != nil {
return fmt.Errorf("init db header: %w", err)
}
// load all the segments
err = m.loadSegments()
if err != nil {
return fmt.Errorf("load segments: %w", err)
}
return nil
}
// refresh the vector index of the specified ip
func (m *Maker) setVectorIndex(ip uint32, ptr uint32) {
var viBlock = &m.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
if viBlock.FirstPtr == 0 {
viBlock.FirstPtr = ptr
viBlock.LastPtr = ptr + SegmentIndexBlockSize
} else {
viBlock.LastPtr = ptr + SegmentIndexBlockSize
}
}
// Start to make the binary file
func (m *Maker) Start() error {
if len(m.segments) < 1 {
return fmt.Errorf("empty segment list")
}
// 1, write all the region/data to the binary file
_, err := m.dstHandle.Seek(int64(HeaderInfoLength+VectorIndexLength), 0)
if err != nil {
return fmt.Errorf("seek to data first ptr: %w", err)
}
log.Printf("try to write the data block ... ")
for _, seg := range m.segments {
log.Printf("try to write region '%s' ... ", seg.Region)
ptr, has := m.regionPool[seg.Region]
if has {
log.Printf(" --[Cached] with ptr=%d", ptr)
continue
}
var region = []byte(seg.Region)
if len(region) > 0xFFFF {
return fmt.Errorf("too long region info `%s`: shoul be less than %d bytes", seg.Region, 0xFFFF)
}
// get the first ptr of the next region
pos, err := m.dstHandle.Seek(0, 1)
if err != nil {
return fmt.Errorf("seek to current ptr: %w", err)
}
_, err = m.dstHandle.Write(region)
if err != nil {
return fmt.Errorf("write region '%s': %w", seg.Region, err)
}
m.regionPool[seg.Region] = uint32(pos)
log.Printf(" --[Added] with ptr=%d", pos)
}
// 2, write the index block and cache the super index block
log.Printf("try to write the segment index block ... ")
var counter = 0
for _, seg := range m.segments {
dataPtr, has := m.regionPool[seg.Region]
if !has {
return fmt.Errorf("missing ptr cache for region `%s`", seg.Region)
}
var dataLen = len(seg.Region)
if dataLen < 1 {
// @TODO: could this even be a case ?
return fmt.Errorf("empty region info for segment '%s'", seg)
}
var segList = seg.Split()
log.Printf("try to index segment(%d splits) %s ...", len(segList), seg.String())
for _, s := range segList {
pos, err := m.dstHandle.Seek(0, 1)
if err != nil {
return fmt.Errorf("seek to segment index block: %w", err)
}
var sIndex = &SegmentIndexBlock{
StartIP: s.StartIP,
EndIP: s.EndIP,
DataLen: uint16(dataLen),
DataPtr: dataPtr,
}
_, err = m.dstHandle.Write(sIndex.Encode())
if err != nil {
return fmt.Errorf("write segment index for '%s': %w", s.String(), err)
}
log.Printf("|-segment index: %d, ptr: %d, segment: %s\n", counter, pos, s.String())
m.setVectorIndex(s.StartIP, uint32(pos))
counter++
}
}
// synchronized the vector index block
_, err = m.dstHandle.Seek(int64(HeaderInfoLength), 0)
if err != nil {
return fmt.Errorf("seek vector index first ptr: %w", err)
}
for i, l := range m.vectorIndex {
for j, c := range l {
_, err = m.dstHandle.Write(c.Encode())
if err != nil {
return fmt.Errorf("write vector index [%d][%d]: %w", i, j, err)
}
}
}
log.Printf("write done, with %d data blocks and (%d, %d) index blocks", len(m.regionPool), len(m.segments), counter)
return nil
}
func (m *Maker) End() error {
err := m.dstHandle.Close()
if err != nil {
return err
}
err = m.srcHandle.Close()
if err != nil {
return err
}
return nil
}

197
maker/golang/searcher.go Normal file
View File

@ -0,0 +1,197 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
// ---
// ip2region database v2.0 searcher.
// this is part of the maker for testing and validate.
// please use the searcher in binding/golang for production use.
// And this is a Not thread safe implementation.
package main
import (
"encoding/binary"
"fmt"
"os"
)
type Searcher struct {
handle *os.File
// header info
header []byte
// use it only when this feature enabled.
// Preload the vector index will reduce the number of IO operations
// thus speedup the search process
vectorIndex [][]*VectorIndexBlock
}
func NewSearcher(dbFile string) (*Searcher, error) {
handle, err := os.OpenFile(dbFile, os.O_RDONLY, 0600)
if err != nil {
return nil, err
}
return &Searcher{
handle: handle,
header: nil,
vectorIndex: nil,
}, nil
}
func (s *Searcher) Close() {
err := s.handle.Close()
if err != nil {
return
}
}
// LoadVectorIndex load and cache the vector index for search speedup.
// this will take up VectorIndexRows x VectorIndexCols x VectorIndexSize bytes memory.
func (s *Searcher) LoadVectorIndex() error {
// loaded already
if s.vectorIndex != nil {
return nil
}
// load all the vector index block
_, err := s.handle.Seek(HeaderInfoLength, 0)
if err != nil {
return fmt.Errorf("seek to vector index: %w", err)
}
var buff = make([]byte, VectorIndexRows*VectorIndexCols*VectorIndexSize)
rLen, err := s.handle.Read(buff)
if err != nil {
return err
}
if rLen != len(buff) {
return fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// decode the vector index blocks
var vectorIndex = make([][]*VectorIndexBlock, VectorIndexRows)
for r := 0; r < VectorIndexRows; r++ {
vectorIndex[r] = make([]*VectorIndexBlock, VectorIndexCols)
for c := 0; c < VectorIndexCols; c++ {
offset := r*VectorIndexCols*VectorIndexSize + c*VectorIndexSize
vectorIndex[r][c], err = VectorIndexBlockDecode(buff[offset:])
if err != nil {
return fmt.Errorf("decode vector index at [%d][%d]: %w", r, c, err)
}
}
}
s.vectorIndex = vectorIndex
return nil
}
// ClearVectorIndex clear preloaded vector index cache
func (s *Searcher) ClearVectorIndex() {
s.vectorIndex = nil
}
// Search find the region for the specified ip address
func (s *Searcher) Search(ip uint32) (string, int, error) {
// locate the segment index block based on the vector index
var ioCount = 0
var vIndex *VectorIndexBlock
if s.vectorIndex != nil {
vIndex = s.vectorIndex[(ip>>24)&0xFF][(ip>>16)&0xFF]
} else {
l0, l1 := (ip>>24)&0xFF, (ip>>16)&0xFF
offset := l0*VectorIndexCols*VectorIndexSize + l1*VectorIndexSize
pos, err := s.handle.Seek(int64(HeaderInfoLength+offset), 0)
if err != nil {
return "", ioCount, fmt.Errorf("seek to vector index[%d][%d]: %w", l0, l1, err)
}
ioCount++
var buff = make([]byte, 8)
rLen, err := s.handle.Read(buff)
if err != nil {
return "", ioCount, fmt.Errorf("read vector index at %d: %w", pos, err)
}
if rLen != len(buff) {
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
vIndex, err = VectorIndexBlockDecode(buff)
if err != nil {
return "", ioCount, fmt.Errorf("invalid vector index block at %d: %w", pos, err)
}
}
//log.Printf("vIndex=%s", vIndex)
// binary search the segment index to get the region
var dataLen, dataPtr = 0, uint32(0)
var buff = make([]byte, SegmentIndexBlockSize)
var l, h = 0, int((vIndex.LastPtr - vIndex.FirstPtr) / SegmentIndexBlockSize)
for l <= h {
// log.Printf("l=%d, h=%d", l, h)
m := (l + h) >> 1
p := vIndex.FirstPtr + uint32(m*SegmentIndexBlockSize)
// log.Printf("m=%d, p=%d", m, p)
_, err := s.handle.Seek(int64(p), 0)
if err != nil {
return "", ioCount, fmt.Errorf("seek to segment block at %d: %w", p, err)
}
ioCount++
rLen, err := s.handle.Read(buff)
if err != nil {
return "", ioCount, fmt.Errorf("read segment index at %d: %w", p, err)
}
if rLen != len(buff) {
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", len(buff))
}
// segIndex, err := SegmentIndexDecode(buff)
// if err != nil {
// return "", fmt.Errorf("invalid segment index block at %d: %w", p, err)
// }
// decode the data step by step to reduce the unnecessary calculations
sip := binary.LittleEndian.Uint32(buff)
if ip < sip {
h = m - 1
} else {
eip := binary.LittleEndian.Uint32(buff[4:])
if ip > eip {
l = m + 1
} else {
dataLen = int(binary.LittleEndian.Uint16(buff[8:]))
dataPtr = binary.LittleEndian.Uint32(buff[10:])
break
}
}
}
if dataLen == 0 {
return "", ioCount, nil
}
// load and return the region data
_, err := s.handle.Seek(int64(dataPtr), 0)
if err != nil {
return "", ioCount, fmt.Errorf("seek to data block at %d: %w", dataPtr, err)
}
ioCount++
var regionBuff = make([]byte, dataLen)
rLen, err := s.handle.Read(regionBuff)
if err != nil {
return "", ioCount, fmt.Errorf("read region data at %d: %w", dataPtr, err)
}
if rLen != dataLen {
return "", ioCount, fmt.Errorf("incomplete read: readed bytes should be %d", dataLen)
}
return string(regionBuff), ioCount, nil
}

97
maker/golang/segment.go Normal file
View File

@ -0,0 +1,97 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"strings"
)
type Segment struct {
StartIP uint32
EndIP uint32
Region string
}
func SegmentFrom(seg string) (*Segment, error) {
var ps = strings.SplitN(seg, "|", 3)
if len(ps) != 3 {
return nil, fmt.Errorf("invalid ip segment `%s`", seg)
}
sip, err := CheckIP(ps[0])
if err != nil {
return nil, fmt.Errorf("check start ip `%s`: %s", ps[0], err)
}
eip, err := CheckIP(ps[1])
if err != nil {
return nil, fmt.Errorf("check end ip `%s`: %s", ps[1], err)
}
if sip > eip {
return nil, fmt.Errorf("start ip(%s) should not be greater than end ip(%s)", ps[0], ps[1])
}
return &Segment{
StartIP: sip,
EndIP: eip,
Region: ps[2],
}, nil
}
// Split the segment based on the pre-two bytes
func (s *Segment) Split() []*Segment {
// 1, split the segment with the first byte
var tList []*Segment
var sByte1, eByte1 = (s.StartIP >> 24) & 0xFF, (s.EndIP >> 24) & 0xFF
var nSip = s.StartIP
for i := sByte1; i <= eByte1; i++ {
sip := (i << 24) | (nSip & 0xFFFFFF)
eip := (i << 24) | 0xFFFFFF
if eip < s.EndIP {
nSip = (i + 1) << 24
} else {
eip = s.EndIP
}
// append the new segment (maybe)
tList = append(tList, &Segment{
StartIP: sip,
EndIP: eip,
// @Note: don't bother to copy the region
/// Region: s.Region,
})
}
// 2, split the segments with the second byte
var segList []*Segment
for _, seg := range tList {
base := seg.StartIP & 0xFF000000
nSip := seg.StartIP
sb2, eb2 := (seg.StartIP>>16)&0xFF, (seg.EndIP>>16)&0xFF
for i := sb2; i <= eb2; i++ {
sip := base | (i << 16) | (nSip & 0xFFFF)
eip := base | (i << 16) | 0xFFFF
if eip < seg.EndIP {
nSip = 0
} else {
eip = seg.EndIP
}
segList = append(segList, &Segment{
StartIP: sip,
EndIP: eip,
Region: s.Region,
})
}
}
return segList
}
func (s *Segment) String() string {
return Long2IP(s.StartIP) + "|" + Long2IP(s.EndIP) + "|" + s.Region
}

73
maker/golang/util.go Normal file
View File

@ -0,0 +1,73 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package main
import (
"encoding/binary"
"fmt"
"strconv"
"strings"
)
// Util function
func CheckIP(ip string) (uint32, error) {
var ps = strings.Split(ip, ".")
if len(ps) != 4 {
return 0, fmt.Errorf("invalid ip address `%s`", ip)
}
var buff = make([]byte, 4)
for i, s := range ps {
d, err := strconv.Atoi(s)
if err != nil {
return 0, fmt.Errorf("the %dth part `%s` is not an integer", i, s)
}
if d < 0 || d > 255 {
return 0, fmt.Errorf("the %dth part `%s` should be an integer bettween 0 and 255", i, s)
}
buff[i] = byte(d)
}
// convert the ip to integer
return binary.BigEndian.Uint32(buff), nil
}
func Long2IP(ip uint32) string {
var buff = make([]string, 4)
buff[0] = fmt.Sprintf("%d", (ip>>24)&0xFF)
buff[1] = fmt.Sprintf("%d", (ip>>16)&0xFF)
buff[2] = fmt.Sprintf("%d", (ip>>8)&0xFF)
buff[3] = fmt.Sprintf("%d", (ip>>0)&0xFF)
return strings.Join(buff, ".")
}
func MidIP(sip uint32, eip uint32) uint32 {
return uint32((uint64(sip) + uint64(eip)) >> 1)
}
func CheckSegments(segList []*Segment) error {
var last *Segment
for _, seg := range segList {
// sip must <= eip
if seg.StartIP > seg.EndIP {
return fmt.Errorf("segment `%s`: start ip should not be greater than end ip", seg.String())
}
// check the continuity of the data segment
if last != nil {
if last.EndIP+1 != seg.StartIP {
return fmt.Errorf("discontinuous segment `%s`: last.eip+1 != cur.sip", seg.String())
}
}
last = seg
}
return nil
}

31
maker/golang/util_test.go Normal file
View File

@ -0,0 +1,31 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"testing"
)
func TestSplitSegment(t *testing.T) {
// var str = "1.1.0.0|1.3.3.24|中国|广东|深圳|电信"
// var str = "0.0.0.0|1.255.225.254|0|0|0|内网IP|内网IP"
var str = "28.201.224.0|29.34.191.255|美国|0|0|0|0"
seg, err := SegmentFrom(str)
if err != nil {
t.Fatalf("failed to parser segment '%s': %s", str, err)
}
fmt.Printf("idx: src, seg: %s\n", seg.String())
var segList = seg.Split()
err = CheckSegments(segList)
if err != nil {
t.Fatalf("check segments: %s", err.Error())
}
for i, s := range segList {
fmt.Printf("idx: %3d, seg: %s\n", i, s.String())
}
}