add field-list support self-define region fields

This commit is contained in:
lion 2025-06-16 23:24:19 +08:00
parent c1808255e7
commit 78c2fab10a
3 changed files with 123 additions and 8 deletions

View File

@ -12,6 +12,8 @@ import (
"log/slog"
"os"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
@ -74,6 +76,78 @@ func applyLogLevel(logLevel string) error {
return nil
}
var pattern = regexp.MustCompile("^(\\d+(-\\d+)?)$")
func getFilterFields(fieldList string) ([]int, error) {
if len(fieldList) == 0 {
return []int{}, nil
}
var fields []int
var mapping = make(map[string]string)
fList := strings.Split(fieldList, ",")
for _, f := range fList {
f = strings.TrimSpace(f)
if len(f) == 0 {
return nil, fmt.Errorf("empty field index value `%s`", f)
}
ms := pattern.FindString(f)
if len(ms) == 0 {
return nil, fmt.Errorf("field `%s` is not a number or number range", f)
}
if strings.Index(ms, "-") == -1 {
if _, ok := mapping[ms]; ok {
return nil, fmt.Errorf("duplicate option `%s`", f)
}
idx, err := strconv.Atoi(ms)
if err != nil {
return nil, fmt.Errorf("field index `%s` not an integer", f)
}
mapping[ms] = ms
fields = append(fields, idx)
continue
}
ra := strings.Split(ms, "-")
if len(ra) != 2 {
return nil, fmt.Errorf("invalid field index range `%s`", ms)
}
start, err := strconv.Atoi(ra[0])
if err != nil {
return nil, fmt.Errorf("range start `%s` not an integer", ra[0])
}
end, err := strconv.Atoi(ra[1])
if err != nil {
return nil, fmt.Errorf("range end `%s` not an integer", ra[1])
}
if start > end {
return nil, fmt.Errorf("index range start(%d) should <= end(%d)", start, end)
}
for i := start; i <= end; i++ {
s := strconv.Itoa(i)
if _, ok := mapping[s]; ok {
return nil, fmt.Errorf("duplicate option `%s`", s)
}
mapping[s] = s
fields = append(fields, i)
}
}
// sort the fields
sort.Ints(fields)
// fmt.Printf("%+v\n", fields)
return fields, nil
}
func genDb() {
var err error
var srcFile, dstFile = "", ""
@ -122,11 +196,15 @@ func genDb() {
return
}
slog.Info("field-list", "value", fieldList)
fields, err := getFilterFields(fieldList)
if err != nil {
slog.Error("failed to get filter fields", "error", err)
return
}
// make the binary file
tStart := time.Now()
maker, err := xdb.NewMaker(indexPolicy, srcFile, dstFile)
maker, err := xdb.NewMaker(indexPolicy, srcFile, dstFile, fields)
if err != nil {
fmt.Printf("failed to create %s\n", err)
return

View File

@ -19,9 +19,9 @@
// -- 4bytes: index block end ptr
//
//
// 2. data block : region or whatever data info.
// 3. segment index block : binary index block.
// 4. vector index block : fixed index info for block index search speed up.
// 2. data block: region or whatever data info.
// 3. segment index block: binary index block.
// 4. vector index block: fixed index info for block index search speedup.
// space structure table:
// -- 0 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
// -- 1 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
@ -54,6 +54,7 @@ import (
"fmt"
"log/slog"
"os"
"strings"
"time"
)
@ -69,13 +70,16 @@ type Maker struct {
srcHandle *os.File
dstHandle *os.File
// self-define field index
fields []int
indexPolicy IndexPolicy
segments []*Segment
regionPool map[string]uint32
vectorIndex []byte
}
func NewMaker(policy IndexPolicy, srcFile string, dstFile string) (*Maker, error) {
func NewMaker(policy IndexPolicy, srcFile string, dstFile string, fields []int) (*Maker, error) {
// open the source file with READONLY mode
srcHandle, err := os.OpenFile(srcFile, os.O_RDONLY, 0600)
if err != nil {
@ -92,6 +96,9 @@ func NewMaker(policy IndexPolicy, srcFile string, dstFile string) (*Maker, error
srcHandle: srcHandle,
dstHandle: dstHandle,
// fields filter index
fields: fields,
indexPolicy: policy,
segments: []*Segment{},
regionPool: map[string]uint32{},
@ -133,6 +140,28 @@ func (m *Maker) initDbHeader() error {
return nil
}
func (m *Maker) getFilteredRegion(region string) (string, error) {
if len(m.fields) == 0 {
return region, nil
}
fs := strings.Split(region, "|")
var sb []string
for _, idx := range m.fields {
if idx < 0 {
return "", fmt.Errorf("negative filter index %d", idx)
}
if idx >= len(fs) {
return "", fmt.Errorf("field index %d exceeded the max length of %d", idx, len(fs))
}
sb = append(sb, fs[idx])
}
return strings.Join(sb, "|"), nil
}
func (m *Maker) loadSegments() error {
slog.Info("try to load the segments ... ")
var last *Segment = nil
@ -146,6 +175,14 @@ func (m *Maker) loadSegments() error {
return err
}
// apply the field filter
region, err := m.getFilteredRegion(seg.Region)
if err != nil {
return err
}
// slog.Info("filtered", "region", region)
seg.Region = region
m.segments = append(m.segments, seg)
last = seg
return nil
@ -241,7 +278,7 @@ func (m *Maker) Start() error {
}
// @Note: data length should be the length of bytes.
// this works find cuz of the string feature (byte sequence) of golang.
// this works fine because of the string feature (byte sequence) of golang.
var dataLen = len(seg.Region)
if dataLen < 1 {
// @TODO: could this even be a case ?

View File

@ -94,7 +94,7 @@ func IterateSegments(handle *os.File, before func(l string), cb func(seg *Segmen
Region: ps[2],
}
// check and automatic merging the Consecutive Segments which means:
// check and automatic merging the Consecutive Segments, which means:
// 1, region info is the same
// 2, last.eip+1 = cur.sip
if last == nil {