feat:csharp 2.0 xdb maker

This commit is contained in:
Alan Lee 2022-08-08 21:52:16 +08:00
parent 294695be48
commit a77ba7f5be
12 changed files with 667 additions and 0 deletions

37
maker/csharp/.gitignore vendored Normal file
View File

@ -0,0 +1,37 @@
*.swp
*.*~
project.lock.json
.DS_Store
*.pyc
nupkg/
# Visual Studio Code
.vscode
# Rider
.idea
# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
build/
bld/
[Bb]in/
[Oo]bj/
[Oo]ut/
msbuild.log
msbuild.err
msbuild.wrn
# Visual Studio 2015
.vs/

View File

@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.1.0" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" />
<PackageReference Include="NUnit.Analyzers" Version="3.3.0" />
<PackageReference Include="coverlet.collector" Version="3.1.2" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\IP2RegionMaker\IP2RegionMaker.csproj" />
</ItemGroup>
</Project>

View File

@ -0,0 +1 @@
global using NUnit.Framework;

View File

@ -0,0 +1,42 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace IP2RegionMaker.Test
{
[TestFixture]
internal class UtilTest
{
[TestCase("114.114.114.114")]
public void TestIpAddressToUInt32(string value)
{
Assert.DoesNotThrow(() => XDB.Util.IpAddressToUInt32(value));
}
[TestCase(1920103026)]
public void TestUInt32ToIpAddress(int value)
{
Assert.DoesNotThrow(() => XDB.Util.UInt32ToIpAddress((uint)value));
}
[TestCase("28.201.224.0|29.34.191.255|美国|0|0|0|0")]
public void TestSplitSegment(string value)
{
Assert.DoesNotThrow(() =>
{
var seg=XDB.Util.GetSegment(value);
var segList= seg.Split();
XDB.Util.CheckSegments(segList);
foreach (var item in segList)
{
Console.WriteLine(item);
}
});
}
}
}

View File

@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@ -0,0 +1,72 @@
using IP2RegionMaker.XDB;
using System.Diagnostics;
string srcFile = "", dstFile = "";
IndexPolicy indexPolicy = IndexPolicy.VectorIndexPolicy;
if (args.Length < 2)
{
PrintHelp();
}
string[] aliases = { "--src", "--dst", "--index" };
for (int i = 0; i < args.Length; i++)
{
var arg = args[i];
var key = aliases.FirstOrDefault(x => arg.StartsWith($"{x}="));
if (string.IsNullOrEmpty(key))
{
continue;
}
var value = arg.Split("=", 2).LastOrDefault()?.Trim();
if (string.IsNullOrEmpty(value))
{
continue;
}
switch (key)
{
case "--src":
srcFile = value;
break;
case "--dst":
dstFile = value;
break;
case "--index":
var flag = Enum.TryParse<IndexPolicy>(value, out indexPolicy);
Console.WriteLine("parse policy failed {arg}", arg);
break;
}
}
Console.WriteLine(srcFile);
if (string.IsNullOrEmpty(srcFile)||string.IsNullOrEmpty(dstFile))
{
PrintHelp();
return;
}
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
Maker maker = new Maker(IndexPolicy.VectorIndexPolicy, srcFile, dstFile);
maker.Init();
maker.Build();
stopwatch.Stop();
Console.WriteLine($"Done, elapsed:{stopwatch.Elapsed.TotalMinutes}m");
void PrintHelp()
{
Console.WriteLine($"ip2region xdb maker");
Console.WriteLine("dotnet IP2RegionMaker.dll [command options]");
Console.WriteLine("--src string source ip text file path");
Console.WriteLine("--dst string destination binary xdb file path");
}

View File

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
https://go.microsoft.com/fwlink/?LinkID=208121.
-->
<Project>
<PropertyGroup>
<Configuration>Release</Configuration>
<Platform>Any CPU</Platform>
<PublishDir>bin\Release\net6.0\publish\</PublishDir>
<PublishProtocol>FileSystem</PublishProtocol>
<_TargetId>Folder</_TargetId>
</PropertyGroup>
</Project>

View File

@ -0,0 +1,15 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace IP2RegionMaker.XDB
{
public enum IndexPolicy
{
VectorIndexPolicy = 1,
BTreeIndexPolicy = 2,
}
}

View File

@ -0,0 +1,256 @@
// Copyright 2022 The Ip2Region Authors. All rights reserved.
// Use of this source code is governed by a Apache2.0-style
// license that can be found in the LICENSE file.
//
// @Author Alan Lee <lzh.shap@gmail.com>
// @Date 2022/8/8
// --- Ip2Region v2.0 data structure
//
// +----------------+--------------------------+---------------+--------------+
// | header space | vector speed up index | data payload | block index |
// +----------------+--------------------------+---------------+--------------+
// | 256 bytes | 512 KiB (fixed) | dynamic size | dynamic size |
// +----------------+--------------------------+---------------+--------------+
//
// 1. padding space : for header info like block index ptr, version, release date eg ... or any other temporary needs.
// -- 2bytes: version number, different version means structure update, it fixed to 2 for now
// -- 2bytes: index algorithm code.
// -- 4bytes: generate unix timestamp (version)
// -- 4bytes: index block start ptr
// -- 4bytes: index block end ptr
//
//
// 2. data block : region or whatever data info.
// 3. segment index block : binary index block.
// 4. vector index block : fixed index info for block index search speed up.
// space structure table:
// -- 0 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
// -- 1 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
// -- 2 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
// -- ...
// -- 255 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block
//
//
// super block structure:
// +-----------------------+----------------------+
// | first index block ptr | last index block ptr |
// +-----------------------+----------------------+
//
// data entry structure:
// +--------------------+-----------------------+
// | 2bytes (for desc) | dynamic length |
// +--------------------+-----------------------+
// data length whatever in bytes
//
// index entry structure
// +------------+-----------+---------------+------------+
// | 4bytes | 4bytes | 2bytes | 4 bytes |
// +------------+-----------+---------------+------------+
// start ip end ip data length data ptr
using System.Text;
namespace IP2RegionMaker.XDB
{
public class Maker
{
const ushort VersionNo = 2;
const int HeaderInfoLength = 256;
const int VectorIndexRows = 256;
const int VectorIndexCols = 256;
const int VectorIndexSize = 8;
const int SegmentIndexSize = 14;
const int VectorIndexLength = VectorIndexRows * VectorIndexCols * VectorIndexSize;
private readonly Stream _srcHandle;
private readonly Stream _dstHandle;
private readonly IndexPolicy _indexPolicy;
private readonly List<Segment> _segments;
private readonly Dictionary<string, uint> _regionPool;
private readonly byte[] _vectorIndex;
public Maker(IndexPolicy indexPolicy,string srcFile, string dstFile)
{
_indexPolicy = indexPolicy;
_srcHandle = File.Open(@srcFile, FileMode.Open);
_dstHandle = File.Open(@dstFile, FileMode.Create);
_segments = new List<Segment>();
_regionPool = new Dictionary<string, uint>();
_vectorIndex = new byte[VectorIndexLength];
}
~Maker()
{
_srcHandle.Close();
_dstHandle.Close();
}
private void InitDbHeader()
{
_srcHandle.Seek(0, SeekOrigin.Begin);
var header = new byte[HeaderInfoLength];
BitConverter.GetBytes(VersionNo).CopyTo(header, 0);
BitConverter.GetBytes((ushort)_indexPolicy).CopyTo(header, 2);
BitConverter.GetBytes(DateTimeOffset.UtcNow.ToUnixTimeSeconds()).CopyTo(header, 4);
BitConverter.GetBytes((uint)0).CopyTo(header, 8);
BitConverter.GetBytes((uint)0).CopyTo(header, 12);
using var writer = new BinaryWriter(_dstHandle, Encoding.UTF8, true);
writer.Write(header);
}
private void LoadSegments()
{
Console.WriteLine("try to load the segments ... ");
using var reader = new StreamReader(_srcHandle);
while (true)
{
var line = reader.ReadLine();
if (line == null) break;
var seg=Util.GetSegment(line);
_segments?.Add(seg);
}
if (_segments!=null)
{
Util.CheckSegments(_segments);
}
Console.WriteLine($"all segments loaded, length: {_segments?.Count}");
}
public void Init()
{
InitDbHeader();
LoadSegments();
}
public void Build()
{
_dstHandle.Seek(HeaderInfoLength + VectorIndexLength, SeekOrigin.Begin);
using var writer = new BinaryWriter(_dstHandle, Encoding.UTF8, false);
Console.WriteLine("try to write the data block ... ");
foreach (var seg in _segments)
{
Console.WriteLine($"try to write region {seg.Region}");
if (_regionPool.ContainsKey(seg.Region))
{
Console.WriteLine($"--[Cached] with ptr={_regionPool[seg.Region]}");
continue;
}
var region = Encoding.UTF8.GetBytes(seg.Region);
if (region.Length > 0xFFFF)
{
throw new ArgumentException($"too long region info `{seg.Region}`: should be less than {0xFFFF} bytes");
}
var pos = _dstHandle.Seek(0, SeekOrigin.Current);
writer.Write(region);
_regionPool[seg.Region] = (uint)pos;
}
Console.WriteLine("try to write the segment index block ... ");
var indexBuff = new byte[SegmentIndexSize];
var counter = 0;
long startPtr = -1;
long endPtr = -1;
foreach (var seg in _segments)
{
var dataPtr = _regionPool[seg.Region];
if (!_regionPool.ContainsKey(seg.Region))
{
throw new Exception($"missing ptr cache for region `{seg.Region}`");
}
var datalen = Encoding.UTF8.GetBytes(seg.Region).Length;
if (datalen < 1)
{
throw new ArgumentNullException(nameof(seg.Region));
}
var segList = seg.Split();
Console.WriteLine($"try to index segment({segList.Count}) {seg} ...");
foreach (var item in segList)
{
var pos = _dstHandle.Seek(0, SeekOrigin.Current);
BitConverter.GetBytes(item.StartIP).CopyTo(indexBuff, 0);
BitConverter.GetBytes(item.EndIP).CopyTo(indexBuff, 4);
BitConverter.GetBytes((ushort)datalen).CopyTo(indexBuff, 8);
BitConverter.GetBytes(dataPtr).CopyTo(indexBuff, 10);
writer.Write(indexBuff);
Console.WriteLine($"|-segment index: {counter}, ptr: {pos}, segment: {seg}");
SetVectorIndex(item.StartIP, (uint)pos);
counter++;
if (startPtr == -1)
{
startPtr = pos;
}
endPtr = pos;
}
}
Console.WriteLine($"try to write the vector index block ... ");
_dstHandle.Seek(HeaderInfoLength, SeekOrigin.Begin);
writer.Write(_vectorIndex);
Console.WriteLine("try to write the segment index ptr ... ");
BitConverter.GetBytes((uint)startPtr).CopyTo(indexBuff, 0);
BitConverter.GetBytes((uint)endPtr).CopyTo(indexBuff, 4);
_dstHandle.Seek(0, SeekOrigin.Begin);
writer.Write(indexBuff[..8]);
Console.WriteLine($"write done, dataBlocks: {_regionPool.Count}, indexBlocks: ({_segments.Count}, {counter}), indexPtr: ({startPtr}, {endPtr})");
}
private void SetVectorIndex(uint ip, uint ptr)
{
var il0 = (ip >> 24) & 0xFF;
var il1 = (ip >> 16) & 0xFF;
var idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize;
ArraySegment<byte> bytes = new(_vectorIndex, (int)idx, _vectorIndex.Length - 1 - (int)idx);
var sPtr = BitConverter.ToUInt32(bytes);
if (sPtr == 0)
{
BitConverter.GetBytes(ptr).CopyTo(_vectorIndex, idx);
BitConverter.GetBytes(ptr + SegmentIndexSize).CopyTo(_vectorIndex, idx + 4);
}
else
{
BitConverter.GetBytes(ptr + SegmentIndexSize).CopyTo(_vectorIndex, idx + 4);
}
}
}
}

View File

@ -0,0 +1,85 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace IP2RegionMaker.XDB
{
public class Segment
{
public uint StartIP { get; set; }
public uint EndIP { get; set; }
public string Region { get; set; }
public List<Segment> Split()
{
var tList = new List<Segment>();
var sByte = (StartIP >> 24) & 0xFF;
var eByte = (EndIP >> 24) & 0xFF;
var nSip = StartIP;
for (var i = sByte; i <= eByte; i++)
{
var sip = (i << 24) | (nSip & 0xFFFFFF);
var eip = (i << 24) | 0xFFFFFF;
if (eip < EndIP)
{
nSip = (i + 1) << 24;
}
else
{
eip = EndIP;
}
tList.Add(new Segment
{
StartIP = sip,
EndIP = eip,
});
}
var segList = new List<Segment>();
foreach (var seg in tList)
{
var temp = seg.StartIP & 0xFF000000;
nSip = seg.StartIP;
sByte = (seg.StartIP >> 16) & 0xFF;
eByte = (seg.EndIP >> 16) & 0xFF;
for (var i = sByte; i <= eByte; i++)
{
var sip = temp | (i << 16) | (nSip & 0xFFFF);
var eip = temp | (i << 16) | 0xFFFF;
if (eip < seg.EndIP)
{
nSip = 0;
}
else
{
eip = seg.EndIP;
}
segList.Add(new Segment
{
StartIP = sip,
EndIP = eip,
Region = Region,
});
}
}
return segList;
}
public override string ToString()
{
return $"{Util.UInt32ToIpAddress(StartIP)}|{Util.UInt32ToIpAddress(EndIP)}|{Region}";
}
}
}

View File

@ -0,0 +1,77 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
namespace IP2RegionMaker.XDB
{
public static class Util
{
public static uint IpAddressToUInt32(string ipAddress)
{
var address = IPAddress.Parse(ipAddress);
byte[] bytes = address.GetAddressBytes();
Array.Reverse(bytes);
return BitConverter.ToUInt32(bytes, 0);
}
public static string UInt32ToIpAddress(uint ipAddress)
{
byte[] bytes = BitConverter.GetBytes(ipAddress);
Array.Reverse(bytes);
return new IPAddress(bytes).ToString();
}
public static Segment GetSegment(string line)
{
var ps = line.Split("|", 3);
if (ps.Length != 3)
{
throw new ArgumentException($"invalid ip segment line {line}");
}
var sip = Util.IpAddressToUInt32(ps[0]);
var eip = Util.IpAddressToUInt32(ps[1]);
if (sip > eip)
{
throw new ArgumentException($"start ip {ps[0]} should not be greater than end ip {ps[1]}");
}
if (string.IsNullOrEmpty(ps[2]))
{
throw new ArgumentException($"empty region info in segment line {line}");
}
return new Segment
{
StartIP = sip,
EndIP = eip,
Region = ps[2],
};
}
public static void CheckSegments(List<Segment> segments)
{
Segment? last = null;
foreach (var seg in segments)
{
if (seg.StartIP > seg.EndIP)
{
throw new ArgumentException($"segment `{seg}`: start ip should not be greater than end ip");
}
if (last != null && last.EndIP + 1 != seg.StartIP)
{
throw new ArgumentException($"discontinuous data segment: last.eip+1({seg.StartIP}) != seg.sip({seg.EndIP},#{seg})");
}
last = seg;
}
}
}
}

36
maker/csharp/ReadMe.md Normal file
View File

@ -0,0 +1,36 @@
# ip2region xdb csharp 生成实现
## 编译安装
编译环境:[dotnet6.0](https://dotnet.microsoft.com/zh-cn/download/dotnet/6.0)
```bash
# cd 到 maker/csharp/IP2RegionMaker目录
dotnet publish -o ./bin
```
然后会在当前目录的 bin 目录下得到一个 IP2RegionMaker.dll 的打包文件。
# 数据生成
通过 `dotnet IP2RegionMaker.dll` 来生成 ip2region.xdb 二进制文件:
```bash
➜ csharp git:(master) ✗ dotnet IP2RegionMaker.dll
ip2region xdb maker
dotnet IP2RegionMaker.dll [command options]
--src string source ip text file path
--dst string destination binary xdb file path
```
例如,通过默认的 data/ip.merge.txt 原数据,在当前目录生成一个 ip2region.xdb 二进制文件:
```bash
➜ csharp git:(master) ✗ dotnet ./IP2RegionMaker/bin/IP2RegionMaker.dll --src=../../data/ip.merge.txt --dst=./ip2region.xdb
# 会看到一堆输出,最终会看到如下输出表示运行成功
...
...
...
write done, dataBlocks: 13804, indexBlocks: (683591, 720221), indexPtr: (982904, 11065984)
Done, elapsed:2.1966620833333335m
```
# 数据 查询/bench 测试
已经完成开发的 [binding](../../binding/) 都有查询和 bench 测试程序以及使用文档,你可以使用你熟悉的语言的 searcher 进行查询测试或者bench测试来确认数据的正确性和完整性。