diff --git a/maker/csharp/.gitignore b/maker/csharp/.gitignore new file mode 100644 index 0000000..0626272 --- /dev/null +++ b/maker/csharp/.gitignore @@ -0,0 +1,37 @@ +*.swp +*.*~ +project.lock.json +.DS_Store +*.pyc +nupkg/ + +# Visual Studio Code +.vscode + +# Rider +.idea + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +build/ +bld/ +[Bb]in/ +[Oo]bj/ +[Oo]ut/ +msbuild.log +msbuild.err +msbuild.wrn + +# Visual Studio 2015 +.vs/ \ No newline at end of file diff --git a/maker/csharp/IP2RegionMaker.Test/IP2RegionMaker.Test.csproj b/maker/csharp/IP2RegionMaker.Test/IP2RegionMaker.Test.csproj new file mode 100644 index 0000000..289b828 --- /dev/null +++ b/maker/csharp/IP2RegionMaker.Test/IP2RegionMaker.Test.csproj @@ -0,0 +1,23 @@ + + + + net6.0 + enable + enable + + false + + + + + + + + + + + + + + + diff --git a/maker/csharp/IP2RegionMaker.Test/Usings.cs b/maker/csharp/IP2RegionMaker.Test/Usings.cs new file mode 100644 index 0000000..cefced4 --- /dev/null +++ b/maker/csharp/IP2RegionMaker.Test/Usings.cs @@ -0,0 +1 @@ +global using NUnit.Framework; \ No newline at end of file diff --git a/maker/csharp/IP2RegionMaker.Test/UtilTest.cs b/maker/csharp/IP2RegionMaker.Test/UtilTest.cs new file mode 100644 index 0000000..7d57c6f --- /dev/null +++ b/maker/csharp/IP2RegionMaker.Test/UtilTest.cs @@ -0,0 +1,42 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace IP2RegionMaker.Test +{ + [TestFixture] + internal class UtilTest + { + [TestCase("114.114.114.114")] + public void TestIpAddressToUInt32(string value) + { + Assert.DoesNotThrow(() => XDB.Util.IpAddressToUInt32(value)); + } + + [TestCase(1920103026)] + public void TestUInt32ToIpAddress(int value) + { + Assert.DoesNotThrow(() => XDB.Util.UInt32ToIpAddress((uint)value)); + } + + [TestCase("28.201.224.0|29.34.191.255|美国|0|0|0|0")] + public void TestSplitSegment(string value) + { + Assert.DoesNotThrow(() => + { + var seg=XDB.Util.GetSegment(value); + + var segList= seg.Split(); + + XDB.Util.CheckSegments(segList); + + foreach (var item in segList) + { + Console.WriteLine(item); + } + }); + } + } +} diff --git a/maker/csharp/IP2RegionMaker/IP2RegionMaker.csproj b/maker/csharp/IP2RegionMaker/IP2RegionMaker.csproj new file mode 100644 index 0000000..74abf5c --- /dev/null +++ b/maker/csharp/IP2RegionMaker/IP2RegionMaker.csproj @@ -0,0 +1,10 @@ + + + + Exe + net6.0 + enable + enable + + + diff --git a/maker/csharp/IP2RegionMaker/Program.cs b/maker/csharp/IP2RegionMaker/Program.cs new file mode 100644 index 0000000..f37d4d8 --- /dev/null +++ b/maker/csharp/IP2RegionMaker/Program.cs @@ -0,0 +1,72 @@ +using IP2RegionMaker.XDB; +using System.Diagnostics; + + +string srcFile = "", dstFile = ""; +IndexPolicy indexPolicy = IndexPolicy.VectorIndexPolicy; + +if (args.Length < 2) +{ + PrintHelp(); +} + +string[] aliases = { "--src", "--dst", "--index" }; +for (int i = 0; i < args.Length; i++) +{ + var arg = args[i]; + + var key = aliases.FirstOrDefault(x => arg.StartsWith($"{x}=")); + if (string.IsNullOrEmpty(key)) + { + continue; + } + + var value = arg.Split("=", 2).LastOrDefault()?.Trim(); + + if (string.IsNullOrEmpty(value)) + { + continue; + } + + switch (key) + { + case "--src": + srcFile = value; + break; + case "--dst": + dstFile = value; + break; + case "--index": + var flag = Enum.TryParse(value, out indexPolicy); + Console.WriteLine("parse policy failed {arg}", arg); + break; + } +} + +Console.WriteLine(srcFile); + +if (string.IsNullOrEmpty(srcFile)||string.IsNullOrEmpty(dstFile)) +{ + PrintHelp(); + return; +} + + +Stopwatch stopwatch = new Stopwatch(); +stopwatch.Start(); + +Maker maker = new Maker(IndexPolicy.VectorIndexPolicy, srcFile, dstFile); +maker.Init(); +maker.Build(); + +stopwatch.Stop(); +Console.WriteLine($"Done, elapsed:{stopwatch.Elapsed.TotalMinutes}m"); + + +void PrintHelp() +{ + Console.WriteLine($"ip2region xdb maker"); + Console.WriteLine("dotnet IP2RegionMaker.dll [command options]"); + Console.WriteLine("--src string source ip text file path"); + Console.WriteLine("--dst string destination binary xdb file path"); +} \ No newline at end of file diff --git a/maker/csharp/IP2RegionMaker/Properties/PublishProfiles/FolderProfile.pubxml b/maker/csharp/IP2RegionMaker/Properties/PublishProfiles/FolderProfile.pubxml new file mode 100644 index 0000000..154e035 --- /dev/null +++ b/maker/csharp/IP2RegionMaker/Properties/PublishProfiles/FolderProfile.pubxml @@ -0,0 +1,13 @@ + + + + + Release + Any CPU + bin\Release\net6.0\publish\ + FileSystem + <_TargetId>Folder + + \ No newline at end of file diff --git a/maker/csharp/IP2RegionMaker/XDB/IndexPolicy.cs b/maker/csharp/IP2RegionMaker/XDB/IndexPolicy.cs new file mode 100644 index 0000000..cd8096d --- /dev/null +++ b/maker/csharp/IP2RegionMaker/XDB/IndexPolicy.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace IP2RegionMaker.XDB +{ + public enum IndexPolicy + { + VectorIndexPolicy = 1, + + BTreeIndexPolicy = 2, + } +} diff --git a/maker/csharp/IP2RegionMaker/XDB/Maker.cs b/maker/csharp/IP2RegionMaker/XDB/Maker.cs new file mode 100644 index 0000000..372020a --- /dev/null +++ b/maker/csharp/IP2RegionMaker/XDB/Maker.cs @@ -0,0 +1,256 @@ +// Copyright 2022 The Ip2Region Authors. All rights reserved. +// Use of this source code is governed by a Apache2.0-style +// license that can be found in the LICENSE file. +// +// @Author Alan Lee +// @Date 2022/8/8 + +// --- Ip2Region v2.0 data structure +// +// +----------------+--------------------------+---------------+--------------+ +// | header space | vector speed up index | data payload | block index | +// +----------------+--------------------------+---------------+--------------+ +// | 256 bytes | 512 KiB (fixed) | dynamic size | dynamic size | +// +----------------+--------------------------+---------------+--------------+ +// +// 1. padding space : for header info like block index ptr, version, release date eg ... or any other temporary needs. +// -- 2bytes: version number, different version means structure update, it fixed to 2 for now +// -- 2bytes: index algorithm code. +// -- 4bytes: generate unix timestamp (version) +// -- 4bytes: index block start ptr +// -- 4bytes: index block end ptr +// +// +// 2. data block : region or whatever data info. +// 3. segment index block : binary index block. +// 4. vector index block : fixed index info for block index search speed up. +// space structure table: +// -- 0 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block +// -- 1 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block +// -- 2 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block +// -- ... +// -- 255 -> | 1rt super block | 2nd super block | 3rd super block | ... | 255th super block +// +// +// super block structure: +// +-----------------------+----------------------+ +// | first index block ptr | last index block ptr | +// +-----------------------+----------------------+ +// +// data entry structure: +// +--------------------+-----------------------+ +// | 2bytes (for desc) | dynamic length | +// +--------------------+-----------------------+ +// data length whatever in bytes +// +// index entry structure +// +------------+-----------+---------------+------------+ +// | 4bytes | 4bytes | 2bytes | 4 bytes | +// +------------+-----------+---------------+------------+ +// start ip end ip data length data ptr + +using System.Text; + +namespace IP2RegionMaker.XDB +{ + public class Maker + { + const ushort VersionNo = 2; + const int HeaderInfoLength = 256; + const int VectorIndexRows = 256; + const int VectorIndexCols = 256; + const int VectorIndexSize = 8; + const int SegmentIndexSize = 14; + const int VectorIndexLength = VectorIndexRows * VectorIndexCols * VectorIndexSize; + + private readonly Stream _srcHandle; + private readonly Stream _dstHandle; + private readonly IndexPolicy _indexPolicy; + private readonly List _segments; + private readonly Dictionary _regionPool; + private readonly byte[] _vectorIndex; + + public Maker(IndexPolicy indexPolicy,string srcFile, string dstFile) + { + _indexPolicy = indexPolicy; + + _srcHandle = File.Open(@srcFile, FileMode.Open); + _dstHandle = File.Open(@dstFile, FileMode.Create); + _segments = new List(); + _regionPool = new Dictionary(); + _vectorIndex = new byte[VectorIndexLength]; + } + + ~Maker() + { + _srcHandle.Close(); + _dstHandle.Close(); + } + + private void InitDbHeader() + { + _srcHandle.Seek(0, SeekOrigin.Begin); + + var header = new byte[HeaderInfoLength]; + BitConverter.GetBytes(VersionNo).CopyTo(header, 0); + BitConverter.GetBytes((ushort)_indexPolicy).CopyTo(header, 2); + BitConverter.GetBytes(DateTimeOffset.UtcNow.ToUnixTimeSeconds()).CopyTo(header, 4); + BitConverter.GetBytes((uint)0).CopyTo(header, 8); + BitConverter.GetBytes((uint)0).CopyTo(header, 12); + + using var writer = new BinaryWriter(_dstHandle, Encoding.UTF8, true); + writer.Write(header); + } + + private void LoadSegments() + { + Console.WriteLine("try to load the segments ... "); + + + using var reader = new StreamReader(_srcHandle); + while (true) + { + var line = reader.ReadLine(); + + + if (line == null) break; + + var seg=Util.GetSegment(line); + + _segments?.Add(seg); + } + + if (_segments!=null) + { + Util.CheckSegments(_segments); + } + + Console.WriteLine($"all segments loaded, length: {_segments?.Count}"); + } + + + public void Init() + { + InitDbHeader(); + LoadSegments(); + } + + public void Build() + { + _dstHandle.Seek(HeaderInfoLength + VectorIndexLength, SeekOrigin.Begin); + using var writer = new BinaryWriter(_dstHandle, Encoding.UTF8, false); + + Console.WriteLine("try to write the data block ... "); + + foreach (var seg in _segments) + { + Console.WriteLine($"try to write region {seg.Region}"); + + if (_regionPool.ContainsKey(seg.Region)) + { + Console.WriteLine($"--[Cached] with ptr={_regionPool[seg.Region]}"); + continue; + } + + var region = Encoding.UTF8.GetBytes(seg.Region); + + if (region.Length > 0xFFFF) + { + throw new ArgumentException($"too long region info `{seg.Region}`: should be less than {0xFFFF} bytes"); + } + + var pos = _dstHandle.Seek(0, SeekOrigin.Current); + writer.Write(region); + + _regionPool[seg.Region] = (uint)pos; + } + + Console.WriteLine("try to write the segment index block ... "); + + var indexBuff = new byte[SegmentIndexSize]; + var counter = 0; + long startPtr = -1; + long endPtr = -1; + foreach (var seg in _segments) + { + var dataPtr = _regionPool[seg.Region]; + if (!_regionPool.ContainsKey(seg.Region)) + { + throw new Exception($"missing ptr cache for region `{seg.Region}`"); + } + + var datalen = Encoding.UTF8.GetBytes(seg.Region).Length; + + if (datalen < 1) + { + throw new ArgumentNullException(nameof(seg.Region)); + } + + var segList = seg.Split(); + Console.WriteLine($"try to index segment({segList.Count}) {seg} ..."); + + foreach (var item in segList) + { + var pos = _dstHandle.Seek(0, SeekOrigin.Current); + + BitConverter.GetBytes(item.StartIP).CopyTo(indexBuff, 0); + BitConverter.GetBytes(item.EndIP).CopyTo(indexBuff, 4); + BitConverter.GetBytes((ushort)datalen).CopyTo(indexBuff, 8); + BitConverter.GetBytes(dataPtr).CopyTo(indexBuff, 10); + + writer.Write(indexBuff); + + Console.WriteLine($"|-segment index: {counter}, ptr: {pos}, segment: {seg}"); + SetVectorIndex(item.StartIP, (uint)pos); + + counter++; + + if (startPtr == -1) + { + startPtr = pos; + } + + endPtr = pos; + } + } + + Console.WriteLine($"try to write the vector index block ... "); + + _dstHandle.Seek(HeaderInfoLength, SeekOrigin.Begin); + writer.Write(_vectorIndex); + + + Console.WriteLine("try to write the segment index ptr ... "); + BitConverter.GetBytes((uint)startPtr).CopyTo(indexBuff, 0); + BitConverter.GetBytes((uint)endPtr).CopyTo(indexBuff, 4); + _dstHandle.Seek(0, SeekOrigin.Begin); + + writer.Write(indexBuff[..8]); + + Console.WriteLine($"write done, dataBlocks: {_regionPool.Count}, indexBlocks: ({_segments.Count}, {counter}), indexPtr: ({startPtr}, {endPtr})"); + } + + private void SetVectorIndex(uint ip, uint ptr) + { + var il0 = (ip >> 24) & 0xFF; + var il1 = (ip >> 16) & 0xFF; + var idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize; + + ArraySegment bytes = new(_vectorIndex, (int)idx, _vectorIndex.Length - 1 - (int)idx); + var sPtr = BitConverter.ToUInt32(bytes); + + if (sPtr == 0) + { + BitConverter.GetBytes(ptr).CopyTo(_vectorIndex, idx); + BitConverter.GetBytes(ptr + SegmentIndexSize).CopyTo(_vectorIndex, idx + 4); + } + else + { + BitConverter.GetBytes(ptr + SegmentIndexSize).CopyTo(_vectorIndex, idx + 4); + } + } + + + } + +} diff --git a/maker/csharp/IP2RegionMaker/XDB/Segment.cs b/maker/csharp/IP2RegionMaker/XDB/Segment.cs new file mode 100644 index 0000000..f60ee19 --- /dev/null +++ b/maker/csharp/IP2RegionMaker/XDB/Segment.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace IP2RegionMaker.XDB +{ + public class Segment + { + public uint StartIP { get; set; } + + public uint EndIP { get; set; } + + public string Region { get; set; } + + public List Split() + { + var tList = new List(); + var sByte = (StartIP >> 24) & 0xFF; + var eByte = (EndIP >> 24) & 0xFF; + + var nSip = StartIP; + for (var i = sByte; i <= eByte; i++) + { + var sip = (i << 24) | (nSip & 0xFFFFFF); + var eip = (i << 24) | 0xFFFFFF; + + if (eip < EndIP) + { + nSip = (i + 1) << 24; + } + else + { + eip = EndIP; + } + + tList.Add(new Segment + { + StartIP = sip, + EndIP = eip, + }); + } + + var segList = new List(); + + foreach (var seg in tList) + { + var temp = seg.StartIP & 0xFF000000; + nSip = seg.StartIP; + + sByte = (seg.StartIP >> 16) & 0xFF; + eByte = (seg.EndIP >> 16) & 0xFF; + + for (var i = sByte; i <= eByte; i++) + { + var sip = temp | (i << 16) | (nSip & 0xFFFF); + var eip = temp | (i << 16) | 0xFFFF; + + if (eip < seg.EndIP) + { + nSip = 0; + } + else + { + eip = seg.EndIP; + } + + segList.Add(new Segment + { + StartIP = sip, + EndIP = eip, + Region = Region, + }); + } + } + return segList; + } + + public override string ToString() + { + return $"{Util.UInt32ToIpAddress(StartIP)}|{Util.UInt32ToIpAddress(EndIP)}|{Region}"; + } + } +} diff --git a/maker/csharp/IP2RegionMaker/XDB/Util.cs b/maker/csharp/IP2RegionMaker/XDB/Util.cs new file mode 100644 index 0000000..2dd2ccd --- /dev/null +++ b/maker/csharp/IP2RegionMaker/XDB/Util.cs @@ -0,0 +1,77 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Text; +using System.Threading.Tasks; + +namespace IP2RegionMaker.XDB +{ + public static class Util + { + public static uint IpAddressToUInt32(string ipAddress) + { + var address = IPAddress.Parse(ipAddress); + byte[] bytes = address.GetAddressBytes(); + Array.Reverse(bytes); + return BitConverter.ToUInt32(bytes, 0); + } + + public static string UInt32ToIpAddress(uint ipAddress) + { + byte[] bytes = BitConverter.GetBytes(ipAddress); + Array.Reverse(bytes); + return new IPAddress(bytes).ToString(); + } + + public static Segment GetSegment(string line) + { + var ps = line.Split("|", 3); + + if (ps.Length != 3) + { + throw new ArgumentException($"invalid ip segment line {line}"); + } + + var sip = Util.IpAddressToUInt32(ps[0]); + var eip = Util.IpAddressToUInt32(ps[1]); + + if (sip > eip) + { + throw new ArgumentException($"start ip {ps[0]} should not be greater than end ip {ps[1]}"); + } + + if (string.IsNullOrEmpty(ps[2])) + { + throw new ArgumentException($"empty region info in segment line {line}"); + } + + return new Segment + { + StartIP = sip, + EndIP = eip, + Region = ps[2], + }; + } + + public static void CheckSegments(List segments) + { + Segment? last = null; + + foreach (var seg in segments) + { + if (seg.StartIP > seg.EndIP) + { + throw new ArgumentException($"segment `{seg}`: start ip should not be greater than end ip"); + } + + if (last != null && last.EndIP + 1 != seg.StartIP) + { + throw new ArgumentException($"discontinuous data segment: last.eip+1({seg.StartIP}) != seg.sip({seg.EndIP},#{seg})"); + } + + last = seg; + } + } + } +} diff --git a/maker/csharp/ReadMe.md b/maker/csharp/ReadMe.md new file mode 100644 index 0000000..cddce55 --- /dev/null +++ b/maker/csharp/ReadMe.md @@ -0,0 +1,36 @@ +# ip2region xdb csharp 生成实现 + +## 编译安装 +编译环境:[dotnet6.0](https://dotnet.microsoft.com/zh-cn/download/dotnet/6.0) +```bash +# cd 到 maker/csharp/IP2RegionMaker目录 +dotnet publish -o ./bin +``` + +然后会在当前目录的 bin 目录下得到一个 IP2RegionMaker.dll 的打包文件。 + +# 数据生成 + +通过 `dotnet IP2RegionMaker.dll` 来生成 ip2region.xdb 二进制文件: +```bash +➜ csharp git:(master) ✗ dotnet IP2RegionMaker.dll +ip2region xdb maker +dotnet IP2RegionMaker.dll [command options] +--src string source ip text file path +--dst string destination binary xdb file path +``` + +例如,通过默认的 data/ip.merge.txt 原数据,在当前目录生成一个 ip2region.xdb 二进制文件: +```bash +➜ csharp git:(master) ✗ dotnet ./IP2RegionMaker/bin/IP2RegionMaker.dll --src=../../data/ip.merge.txt --dst=./ip2region.xdb +# 会看到一堆输出,最终会看到如下输出表示运行成功 +... +... +... +write done, dataBlocks: 13804, indexBlocks: (683591, 720221), indexPtr: (982904, 11065984) +Done, elapsed:2.1966620833333335m +``` + +# 数据 查询/bench 测试 + +已经完成开发的 [binding](../../binding/) 都有查询和 bench 测试程序以及使用文档,你可以使用你熟悉的语言的 searcher 进行查询测试或者bench测试,来确认数据的正确性和完整性。 \ No newline at end of file