funcs to parse ip address (IPv4 and IPv6)

This commit is contained in:
lion 2025-10-19 17:26:17 +08:00
parent ffb632c3d0
commit 69eb94dcd8
2 changed files with 281 additions and 106 deletions

View File

@ -6,14 +6,28 @@
-- @Author Lion <chenxin619315@gmail.com> -- @Author Lion <chenxin619315@gmail.com>
-- @Date 2022/07/05 -- @Date 2022/07/05
-- set the package path package.path = "./?.lua" .. package.path
package.path = "./?.lua" package.cpath = "./?.so" .. package.cpath
package.cpath = "./?.so"
local xdb = require("xdb_searcher") local xdb = require("xdb_searcher")
---- ip checking testing function test_parse_ip()
print("--- testing check_ip and long2ip ... ") local ip_list = {
"1.0.0.0", "58.251.30.115", "192.168.1.100", "126.255.32.255", "219.xx.xx.11",
"::", "::1", "fffe::", "2c0f:fff0::", "2c0f:fff0::1", "2a02:26f7:c409:4001::",
"2fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "240e:982:e617:ffff:ffff:ffff:ffff:ffff", "::xx:ffff"
}
for _, ip_str in ipairs(ip_list) do
ip_bytes, err = xdb.parse_ip(ip_str)
if err ~= nil then
print(string.format("failed to parse ip address `%s`: %s", ip_str, err))
else
print(string.format("`%s`.bytes=%d", ip_str, #ip_bytes))
end
end
end
function test_check_ip()
local ip_list = { local ip_list = {
"1.2.3.4", "192.168.2.3", "120.24.78.129", "255.255.255.0", "1.2.3.4", "192.168.2.3", "120.24.78.129", "255.255.255.0",
"256.7.12.9", "12.56.78.320", "32.12.45.192", "222.221.220.219", "256.7.12.9", "12.56.78.320", "32.12.45.192", "222.221.220.219",
@ -35,10 +49,11 @@ for _, ip_src in ipairs(ip_list) do
end end
end end
end end
end
---- buffer loading test ---- buffer loading test
print("\n--- testing load header ... ") function test_load_header()
header, err = xdb.load_header("../../data/ip2region.xdb") header, err = xdb.load_header("../../data/ip2region_v4.xdb")
if err ~= nil then if err ~= nil then
print("failed to load header: ", err) print("failed to load header: ", err)
else else
@ -51,36 +66,40 @@ header: {
created_at: %d created_at: %d
start_index_ptr: %d start_index_ptr: %d
end_index_ptr: %d end_index_ptr: %d
ip_version: %d
runtime_ptr_bytes: %d
}]] }]]
print(string.format(tpl, print(string.format(tpl,
header["version"], header["index_policy"], header["version"], header["index_policy"],
header["created_at"], header["start_index_ptr"], header["end_index_ptr"]) header["created_at"], header["start_index_ptr"], header["end_index_ptr"],
) header["ip_version"], header["runtime_ptr_bytes"]
))
end
end end
function test_load_vector_index()
print("\n--- testing load vector index ... ") v_index, err = xdb.load_vector_index("../../data/ip2region_v4.xdb")
v_index, err = xdb.load_vector_index("../../data/ip2region.xdb")
if err ~= nil then if err ~= nil then
print("failed to load vector index: ", err) print("failed to load vector index: ", err)
else else
print("xdb vector index buffer loaded") print("xdb vector index buffer loaded")
end end
end
function test_load_content()
print("\n--- testing load content buffer ... ") c_buffer, err = xdb.load_content("../../data/ip2region_v4.xdb")
c_buffer, err = xdb.load_content("../../data/ip2region.xdb")
if err ~= nil then if err ~= nil then
print("failed to load content: ", err) print("failed to load content: ", err)
else else
print("xdb content buffer loaded") print("xdb content buffer loaded")
end end
end
print("\n--- testing search ... ") function test_search()
local ip_str = "1.2.3.4" local ip_str = "1.2.3.4"
searcher, err = xdb.new_with_file_only("../../data/ip2region.xdb") searcher, err = xdb.new_with_file_only("../../data/ip2region_v4.xdb")
local t_start = xdb.now() local t_start = xdb.now()
region, err = searcher:search(ip_str) region, err = searcher:search(ip_str)
if err ~= nil then if err ~= nil then
@ -92,7 +111,24 @@ else
print(string.format("searcher.tostring=%s", searcher)) print(string.format("searcher.tostring=%s", searcher))
end end
searcher:close() searcher:close()
end
print("") -- check and call the function
print(string.format("all tests done, elapsed %d μs", xdb.now() - s_time))
local func_name = arg[1]
if func_name == nil then
print("please specified the function to test")
return
end
if (_G[func_name] == nil) then
print(string.format("undefined function `%s` to call", func_name))
return
end
local s_time = xdb.now();
print(string.format("+---calling test function %s ...", func_name))
_G[func_name]();
local cost_time = xdb.now() - s_time
print(string.format("|---done, elapsed %.3fμs", cost_time))

View File

@ -14,7 +14,8 @@ local VectorIndexSize = 8
local SegmentIndexSize = 14 local SegmentIndexSize = 14
local VectorIndexLength = 524288 local VectorIndexLength = 524288
local _M = {
local _xdb = {
-- xdb file handle -- xdb file handle
handle = nil, handle = nil,
@ -30,8 +31,8 @@ local _M = {
} }
-- index and to string attribute set -- index and to string attribute set
_M.__index = _M _xdb.__index = _xdb
_M.__tostring = function(self) _xdb.__tostring = function(self)
return "xdb searcher object (lua)" return "xdb searcher object (lua)"
end end
@ -39,7 +40,7 @@ end
-- construct functions -- construct functions
function newBase(dbPath, vIndex, cBuffer) function newBase(dbPath, vIndex, cBuffer)
local obj = setmetatable({}, _M) local obj = setmetatable({}, _xdb)
if cBuffer ~= nil then if cBuffer ~= nil then
obj.io_count = 0 obj.io_count = 0
obj.vector_index = nil obj.vector_index = nil
@ -56,15 +57,15 @@ function newBase(dbPath, vIndex, cBuffer)
return obj, nil return obj, nil
end end
function _M.new_with_file_only(dbPath) function _xdb.new_with_file_only(dbPath)
return newBase(dbPath, nil, nil) return newBase(dbPath, nil, nil)
end end
function _M.new_with_vector_index(dbPath, vIndex) function _xdb.new_with_vector_index(dbPath, vIndex)
return newBase(dbPath, vIndex, nil) return newBase(dbPath, vIndex, nil)
end end
function _M.new_with_buffer(cBuffer) function _xdb.new_with_buffer(cBuffer)
return newBase(nil, nil, cBuffer) return newBase(nil, nil, cBuffer)
end end
@ -72,7 +73,7 @@ end
-- object api impl, must call via ':' -- object api impl, must call via ':'
function _M:search(ip_src) function _xdb:search(ip_src)
-- check and convert string ip to long ip -- check and convert string ip to long ip
local t, ip = type(ip_src), 0 local t, ip = type(ip_src), 0
if t == nil then if t == nil then
@ -102,11 +103,11 @@ function _M:search(ip_src)
local idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize local idx = il0 * VectorIndexCols * VectorIndexSize + il1 * VectorIndexSize
local s_ptr, e_ptr = 0, 0 local s_ptr, e_ptr = 0, 0
if vector_index ~= nil then if vector_index ~= nil then
s_ptr = getLong(vector_index, idx + 1) s_ptr = le_getUint32(vector_index, idx + 1)
e_ptr = getLong(vector_index, idx + 5) e_ptr = le_getUint32(vector_index, idx + 5)
elseif content_buff ~= nil then elseif content_buff ~= nil then
s_ptr = getLong(content_buff, HeaderInfoLength + idx + 1) s_ptr = le_getUint32(content_buff, HeaderInfoLength + idx + 1)
e_ptr = getLong(content_buff, HeaderInfoLength + idx + 5) e_ptr = le_getUint32(content_buff, HeaderInfoLength + idx + 5)
else else
-- load from the file -- load from the file
buff, err = read_data(self, HeaderInfoLength + idx, SegmentIndexSize) buff, err = read_data(self, HeaderInfoLength + idx, SegmentIndexSize)
@ -114,8 +115,8 @@ function _M:search(ip_src)
return "", string.format("read buffer: %s", err) return "", string.format("read buffer: %s", err)
end end
s_ptr = getLong(buff, 1) s_ptr = le_getUint32(buff, 1)
e_ptr = getLong(buff, 5) e_ptr = le_getUint32(buff, 5)
end end
-- print(string.format("s_ptr: %d, e_ptr: %d", s_ptr, e_ptr)) -- print(string.format("s_ptr: %d, e_ptr: %d", s_ptr, e_ptr))
@ -133,16 +134,16 @@ function _M:search(ip_src)
return "", string.format("read segment index at %d", p) return "", string.format("read segment index at %d", p)
end end
sip = getLong(buff, 1) sip = le_getUint32(buff, 1)
if ip < sip then if ip < sip then
h = m - 1 h = m - 1
else else
eip = getLong(buff, 5) eip = le_getUint32(buff, 5)
if ip > eip then if ip > eip then
l = m + 1 l = m + 1
else else
data_len = getShort(buff, 9) data_len = le_getUint16(buff, 9)
data_ptr = getLong(buff, 11) data_ptr = le_getUint32(buff, 11)
break break
end end
end end
@ -166,7 +167,7 @@ end
-- read specified bytes from the specified index -- read specified bytes from the specified index
function _M:read(offset, length) function _xdb:read(offset, length)
-- local cache -- local cache
local content_buff = self.content_buff local content_buff = self.content_buff
local handle = self.handle local handle = self.handle
@ -191,11 +192,11 @@ function _M:read(offset, length)
return buff, nil return buff, nil
end end
function _M:get_io_count() function _xdb:get_io_count()
return self.io_count return self.io_count
end end
function _M:close() function _xdb:close()
if self.handle ~= nil then if self.handle ~= nil then
self.handle:close() self.handle:close()
end end
@ -206,7 +207,7 @@ end
-- static util functions -- static util functions
function _M.load_header(dbPath) function _xdb.load_header(dbPath)
local handle = io.open(dbPath, "r") local handle = io.open(dbPath, "r")
if handle == nil then if handle == nil then
return nil, string.format("failed to open xdb file `%s`", dbPath) return nil, string.format("failed to open xdb file `%s`", dbPath)
@ -226,16 +227,21 @@ function _M.load_header(dbPath)
handle:close() handle:close()
return { return {
["version"] = getShort(c, 1), ["version"] = le_getUint16(c, 1),
["index_policy"] = getShort(c, 3), ["index_policy"] = le_getUint16(c, 3),
["created_at"] = getLong(c, 5), ["created_at"] = le_getUint32(c, 5),
["start_index_ptr"] = getLong(c, 9), ["start_index_ptr"] = le_getUint32(c, 9),
["end_index_ptr"] = getLong(c, 13), ["end_index_ptr"] = le_getUint32(c, 13),
-- xdb 3.0 since IPv6 supporting
["ip_version"] = le_getUint16(c, 17),
["runtime_ptr_bytes"] = le_getUint16(c, 19),
["raw_data"] = c ["raw_data"] = c
}, nil }, nil
end end
function _M.load_vector_index(dbPath) function _xdb.load_vector_index(dbPath)
local handle = io.open(dbPath, "r") local handle = io.open(dbPath, "r")
if handle == nil then if handle == nil then
return nil, string.format("failed to open xdb file `%s`", dbPath) return nil, string.format("failed to open xdb file `%s`", dbPath)
@ -257,7 +263,7 @@ function _M.load_vector_index(dbPath)
return c, nil return c, nil
end end
function _M.load_content(dbPath) function _xdb.load_content(dbPath)
local handle = io.open(dbPath, "r") local handle = io.open(dbPath, "r")
if handle == nil then if handle == nil then
return nil, string.format("failed to open xdb file `%s`", dbPath) return nil, string.format("failed to open xdb file `%s`", dbPath)
@ -273,7 +279,9 @@ function _M.load_content(dbPath)
return c, nil return c, nil
end end
function _M.check_ip(ip_str) --- ip parse and compare
function _xdb.check_ip(ip_str)
local ip, id, v = 0, 1, 0 local ip, id, v = 0, 1, 0
local offset_arr = {24, 16, 8, 0} local offset_arr = {24, 16, 8, 0}
for p in string.gmatch(ip_str..".", "([%d]+)%.") do for p in string.gmatch(ip_str..".", "([%d]+)%.") do
@ -304,20 +312,146 @@ function _M.check_ip(ip_str)
return ip, nil return ip, nil
end end
function _M.long2ip(ip) function _xdb.long2ip(ip)
return string.format("%d.%d.%d.%d", (ip >> 24) & 0xFF, (ip >> 16) & 0xFF, (ip >> 8 ) & 0xFF, ip & 0xFF) return string.format("%d.%d.%d.%d", (ip >> 24) & 0xFF, (ip >> 16) & 0xFF, (ip >> 8 ) & 0xFF, ip & 0xFF)
end end
-- this is a bit weird, but we have to better choice for now --
function _M.now() -- parse ip string
return os.time() * 1e6 --
function split(str, sep)
local ps, sIndex, length = {}, 1, #str
-- loop to find all parts
while true do
local mi = string.find(str, sep, sIndex, true)
if mi == nil then
table.insert(ps, string.sub(str, sIndex))
break
end end
if sIndex == mi then
table.insert(ps, "")
else
table.insert(ps, string.sub(str, sIndex, mi - 1))
end
-- reset the start index
sIndex = mi + 1
end
return ps
end
function _parse_ipv4_addr(v4_str)
local ps = split(v4_str, ".")
if #ps ~= 4 then
return nil, string.format("invalid ipv4 address `%s`", v4_str)
end
local bytes = {0x00, 0x00, 0x00, 0x00}
for i, s in ipairs(ps) do
local v = tonumber(s)
if v == nil then
return nil, string.format("invalid ipv4 part `%s`, a valid number expected", s)
end
if v < 0 or v > 255 then
return nil, string.format("invalid ipv4 part `%s`, should <=0 and <= 255", s)
end
bytes[i] = v
end
return string.char(table.unpack(bytes)), nil
end
function _parse_ipv6_addr(v6_str)
local ps = split(v6_str, ':')
if #ps < 3 or #ps > 8 then
return nil, string.format("invalid ipv6 address `%s`", v6_str)
end
local bytes = {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
}
local i, dc_num, offset, length = 1, 0, 1, #ps
-- process the v6 parts
while i <= length do
local s = ps[i]:match("^%s*(.-)%s*$")
-- Double colon check and auto padding
if #s == 0 then
-- ONLY one double colon allow
if dc_num > 0 then
return nil, "invalid ipv6 address: multi double colon detected"
end
-- clear all the consecutive spaces
local start = i
i = i + 1
while true do
s = ps[i]:match("^%s*(.-)%s*$")
if #s > 0 then
i = i - 1
break
end
if i >= length then
break
end
i = i + 1
end
dc_num = 1
-- padding = 9 - start - left
local padding = 9 - start - (length - i)
offset = offset + 2 * padding
-- print("-> i ", i, "start", start, "padding: ", padding, "offset", offset)
i = i + 1
else
local v = tonumber(s, 16);
if v == nil then
return nil, string.format("invalid ipv6 part `%s`, a valid hex number expected", ps[i])
end
if v < 0 or v > 0xFFFF then
return nil, string.format("invalid ipv6 part `%s` should >= 0 and <= 65534", ps[i])
end
bytes[offset ] = (v >> 8) & 0xFF
bytes[offset + 1] = (v & 0xFF)
offset = offset + 2
i = i + 1
end
end
return string.char(table.unpack(bytes))
end
function _xdb.parse_ip(ip_str)
local s_dot = string.find(ip_str, ".", 1, true)
local c_dot = string.find(ip_str, ":", 1, true)
if s_dot ~= nil and c_dot == nil then
return _parse_ipv4_addr(ip_str)
elseif c_dot ~= nil then
return _parse_ipv6_addr(ip_str)
else
return nil, string.format("invalid ip address `%s`", ip_str)
end
end
-- end ip parse
--
-- End of util functions -- End of util functions
--internal function to get a integer from a binary string --internal function to get a integer from a binary string
function getLong(buff, idx) function le_getUint32(buff, idx)
local i1 = (string.byte(string.sub(buff, idx, idx))) local i1 = (string.byte(string.sub(buff, idx, idx)))
local i2 = (string.byte(string.sub(buff, idx+1, idx+1)) << 8) local i2 = (string.byte(string.sub(buff, idx+1, idx+1)) << 8)
local i3 = (string.byte(string.sub(buff, idx+2, idx+2)) << 16) local i3 = (string.byte(string.sub(buff, idx+2, idx+2)) << 16)
@ -325,10 +459,15 @@ function getLong(buff, idx)
return (i1 | i2 | i3 | i4) return (i1 | i2 | i3 | i4)
end end
function getShort(buff, idx) function le_getUint16(buff, idx)
local i1 = (string.byte(string.sub(buff, idx, idx))) local i1 = (string.byte(string.sub(buff, idx, idx)))
local i2 = (string.byte(string.sub(buff, idx+1, idx+1)) << 8) local i2 = (string.byte(string.sub(buff, idx+1, idx+1)) << 8)
return (i1 | i2) return (i1 | i2)
end end
return _M -- this is a bit weird, but we have no better choice for now
function _xdb.now()
return os.time() * 1e6
end
return _xdb