mirror of
https://github.com/lionsoul2014/ip2region.git
synced 2025-12-08 19:25:22 +00:00
add in-memory search supports when the xdb file size exceeds 2^31 - 1
This commit is contained in:
parent
f9402614f6
commit
4996c0ff6a
@ -4,7 +4,7 @@
|
||||
|
||||
<groupId>org.lionsoul</groupId>
|
||||
<artifactId>ip2region</artifactId>
|
||||
<version>2.8.0</version>
|
||||
<version>2.8.1</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>ip2region</name>
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
|
||||
package org.lionsoul.ip2region;
|
||||
|
||||
import org.lionsoul.ip2region.xdb.LongByteArray;
|
||||
import org.lionsoul.ip2region.xdb.Searcher;
|
||||
|
||||
import java.io.*;
|
||||
@ -30,7 +31,7 @@ public class SearchTest {
|
||||
byte[] vIndex = Searcher.loadVectorIndexFromFile(dbPath);
|
||||
return Searcher.newWithVectorIndex(dbPath, vIndex);
|
||||
} else if ("content".equals(cachePolicy)) {
|
||||
byte[] cBuff = Searcher.loadContentFromFile(dbPath);
|
||||
LongByteArray cBuff = Searcher.loadContentFromFile(dbPath);
|
||||
return Searcher.newWithBuffer(cBuff);
|
||||
} else {
|
||||
throw new IOException("invalid cache policy `" + cachePolicy + "`, options: file/vectorIndex/content");
|
||||
|
||||
@ -6,8 +6,11 @@
|
||||
|
||||
package org.lionsoul.ip2region;
|
||||
|
||||
import org.lionsoul.ip2region.xdb.LongByteArray;
|
||||
import org.lionsoul.ip2region.xdb.Searcher;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class UtilTest {
|
||||
|
||||
public static void testIP2Long() {
|
||||
@ -34,9 +37,49 @@ public class UtilTest {
|
||||
System.out.printf("passed: ip=%s, ipAddr=%d, ip2=%s\n", ip, ipAddr, ip2);
|
||||
}
|
||||
|
||||
public static void testLongByteArray() {
|
||||
final LongByteArray byteArray = new LongByteArray();
|
||||
byteArray.append(new byte[]{0,0,0,0,0});
|
||||
byteArray.append(new byte[]{1,1,1,1,1});
|
||||
int counter = 2;
|
||||
for (int i = 0; i < 100; i++) {
|
||||
final byte[] buff = new byte[10];
|
||||
Arrays.fill(buff, (byte) counter);
|
||||
byteArray.append(buff);
|
||||
counter++;
|
||||
}
|
||||
|
||||
System.out.printf("1, byteArray.length: %d\n", byteArray.length());
|
||||
System.out.println("2, length copy test...");
|
||||
int[] length = new int[]{5, 10, 15, 20, 21, 22, 23, 25, 28, 29, 30, 40, 42, 44, 50, 60};
|
||||
for (int j : length) {
|
||||
final byte[] destBuff = new byte[j];
|
||||
byteArray.copy(0, destBuff, 0, destBuff.length);
|
||||
System.out.printf("copy(0,%d): \n", destBuff.length);
|
||||
for (byte b : destBuff) {
|
||||
System.out.print(b + " ");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
System.out.println("3, offset copy test...");
|
||||
int[] offset = new int[]{0, 5, 10, 15, 20, 21, 22, 23, 25, 28, 29, 30, 40, 42, 44, 50, 60};
|
||||
for (int j : offset) {
|
||||
final byte[] destBuff = new byte[11];
|
||||
byteArray.copy(j, destBuff, 0, destBuff.length);
|
||||
System.out.printf("copy(%d,%d): \n", j, destBuff.length);
|
||||
for (byte b : destBuff) {
|
||||
System.out.print(b + " ");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.print("testing IP2Long ... \n");
|
||||
testIP2Long();
|
||||
System.out.print("testing LongByteArray ... \n");
|
||||
testLongByteArray();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,140 @@
|
||||
// Copyright 2022 The Ip2Region Authors. All rights reserved.
|
||||
// Use of this source code is governed by a Apache2.0-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package org.lionsoul.ip2region.xdb;
|
||||
|
||||
// xdb byte buffer which used to instead of the byte array
|
||||
// when the size of the xdb file is greater than 2^32 << 2;
|
||||
// xdb file v4 is designed to be a maximum of 2^32 bytes in size.
|
||||
// @Author Leon <chenxin619315@gmail.com>
|
||||
// @Date 2025/08/22
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class LongByteArray {
|
||||
// byte buffer list
|
||||
private final List<byte[]> buffs = new ArrayList<byte[]>();
|
||||
private long length;
|
||||
|
||||
public LongByteArray() {
|
||||
this.length = 0;
|
||||
}
|
||||
|
||||
public LongByteArray(byte[] buff) {
|
||||
buffs.add(buff);
|
||||
length = buff.length;
|
||||
}
|
||||
|
||||
// append new buffer
|
||||
public void append(final byte[] buffer) {
|
||||
buffs.add(buffer);
|
||||
length += buffer.length;
|
||||
}
|
||||
|
||||
public long length() {
|
||||
return length;
|
||||
}
|
||||
|
||||
// internal method to determine the position of the specified offset
|
||||
private Position determinate(final long offset) {
|
||||
int index = 0, position = 0, buffLen = buffs.size();
|
||||
long curIndex = 0;
|
||||
for (index = 0; index < buffLen; index++) {
|
||||
final byte[] buff = buffs.get(index);
|
||||
if (curIndex + buff.length < offset) {
|
||||
curIndex += buff.length;
|
||||
continue;
|
||||
}
|
||||
|
||||
// matched and calc the position
|
||||
position = (int) (offset - curIndex);
|
||||
break;
|
||||
}
|
||||
|
||||
return new Position(index, position);
|
||||
}
|
||||
|
||||
// Copy from the current buffers to a specified buffer
|
||||
// from the specified offset with a specified length
|
||||
public byte[] copy(final long srcPos, final byte[] dest, final int destPos, final int length) {
|
||||
if (srcPos >= this.length) {
|
||||
throw new IndexOutOfBoundsException("srcPos exceed the maximum array length `" + this.length + "`");
|
||||
}
|
||||
|
||||
if (destPos + length > dest.length) {
|
||||
throw new IndexOutOfBoundsException("destPost+length exceed the maximum dest buffer length `" + dest.length + "`");
|
||||
}
|
||||
|
||||
final Position pos = determinate(srcPos);
|
||||
|
||||
// copy from the current buffer
|
||||
final byte[] hBuff = buffs.get(pos.index++);
|
||||
final int copyLen = Math.min(hBuff.length - pos.offset, length);
|
||||
System.arraycopy(hBuff, pos.offset, dest, destPos, copyLen);
|
||||
|
||||
// check and copy from the rest buffer?
|
||||
int sPos = destPos + copyLen;
|
||||
int left = length - copyLen;
|
||||
while (left > 0) {
|
||||
final byte[] tBuff = buffs.get(pos.index++);
|
||||
final int buffLen = tBuff.length;
|
||||
if (left <= buffLen) {
|
||||
System.arraycopy(tBuff, 0, dest, sPos, left);
|
||||
break;
|
||||
}
|
||||
|
||||
System.arraycopy(tBuff, 0, dest, sPos, buffLen);
|
||||
sPos += buffLen;
|
||||
left -= buffLen;
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
// get a byte-buffer from the specified index with a specified length.
|
||||
// this method will allocate a new byte buffer with length = $length.
|
||||
public byte[] slice(long offset, int length) {
|
||||
if (offset + length > this.length) {
|
||||
throw new IndexOutOfBoundsException("offset+length exceed the maximum array length `" + this.length + "`");
|
||||
}
|
||||
|
||||
final byte[] buffer = new byte[length];
|
||||
return copy(offset, buffer, 0, length);
|
||||
}
|
||||
|
||||
// get a 4-bytes long integer from the specified index
|
||||
public long getIntLong(long offset) {
|
||||
final byte[] b = new byte[4];
|
||||
copy(offset, b, 0, 4);
|
||||
return (
|
||||
((b[0] & 0x000000FFL)) |
|
||||
((b[1] << 8) & 0x0000FF00L) |
|
||||
((b[2] << 16) & 0x00FF0000L) |
|
||||
((b[3] << 24) & 0xFF000000L)
|
||||
);
|
||||
}
|
||||
|
||||
public int getInt(long offset) {
|
||||
final byte[] b = new byte[4];
|
||||
copy(offset, b, 0, 4);
|
||||
return (
|
||||
((b[0] & 0x000000FF)) |
|
||||
((b[1] << 8) & 0x0000FF00) |
|
||||
((b[2] << 16) & 0x00FF0000) |
|
||||
((b[3] << 24) & 0xFF000000)
|
||||
);
|
||||
}
|
||||
|
||||
// position entry class
|
||||
public static class Position {
|
||||
public int index;
|
||||
public int offset;
|
||||
public Position(int index, int offset) {
|
||||
this.index = index;
|
||||
this.offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -20,7 +20,10 @@ public class Searcher {
|
||||
public static final int VectorIndexSize = 8;
|
||||
public static final int SegmentIndexSize = 14;
|
||||
|
||||
// random access file handle for file based search
|
||||
// Linux max write / read bytes
|
||||
public static final int MAX_WRITE_BYTES = 0x7ffff000;
|
||||
|
||||
// random access file handle for file-based search
|
||||
private final RandomAccessFile handle;
|
||||
|
||||
private int ioCount = 0;
|
||||
@ -30,8 +33,10 @@ public class Searcher {
|
||||
// the minimal memory allocation.
|
||||
private final byte[] vectorIndex;
|
||||
|
||||
// xdb content buffer, used for in-memory search
|
||||
private final byte[] contentBuff;
|
||||
// xdb content buffer, used for in-memory search.
|
||||
// @Note: use the LongByteArray instead since 2025/08/22
|
||||
// private final byte[] contentBuff;
|
||||
private final LongByteArray contentBuff;
|
||||
|
||||
// --- static method to create searchers
|
||||
|
||||
@ -43,13 +48,13 @@ public class Searcher {
|
||||
return new Searcher(dbPath, vectorIndex, null);
|
||||
}
|
||||
|
||||
public static Searcher newWithBuffer(byte[] cBuff) throws IOException {
|
||||
public static Searcher newWithBuffer(LongByteArray cBuff) throws IOException {
|
||||
return new Searcher(null, null, cBuff);
|
||||
}
|
||||
|
||||
// --- End of creator
|
||||
|
||||
public Searcher(String dbFile, byte[] vectorIndex, byte[] cBuff) throws IOException {
|
||||
public Searcher(String dbFile, byte[] vectorIndex, LongByteArray cBuff) throws IOException {
|
||||
if (cBuff != null) {
|
||||
this.handle = null;
|
||||
this.vectorIndex = null;
|
||||
@ -90,8 +95,8 @@ public class Searcher {
|
||||
sPtr = getIntLong(vectorIndex, idx);
|
||||
ePtr = getIntLong(vectorIndex, idx + 4);
|
||||
} else if (contentBuff != null) {
|
||||
sPtr = getIntLong(contentBuff, HeaderInfoLength + idx);
|
||||
ePtr = getIntLong(contentBuff, HeaderInfoLength + idx + 4);
|
||||
sPtr = contentBuff.getIntLong(HeaderInfoLength + idx);
|
||||
ePtr = contentBuff.getIntLong(HeaderInfoLength + idx + 4);
|
||||
} else {
|
||||
final byte[] buff = new byte[VectorIndexSize];
|
||||
read(HeaderInfoLength + idx, buff);
|
||||
@ -141,15 +146,7 @@ public class Searcher {
|
||||
protected void read(long offset, byte[] buffer) throws IOException {
|
||||
// check the in-memory buffer first
|
||||
if (contentBuff != null) {
|
||||
// @TODO: reduce data copying, directly decode the data ?
|
||||
// @TODO: added by Leon at 2025/06/10, when offset is negative and the content byte is not going to work.
|
||||
// we need a better solution for the content buffer which is greater than (2^31 - 1 << 2)
|
||||
int int_idx = (int) offset;
|
||||
if (int_idx < 0) {
|
||||
throw new IOException("No content buffer policy for NOW since the xdb is too large, use file or vectorIndex instead");
|
||||
}
|
||||
|
||||
System.arraycopy(contentBuff, int_idx, buffer, 0, buffer.length);
|
||||
contentBuff.copy(offset, buffer, 0, buffer.length);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -199,20 +196,28 @@ public class Searcher {
|
||||
return vIndex;
|
||||
}
|
||||
|
||||
public static byte[] loadContent(RandomAccessFile handle) throws IOException {
|
||||
public static LongByteArray loadContent(RandomAccessFile handle) throws IOException {
|
||||
handle.seek(0);
|
||||
final byte[] buff = new byte[(int) handle.length()];
|
||||
int rLen = handle.read(buff);
|
||||
if (rLen != buff.length) {
|
||||
throw new IOException("incomplete read: read bytes should be " + buff.length);
|
||||
// check the length and do the buff load
|
||||
long toRead = handle.length();
|
||||
final LongByteArray byteArray = new LongByteArray();
|
||||
while (toRead > 0) {
|
||||
final byte[] buff = new byte[(int) Math.min(toRead, MAX_WRITE_BYTES)];
|
||||
final int rLen = handle.read(buff);
|
||||
if (rLen != buff.length) {
|
||||
throw new IOException("incomplete read: read bytes should be " + buff.length + ", got `" + rLen + "`");
|
||||
}
|
||||
|
||||
byteArray.append(buff);
|
||||
toRead -= rLen;
|
||||
}
|
||||
|
||||
return buff;
|
||||
return byteArray;
|
||||
}
|
||||
|
||||
public static byte[] loadContentFromFile(String dbPath) throws IOException {
|
||||
public static LongByteArray loadContentFromFile(String dbPath) throws IOException {
|
||||
final RandomAccessFile handle = new RandomAccessFile(dbPath, "r");
|
||||
final byte[] content = loadContent(handle);
|
||||
final LongByteArray content = loadContent(handle);
|
||||
handle.close();
|
||||
return content;
|
||||
}
|
||||
|
||||
@ -112,6 +112,7 @@ if ($handle === false) {
|
||||
}
|
||||
|
||||
$count = 0;
|
||||
$qx_count = 0;
|
||||
while (!feof($handle)) {
|
||||
$line = trim(fgets($handle, 1024));
|
||||
if (strlen($line) < 1) {
|
||||
@ -126,8 +127,13 @@ while (!feof($handle)) {
|
||||
|
||||
$count++;
|
||||
$region = $searcher->search($ip);
|
||||
echo $line, ", ", $region, "\n";
|
||||
$ss = explode('|', $region);
|
||||
if (strlen($ss[3]) > 1) {
|
||||
$qx_count++;
|
||||
}
|
||||
echo $line, ",", str_replace('|', ',', $region), "\n";
|
||||
}
|
||||
|
||||
fclose($handle);
|
||||
echo "qx_count: {$qx_count}";
|
||||
echo "Done, with {$count} IPs\n";
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user