maker is ready and with consecutive segments check and merge

This commit is contained in:
lion 2025-09-11 21:08:51 +08:00
parent 23b2446736
commit 4911bdcd41
5 changed files with 151 additions and 43 deletions

View File

@ -105,7 +105,7 @@
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.lionsoul.ip2region.MakerTest</mainClass>
<mainClass>org.lionsoul.ip2region.MakerApp</mainClass>
</transformer>
</transformers>
</configuration>

View File

@ -19,9 +19,9 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MakerTest {
public class MakerApp {
public final static Log log = Log.getLogger(MakerTest.class);
public final static Log log = Log.getLogger(MakerApp.class);
public final static Pattern p = Pattern.compile("^(\\d+(-\\d+)?)$");
public static void printHelp(String[] args) {
@ -164,7 +164,7 @@ public class MakerTest {
long tStart = System.currentTimeMillis();
final Maker maker = new Maker(version, indexPolicy, srcFile, dstFile, fields);
log.infof("Generating xdb with src=%s, dst=%s, logLevel=%s", srcFile, dstFile, logLevel);
Maker.log.setLevel(logLevel);
MakerApp.log.setLevel(logLevel);
maker.init();
maker.start();
maker.end();

View File

@ -152,55 +152,38 @@ public class Maker {
dstHandle.write(header);
}
// load all the segments
// load all the segments.
private void loadSegments() throws Exception {
log.infof("try to load the segments ... ");
final long tStart = System.currentTimeMillis();
Segment last = null;
String line;
Segment.iterate(srcFile, new Segment.IterateAction() {
private Segment last = null;
final FileInputStream fis = new FileInputStream(srcFile);
final BufferedReader br = new BufferedReader(new InputStreamReader(fis, bytesCharset));
while ((line = br.readLine()) != null) {
log.debugf("load segment `%s`", line);
final String[] ps = line.split("\\|", 3);
if (ps.length != 3) {
br.close();
throw new Exception("invalid ip segment line `"+ps[0]+"`");
@Override
public void before(String line) {
log.debugf("load segment: `%s`", line);
}
final byte[] sip = Util.parseIP(ps[0]);
final byte[] eip = Util.parseIP(ps[1]);
if (Util.ipCompare(sip, eip) > 0) {
br.close();
throw new Exception("start ip("+ps[0]+") should not be greater than end ip("+ps[1]+")");
}
if (ps[2].isEmpty()) {
br.close();
throw new Exception("empty region info in segment line `"+ps[2]+"`");
}
// ip version check
if (version.bytes != sip.length) {
br.close();
throw new InvalidInetAddressException("invalid ip segment(" + version.name + " expected)");
}
// check the continuity of the data segment
if (last != null) {
if (Util.ipCompare(Util.ipAddOne(last.endIP), sip) != 0) {
br.close();
throw new Exception("discontinuous data segment: last.eip+1("+sip+") != seg.sip("+eip+", "+ps[0]+")");
@Override
public void handle(Segment seg) throws Exception {
// ip version check
if (seg.startIP.length != version.bytes) {
throw new Exception("invalid ip segment("+version.name+" expected)");
}
if (last != null && !seg.after(last)) {
throw new Exception("discontinuous data segment: last.eip("
+ Util.ipToString(last.endIP)+")+1 != seg.sip("+ Util.ipToString(seg.startIP) + ", "+ seg.region +")");
}
// apply the field filtering
final String region = Util.regionFiltering(seg.region, fields);
segments.add(new Segment(seg.startIP, seg.endIP, region));
last = seg;
}
final Segment seg = new Segment(sip, eip, Util.regionFiltering(ps[2], this.fields));
segments.add(seg);
last = seg;
}
});
br.close();
log.infof("all segments loaded, length: %d, elapsed: %d ms", segments.size(), System.currentTimeMillis() - tStart);
}

View File

@ -7,6 +7,10 @@
package org.lionsoul.ip2region.xdb;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
@ -94,6 +98,11 @@ public class Segment {
return Util.ipCompare(ip, startIP) >= 0 && Util.ipCompare(ip, endIP) <= 0;
}
// check if the current segment just after the specified one.
public boolean after(final Segment last) {
return Util.ipCompare(Util.ipAddOne(last.endIP), startIP) == 0;
}
// parser the Segment from an input string
public static Segment parse(String input) throws Exception {
final String[] ps = input.trim().split("\\|", 3);
@ -110,4 +119,96 @@ public class Segment {
return new Segment(sip, eip, ps[2]);
}
// static class to handler the iterate callback
public static interface IterateAction {
public void before(final String line);
public void handle(final Segment seg) throws Exception;
}
// iterate the segments from the specified ip source file and call the handler
public static void iterate(final String srcFile, IterateAction action) throws Exception {
iterate(new File(srcFile), action);
}
public static void iterate(final File srcFile, IterateAction action) throws Exception {
Segment last = null;
String line = null;
final FileInputStream fis = new FileInputStream(srcFile);
final BufferedReader br = new BufferedReader(new InputStreamReader(fis, "utf-8"));
while ((line = br.readLine()) != null) {
final String l = line.trim();
// ignore empty line
if (l.length() < 1) {
continue;
}
// ignore comment line
if (l.charAt(0) == '#') {
continue;
}
// call the action.before
action.before(l);
// split the line to create the segment
final String[] ps = line.split("\\|", 3);
if (ps.length != 3) {
br.close();
throw new Exception("invalid ip segment line `"+ps[0]+"`");
}
final byte[] sip = Util.parseIP(ps[0]);
final byte[] eip = Util.parseIP(ps[1]);
if (sip.length != eip.length) {
br.close();
throw new Exception("invalid ip segment line `" + line + "`: sip/eip version not match");
}
if (Util.ipCompare(sip, eip) > 0) {
br.close();
throw new Exception("start ip("+ps[0]+") should not be greater than end ip("+ps[1]+")");
}
if (ps[2].isEmpty()) {
br.close();
throw new Exception("empty region info in segment line `"+ps[2]+"`");
}
final Segment seg = new Segment(sip, eip, ps[2]);
// check and set the last segment
if (last == null) {
last = seg;
continue;
}
// check and automatic merging the Consecutive Segments, which means:
// 1, region info is the same
// 2, last.eip+1 = cur.sip
if (last.region.equals(seg.region) && seg.after(last)) {
// last.endIP = seg.endIP;
System.arraycopy(seg.endIP, 0, last.endIP, 0, seg.endIP.length);
continue;
}
// pass the segment to the aciton.handle
try {
action.handle(last);
} catch (Exception e) {
// break the loop if the handle return false
br.close();
throw new Exception(e.getMessage());
}
// reset the last
last = seg;
}
// process the last segment
if (last != null) {
action.handle(last);
}
br.close();
}
}

View File

@ -1,5 +1,8 @@
package org.lionsoul.ip2region.xdb;
import java.io.File;
import java.net.URL;
import org.junit.Test;
public class SegmentTest {
@ -36,4 +39,25 @@ public class SegmentTest {
}
}
}
@Test
public void testIterate() throws Exception {
final URL res = getClass().getClassLoader().getResource("");
if (res == null) {
throw new Exception("unable to get the resource path");
}
final String base = new File(res.getPath()).getParentFile().getParentFile().getParentFile().getParent();
Segment.iterate(base+"/data/segments.tests.mixed", new Segment.IterateAction() {
@Override
public void before(String line) {
// log.debugf("load segment: `%s`", line);
}
@Override
public void handle(Segment seg) throws Exception {
log.infof("handle segment: `%s`", seg.toString());
}
});
}
}