fix GP-2: Improve the diff to better handle the case of whitespace

changes
This commit is contained in:
robin shine 2016-11-24 22:13:59 +08:00
parent 04d0340f11
commit dad2a1dccc
5 changed files with 52 additions and 3 deletions

View File

@ -165,6 +165,14 @@ public class DiffUtils {
return diffBlocks;
}
/**
* This method checks deleted lines and inserted lines, and position them so that
* similar delete line and insert line (indicates they are the same line with
* modification) will be displayed on same row
* @param deleteLines
* @param insertLines
* @return
*/
public static LinkedHashMap<Integer, LineDiff> align(
List<List<CmToken>> deleteLines, List<List<CmToken>> insertLines) {
LinkedHashMap<Integer, LineDiff> lineDiffs = new LinkedHashMap<>();

View File

@ -21,7 +21,12 @@ public abstract class AbstractTokenizer<S> implements Tokenizer {
StringStream stream = new StringStream(line);
while (!stream.eol()) {
String style = token(stream, state);
tokenizedLine.add(new CmToken(style, stream.current()));
CmToken token = new CmToken(style, stream.current());
List<CmToken> splitted = token.split();
if (splitted != null)
tokenizedLine.addAll(splitted);
else
tokenizedLine.add(token);
stream.start(stream.pos());
}
tokenizedLines.add(tokenizedLine);

View File

@ -1,6 +1,10 @@
package com.gitplex.commons.lang.tokenizers;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nullable;
import com.gitplex.commons.lang.diff.DiffMatchPatch.Operation;
import com.gitplex.commons.util.StringUtils;
@ -167,5 +171,31 @@ public class CmToken implements Serializable {
else
return text;
}
/**
* CodeMirror returns continuous spaces/tabs as a single token and it makes diff not ideal,
* for instance, if we have three tabs as below:
* \t\t\t
* And if we add another tab:
* \t\t\t\t
* The diff will be a deletion of token "\t\t\t" and addition of token "\t\t\t\t", which is
* not optimal. So we split a whitespace token containing multiple characters as multiple
* single-character tokens to make the diff able to figure out that a single "\t" token is
* inserted
*
* @return
*/
@Nullable
public List<CmToken> split() {
if (isWhitespace()) {
List<CmToken> splitted = new ArrayList<>();
for (char ch: text.toCharArray()) {
splitted.add(new CmToken(type, String.valueOf(ch)));
}
return splitted;
} else {
return null;
}
}
}

View File

@ -81,7 +81,12 @@ public abstract class AbstractTokenizerTest {
tokenizedLines.add(currentLine);
currentLine = new ArrayList<>();
} else {
currentLine.add(new CmToken(style, text));
CmToken token = new CmToken(style, text);
List<CmToken> splitted = token.split();
if (splitted != null)
currentLine.addAll(splitted);
else
currentLine.add(token);
}
}

View File

@ -81,7 +81,8 @@
background: #ffecec;
}
.text-diff>.body>tbody>tr>td.old .delete {
background-color: #F8C8C8;
background-color: #F8B8B8;
text-decoration: line-through;
}
.text-diff>.body>tbody>tr>td.new.number {
background: #dbffdb;