fix GP-2: Improve the diff to better handle the case of whitespace

changes
2025-12-08 18:26:30 +00:00 · 2016-11-24 22:13:59 +08:00 · 2016-11-24 22:13:59 +08:00 · dad2a1dccc
commit dad2a1dccc
parent 04d0340f11
5 changed files with 52 additions and 3 deletions
--- a/commons.lang/src/main/java/com/gitplex/commons/lang/diff/DiffUtils.java
+++ b/commons.lang/src/main/java/com/gitplex/commons/lang/diff/DiffUtils.java
@ -165,6 +165,14 @@ public class DiffUtils {
 		return diffBlocks;
 	}
 	
+	/**
+	 * This method checks deleted lines and inserted lines, and position them so that 
+	 * similar delete line and insert line (indicates they are the same line with 
+	 * modification) will be displayed on same row
+	 * @param deleteLines
+	 * @param insertLines
+	 * @return
+	 */
 	public static LinkedHashMap<Integer, LineDiff> align(
 			List<List<CmToken>> deleteLines, List<List<CmToken>> insertLines) {
 		LinkedHashMap<Integer, LineDiff> lineDiffs = new LinkedHashMap<>();
--- a/commons.lang/src/main/java/com/gitplex/commons/lang/tokenizers/AbstractTokenizer.java
+++ b/commons.lang/src/main/java/com/gitplex/commons/lang/tokenizers/AbstractTokenizer.java
@ -21,7 +21,12 @@ public abstract class AbstractTokenizer<S> implements Tokenizer {
 			StringStream stream = new StringStream(line);
 			while (!stream.eol()) {
 				String style = token(stream, state);
-				tokenizedLine.add(new CmToken(style, stream.current()));
+				CmToken token = new CmToken(style, stream.current());
+				List<CmToken> splitted = token.split();
+				if (splitted != null)
+					tokenizedLine.addAll(splitted);
+				else
+					tokenizedLine.add(token);
 				stream.start(stream.pos());
 			}
 			tokenizedLines.add(tokenizedLine);
--- a/commons.lang/src/main/java/com/gitplex/commons/lang/tokenizers/CmToken.java
+++ b/commons.lang/src/main/java/com/gitplex/commons/lang/tokenizers/CmToken.java
@ -1,6 +1,10 @@
 package com.gitplex.commons.lang.tokenizers;

 import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nullable;

 import com.gitplex.commons.lang.diff.DiffMatchPatch.Operation;
 import com.gitplex.commons.util.StringUtils;
@ -167,5 +171,31 @@ public class CmToken implements Serializable {
 		else
 			return text;
 	}
+
+	/**
+	 * CodeMirror returns continuous spaces/tabs as a single token and it makes diff not ideal, 
+	 * for instance, if we have three tabs as below:
+	 * \t\t\t
+	 * And if we add another tab:
+	 * \t\t\t\t
+	 * The diff will be a deletion of token "\t\t\t" and addition of token "\t\t\t\t", which is 
+	 * not optimal. So we split a whitespace token containing multiple characters as multiple 
+	 * single-character tokens to make the diff able to figure out that a single "\t" token is 
+	 * inserted
+	 * 
+	 * @return
+	 */
+	@Nullable
+	public List<CmToken> split() {
+		if (isWhitespace()) {
+			List<CmToken> splitted = new ArrayList<>();
+			for (char ch: text.toCharArray()) {
+				splitted.add(new CmToken(type, String.valueOf(ch)));
+			}
+			return splitted;
+		} else {
+			return null;
+		}
+	}
 	
 }
--- a/commons.lang/src/test/java/com/gitplex/commons/lang/tokenizers/AbstractTokenizerTest.java
+++ b/commons.lang/src/test/java/com/gitplex/commons/lang/tokenizers/AbstractTokenizerTest.java
@ -81,7 +81,12 @@ public abstract class AbstractTokenizerTest {
 				tokenizedLines.add(currentLine);
 				currentLine = new ArrayList<>();
 			} else {
-				currentLine.add(new CmToken(style, text));
+				CmToken token = new CmToken(style, text);
+				List<CmToken> splitted = token.split();
+				if (splitted != null)
+					currentLine.addAll(splitted);
+				else
+					currentLine.add(token);
 			}
 		}
 		
--- a/server.web/src/main/java/com/gitplex/server/web/component/diff/blob/text/text-diff.css
+++ b/server.web/src/main/java/com/gitplex/server/web/component/diff/blob/text/text-diff.css
@ -81,7 +81,8 @@
 	background: #ffecec;
 }
 .text-diff>.body>tbody>tr>td.old .delete {
-	background-color: #F8C8C8;
+	background-color: #F8B8B8;
+	text-decoration: line-through;
 }
 .text-diff>.body>tbody>tr>td.new.number {
 	background: #dbffdb;