java文本对比工具源码8(java比较文本相似度)
lipiwang 2025-06-23 20:28 4 浏览 0 评论
/**
* Parse a textual representation of patches and return a List of Patch
* objects.
* @param textline Text representation of patches.
* @return List of Patch objects.
* @throws IllegalArgumentException If invalid input.
*/
public List<Patch> patch_fromText(String textline)
throws IllegalArgumentException {
List<Patch> patches = new LinkedList<Patch>();
if (textline.length() == 0) {
return patches;
}
List<String> textList = Arrays.asList(textline.split("\n"));
LinkedList<String> text = new LinkedList<String>(textList);
Patch patch;
Pattern patchHeader
= Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@#34;);
Matcher m;
char sign;
String line;
while (!text.isEmpty()) {
m = patchHeader.matcher(text.getFirst());
if (!m.matches()) {
throw new IllegalArgumentException(
"Invalid patch string: " + text.getFirst());
}
patch = new Patch();
patches.add(patch);
patch.start1 = Integer.parseInt(m.group(1));
if (m.group(2).length() == 0) {
patch.start1--;
patch.length1 = 1;
} else if (m.group(2).equals("0")) {
patch.length1 = 0;
} else {
patch.start1--;
patch.length1 = Integer.parseInt(m.group(2));
}
patch.start2 = Integer.parseInt(m.group(3));
if (m.group(4).length() == 0) {
patch.start2--;
patch.length2 = 1;
} else if (m.group(4).equals("0")) {
patch.length2 = 0;
} else {
patch.start2--;
patch.length2 = Integer.parseInt(m.group(4));
}
text.removeFirst();
while (!text.isEmpty()) {
try {
sign = text.getFirst().charAt(0);
} catch (IndexOutOfBoundsException e) {
// Blank line? Whatever.
text.removeFirst();
continue;
}
line = text.getFirst().substring(1);
line = line.replace("+", "%2B"); // decode would change all "+" to " "
try {
line = URLDecoder.decode(line, "UTF-8");
} catch (UnsupportedEncodingException e) {
// Not likely on modern system.
throw new Error("This system does not support UTF-8.", e);
} catch (IllegalArgumentException e) {
// Malformed URI sequence.
throw new IllegalArgumentException(
"Illegal escape in patch_fromText: " + line, e);
}
if (sign == '-') {
// Deletion.
patch.diffs.add(new Diff(Operation.DELETE, line));
} else if (sign == '+') {
// Insertion.
patch.diffs.add(new Diff(Operation.INSERT, line));
} else if (sign == ' ') {
// Minor equality.
patch.diffs.add(new Diff(Operation.EQUAL, line));
} else if (sign == '@') {
// Start of next patch.
break;
} else {
// WTF?
throw new IllegalArgumentException(
"Invalid patch mode '" + sign + "' in: " + line);
}
text.removeFirst();
}
}
return patches;
}
/**
* Class representing one diff operation.
*/
public static class Diff {
/**
* One of: INSERT, DELETE or EQUAL.
*/
public Operation operation;
/**
* The text associated with this diff operation.
*/
public String text;
/**
* Constructor. Initializes the diff with the provided values.
* @param operation One of INSERT, DELETE or EQUAL.
* @param text The text being applied.
*/
public Diff(Operation operation, String text) {
// Construct a diff with the specified operation and text.
this.operation = operation;
this.text = text;
}
/**
* Display a human-readable version of this Diff.
* @return text version.
*/
public String toString() {
String prettyText = this.text.replace('\n', '\u00b6');
return "Diff(" + this.operation + ",\"" + prettyText + "\")";
}
/**
* Create a numeric hash value for a Diff.
* This function is not used by DMP.
* @return Hash value.
*/
@Override
public int hashCode() {
final int prime = 31;
int result = (operation == null) ? 0 : operation.hashCode();
result += prime * ((text == null) ? 0 : text.hashCode());
return result;
}
/**
* Is this Diff equivalent to another Diff?
* @param obj Another Diff to compare against.
* @return true or false.
*/
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Diff other = (Diff) obj;
if (operation != other.operation) {
return false;
}
if (text == null) {
if (other.text != null) {
return false;
}
} else if (!text.equals(other.text)) {
return false;
}
return true;
}
}
/**
* Class representing one patch operation.
*/
public static class Patch {
public LinkedList<Diff> diffs;
public int start1;
public int start2;
public int length1;
public int length2;
/**
* Constructor. Initializes with an empty list of diffs.
*/
public Patch() {
this.diffs = new LinkedList<Diff>();
}
/**
* Emulate GNU diff's format.
* Header: @@ -382,8 +481,9 @@
* Indices are printed as 1-based, not 0-based.
* @return The GNU diff string.
*/
public String toString() {
String coords1, coords2;
if (this.length1 == 0) {
coords1 = this.start1 + ",0";
} else if (this.length1 == 1) {
coords1 = Integer.toString(this.start1 + 1);
} else {
coords1 = (this.start1 + 1) + "," + this.length1;
}
if (this.length2 == 0) {
coords2 = this.start2 + ",0";
} else if (this.length2 == 1) {
coords2 = Integer.toString(this.start2 + 1);
} else {
coords2 = (this.start2 + 1) + "," + this.length2;
}
StringBuilder text = new StringBuilder();
text.append("@@ -").append(coords1).append(" +").append(coords2)
.append(" @@\n");
// Escape the body of the patch with %xx notation.
for (Diff aDiff : this.diffs) {
switch (aDiff.operation) {
case INSERT:
text.append('+');
break;
case DELETE:
text.append('-');
break;
case EQUAL:
text.append(' ');
break;
}
try {
text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' '))
.append("\n");
} catch (UnsupportedEncodingException e) {
// Not likely on modern system.
throw new Error("This system does not support UTF-8.", e);
}
}
return unescapeForEncodeUriCompatability(text.toString());
}
}
/**
* Unescape selected chars for compatability with JavaScript's encodeURI.
* In speed critical applications this could be dropped since the
* receiving application will certainly decode these fine.
* Note that this function is case-sensitive. Thus "%3f" would not be
* unescaped. But this is ok because it is only called with the output of
* URLEncoder.encode which returns uppercase hex.
*
* Example: "%3F" -> "?", "%24" -> "#34;, etc.
*
* @param str The string to escape.
* @return The escaped string.
*/
private static String unescapeForEncodeUriCompatability(String str) {
return str.replace("%21", "!").replace("%7E", "~")
.replace("%27", "'").replace("%28", "(").replace("%29", ")")
.replace("%3B", ";").replace("%2F", "/").replace("%3F", "?")
.replace("%3A", ":").replace("%40", "@").replace("%26", "&")
.replace("%3D", "=").replace("%2B", "+").replace("%24", "#34;)
.replace("%2C", ",").replace("%23", "#");
}
}
相关推荐
- 前端 JavaScript 字符串中提取数字
-
varstr="4500元";varnum=parseInt(str);alert(num);//4500如果字符串前面有非数字字符,上面这种方法就不行了:var...
- 使用JavaScript如何获取网站网址(js如何获取浏览器信息)
-
在做网站开发时,我们有时候会获取当前页面的完整路径。在网页前端如何实现呢?请在网页脚本代码段中粘贴如下代码。functiongetRootPath(){//获取当前网址,...
- java文本对比工具源码8(java比较文本相似度)
-
/***ParseatextualrepresentationofpatchesandreturnaListofPatch*objects.*@paramtextline...
- JavaScript实现的9大排序算法(js排序方法)
-
笔试面试经常涉及各种算法,本文简要介绍常用的一些算法,并用JavaScript实现。1、插入排序1)算法简介插入排序(Insertion-Sort)的算法描述是一种简单直观的排序算法。它的工作原理是通...
- 使用函数化的Javascript代码编写方式
-
对于Javascript介绍想必大家都耳熟能详啦,基于函数化的编程语言,基于浏览器运行的编程语言,web开发语言,前端开发必备语言,blablabla...Javascript是一个非常灵...
- Js基础31:内置对象(js内置对象是什么意思)
-
js里面的对象分成三大类:内置对象ArrayDateMath宿主对象浏览器提供的对象(如bom、dom等等)自定义对象开发人员自己定义的对象内置对象——所谓内置对象,就是JavaScript自...
- js获取上传文件类型以及大小的方法
-
前端web上传文件时,需要在上传之前判断一下文件的类型以及文件的大小,HTML为前端的标记语言是无法做到这一点,只能使用javascript动态脚本代码来实现。js获取上传文件大小的方法示例代码:&...
- 黑客入门实践:如何绕过前端过滤上传文件
-
今天开始,我们就要开始学习具体的漏洞了,但是希望大家学完后,不要轻易"入侵"网站哦,当时测试环境下除外。今天的课程是关于“文件上传漏洞”,据安界网的老师介绍,文件上传漏洞仅次于命令执行...
- Java文件上传细讲(java文件夹上传)
-
什么是文件上传?文件上传就是把用户的信息保存起来。为什么需要文件上传?在用户注册的时候,可能需要用户提交照片。那么这张照片就应该要进行保存。免费学习资料获取方式上传组件(工具)为什么我们要使用上传工具...
- 鸿蒙上实现“翻译”功能(鸿蒙宴全文翻译)
-
本章节我们来制作中文翻译成英文的实例(运行在HarmonyOS上),通过HTTP去配合API进行实现。需求分析如下:文字输入HTTP协议使用文字翻译控件介绍①HTTP数据请求官方文档请求...
- JavaScript从入门到精通(javascript 入门教程)
-
前几天,我们学习了JavaScript的入门课程,但是要想做网站,仅仅学会入门是不够的,今后的几天,我将带领大家精通JavaScript,希望大家好好学习!JS内置对象String对象:字符串对象,提...
- 第15天|16天搞定前端,javascript语法篇(干货)
-
JavaScript是互联网上最流行的脚本语言,这门语言可用于HTML和web,可广泛用于服务器、PC、笔记本电脑、平板电脑和智能手机等设备。它是一个脚本语言,它是一个轻量级,但功能强大的编程...
- 通过js来实现打字效果(js如何输入)
-
有时候浏览网页经常会看见一些页面出现一些打字的效果,那么是怎么实现的呢?逻辑确定目标容器,在哪个容器进行输出确定输出内容,当前直接根据目标容器确定输出内容即可需要控制输出频率,因此需要循环输出完毕代码...
- 手把手教你学会一键还原混淆js原理
-
1.短变量名在以下示例代码中,我们将变量“customerName”替换为“a”:vara="JohnSmith";console.log(a);2.随机变量名在以下示例代码...
- sql中常用的字符串函数详解(sql字符串函数有哪些)
-
在日常开发中遇到处理最多的可能字符串要算其中一个了,什么替换啊截取啊大小写转换啊、删除空格啊等等,这些操作我们可以在前端操作,也可以直接在数据库的sql中操作,那么我们来看一下sql中处理字符串的...
你 发表评论:
欢迎- 一周热门
- 最近发表
- 标签列表
-
- maven镜像 (69)
- undefined reference to (60)
- zip格式 (63)
- oracle over (62)
- date_format函数用法 (67)
- 在线代理服务器 (60)
- shell 字符串比较 (74)
- x509证书 (61)
- localhost (65)
- java.awt.headless (66)
- syn_sent (64)
- settings.xml (59)
- 弹出窗口 (56)
- applicationcontextaware (72)
- my.cnf (73)
- httpsession (62)
- pkcs7 (62)
- session cookie (63)
- java 生成uuid (58)
- could not initialize class (58)
- beanpropertyrowmapper (58)
- word空格下划线不显示 (73)
- jar文件 (60)
- jsp内置对象 (58)
- makefile编写规则 (58)