百度360必应搜狗淘宝本站头条
当前位置:网站首页 > 技术资源 > 正文

java文本对比工具源码8(java比较文本相似度)

lipiwang 2025-06-23 20:28 4 浏览 0 评论

/**

* Parse a textual representation of patches and return a List of Patch

* objects.

* @param textline Text representation of patches.

* @return List of Patch objects.

* @throws IllegalArgumentException If invalid input.

*/

public List<Patch> patch_fromText(String textline)

throws IllegalArgumentException {

List<Patch> patches = new LinkedList<Patch>();

if (textline.length() == 0) {

return patches;

}

List<String> textList = Arrays.asList(textline.split("\n"));

LinkedList<String> text = new LinkedList<String>(textList);

Patch patch;

Pattern patchHeader

= Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@#34;);

Matcher m;

char sign;

String line;

while (!text.isEmpty()) {

m = patchHeader.matcher(text.getFirst());

if (!m.matches()) {

throw new IllegalArgumentException(

"Invalid patch string: " + text.getFirst());

}

patch = new Patch();

patches.add(patch);

patch.start1 = Integer.parseInt(m.group(1));

if (m.group(2).length() == 0) {

patch.start1--;

patch.length1 = 1;

} else if (m.group(2).equals("0")) {

patch.length1 = 0;

} else {

patch.start1--;

patch.length1 = Integer.parseInt(m.group(2));

}

patch.start2 = Integer.parseInt(m.group(3));

if (m.group(4).length() == 0) {

patch.start2--;

patch.length2 = 1;

} else if (m.group(4).equals("0")) {

patch.length2 = 0;

} else {

patch.start2--;

patch.length2 = Integer.parseInt(m.group(4));

}

text.removeFirst();

while (!text.isEmpty()) {

try {

sign = text.getFirst().charAt(0);

} catch (IndexOutOfBoundsException e) {

// Blank line? Whatever.

text.removeFirst();

continue;

}

line = text.getFirst().substring(1);

line = line.replace("+", "%2B"); // decode would change all "+" to " "

try {

line = URLDecoder.decode(line, "UTF-8");

} catch (UnsupportedEncodingException e) {

// Not likely on modern system.

throw new Error("This system does not support UTF-8.", e);

} catch (IllegalArgumentException e) {

// Malformed URI sequence.

throw new IllegalArgumentException(

"Illegal escape in patch_fromText: " + line, e);

}

if (sign == '-') {

// Deletion.

patch.diffs.add(new Diff(Operation.DELETE, line));

} else if (sign == '+') {

// Insertion.

patch.diffs.add(new Diff(Operation.INSERT, line));

} else if (sign == ' ') {

// Minor equality.

patch.diffs.add(new Diff(Operation.EQUAL, line));

} else if (sign == '@') {

// Start of next patch.

break;

} else {

// WTF?

throw new IllegalArgumentException(

"Invalid patch mode '" + sign + "' in: " + line);

}

text.removeFirst();

}

}

return patches;

}

/**

* Class representing one diff operation.

*/

public static class Diff {

/**

* One of: INSERT, DELETE or EQUAL.

*/

public Operation operation;

/**

* The text associated with this diff operation.

*/

public String text;

/**

* Constructor. Initializes the diff with the provided values.

* @param operation One of INSERT, DELETE or EQUAL.

* @param text The text being applied.

*/

public Diff(Operation operation, String text) {

// Construct a diff with the specified operation and text.

this.operation = operation;

this.text = text;

}

/**

* Display a human-readable version of this Diff.

* @return text version.

*/

public String toString() {

String prettyText = this.text.replace('\n', '\u00b6');

return "Diff(" + this.operation + ",\"" + prettyText + "\")";

}

/**

* Create a numeric hash value for a Diff.

* This function is not used by DMP.

* @return Hash value.

*/

@Override

public int hashCode() {

final int prime = 31;

int result = (operation == null) ? 0 : operation.hashCode();

result += prime * ((text == null) ? 0 : text.hashCode());

return result;

}

/**

* Is this Diff equivalent to another Diff?

* @param obj Another Diff to compare against.

* @return true or false.

*/

@Override

public boolean equals(Object obj) {

if (this == obj) {

return true;

}

if (obj == null) {

return false;

}

if (getClass() != obj.getClass()) {

return false;

}

Diff other = (Diff) obj;

if (operation != other.operation) {

return false;

}

if (text == null) {

if (other.text != null) {

return false;

}

} else if (!text.equals(other.text)) {

return false;

}

return true;

}

}

/**

* Class representing one patch operation.

*/

public static class Patch {

public LinkedList<Diff> diffs;

public int start1;

public int start2;

public int length1;

public int length2;

/**

* Constructor. Initializes with an empty list of diffs.

*/

public Patch() {

this.diffs = new LinkedList<Diff>();

}

/**

* Emulate GNU diff's format.

* Header: @@ -382,8 +481,9 @@

* Indices are printed as 1-based, not 0-based.

* @return The GNU diff string.

*/

public String toString() {

String coords1, coords2;

if (this.length1 == 0) {

coords1 = this.start1 + ",0";

} else if (this.length1 == 1) {

coords1 = Integer.toString(this.start1 + 1);

} else {

coords1 = (this.start1 + 1) + "," + this.length1;

}

if (this.length2 == 0) {

coords2 = this.start2 + ",0";

} else if (this.length2 == 1) {

coords2 = Integer.toString(this.start2 + 1);

} else {

coords2 = (this.start2 + 1) + "," + this.length2;

}

StringBuilder text = new StringBuilder();

text.append("@@ -").append(coords1).append(" +").append(coords2)

.append(" @@\n");

// Escape the body of the patch with %xx notation.

for (Diff aDiff : this.diffs) {

switch (aDiff.operation) {

case INSERT:

text.append('+');

break;

case DELETE:

text.append('-');

break;

case EQUAL:

text.append(' ');

break;

}

try {

text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' '))

.append("\n");

} catch (UnsupportedEncodingException e) {

// Not likely on modern system.

throw new Error("This system does not support UTF-8.", e);

}

}

return unescapeForEncodeUriCompatability(text.toString());

}

}

/**

* Unescape selected chars for compatability with JavaScript's encodeURI.

* In speed critical applications this could be dropped since the

* receiving application will certainly decode these fine.

* Note that this function is case-sensitive. Thus "%3f" would not be

* unescaped. But this is ok because it is only called with the output of

* URLEncoder.encode which returns uppercase hex.

*

* Example: "%3F" -> "?", "%24" -> "#34;, etc.

*

* @param str The string to escape.

* @return The escaped string.

*/

private static String unescapeForEncodeUriCompatability(String str) {

return str.replace("%21", "!").replace("%7E", "~")

.replace("%27", "'").replace("%28", "(").replace("%29", ")")

.replace("%3B", ";").replace("%2F", "/").replace("%3F", "?")

.replace("%3A", ":").replace("%40", "@").replace("%26", "&")

.replace("%3D", "=").replace("%2B", "+").replace("%24", "#34;)

.replace("%2C", ",").replace("%23", "#");

}

}

相关推荐

前端 JavaScript 字符串中提取数字

varstr="4500元";varnum=parseInt(str);alert(num);//4500如果字符串前面有非数字字符,上面这种方法就不行了:var...

使用JavaScript如何获取网站网址(js如何获取浏览器信息)

在做网站开发时,我们有时候会获取当前页面的完整路径。在网页前端如何实现呢?请在网页脚本代码段中粘贴如下代码。functiongetRootPath(){//获取当前网址,...

java文本对比工具源码8(java比较文本相似度)

/***ParseatextualrepresentationofpatchesandreturnaListofPatch*objects.*@paramtextline...

JavaScript实现的9大排序算法(js排序方法)

笔试面试经常涉及各种算法,本文简要介绍常用的一些算法,并用JavaScript实现。1、插入排序1)算法简介插入排序(Insertion-Sort)的算法描述是一种简单直观的排序算法。它的工作原理是通...

使用函数化的Javascript代码编写方式

对于Javascript介绍想必大家都耳熟能详啦,基于函数化的编程语言,基于浏览器运行的编程语言,web开发语言,前端开发必备语言,blablabla...Javascript是一个非常灵...

Js基础31:内置对象(js内置对象是什么意思)

js里面的对象分成三大类:内置对象ArrayDateMath宿主对象浏览器提供的对象(如bom、dom等等)自定义对象开发人员自己定义的对象内置对象——所谓内置对象,就是JavaScript自...

js获取上传文件类型以及大小的方法

前端web上传文件时,需要在上传之前判断一下文件的类型以及文件的大小,HTML为前端的标记语言是无法做到这一点,只能使用javascript动态脚本代码来实现。js获取上传文件大小的方法示例代码:&...

黑客入门实践:如何绕过前端过滤上传文件

今天开始,我们就要开始学习具体的漏洞了,但是希望大家学完后,不要轻易"入侵"网站哦,当时测试环境下除外。今天的课程是关于“文件上传漏洞”,据安界网的老师介绍,文件上传漏洞仅次于命令执行...

Java文件上传细讲(java文件夹上传)

什么是文件上传?文件上传就是把用户的信息保存起来。为什么需要文件上传?在用户注册的时候,可能需要用户提交照片。那么这张照片就应该要进行保存。免费学习资料获取方式上传组件(工具)为什么我们要使用上传工具...

鸿蒙上实现“翻译”功能(鸿蒙宴全文翻译)

本章节我们来制作中文翻译成英文的实例(运行在HarmonyOS上),通过HTTP去配合API进行实现。需求分析如下:文字输入HTTP协议使用文字翻译控件介绍①HTTP数据请求官方文档请求...

JavaScript从入门到精通(javascript 入门教程)

前几天,我们学习了JavaScript的入门课程,但是要想做网站,仅仅学会入门是不够的,今后的几天,我将带领大家精通JavaScript,希望大家好好学习!JS内置对象String对象:字符串对象,提...

第15天|16天搞定前端,javascript语法篇(干货)

JavaScript是互联网上最流行的脚本语言,这门语言可用于HTML和web,可广泛用于服务器、PC、笔记本电脑、平板电脑和智能手机等设备。它是一个脚本语言,它是一个轻量级,但功能强大的编程...

通过js来实现打字效果(js如何输入)

有时候浏览网页经常会看见一些页面出现一些打字的效果,那么是怎么实现的呢?逻辑确定目标容器,在哪个容器进行输出确定输出内容,当前直接根据目标容器确定输出内容即可需要控制输出频率,因此需要循环输出完毕代码...

手把手教你学会一键还原混淆js原理

1.短变量名在以下示例代码中,我们将变量“customerName”替换为“a”:vara="JohnSmith";console.log(a);2.随机变量名在以下示例代码...

sql中常用的字符串函数详解(sql字符串函数有哪些)

在日常开发中遇到处理最多的可能字符串要算其中一个了,什么替换啊截取啊大小写转换啊、删除空格啊等等,这些操作我们可以在前端操作,也可以直接在数据库的sql中操作,那么我们来看一下sql中处理字符串的...

取消回复欢迎 发表评论: