下载地址:https://files.cnblogs.com/files/heyang78/JsonAnalyzer20200518-01.zip

测试用例:https://www.cnblogs.com/heyang78/p/12911174.html

为什么创建此工程?

笔者在开发中曾遇到一个Restful接口变更后,将新Json文本和旧有文档的对比矫正工作,当时最大的问题是两边都不是按字母序排列的,比较时只能上下翻找而不可一一比对,于是我产生了将Json文本同级节点按字母序排列的想法,继而将其实现之.https://www.cnblogs.com/heyang78/p/11973129.html 是第一个版本,当时实现了需求但未完善,今天(2020年5月18日)修正了原有数组解析不完全的bug并调整简化了代码.

 解析效果展示:

原有文本:

{"data":[{"deliveryListId":"","shipperCode":"","shortShipperName":"RB","orderNo":"","deliveryOrder":,"receiverName":"吉田XXX","receiverTelNo":"","receiverAddress1":"東京都足立区足立1-1","receiverAddress2":"東京都足立区足立1-2","isCod":true,"billAmount":,"geocodingScore":,"latitudeJP":"56789.33","longitudeJP":"123456.33","latitude":"20180001.22","longitude":"20180001.33","vehicleId":"","orderDetails":[{"trackingNo":"","quantity":,"lapCount":null,"statusCode":null,"statusNameMobile":null},{"trackingNo":"","quantity":,"lapCount":,"statusCode":"","statusNameMobile":"配送準備中"},{"trackingNo":"","quantity":,"lapCount":,"statusCode":"","statusNameMobile":"持出し"},{"trackingNo":"","quantity":,"lapCount":,"statusCode":"","statusNameMobile":"配送準備中"},{"trackingNo":"","quantity":,"lapCount":,"statusCode":"","statusNameMobile":"配送準備中"}]}]}

解析后文本:

{
"data":[
{
"billAmount":,
"deliveryListId":"",
"deliveryOrder":,
"geocodingScore":,
"isCod":true,
"latitude":"20180001.22",
"latitudeJP":"56789.33",
"longitude":"20180001.33",
"longitudeJP":"123456.33",
"orderDetails":[
{
"lapCount":null,
"quantity":,
"statusCode":null,
"statusNameMobile":null,
"trackingNo":""
},
{
"lapCount":,
"quantity":,
"statusCode":"",
"statusNameMobile":"配送準備中",
"trackingNo":""
},
{
"lapCount":,
"quantity":,
"statusCode":"",
"statusNameMobile":"持出し",
"trackingNo":""
},
{
"lapCount":,
"quantity":,
"statusCode":"",
"statusNameMobile":"配送準備中",
"trackingNo":""
},
{
"lapCount":,
"quantity":,
"statusCode":"",
"statusNameMobile":"配送準備中",
"trackingNo":""
}
],
"orderNo":"",
"receiverAddress1":"東京都足立区足立1-1",
"receiverAddress2":"東京都足立区足立1-2",
"receiverName":"吉田XXX",
"receiverTelNo":"",
"shipperCode":"",
"shortShipperName":"RB",
"vehicleId":""
}
]
}

此工程的扩展意义:

做编译的分词,语法分析,构建语法树在此工程中均有体现,此工程也为后继编译项目打下了基础.

核心类说明:

记号类,此类用于给Json文本中出现的七种文本分类:

package com.heyang;

/**
* Tokens in json format
* @author Heyang
*
*/
public class Token {
public static final int TYPE_OPEN_BRACE=0; // {
public static final int TYPE_CLOSE_BRACE=1; // }
public static final int TYPE_TEXT=2; // text
public static final int TYPE_COMMA=3; // ,
public static final int TYPE_COLON=4; // :
public static final int TYPE_OPEN_BRACKET=5; // [
public static final int TYPE_CLOSE_BRACKET=6; // ] private int type;
private String text; public Token(char c,int type) {
this.text=String.valueOf(c);
this.type=type;
} public Token(String word,int type) {
this.text=word;
this.type=type;
} public int getType() {
return type;
} public void setType(int type) {
this.type = type;
} public String getText() {
return text;
} public void setText(String text) {
this.text = text;
}
}

分词器类,此类用于将json变成记号:

package com.heyang;

import java.util.ArrayList;
import java.util.List; import org.apache.commons.lang.StringUtils; /**
* Parse json string to tokens
* @author Heyang
*
*/
public class Lexer {
private List<Token> tokens; public Lexer(String jsonTxt) {
tokens = new ArrayList<Token>(); String bundle = "";
for (int i = 0; i < jsonTxt.length(); i++) {
char c = jsonTxt.charAt(i); if (Character.isWhitespace(c)) {
continue;
} else if (c == '{') {
tokens.add(new Token(c, Token.TYPE_OPEN_BRACE));
} else if (c == '}') {
if (StringUtils.isNotEmpty(bundle)) {
tokens.add(new Token(bundle, Token.TYPE_TEXT));
bundle = "";
} tokens.add(new Token(c, Token.TYPE_CLOSE_BRACE));
} else if (c == '[') {
tokens.add(new Token(c, Token.TYPE_OPEN_BRACKET));
} else if (c == ']') {
if (StringUtils.isNotEmpty(bundle)) {
tokens.add(new Token(bundle, Token.TYPE_TEXT));
bundle = "";
} tokens.add( new Token(c, Token.TYPE_CLOSE_BRACKET));
} else if (c == ',') {
if (StringUtils.isNotEmpty(bundle)) {
tokens.add(new Token(bundle, Token.TYPE_TEXT));
bundle = "";
} tokens.add(new Token(c, Token.TYPE_COMMA));
} else if (c == ':') {
if (StringUtils.isNotEmpty(bundle)) {
tokens.add(new Token(bundle, Token.TYPE_TEXT));
bundle = "";
} tokens.add(new Token(c, Token.TYPE_COLON));
} else {
bundle += c;
}
}
} public List<Token> getTokenList() {
return tokens;
} // Just for test
public void printTokens() {
int idx = 0;
for (Token t : tokens) {
idx++;
System.out.println("#" + idx + " " + t.getText());
}
} public String getCompactJsonTxt() {
StringBuilder sb=new StringBuilder(); for (Token t : tokens) {
sb.append(t.getText());
} return sb.toString();
}
}

节点类,这个类构成了语法树的节点:

package com.heyang;

import java.util.Collections;
import java.util.LinkedList;
import java.util.List; /**
* Json Node
* @author Heyang
*
*/
public class Node implements Comparable<Node>{ // There are value types
public static final int Type_String=1;
public static final int Type_Array=2;
public static final int Type_List=3; // Key always is String
private String key;
private Node parent; // There are three types of value
private int valueType;
private String valueString;
private List<Node> valueList; // indent depth
private int depth; public Node() { } public Node(String key,String value) {
this.key=key;
this.valueType=Type_String;
this.valueString=value;
this.depth=0;
} public Node(String key,int type) {
this.key=key;
this.valueType=type;
this.valueList=new LinkedList<Node>();
} public void addChild(Node child) {
if(valueList!=null) {
valueList.add(child);
child.parent=this; adjustDepth();
}
} private void adjustDepth() {
if(valueType==Type_List || valueType==Type_Array) {
for(Node json:valueList) {
json.depth=this.depth+1;
json.adjustDepth();
}
}
} public String toString() {
StringBuilder sb=new StringBuilder(); // key
String tabs=getIndentSpace();
sb.append(tabs); if(key!=null) {
sb.append(key);
sb.append(":");
} // value
if(valueType==Type_String) {
sb.append(valueString);
}else if(valueType==Type_Array) {
sb.append("[\n"); int n=valueList.size();
for(int i=0;i<n;i++) {
Node json=valueList.get(i);
if(i!=n-1) {
sb.append(json.toString()+",\n");
}else {
sb.append(json.toString()+"\n");
}
} sb.append(tabs+"]");
}else if(valueType==Type_List) {
sb.append("{\n"); Collections.sort(valueList); int n=valueList.size();
for(int i=0;i<n;i++) {
Node json=valueList.get(i);
if(i!=n-1) {
sb.append(json.toString()+",\n");
}else {
sb.append(json.toString()+"\n");
}
} sb.append(tabs+"}");
} return sb.toString();
} public int compareTo(Node other) {
return this.key.compareTo(other.key);
} private String getIndentSpace() {
return String.join("", Collections.nCopies(this.depth, " "));
} public String getKey() {
return key;
} public void setKey(String key) {
this.key = key;
} public Node getParent() {
return parent;
} public void setParent(Node parent) {
this.parent = parent;
} public List<Node> getValueList() {
return valueList;
}
}

节点树构建类,顾名思义,此类就是用类构建Node树的:

package com.heyang;

import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern; /**
* JSOn tree builder
* @author heyang
*
* 2020年5月18日
*/
public class Builder {
private Node root;
private int index;
private List<Token> tokens; public Builder(List<Token> tokens) {
this.tokens=tokens;
this.index=1; this.root=new Node(null,Node.Type_List);
addSubNode2(this.root);
} /**
* Add branch/leaf to parent node
* @param parent
*/
private void addSubNode2(Node parent) {
if(parent==null) {
return;
} Stack<Token> stack=new Stack<Token>(); while(index<this.tokens.size()) {
Token token=tokens.get(index); if(token.getType()==Token.TYPE_OPEN_BRACE) {// {
Node newBraceNode=new Node(null,Node.Type_List); if(stack.size()>=2) {
Token colonToken=stack.pop();
Token keyToken=stack.pop(); if(colonToken.getType()==Token.TYPE_COLON && keyToken.getType()==Token.TYPE_TEXT) {
newBraceNode.setKey(keyToken.getText());
}
} parent.addChild(newBraceNode); index++;
addSubNode2(newBraceNode);
}else if(token.getType()==Token.TYPE_CLOSE_BRACE) { // }
String text=getTextInStack(stack); if(text.length()>0) {
final String keyValuePattern="(\"([_a-zA-Z]+[_a-zA-Z0-9]*)\")[:]([^,}]+)"; if(Pattern.matches(keyValuePattern,text)) {
java.util.regex.Pattern pattern=Pattern.compile(keyValuePattern);
Matcher matcher=pattern.matcher(text);
while(matcher.find()) {
Node txt=new Node(matcher.group(1),matcher.group(3));
parent.addChild(txt);
}
}
} stack.clear();
index++; addSubNode2(parent.getParent());
}else if(token.getType()==Token.TYPE_OPEN_BRACKET) { // [
Node newBracketNode=new Node(null,Node.Type_Array); if(stack.size()>=2) {
Token left1=stack.pop();
Token left2=stack.pop(); if(left1.getType()==Token.TYPE_COLON && left2.getType()==Token.TYPE_TEXT) {
newBracketNode.setKey(left2.getText());
}
} parent.addChild(newBracketNode); index++;
addSubNode2(newBracketNode);
}else if(token.getType()==Token.TYPE_CLOSE_BRACKET) { // ]
String text=getTextInStack(stack); if(text.length()>0) {
final String keyValuePattern="(\"([_a-zA-Z]+[_a-zA-Z0-9]*)\")[:]([^,}]+)"; if(Pattern.matches(keyValuePattern,text)) {
java.util.regex.Pattern pattern=Pattern.compile(keyValuePattern);
Matcher matcher=pattern.matcher(text);
while(matcher.find()) {
Node txt=new Node(matcher.group(1),matcher.group(3));
parent.addChild(txt);
}
}else { java.util.regex.Pattern pattern=Pattern.compile("([^,]+)");
Matcher matcher=pattern.matcher(text);
while(matcher.find()) {
Node txt=new Node(null,matcher.group(1));
parent.addChild(txt);
}
}
} stack.clear();
index++;
addSubNode2(parent.getParent());
}else if(token.getType()==Token.TYPE_COMMA) {
String text=getTextInStack(stack); if(text.length()>0) {
final String keyValuePattern="(\"([_a-zA-Z]+[_a-zA-Z0-9]*)\")[:]([^,}]+)"; if(Pattern.matches(keyValuePattern,text)) {
java.util.regex.Pattern pattern=Pattern.compile(keyValuePattern);
Matcher matcher=pattern.matcher(text);
while(matcher.find()) {
Node txt=new Node(matcher.group(1),matcher.group(3));
parent.addChild(txt);
}
}else { java.util.regex.Pattern pattern=Pattern.compile("([^,]+)");
Matcher matcher=pattern.matcher(text);
while(matcher.find()) {
Node txt=new Node(null,matcher.group(1));
parent.addChild(txt);
}
}
} stack.clear();
index++;
}else {
stack.push(token);
index++;
}
}
} private String getTextInStack(Stack<Token> stack) {
StringBuilder sb=new StringBuilder();
for(int i=0;i<stack.size();i++) {
Token t=stack.elementAt(i);
sb.append(t.getText());
} return sb.toString();
} public Node getRoot() {
return root;
}
}

入口类,这个类将以上四个类串联起来调用:

package com.heyang;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import com.heyang.util.BracketsBalanceChecker;
import com.heyang.util.CommonUtil; public class EntryPoint {
private final static Logger log = LoggerFactory.getLogger(EntryPoint.class); public static void main(String[] args){
log.info("JsonAnalyzer started."); if(args.length<1) {
log.error("Please set json file path in arguments.");
} String filePath=args[0];
log.info("Begin to read file:'{}'",filePath); try {
// Read context from file
String jsonTxt=CommonUtil.readTextFromFile(filePath);
log.info("Raw json text=\n{}",jsonTxt); // Check brackets balance
BracketsBalanceChecker bbc=new BracketsBalanceChecker();
boolean balanced=bbc.isBracketsBalanced(jsonTxt);
if(balanced) {
log.info("The brackets in read content are balanced.");
} // Parse json to tokens
Lexer l=new Lexer(jsonTxt);
log.info("Compact json text=\n{}",l.getCompactJsonTxt()); // Build json node tree
Builder b=new Builder(l.getTokenList());
Node root=b.getRoot();
log.info("Formatted json=\n{}",root.toString()); }catch(Exception ex) {
log.error(ex.getMessage());
}finally {
log.info("JsonAnalyzer end.");
}
}
}

最后的感悟:

我在开发生涯中不止一次的遇到复杂的文本解析任务,做文本解析,第一反应往往是<编译原理>的那些东西,但笔者不是计算机科班出身,把网上推荐的龙书虎书鲸书买来一看头都要炸了,开发更是在一番斗争后搁置或是简化了. 多次后我终于想到,做文本解析并非一定要先啃下那些大部头书,用递归向下一样能完成任务,适合我的才是最好的.我先用递归向下的方式解析文本,由易到难,再辅以看书,编译原理和任务就可以并行的.这比啃不动书而止步不前要强得多.编程就是这样,能动手才算真正掌握一门技能, 学东西就该学明白,钻透!纸上得来一知半解,最终还是要重新夯实!

--2020年5月18日--

最新文章

  1. java必备基础知识点
  2. 恶心的hadoop集群
  3. 转 Learning To Rank之LambdaMART的前世今生
  4. android关于AndroidManifest.xml详细分析
  5. swift学习(二)--基本运算符、字符串、集合操作
  6. 整理收藏一份PHP高级工程师的笔试…
  7. 读headFirst设计模式 - 策略模式
  8. MyEclipse 快捷键问题
  9. 软件性能测试技术树(三)----数据库(MySQL)性能
  10. IP通信基础学习第四周(下)
  11. JS(JavaScript)的进一步了解2(更新中&#183;&#183;&#183;)
  12. Sigma Function
  13. .15-浅析webpack源码之WebpackOptionsApply模块-plugin事件流总览
  14. [django]python异步神器-celery
  15. Python使用xlwt模块 操作Excel文件
  16. 结构体访问成员变量什么时候该用“-&gt;”或者是&quot;.&quot;呢?的困惑
  17. Qt 计算两个日前间隔天数
  18. zeptojs的一些别人的博客
  19. @Html.Partial,@Html.Action,@Html.RenderPartial,@Html.RenderAction [转]
  20. codeforces 658D D. Bear and Polynomials(数学)

热门文章

  1. JS学习第二天
  2. C++二分查找:lower_bound( )和upper_bound( )
  3. LeetCode 646 最长数对链详解
  4. BiLSTM:序列标注任务的标杆
  5. adb命令将抓包工具证书从用户目录移动至系统目录,解决反爬对于本地证书认证
  6. C++ Templates(1.3 多模板参数 Multiple Template Parameters)
  7. 冒泡排序(Bubble Sorting)
  8. Statistics and Samples in Distributional Reinforcement Learning
  9. IDEA创建动态Web项目
  10. 问题定位 | Peronca Xtrabackup 8.0近日踩坑总结 - xtrabackup 2.4和8.0区别