lucene学习-3 - 代码重构
2024-08-28 17:25:39
内容就是标题了。是要重构下上一节的代码,大体上按如下的思路:
- 功能拆分;
- 创建必要的工具类;
两个工具类StringUtils和TxtUtils。
StringUtils,主要是获取当前系统的换行符:
package com.zhyea.util; public class StringUtils { public static final String NEWLINE = System.getProperty("line.separator"); }
TxtUtils,主要是读取txt文件,这里使用了一个自定义类FileCharsetDetector,可以点击这个超链接:
package com.zhyea.util; import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader; /**
* txt文件处理工具类
*
* @author robin
*
*/
public class TxtUtils { /**
* 检查txt文件编码格式
*
* @param file
* txt文件对象
* @return
* @throws IOException
*/
public static String checkEncode(File file) throws IOException {
String encode = FileCharsetDetector.checkEncoding(file);
return (encode.equals("windows-1252") ? "Unicode" : encode);
} /**
* 读取txt文件内容
*
* @param file
* Txt文件对象
* @return
* @throws IOException
*/
public static String readTxt(File file) throws IOException {
BufferedReader reader = null;
try {
String encode = checkEncode(file);
reader = new BufferedReader(new InputStreamReader(
new FileInputStream(file), encode));
StringBuilder builder = new StringBuilder();
String content = null;
while (null != (content = reader.readLine())) {
builder.append(content).append(StringUtils.NEWLINE);
}
return builder.toString();
} finally {
reader.close();
}
} }
然后是拆分后的Lucene操作类:
package com.zhyea.doggie; import java.io.File;
import java.io.IOException; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version; import com.zhyea.util.TxtUtils; public class DoggieLucene { /**
* 分词器
*/
private static Analyzer analyzer; /**
* 创建分词器实例
*
* @param clazz
* 创建分词器使用的类
* @return
* @throws InstantiationException
* @throws IllegalAccessException
*/
public static Analyzer createAnalyzer(Class<?> clazz)
throws InstantiationException, IllegalAccessException {
if (null != analyzer && analyzer.getClass().equals(clazz)) {
return analyzer;
}
return analyzer = (Analyzer) clazz.newInstance();
} /**
* 创建索引写出器
*
* @param analyzer
* 分词器
* @param indexPath
* 索引存储路径
* @return
* @throws IOException
*/
public static IndexWriter createIndexWriter(Analyzer analyzer,
String indexPath) throws IOException {
// 创建索引存储目录
Directory dir = FSDirectory.open(new File(indexPath));
// 创建索引写入器配置
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,
analyzer);
// 创建索引写入器
return new IndexWriter(dir, config);
} /**
* 写入索引,索引文件为本地文本文件
*
* @param writer
* 索引写出器
* @param localDocPath
* 本地文本文件存储地址
* @throws IOException
*/
public static void addLocalDocument(IndexWriter writer, String localDocPath)
throws IOException {
File directory = new File(localDocPath);
for (File tmp : directory.listFiles()) {
Document doc = new Document();
doc.add(new StringField("path", tmp.getCanonicalPath(),
Field.Store.YES));
doc.add(new TextField("content", TxtUtils.readTxt(tmp),
Field.Store.YES));
writer.addDocument(doc);
writer.commit();
}
} /**
* 创建索引写入器
*
* @param indexPath
* 索引存储路径
* @return
* @throws IOException
*/
public static IndexReader createIndexReader(String indexPath)
throws IOException {
return DirectoryReader.open(FSDirectory.open(new File(indexPath)));
} /**
* 创建索引搜索器
*
* @param reader
* 索引写入器
* @return
*/
public static IndexSearcher createIndexSearcher(IndexReader reader) {
return new IndexSearcher(reader);
} /**
* 执行搜索
*
* @param searcher
* 搜索器
* @param target
* 搜索对象
* @return
* @throws IOException
*/
public static TopDocs executeSearch(IndexSearcher searcher, Query query)
throws IOException {
return searcher.search(query, 10000);
} /**
* 展示查询结果
*
* @param docs
* 查询结果文档
* @throws IOException
*/
public static void showResult(TopDocs docs, IndexReader reader)
throws IOException {
Document doc = null;
for (ScoreDoc tmp : docs.scoreDocs) {
doc = reader.document(tmp.doc);
System.out.println(tmp.score + " " + doc.get("path"));
// System.out.println(doc.getField("path").stringValue());
}
}
}
拆的比较琐碎了,凑合看吧。
创建索引的类:
package com.zhyea.doggie; import java.io.IOException; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.IndexWriter; import com.zhyea.util.FileUtil; public class IndexTest{ String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index";
String docPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\docs"; public static void main(String[] args){
try{
new IndexTest().createIndex();
}catch(Exception e){
e.printStackTrace();
}
} /**
* 创建索引
* @throws IOException
* @throws InstantiationException
* @throws IllegalAccessException
*/
private void createIndex() throws IOException,
InstantiationException,
IllegalAccessException{
IndexWriter writer = null;
try{
Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);
writer = DoggieLucene.createIndexWriter(analyzer, indexPath);
DoggieLucene.addLocalDocument(writer, docPath);
}finally{
if(null!=writer)writer.close();
}
}
}
执行搜索的类:
package com.zhyea.doggie; import java.io.IOException; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs; public class SearchTest { String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index"; public static void main(String[] args){
try{
new SearchTest().executeSearch();
}catch(Exception e){
e.printStackTrace();
}
} public void executeSearch() throws ParseException,
IOException,
InstantiationException,
IllegalAccessException{
IndexReader reader = null;
try{
reader = DoggieLucene.createIndexReader(indexPath);
IndexSearcher searcher = DoggieLucene.createIndexSearcher(reader);
Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);
Query query = new QueryParser("content", analyzer).parse("杨过");
TopDocs docs = DoggieLucene.executeSearch(searcher, query);
DoggieLucene.showResult(docs, reader);
}finally{
if(null!=reader)reader.close();
}
}
}
OK。
最新文章
- 【分布式】Zookeeper系统模型
- PHP设计模式笔记
- 论文笔记之:Visual Tracking with Fully Convolutional Networks
- 垂直的TextView
- 删除ArrayList中的元素
- 【策略】UVa 1344 - Tian Ji -- The Horse Racing(田忌赛马)
- CXF错误:Unsupported major.minor version 51.0,java.lang.UnsupportedClassVersionErro
- SpringSecurity 在MVC 中的简单使用(翻译的,稍加改动)
- <; high performance web sites >; 阅读小记
- qt 操作word
- Angularjs Directive(指令)机制
- Java 枚举7常见种用法(转)
- Applets的分析
- EChart 猜猜乐
- 2018-2019-3《Java程序设计》第二周学习总结
- Spring+SpringMVC重复加载配置文件问题
- 离屏Canvas — 使用Web Worker提高你的Canvas运行速度
- python计算两个数的百分比
- Vue.js $nextTick
- ab命令压力测试