package hello;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map.Entry; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken; public class HelloLucene222 { public static void main(String[] args) {
HelloLucene222 hLucene = new HelloLucene222();
hLucene.index();
System.out.print("search ...\n");
hLucene.search();
} // 建立索引
public void index() {
System.out.println("Indexing to directory begin...");
try {
Thread.sleep(10);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
System.out.println("sleep OK");
long start = System.currentTimeMillis();
IndexWriter writer = null;
try {
// 1、创建Directory
// Directory directory = new RAMDirectory();//索引是建立在内存中的
Directory directory = FSDirectory.open(Paths.get("C:\\exp\\test_data\\index"));// 创建在硬盘上
// 2、创建IndexWriter
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
writer = new IndexWriter(directory, iwc);
// 3、创建Document对象
Document doc = null;
// 4、为Document添加Field,是Document的一个子元素
// File file = new File("D:\\exp\\test_data\\txt");
File file = new File("C:\\exp\\test_data\\ES");
for (File f : file.listFiles()) {
try (BufferedReader br = new BufferedReader(new FileReader(f))) {
String line = null;
int i = 0;
Gson gson = new Gson();
while ((line = br.readLine()) != null) {
// process the line.
if ((i & 1) == 1) {
// System.out.println(line);
HashMap<String, String> events = gson.fromJson(line,
new TypeToken<HashMap<String, String>>() {
}.getType());
// System.out.println(events);
doc = new Document();
for (Entry<String, String> entry : events.entrySet()) {
doc.add(new TextField(entry.getKey(), entry.getValue(), Field.Store.NO));
// doc.add(new Field("filename", f.getName(),
// Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("path", f.getAbsolutePath(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
// 5、通过IndexWriter添加文档到索引中
}
writer.addDocument(doc);
}
i += 1;
}
}
System.out.println("Indexing to directory '" + f.getAbsolutePath() + "'...");
}
long end = System.currentTimeMillis();
System.out.println("add docment Took : " + ((end - start) / 1000.0));
} catch (Exception e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
long end = System.currentTimeMillis();
System.out.println("Took : " + ((end - start) / 1000.0));
try {
Thread.sleep(1);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
System.out.println("sleep OK");
} // 搜索
public void search() {
long start = System.currentTimeMillis();
Directory directory;
try {
// 1、创建Directory
directory = FSDirectory.open(Paths.get("C:\\exp\\test_data\\index"));
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
// 4、创建搜索的Query
// 创建QueryParser来确定要搜索文件的内容,第二个参数表示搜索的域
QueryParser parser = new QueryParser("field-38", new StandardAnalyzer());
// 创建Query,表示搜索域为content中包含java的文档
Query query = parser.parse("tcholo");
// 5、根据searcher搜索并且返回TopDocs
TopDocs tdoc = isearcher.search(query, 10);// 只会显示10条内容 // 6、根据TopDocs获取ScoreDoc对象
ScoreDoc sdocs[] = tdoc.scoreDocs;
for (ScoreDoc s : sdocs) {
// 7、根据searcher行业ScoreDoc获取具体的Document对象
Document document = isearcher.doc(s.doc);
// 8、根据Document对象获取所需要的值
System.out.println(document.get("filename") + "[" + document.get("path") + "]");
}
// 9、关闭reader
ireader.close();
directory.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
long end = System.currentTimeMillis();
System.out.println("Took : " + ((end - start) / 1000.0));
} }

最新文章

  1. JavaScript 随机数
  2. poi获取合并单元格内的第一行第一列的值
  3. osquery An Operating System Instrumentation Framewor
  4. sql附加数据库错误5120
  5. oracle 创建数据表空间和用户
  6. 【循序渐进学Python】1. Python基础知识
  7. flex 生命周期 ibm引用
  8. CentOS7.0下载各版本说明 新增Everything版
  9. myeclipse10 如何把代码预览的窗口去掉
  10. iOS随机颜色
  11. 基于 Spring MVC 的开源测试用例管理系统以及开发自测的实践
  12. Android在 普通类(非Activity,多数为Adapter) 中 传输数据为空值 解决方法 :在startActivity 用 intent传输数据
  13. 機器學習基石 (Machine Learning Foundations) 作业1 Q15-17的C++实现
  14. beta冲刺5-咸鱼
  15. SQLite新建数据库及txt文件(CSV文件)导入
  16. springboot整合shiro应用
  17. [蓝桥杯]ALGO-186.算法训练_P0501
  18. 【瞎搞题】gym226123 L. For the Honest Election
  19. Uncaught DOMException: Failed to execute &#39;removeChild&#39; on &#39;Node&#39;: The node ……
  20. TokuDB的索引结构–分形树的实现

热门文章

  1. bash变量类型详解
  2. golang-random随机数
  3. 洛谷—— P2515 [HAOI2010]软件安装
  4. codeforces edu40
  5. iOS开发 清除电话号码中的其他符号
  6. 【转载】容器技术 &amp; Docker &amp; 与虚拟化的比较
  7. 【Todo】已经打开的页面需要清掉的坑
  8. iOS开发之计算两个日期的时间间隔
  9. 从头学起-CLR的执行模型
  10. 走入asp.net mvc不归路:[4]说说Action有哪些常见成员