import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test; import cn.itcast._domain.Article; public class HelloWorld { private static Directory directory; // 索引库文件夹
private static Analyzer analyzer; // 分词器 static {
try {
directory = FSDirectory.open(new File("./indexDir"));
analyzer = new StandardAnalyzer(Version.LUCENE_30);
} catch (IOException e) {
throw new RuntimeException(e);
}
} // 建立索引
@Test
public void testCreateIndex() throws Exception {
// 准备数据
Article article = new Article();
article.setId(1);
article.setTitle("准备Lucene的开发环境");
article.setContent("假设信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。"); // 放到索引库中
// 1, 把Article转为Document
Document doc = new Document();
String idStr = article.getId().toString();
doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));
doc.add(new Field("content", article.getContent(), Store.NO, Index.ANALYZED)); // 2, 把Document放到索引库中
IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.UNLIMITED);
indexWriter.addDocument(doc);
indexWriter.close();
} // 搜索
@Test
public void testSearch() throws Exception {
// 准备查询条件
String queryString = "lucene";
// String queryString = "hibernate"; // 运行搜索
List<Article> list = new ArrayList<Article>(); // ========================================================================================== // 1,把查询字符串转为Query对象(默认仅仅从title中查询)
QueryParser queryParser = new QueryParser(Version.LUCENE_30, "title", analyzer);
Query query = queryParser.parse(queryString); // 2,运行查询,得到中间结果
IndexSearcher indexSearcher = new IndexSearcher(directory); // 指定所用的索引库
TopDocs topDocs = indexSearcher.search(query, 100); // 最多返回前n条结果 int count = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 3,处理结果
for (int i = 0; i < scoreDocs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
float score = scoreDoc.score; // 相关度得分
int docId = scoreDoc.doc; // Document的内部编号 // 依据编号拿到Document数据
Document doc = indexSearcher.doc(docId); // 把Document转为Article
String idStr = doc.getField("id").toString(); //doc.get("id");
String title = doc.get("title");
String content = doc.get("content"); // 等价于 doc.getField("content").stringValue(); Article article = new Article();
article.setId(Integer.parseInt(idStr));
article.setTitle(title);
article.setContent(content); list.add(article);
}
indexSearcher.close(); // ========================================================================================== // 显示结果
System.out.println("总结果数:" + list.size());
for (Article a : list) {
System.out.println("------------------------------");
System.out.println("id = " + a.getId());
System.out.println("title = " + a.getTitle());
System.out.println("content = " + a.getContent());
}
}
}

public class Article {

	private Integer id;
private String title;
private String content; public Integer getId() {
return id;
} public void setId(Integer id) {
this.id = id;
} public String getTitle() {
return title;
} public void setTitle(String title) {
this.title = title;
} public String getContent() {
return content;
} public void setContent(String content) {
this.content = content;
} }

最新文章

  1. excel小技巧
  2. html5学习笔记一
  3. Intent的七大组件——Android开发之路5
  4. Jqgrid入门-Jqgrid格式化数据(九)
  5. javascript取url参数的几种方法
  6. 论文摘抄 - FlumeJava
  7. Hacker(23)----破解常见文件密码
  8. OpenVPN多实例优化的思考过程
  9. 102 - kube-scheduler源码分析 - cobra-寻找scheduler组件启动函数
  10. Spring Data Redis 让 NoSQL 快如闪电(2)
  11. Lua保留指定小数位数
  12. 腾讯AlloyTeam正式发布omi-cli脚手架 - 创建网站无需任何配置
  13. TextView 链接显示及跳转
  14. centos 7.x开放端口
  15. Codex Delphi Expert
  16. linux查看文件被哪个进程占用?
  17. flask~数据库
  18. synchronized关键字的用法总结
  19. 获取token
  20. 较快的maven的settings.xml文件

热门文章

  1. ThreeJs 基础入门
  2. ogre3d环境配置与简单程序示例
  3. webdriver高级应用- 右键另存为下载文件
  4. python - 接口自动化测试实战 - case1 - 优化版
  5. python学习--Django mvc框架简介
  6. Educational Codeforces Round 20 C. Maximal GCD
  7. 九度oj 题目1007:奥运排序问题
  8. CSS编码规范(转)
  9. kali2 install Nessus
  10. APUE 学习笔记(四) 标准I/O库