public class WordCount {
static Directory directory;
// 创建分词器
static Analyzer analyzer = new IKAnalyzer();
static IndexWriterConfig config = new IndexWriterConfig(analyzer);
static IndexWriter writer;
static IndexReader reader;
static {
// 指定索引存放目录以及配置参数
try {
directory = FSDirectory.open(Paths.get("F:/luceneIndex"));
writer = new IndexWriter(directory, config);
} catch (IOException e) {
e.printStackTrace();
}
} public static void main(String[] args) {
indexCreate();
Map<String, Long> map = getTotalFreqMap();
Map<String, Long> sortMap = sortMapByValue(map);
Set<Entry<String, Long>> entrySet = sortMap.entrySet();
Iterator<Entry<String, Long>> iterator = entrySet.iterator();
while (iterator.hasNext()) {
Entry<String, Long> entry = iterator.next();
System.out.println(entry.getKey() + "----" + entry.getValue());
} } /**
* 创建索引
*/
public static void indexCreate() {
// 文件夹检测(创建索引前要保证目录是空的)
File file = new File("f:/luceneIndex");
if (!file.exists()) {
file.mkdirs();
} else {
try {
file.delete();
} catch (Exception e) {
e.printStackTrace();
}
} // 将采集的数据封装到Document中
Document doc = new Document();
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setStored(true);
ft.setStoreTermVectors(true);
ft.setTokenized(true);
// ft.setStoreTermVectorOffsets(true);
// ft.setStoreTermVectorPositions(true); // 读取文件内容(小文件,readFully)
File content = new File("f:/qz/twitter.txt");
try {
byte[] buffer = new byte[(int) content.length()];
IOUtils.readFully(new FileInputStream(content), buffer);
doc.add(new Field("twitter", new String(buffer), ft));
} catch (Exception e) {
e.printStackTrace();
} // 生成索引
try {
writer.addDocument(doc);
// 关闭
writer.close(); } catch (IOException e) {
e.printStackTrace();
}
} /**
* 获得词频map
*
* @throws ParseException
*/
public static Map<String, Long> getTotalFreqMap() {
Map<String, Long> map = new HashMap<String, Long>();
try {
reader = DirectoryReader.open(directory);
List<LeafReaderContext> leaves = reader.leaves();
for (LeafReaderContext leafReaderContext : leaves) {
LeafReader leafReader = leafReaderContext.reader(); Terms terms = leafReader.terms("twitter"); TermsEnum iterator = terms.iterator(); BytesRef term = null; while ((term = iterator.next()) != null) {
String text = term.utf8ToString();
map.put(text, iterator.totalTermFreq());
} }
reader.close();
return map;
} catch (IOException e) {
e.printStackTrace();
}
return null;
} /**
* 使用 Map按value进行排序
*
* @param map
* @return
*/
public static Map<String, Long> sortMapByValue(Map<String, Long> oriMap) {
if (oriMap == null || oriMap.isEmpty()) {
return null;
}
Map<String, Long> sortedMap = new LinkedHashMap<String, Long>(); List<Map.Entry<String, Long>> entryList = new ArrayList<Map.Entry<String, Long>>(oriMap.entrySet());
Collections.sort(entryList, new MapValueComparator()); Iterator<Map.Entry<String, Long>> iter = entryList.iterator();
Map.Entry<String, Long> tmpEntry = null;
while (iter.hasNext()) {
tmpEntry = iter.next();
sortedMap.put(tmpEntry.getKey(), tmpEntry.getValue());
}
return sortedMap;
}
} class MapValueComparator implements Comparator<Map.Entry<String, Long>> { @Override
public int compare(Entry<String, Long> me1, Entry<String, Long> me2) {
if (me1.getValue() == me2.getValue()) {
return ;
}
return me1.getValue() > me2.getValue() ? - : ;
// return me1.getValue().compareTo(me2.getValue());
}
}

map排序代码https://www.cnblogs.com/zhujiabin/p/6164826.html

最新文章

  1. 基于NPOI的报表引擎——ExcelReport
  2. MongoDB(五)mongo语法和mysql语法对比学习
  3. django的跨站请求访问
  4. C#获取IP和主机名
  5. bc.34.B.Building Blocks(贪心)
  6. Cracking the Code Interview 4.3 Array to Binary Tree
  7. 用defy来潜水最终还是挂了........
  8. mysql 交叉表
  9. 在User Profile Service中配置AD的同步连接
  10. [转] linux系统文件流、文件描述符与进程间关系详解
  11. MySQL中同一时候存在创建和上次更新时间戳字段解决方法浅析
  12. Publish Web Site To IIS From VS
  13. Oracle免安装绿色版-PLSQL连接报12154
  14. python捕获Ctrl+C信号
  15. qhfl-6 购物车
  16. oracle listagg和wm_concat函数
  17. java ArrayList 迭代器快速失败源码分析
  18. springfox+swagger2生成API文档
  19. Unity3D工程全资源自动检测系统
  20. BZOJ 3101: N皇后 构造

热门文章

  1. amazeui学习笔记--css(常用组件1)--小徽章Badge
  2. 目标识别(object detection)中的 IoU(Intersection over Union)
  3. numpy,scipy,pandas 和 matplotlib
  4. (转) 25个必须记住的SSH命令
  5. Maven学习总结(16)——深入理解maven生命周期和插件
  6. windows SID
  7. iTestin云测工具
  8. 新手MFC学习之Socket练习
  9. sum()函数——MATLAB
  10. 恢复SLAVE上的某几张表的简要方法