lucene学习-3 - 代码重构

内容就是标题了。是要重构下上一节的代码，大体上按如下的思路：

功能拆分；
创建必要的工具类；

两个工具类StringUtils和TxtUtils。

StringUtils，主要是获取当前系统的换行符：

package com.zhyea.util;

public class StringUtils {

    public static final String NEWLINE = System.getProperty("line.separator");

}

TxtUtils，主要是读取txt文件，这里使用了一个自定义类FileCharsetDetector，可以点击这个超链接：

package com.zhyea.util;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStreamReader;

/**

 * txt文件处理工具类

 *

 * @author robin

 *

 */

public class TxtUtils {

    /**

     * 检查txt文件编码格式

     *

     * @param file

     *            txt文件对象

     * @return

     * @throws IOException

     */

    public static String checkEncode(File file) throws IOException {

        String encode = FileCharsetDetector.checkEncoding(file);

        return (encode.equals("windows-1252") ? "Unicode" : encode);

    }

    /**

     * 读取txt文件内容

     *

     * @param file

     *            Txt文件对象

     * @return

     * @throws IOException

     */

    public static String readTxt(File file) throws IOException {

        BufferedReader reader = null;

        try {

            String encode = checkEncode(file);

            reader = new BufferedReader(new InputStreamReader(

                    new FileInputStream(file), encode));

            StringBuilder builder = new StringBuilder();

            String content = null;

            while (null != (content = reader.readLine())) {

                builder.append(content).append(StringUtils.NEWLINE);

            }

            return builder.toString();

        } finally {

            reader.close();

        }

    }

}

然后是拆分后的Lucene操作类：

package com.zhyea.doggie;

import java.io.File;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.StringField;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import com.zhyea.util.TxtUtils;

public class DoggieLucene {

    /**

     * 分词器

     */

    private static Analyzer analyzer;

    /**

     * 创建分词器实例

     *

     * @param clazz

     *            创建分词器使用的类

     * @return

     * @throws InstantiationException

     * @throws IllegalAccessException

     */

    public static Analyzer createAnalyzer(Class<?> clazz)

            throws InstantiationException, IllegalAccessException {

        if (null != analyzer && analyzer.getClass().equals(clazz)) {

            return analyzer;

        }

        return analyzer = (Analyzer) clazz.newInstance();

    }

    /**

     * 创建索引写出器

     *

     * @param analyzer

     *            分词器

     * @param indexPath

     *            索引存储路径

     * @return

     * @throws IOException

     */

    public static IndexWriter createIndexWriter(Analyzer analyzer,

            String indexPath) throws IOException {

        // 创建索引存储目录

        Directory dir = FSDirectory.open(new File(indexPath));

        // 创建索引写入器配置

        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,

                analyzer);

        // 创建索引写入器

        return new IndexWriter(dir, config);

    }

    /**

     * 写入索引，索引文件为本地文本文件

     *

     * @param writer

     *            索引写出器

     * @param localDocPath

     *            本地文本文件存储地址

     * @throws IOException

     */

    public static void addLocalDocument(IndexWriter writer, String localDocPath)

            throws IOException {

        File directory = new File(localDocPath);

        for (File tmp : directory.listFiles()) {

            Document doc = new Document();

            doc.add(new StringField("path", tmp.getCanonicalPath(),

                    Field.Store.YES));

            doc.add(new TextField("content", TxtUtils.readTxt(tmp),

                    Field.Store.YES));

            writer.addDocument(doc);

            writer.commit();

        }

    }

    /**

     * 创建索引写入器

     *

     * @param indexPath

     *            索引存储路径

     * @return

     * @throws IOException

     */

    public static IndexReader createIndexReader(String indexPath)

            throws IOException {

        return DirectoryReader.open(FSDirectory.open(new File(indexPath)));

    }

    /**

     * 创建索引搜索器

     *

     * @param reader

     *            索引写入器

     * @return

     */

    public static IndexSearcher createIndexSearcher(IndexReader reader) {

        return new IndexSearcher(reader);

    }

    /**

     * 执行搜索

     *

     * @param searcher

     *            搜索器

     * @param target

     *            搜索对象

     * @return

     * @throws IOException

     */

    public static TopDocs executeSearch(IndexSearcher searcher, Query query)

            throws IOException {

        return searcher.search(query, 10000);

    }

    /**

     * 展示查询结果

     *

     * @param docs

     *            查询结果文档

     * @throws IOException

     */

    public static void showResult(TopDocs docs, IndexReader reader)

            throws IOException {

        Document doc = null;

        for (ScoreDoc tmp : docs.scoreDocs) {

            doc = reader.document(tmp.doc);

            System.out.println(tmp.score + "  " + doc.get("path"));

            // System.out.println(doc.getField("path").stringValue());

        }

    }

}

拆的比较琐碎了，凑合看吧。

创建索引的类：

package com.zhyea.doggie;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;

import org.apache.lucene.index.IndexWriter;

import com.zhyea.util.FileUtil;

public class IndexTest{

    String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index";

    String docPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\docs";

    public static void main(String[] args){

        try{

            new IndexTest().createIndex();

        }catch(Exception e){

            e.printStackTrace();

        }

    }

    /**

     * 创建索引

     * @throws IOException

     * @throws InstantiationException

     * @throws IllegalAccessException

     */

    private void createIndex() throws IOException,

                                      InstantiationException,

                                      IllegalAccessException{

        IndexWriter writer = null;

        try{

            Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);

            writer = DoggieLucene.createIndexWriter(analyzer, indexPath);

            DoggieLucene.addLocalDocument(writer, docPath);

        }finally{

            if(null!=writer)writer.close();

        }

    }

}

执行搜索的类：

package com.zhyea.doggie;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.queryparser.classic.ParseException;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.TopDocs;

public class SearchTest {

    String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index";

    public static void main(String[] args){

        try{

            new SearchTest().executeSearch();

        }catch(Exception e){

            e.printStackTrace();

        }

    }

    public void executeSearch() throws ParseException,

                                       IOException,

                                       InstantiationException,

                                       IllegalAccessException{

        IndexReader reader = null;

        try{

            reader = DoggieLucene.createIndexReader(indexPath);

            IndexSearcher searcher = DoggieLucene.createIndexSearcher(reader);

            Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);

            Query query = new QueryParser("content", analyzer).parse("杨过");

            TopDocs docs = DoggieLucene.executeSearch(searcher, query);

            DoggieLucene.showResult(docs, reader);

        }finally{

            if(null!=reader)reader.close();

        }

    }

}

OK。

巴特西

lucene学习-3 - 代码重构

最新文章

热门文章