导入clob很简单。但是blob好像没有提供方法,所以改了一下源码,重新编译替换class文件,竟然成功了。

先把配置文件贴上

SCHEMA.XML

<?xml version="1.0" ?>
<schema name="test" version="1.1">
<types>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> <fieldType name="standard" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType> <fieldType name="ik" class="solr.TextField">
<analyzer class="org.wltea.analyzer.lucene.IKAnalyzer"/>
</fieldType> </types> <fields>
<field name="blogId" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="blogTitle" type="ik" indexed="true" stored="true" multiValued="false" />
<field name="blogAuthorName" type="ik" indexed="true" stored="true" multiValued="false" />
<field name="blogContent" type="ik" indexed="true" stored="true" multiValued="false" />
<field name="TITLE" type="ik" indexed="true" stored="true" />
<field name="TEXT" type="ik" indexed="true" stored="true" />
</fields>
<defaultSearchField>blogTitle</defaultSearchField>
<solrQueryParser defaultOperator="OR"/> </schema>

这里的field只用到了blogContent一个。

SOLRCONFIG.XML

<?xml version="1.0" encoding="UTF-8" ?>
<config>
<luceneMatchVersion>LUCENE_34</luceneMatchVersion>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
<updateHandler class="solr.DirectUpdateHandler2" /> <requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
</requestDispatcher> <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> <!-- the dataimport requestHandler -->
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">db-data-config.xml</str>
</lst>
</requestHandler> <admin>
<defaultQuery>solr</defaultQuery>
</admin>
<unlockOnStartup>true</unlockOnStartup>
<lockType>simple</lockType>
<requestHandler name="/analysis/field"
startup="lazy"
class="solr.FieldAnalysisRequestHandler" /> </config>

db-data-config.xml

<dataConfig>
<dataSource name="f1" type="FieldStreamDataSource"/>
<dataSource driver="oracle.jdbc.driver.OracleDriver"
url="jdbc:oracle:thin:@127.0.0.1:1521:orcl" user="HT" password="HT"/>
<document>
<entity name="blog" query="SELECT BLOG_CONTENT from TB_ENT_BLOG" transformer="ClobTransformer">
<field column="BLOG_CONTENT" name="blogContent" clob="true"/>
</entity>
</document>
</dataConfig>

然后修改了ClobTransformer.java。使其同时支持BLOG格式。

package org.apache.solr.handler.dataimport;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map; public class ClobTransformer extends Transformer
{
public static final String CLOB = "clob"; public Object transformRow(Map<String, Object> aRow, Context context)
{
for (Map map : context.getAllEntityFields()) {
if ("true".equals(map.get("clob"))) {
String column = (String)map.get("column");
String srcCol = (String)map.get("sourceColName");
if (srcCol == null)
srcCol = column;
Object o = aRow.get(srcCol);
if ((o instanceof List)) {
List inputs = (List)o;
List results = new ArrayList();
for (Object input : inputs) {
if ((input instanceof Clob)) {
Clob clob = (Clob)input;
results.add(readFromClob(clob));
}else if(input instanceof Blob){
Blob blob = (Blob)input;
results.add(readFromBlob(blob));
}
}
aRow.put(column, results);
}
else if ((o instanceof Clob)) {
Clob clob = (Clob)o;
aRow.put(column, readFromClob(clob));
}else if(o instanceof Blob){
Blob blob = (Blob)o;
aRow.put(column, readFromBlob(blob));
}
}
}
return aRow;
} private String readFromBlob(Blob blob) {
try{
InputStream is = blob.getBinaryStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String str = "";
String res = "";
while((str=br.readLine())!=null){
res += str;
}
return res;
}catch (Exception e) {
e.printStackTrace();
return "";
}
} private String readFromClob(Clob clob) {
Reader reader = null;
try {
reader = clob.getCharacterStream();
} catch (SQLException e1) {
e1.printStackTrace();
}
StringBuilder sb = new StringBuilder();
char[] buf = new char[1024];
try
{
int len;
while ((len = reader.read(buf)) != -1)
sb.append(buf, 0, len);
}
catch (IOException e) {
DataImportHandlerException.wrapAndThrow(500, e);
}
return sb.toString();
}
}

这里加了一个readFromBlob方法,加了两个else if。异常的处理很粗糙。

这样替换class文件,导入索引就正常了。在query ":" 页面的response会出现所有blob内容。

如果response没有blob字段或者显示为对象地址,都是错了。

最新文章

  1. 1Z0-053 争议题目解析683
  2. Effective java笔记(六),方法
  3. jquery表格动态增删改及取数据绑定数据完整方案
  4. CSS中常见的位置(position)属性
  5. 信息安全系统设计基础第一次实验报告 20135201&amp;&amp;20135306&amp;&amp;20135307
  6. 视频处理控件TVideoGrabber如何对屏幕进行录制/压缩
  7. C# chart,有关如何在鼠标移动到Series上时显示节点及数据 (有待继续更新)
  8. OpenJudge_cdqz 数据结构版块小结
  9. 我写了一起 Makefile(一)
  10. bitnami gitlab 安装
  11. Scala:枚举类型的用法
  12. StackExchange.Redis .net core Timeout performing 超时问题
  13. OO第二单元电梯线程系列总结作业
  14. python基础—字符串的常用函数“”
  15. [LeetCode] 577. Employee Bonus_Easy tag: SQL
  16. python基础之函数名的使用,闭包以及迭代器
  17. PHP生成GIF动态图片验证码
  18. HDU_1022
  19. 异常:java.lang.IllegalArgumentException: Control character in cookie value or attribute.
  20. zabbix时间不同步

热门文章

  1. css3动画使用技巧之—JQ配合css3实现轮播之animation-delay应用
  2. chgrp命令
  3. PHP学习笔记(2) - 对PHP的印象
  4. oracle安装,配置,启动
  5. xml转array
  6. KMP模板与讲解
  7. 【web安全】第四弹:防火墙技术笔记
  8. iOS --- 取整数
  9. ie8下jquery读取当前点击的标签位置错误,原因是里面有内容写了text-indent:-9999px
  10. scanf格式控制符