1.引用读取PDF文件组件

FontBox-0.1.0-dev.dll

IKVM.GNU.Classpath.dll

IKVM.Runtime.dll

PDFBox-0.7.3.dll

2.添加office 组件 这个就过吧

3.添加盘古分词

PanGu.dll

PanGu.HighLight.dll

PanGu.Lucene.Analyzer.dll

4.添加Lucene.net 引用

Lucene.Net.dll

5.创建索引库

   #region  同步资料到索引库
static Queue<ResourcesModel> TaskQueue = new Queue<ResourcesModel>();
private void tmResSync_Tick(object sender, EventArgs e)
{
//读取数据到队列
try
{
InitTaskQueue();
LogHelper.writeLog("WinFrom【同步数据索引库读取数据到队列】成功!!");
}
catch (Exception ex)
{
LogHelper.writeErrLog("WinFrom【同步数据索引库读取数据到队列】:" + ex.Message);
}
}
public void ServiceStart()
{
Thread TaskThread = new Thread(new ThreadStart(ThreadInvoke));
TaskThread.IsBackground = true;
TaskThread.Start();
}
public void ThreadInvoke()
{
while (true)
{
try
{
if (TaskQueue.Count > )
{
ResourcesModel res = null;
lock (TaskQueue)
{
res = TaskQueue.Dequeue();
}
//调用方法
new CreateResIndex().CreateIndex(res);
}
else
{
Thread.Sleep();
}
}
catch (Exception ex)
{
LogHelper.writeErrLog("WinFrom【同步数据索引库出错】:"+ex.ToString());
}
}
}
public void InitTaskQueue()
{
//读取资料中心数据
var query = new CreateResIndex().Get_View_CreateResIndex(" and uploadTime is not null and IsIndex=0 ");
if (query!=null)
{
for (int i = ; i < query.Rows.Count; i++)
{
var model =new ResourcesModel();
model.ID =query.Rows[i]["ID"].ToString();
model.FileName=query.Rows[i]["FileName"]!=null ? query.Rows[i]["FileName"].ToString():"";
model.FilePath=query.Rows[i]["FilePath"]!=null ? query.Rows[i]["FilePath"].ToString():"";
model.CreaetBy=query.Rows[i]["UserName"]!=null ? query.Rows[i]["UserName"].ToString():"";
model.Types=query.Rows[i]["Name"]!=null ? query.Rows[i]["Name"].ToString():"";
model.TypeId=query.Rows[i]["Type"]!=null ? query.Rows[i]["Type"].ToString():"";
model.SimpleDesc=query.Rows[i]["SimpleDesc"]!=null ? query.Rows[i]["SimpleDesc"].ToString():"";
model.Title=query.Rows[i]["Title"]!=null ? query.Rows[i]["Title"].ToString():"";
model.Tags=query.Rows[i]["Tag"]!=null ? query.Rows[i]["Tag"].ToString():"";
model.OP = query.Rows[i]["IsDel"] != null && query.Rows[i]["IsDel"].ToString()!="" ? Convert.ToBoolean(query.Rows[i]["IsDel"].ToString())==true ? "" : "":"";
model.UploadTime = query.Rows[i]["uploadTime"] != null && query.Rows[i]["uploadTime"].ToString() != "" ? Convert.ToDateTime(query.Rows[i]["uploadTime"]).ToString("yyyy-MM-dd"):"";
TaskQueue.Enqueue(model);
}
} }
#endregion
  #region  ResourcesModel
public class ResourcesModel
{
public ResourcesModel() { } /// <summary>
/// 标识
/// </summary>
public string ID { get; set; } /// <summary>
/// 标题
/// </summary>
public string Title { get; set; } /// <summary>
///标签
/// </summary>
public string Tags { get; set; } /// <summary>
///创建人
/// </summary>
public string CreaetBy { get; set; } /// <summary>
///上传时间
/// </summary>
public string UploadTime { get; set; } /// <summary>
///类别
/// </summary>
public string Types { get; set; } /// <summary>
///简介
/// </summary>
public string SimpleDesc { get; set; }
/// <summary>
///内容
/// </summary>
public string ContextDesc { get; set; }
/// <summary>
/// 有来标注是 删除=0 增加=1 修改=2
/// </summary>
public string OP { get; set; }
/// <summary>
/// 类型Id
/// </summary>
public string TypeId { get; set; }
/// <summary>
/// 文件路径
/// </summary>
public string FilePath { get; set; }
/// <summary>
/// 文件名称
/// </summary>
public string FileName { get; set; }
}
#endregion
  #region 读取文件
public class ReadFilesTxt
{
public string ResumeTxt(string path)
{
string str = string.Empty; StreamReader reader = new StreamReader(path, System.Text.Encoding.Default);
str = reader.ReadToEnd(); //再通过查询解析出来的的字符串有没有GB2312 的字段,来判断是否是GB2312格式的,如果是,则重新以GB2312的格式解析
System.Text.RegularExpressions.Regex reGB = new System.Text.RegularExpressions.Regex("GB2312", RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Match mcGB = reGB.Match(str);
if (mcGB.Success)
{
StreamReader reader2 = new StreamReader(path, System.Text.Encoding.GetEncoding("GB2312"));
str = reader2.ReadToEnd();
}
return str;
} private string ResumeWord(string path)
{
string str = string.Empty;
object missing = System.Reflection.Missing.Value;
object readOnly = true;
object docPathp = path;
Microsoft.Office.Interop.Word.Application wordApp = new Microsoft.Office.Interop.Word.Application(); Microsoft.Office.Interop.Word.Document wordDoc = wordApp.Documents.Open(ref docPathp,
ref missing,
ref readOnly,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing,
ref missing);
str = wordDoc.Content.Text;
wordDoc.Close(ref missing, ref missing, ref missing);
wordApp.Quit(ref missing, ref missing, ref missing); return str;
} private string ResumeExcel(string path)
{
string str = string.Empty;
//创建Application对象
Microsoft.Office.Interop.Excel.Application xApp = new Microsoft.Office.Interop.Excel.Application();
xApp.Visible = false;
object readOnly = true;
object missing = System.Reflection.Missing.Value;
////得到WorkBook对象,
Microsoft.Office.Interop.Excel.Workbook xBook = xApp.Workbooks._Open(path,
missing, readOnly, missing, missing,
missing, missing, missing, missing,
missing, missing, missing, missing); var count = xBook.Sheets.Count;
Microsoft.Office.Interop.Excel.Worksheet xSheet;
for (int k = ; k < count; k++)
{
xSheet = (Microsoft.Office.Interop.Excel.Worksheet)xBook.Sheets[k + ];
var rcount = xSheet.UsedRange.Rows.Count;
var ccount = xSheet.UsedRange.Columns.Count; for (int m = ; m < rcount; m++)
{
for (int n = ; n < ccount; n++)
{
str = str + ((Microsoft.Office.Interop.Excel.Range)xSheet.Cells[m + , n + ]).Value2;
}
}
} xSheet = null;
xBook.Close(missing, missing, missing);
xApp.Quit(); return str;
} public string ResumePDF(string path)
{ org.pdfbox.pdmodel.PDDocument doc = org.pdfbox.pdmodel.PDDocument.load(path); org.pdfbox.util.PDFTextStripper pdfStripper = new org.pdfbox.util.PDFTextStripper(); string text = pdfStripper.getText(doc); return text; } public string GetReadContext(string ResourceRoute, string path)
{
StringBuilder sb = new StringBuilder(); try
{
if (path != "")
{
string[] paths = path.Split(';');
for (int i = ; i < paths.Length; i++)
{
if (paths[i] != null && paths[i].ToString() != "")
{ string lpath = paths[i].ToString();
var suffix = lpath.Substring(lpath.LastIndexOf(".") + , lpath.Length - lpath.LastIndexOf(".") - );
if ("doc" == suffix || "docx" == suffix)
{
sb.Append(ResumeWord(ResourceRoute + lpath));
}
else if ("xls" == suffix || "xlsx" == lpath)
{
sb.Append(ResumeExcel(ResourceRoute + lpath));
}
else if ("pdf" == suffix)
{
sb.Append(ResumePDF(ResourceRoute + lpath));
}
else if ("txt" == suffix)
{
sb.Append(ResumeTxt(ResourceRoute + lpath));
} }
}
}
}
catch (Exception ex)
{ LogHelper.writeErrLog( "【读取文件出错:文件名称:" + path + " 】 错误消息:" + ex.Message.ToString());
} return sb.ToString();
} public string GetReadContextSingle(string ResourceRoute, string lpath)
{
StringBuilder sb = new StringBuilder();
try
{
if (lpath != "")
{
var suffix = lpath.Substring(lpath.LastIndexOf(".") + , lpath.Length - lpath.LastIndexOf(".") - );
if ("doc" == suffix || "docx" == suffix)
{
sb.Append(ResumeWord(ResourceRoute + lpath));
}
else if ("xls" == suffix || "xlsx" == lpath)
{
sb.Append(ResumeExcel(ResourceRoute + lpath));
}
else if ("pdf" == suffix)
{
sb.Append(ResumePDF(ResourceRoute + lpath));
}
else if ("txt" == suffix)
{
sb.Append(ResumeTxt(ResourceRoute + lpath));
}
}
}
catch (Exception ex)
{ LogHelper.writeErrLog("【读取文件出错:文件名称:" + ResourceRoute + lpath + " 】 错误消息:" + ex.Message.ToString());
} return sb.ToString();
}
}
#endregion
 #region  创建索引
public class CreateResIndex
{
public static string IndexPath = ConfigurationManager.AppSettings["pathIndex"];//索引文件路径
public static string ResourceRoute = ConfigurationManager.AppSettings["ResourceRoute"];//文件路径 // private readonly ILog log = LogManager.GetLogger("CreateIndex"); #region 属性
/// <summary>
/// 盘古分词器
/// </summary>
protected Analyzer NewPanGuAnalyzer
{
get { return new PanGuAnalyzer(); } } /// <summary>
/// Lucene.Net的目录-参数
/// </summary>
public FSDirectory DirectoryLuce
{
get
{
return FSDirectory.Open(new DirectoryInfo(IndexPath), new NativeFSLockFactory());
}
}
#endregion #region 创建索引
/// <summary>
///创建索引
/// </summary>
public void CreateIndex(ResourcesModel res)
{
//创建索引目录
if (!System.IO.Directory.Exists(IndexPath))
{
System.IO.Directory.CreateDirectory(IndexPath);
} //FSDirectory directory = FSDirectory.Open(new DirectoryInfo(IndexDic), new NativeFSLockFactory());
bool isUpdate = IndexReader.IndexExists(DirectoryLuce);
if (isUpdate)
{
if (IndexWriter.IsLocked(DirectoryLuce))
{
IndexWriter.Unlock(DirectoryLuce);
}
} IndexWriter writer = new IndexWriter(DirectoryLuce, NewPanGuAnalyzer, !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED);
List<string> listIsdex = GetResourceTypePublicResources();
List<string> modifyindex = new List<string>();
if (res != null)
{
if (res.OP == "")
{
writer.DeleteDocuments(new Term("ID", res.ID.ToString().Trim()));
modifyindex.Add(res.ID.ToString().Trim());
LogHelper.writeLog("【删除索引编号】 【ID:" + res.ID.ToString().Trim() + "】");
}
else
{ if (IsPublicResources(listIsdex, res.TypeId.Trim()))
{ writer.DeleteDocuments(new Term("ID", res.ID.ToString().Trim())); var path = res.FilePath; string ID = res.ID.ToString().Trim();
string Title = res.Title != null ? res.Title.ToString() : ""; string CreaetBy = res.CreaetBy == null ? "" : res.CreaetBy.ToString();
string UploadTime = res.UploadTime;
string Types = res.Types != null ? res.Types.ToString() : "";
string SimpleDesc = res.SimpleDesc == null ? "" : res.SimpleDesc.ToString();
string Tags = res.Tags != null ? res.Tags.ToString() : "";
string FileName = res.FileName;
try
{
string ContextDesc = "";
AddIndex(writer, ID, Title, Tags, SimpleDesc, "", Types, UploadTime, CreaetBy, FileName);
if (path != "")
{
string[] paths = path.Split(';');
string[] pname = FileName.Split(';');
for (int i = ; i < paths.Length; i++)
{
if (paths[i] != null && paths[i].ToString() != "")
{
string lpath = paths[i].ToString();
string lname = pname[i].ToString();
ContextDesc= new ReadFilesTxt().GetReadContextSingle(ResourceRoute, lpath);
//SimpleDesc=ContextDesc ContextDesc=""
string NewFileName = GetFileName(lpath, lname);
AddIndex(writer, ID, NewFileName, Tags, ContextDesc, lpath, Types, UploadTime, CreaetBy, FileName);
}
}
} // string ContextDesc = new ReadFilesTxt().GetReadContext(ResourceRoute,path);
LogHelper.writeLog("【添加索引编号】 【ID:" + res.ID.ToString().Trim() + "】");
modifyindex.Add(ID);
}
catch (Exception ex)
{
LogHelper.writeLog("【添加索引失败】 【ID:" + ID + "】:" + ex.Message.ToString()); } }
}
}
writer.Optimize();
writer.Close();
ModifyResIndex(modifyindex);
} public void AddIndex(IndexWriter writer, string ID, string Title, string Tags, string SimpleDesc, string ContextDesc, string Types, string UploadTime, string CreaetBy,string FileName)
{
try
{
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
doc.Add(new Lucene.Net.Documents.Field("ID", ID, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("Title", Title, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("Tags", Tags, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("SimpleDesc", SimpleDesc, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("FileName", FileName, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("ContextDesc", ContextDesc, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("Types", Types, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("UploadTime", UploadTime, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NOT_ANALYZED));//存储且索引
doc.Add(new Lucene.Net.Documents.Field("CreaetBy", CreaetBy, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.NO));
writer.AddDocument(doc);
}
catch (FileNotFoundException fnfe)
{
throw fnfe;
}
catch (Exception ex)
{
throw ex;
}
}
public string GetFileName(object objfilepath, object FileName)
{
string result = "";
if (FileName != null && FileName.ToString() != "")
{
result = FileName.ToString();
}
else
{
if (objfilepath != null && objfilepath.ToString() != "")
{
string filename = objfilepath.ToString().Substring(objfilepath.ToString().LastIndexOf(',') + ).Replace(";", "");
result = filename;
}
}
return result;
}
#endregion #region 获取数据库数据
/// <summary>
/// 获取中心资料库数据
/// </summary>
/// <param name="whereStr"></param>
/// <returns></returns>
public DataTable Get_View_CreateResIndex(string whereStr)
{
string sql = " Select * From Res_View_createResIndex where 1=1 " + whereStr;
DataTable dt = new DataTable(); try
{
DataSet ds = Ruihua.Common.DbHelperSQL.Query(sql);
if (ds != null && ds.Tables != null && ds.Tables.Count > )
{
dt = ds.Tables[];
}
}
catch (Exception ex)
{
LogHelper.writeLog("【 获取中心资料库数据错误】:" + ex.ToString());
}
return dt;
} public void ModifyResIndex(List<string> list)
{
string sql = " update ResourceInfoNew set IsIndex=1 where id in ({0}) ";
StringBuilder sb = new StringBuilder("'-1'");
//Ruihua.Common.DbHelperSQL.connectionString = ConfigurationManager.AppSettings["ResConStr"].ToString();
LogHelper.writeLog("【更新索引编号开始】:" + string.Join(",", list.ToArray()));
if (list.Count > )
{
for (int i = ; i < list.Count; i++)
{
sb.Append(",'" + list[i].ToString() + "'");
}
sql = string.Format(sql, sb.ToString());
int result = Ruihua.Common.DbHelperSQL.ExecuteSql(sql);
LogHelper.writeLog("【更新索引编号结束:" + result.ToString() + "】:" + string.Join(",", list.ToArray()));
}
} /// <summary>
/// 判断是否公共资源
/// </summary>
/// <returns></returns>
public bool IsPublicResources(List<string> list, string Id)
{ if (list.Contains(Id))
{
return true;
}
return false; }
public List<string> GetResourceTypePublicResources()
{
ObjectCache cache = MemoryCache.Default;
List<string> ResourceType = cache["ResourceType"] as List<string>;
List<string> publicresource = new List<string>();
if (ResourceType == null)
{ // Ruihua.Common.DbHelperSQL.connectionString = ConfigurationManager.AppSettings["ResConStr"].ToString();
string sql = "select *From ResourceType ";
DataSet ds = Ruihua.Common.DbHelperSQL.Query(sql);
if (ds != null && ds.Tables != null && ds.Tables.Count > )
{
DataTable dt = ds.Tables[];
var query1 = from q1 in dt.AsEnumerable()
where q1.Field<string>("ParentID") == ""
select q1;
if (query1 != null)
{
foreach (var item in query1)
{
publicresource.Add(item.Field<string>("TID").Trim());
//第二层
AddListString(ref publicresource, dt, item.Field<string>("TID").Trim());
}
}
}
CacheItemPolicy policy = new CacheItemPolicy();
policy.AbsoluteExpiration = DateTimeOffset.Now.AddSeconds(1800.0);//属性设置为 60*30 秒后逐出缓存
cache.Set("ResourceType", publicresource, policy);
}
else
{
publicresource = ResourceType;
}
return publicresource; }
public void AddListString(ref List<string> list, DataTable dt, string Id)
{
var query2 = from q2 in dt.AsEnumerable()
where q2.Field<string>("ParentID") == Id
select q2;
if (query2 != null)
{
foreach (var item in query2)
{
list.Add(item.Field<string>("TID").Trim());
AddListString(ref list, dt, item.Field<string>("TID").Trim());
}
}
} #endregion } #endregion

最新文章

  1. 基于git diff进行的eslint代码检测
  2. Memcached,你懂的
  3. PHP 单例模式代码片段
  4. Hashtable、Dictionary和List 谁效率更高
  5. [Redux] Filtering Redux State with React Router Params
  6. 均价 和 最新价格 是啥意思 什么是MACD DIFF DEA 指标?
  7. 列表checkbox全选
  8. JSU省赛队员选拔赛个人赛1(Coin Change、Fibbonacci Number、Max Num、单词数、无限的路、叠筐)
  9. HDU 3613 Best Reward 正反两次扩展KMP
  10. 【前端】:css
  11. 使用CSS3动画实现绚丽的照片墙效果
  12. mybatis-XML配置文件
  13. Nordic nRF51/nRF52开发环境搭建
  14. [转]XModem协议
  15. win2008 401 - 未授权: 由于凭据无效,访问被拒绝。解决方法
  16. vue this.$router.push和this.$route.path的区别
  17. 微信小程序模板中使用循环
  18. LeetCode题解之Find All Duplicates in an Array
  19. Android Fragment的使用(转载)
  20. 含有按钮的ScrollView在iOS8中无法滚动的解决办法 | ScrollView with UIControl/UIButton subviews not scrollable under iOS 8

热门文章

  1. iOS开发系列-NSURLConnection
  2. LeetCode 206.反转链表(Python3)
  3. 【期望DP】[poj2096]Collecting Bugs
  4. 串口通信中,QString 、QByteArray 转化需要注意的问题
  5. es6 + 笔记整理
  6. [JZOJ4788] 【NOIP2016提高A组模拟9.17】序列
  7. 带权二分图——KM算法hdu2255 poj3565
  8. Hystrix容错处理
  9. JS 作用域、原型链
  10. PHP函数高级(二)