You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

132 lines
5.5 KiB

2 years ago
  1. using log4net;
  2. using Lucene.Net.Analysis;
  3. using Lucene.Net.Index;
  4. using Lucene.Net.Store;
  5. using System.Collections.Generic;
  6. using System.IO;
  7. using System.Threading;
  8. using System.Web.Hosting;
  9. namespace WebApp
  10. {
  11. public class IndexManager
  12. {
  13. public static readonly IndexManager Instance = new IndexManager();
  14. private static readonly string IndexPath = HostingEnvironment.MapPath("~/Index");
  15. private static ILog log = LogManager.GetLogger(typeof(IndexManager));
  16. private IndexManager()
  17. { }
  18. static IndexManager()
  19. { }
  20. public void Start()
  21. {
  22. var thread = new Thread(WatchIndexTask)
  23. {
  24. IsBackground = true
  25. };
  26. thread.Start();
  27. log.Debug("IndexManager has been lunched successfully!");
  28. }
  29. private readonly Queue<IndexTask> indexQueue = new Queue<IndexTask>();
  30. private void WatchIndexTask()
  31. {
  32. while (true)
  33. {
  34. if (indexQueue.Count > 0)
  35. {
  36. // 索引文档保存位置
  37. var directory = FSDirectory.Open(new DirectoryInfo(IndexPath), new NativeFSLockFactory());
  38. var isUpdate = IndexReader.IndexExists(directory); //判断索引库是否存在
  39. log.Debug($"The status of index : {isUpdate}");
  40. if (isUpdate)
  41. {
  42. // 如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 Lucene.Net在写索引库之前会自动加锁,在close的时候会自动解锁 不能多线程执行,只能处理意外被永远锁定的情况
  43. if (IndexWriter.IsLocked(directory))
  44. {
  45. log.Debug("The index is existed, need to unlock.");
  46. IndexWriter.Unlock(directory); //unlock:强制解锁,待优化
  47. }
  48. }
  49. // 创建向索引库写操作对象 IndexWriter(索引目录,指定使用盘古分词进行切词,最大写入长度限制) 补充:使用IndexWriter打开directory时会自动对索引库文件上锁
  50. var writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate,
  51. IndexWriter.MaxFieldLength.UNLIMITED);
  52. log.Debug($"Total number of task : {indexQueue.Count}");
  53. while (indexQueue.Count > 0)
  54. {
  55. var task = indexQueue.Dequeue();
  56. var id = task.TaskId;
  57. //ArticleService articleService = new ArticleService();
  58. //Article article = articleService.GetById(id);
  59. //if (article == null)
  60. //{
  61. // continue;
  62. //}
  63. //// 一条Document相当于一条记录
  64. //Document document = new Document();
  65. //// 每个Document可以有自己的属性(字段),所有字段名都是自定义的,值都是string类型
  66. //// Field.Store.YES不仅要对文章进行分词记录,也要保存原文,就不用去数据库里查一次了
  67. //document.Add(new Field("id", id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
  68. //// 需要进行全文检索的字段加 Field.Index. ANALYZED
  69. //// Field.Index.ANALYZED:指定文章内容按照分词后结果保存,否则无法实现后续的模糊查询
  70. //// WITH_POSITIONS_OFFSETS:指示不仅保存分割后的词,还保存词之间的距离
  71. //document.Add(new Field("title", article.Title, Field.Store.YES, Field.Index.ANALYZED,
  72. // Field.TermVector.WITH_POSITIONS_OFFSETS));
  73. //document.Add(new Field("msg", article.Msg, Field.Store.YES, Field.Index.ANALYZED,
  74. // Field.TermVector.WITH_POSITIONS_OFFSETS));
  75. if (task.TaskType != TaskTypeEnum.Add)
  76. {
  77. // 防止重复索引,如果不存在则删除0条
  78. writer.DeleteDocuments(new Term(nameof(id), id.ToString()));// 防止已存在的数据 => delete from t where id=i
  79. }
  80. // 把文档写入索引库
  81. //writer.AddDocument(document);
  82. log.Debug($"Index {id.ToString()} has been writen to index library!");
  83. }
  84. writer.Dispose(); // Close后自动对索引库文件解锁
  85. directory.Dispose(); // 不要忘了Close,否则索引结果搜不到
  86. log.Debug("The index library has been closed!");
  87. }
  88. else
  89. {
  90. Thread.Sleep(2000);
  91. }
  92. }
  93. }
  94. public void AddArticle(IndexTask task)
  95. {
  96. task.TaskType = TaskTypeEnum.Add;
  97. indexQueue.Enqueue(task);
  98. }
  99. public void UpdateArticle(IndexTask task)
  100. {
  101. task.TaskType = TaskTypeEnum.Update;
  102. indexQueue.Enqueue(task);
  103. }
  104. }
  105. public class IndexTask
  106. {
  107. public long TaskId { get; set; }
  108. public TaskTypeEnum TaskType { get; set; }
  109. }
  110. public enum TaskTypeEnum
  111. {
  112. Add,
  113. Update
  114. }
  115. }