参考:https://www.cnblogs.com/kubidemanong/p/10834993.html

public class TreeNode
{
public char Char;
public bool IsEnd;
public int WordEndAt;
private Dictionary<char, TreeNode> NodeDict;
public TreeNode(char c)
{
Char = c;
IsEnd = false;
NodeDict = new Dictionary<char, TreeNode>();
} public bool ContainChar(char ch)
{
return NodeDict.ContainsKey(ch);
} public TreeNode GetChild(char c)
{
TreeNode Child = null;
NodeDict.TryGetValue(c, out Child);
return Child;
} public TreeNode AddNode(char ch)
{
TreeNode insertNode = null;
if (!NodeDict.TryGetValue(ch, out insertNode))
{
insertNode = new TreeNode(ch);
NodeDict.Add(ch, insertNode);
}
return insertNode;
}
}
public class Trie
{ public TreeNode Root { get; }
private HashSet<char> SkipCharSet; //忽略字符
private HashSet<char> SeparateCharSet; //常见分隔符
private string skipCharList = " `-=[]\\',.·/~!@#$%^&*()_+{}|:\"<>?*\r\n";
private TreeNode checkNode;
private TreeNode backupNode;
private char duplicateChar;
public Trie()
{
Root = new TreeNode(' ');
SkipCharSet = new HashSet<char>();
SeparateCharSet = new HashSet<char>();
foreach (char c in skipCharList)
{
SkipCharSet.Add(c);
SeparateCharSet.Add(c);
}
} public void AddNode(TreeNode node, string word)
{
if (word.Length > 0)
{
char ch = word[0];
TreeNode insertNode = node.AddNode(ch);
if (word.Length == 1)
{
insertNode.IsEnd = true;
}
word = word.Remove(0,1);
AddNode(insertNode, word);
}
} private bool IsSkipChar(char c)
{
return SkipCharSet.Contains(c);
} //是否是英文
private bool IsEnglishChar(char ch)
{
string str = new string(ch, 1);
Regex regEnglish = new Regex("^[a-zA-Z]");
return regEnglish.IsMatch(str);
} //是否是分割符
private bool Separator(char ch)
{
return SeparateCharSet.Contains(ch);
} //是否是单词的开头
private bool IsWordBegin(string word,int pos)
{
if (pos == 0)
return true;
if (pos < word.Length)
{
char c1 = word[pos - 1];
char c2 = word[pos];
return (Separator(c1) && !Separator(c2)) || (!IsEnglishChar(c1) && IsEnglishChar(c2));
}
return false;
} //是否是单词的结尾
private bool IsWordEnd(string word,int pos)
{
if (pos == word.Length - 1)
return true;
if(pos < word.Length)
{
char c1 = word[pos];
char c2 = word[pos + 1];
return (!Separator(c1) && Separator(c2)) || (IsEnglishChar(c1) && !IsEnglishChar(c2));
}
return false;
} private void CheckWord(string checkWord,int begin)
{
int index = begin;
while(index + 1 < checkWord.Length)
{
++index;
char ch = checkWord[index];
if (IsSkipChar(ch))
{
if (checkNode.ContainChar(ch))
checkNode = checkNode.GetChild(ch);
}
else
{
if(checkNode.ContainChar(ch))
{
checkNode = checkNode.GetChild(ch);
if(checkNode.IsEnd)
{
checkNode.WordEndAt = index;
backupNode = checkNode;
duplicateChar = ch;
CheckWord(checkWord,index); //继续匹配
break;
}
}
else
{
if (duplicateChar == ch) //屏蔽fuccccccck例如这样的
backupNode.WordEndAt = index;
else
break;
}
}
duplicateChar = ch;
}
} public string Filter(string filterWord)
{
int begin = 0;
checkNode = null;
backupNode = null;
duplicateChar = ' ';
string word = filterWord.ToLower();
StringBuilder result = new StringBuilder(filterWord);
while(begin < word.Length)
{
checkNode = Root;
backupNode = Root;
char ch = word[begin];
duplicateChar = ch;
               //设置是否严格匹配, 即逐个字符检测是否有可能是敏感词,否则像英语一样只检测单词, 
//严格匹配: [av]是敏感词,那么[avoid]被替换成[**oid]
                bool isStrict = !IsEnglishChar(ch);              
                bool isWordBegin = isStrict || IsWordBegin(word, begin);
if(isWordBegin && checkNode.ContainChar(ch))
{
checkNode = checkNode.GetChild(ch);
if(!IsSkipChar(ch))
{
CheckWord(word, begin);
if(backupNode.IsEnd && backupNode.WordEndAt > 0)
{
bool isWordEnd = isStrict || IsWordEnd(word, backupNode.WordEndAt); //到单词末尾才行 have 中有av 但是不是末尾 所以不是屏蔽词
if(isWordEnd)
{
for(int i = begin; i <= backupNode.WordEndAt;++i)
{
result[i] = '*';
}
begin = backupNode.WordEndAt;
}
}
}
}
++begin;
} return result.ToString();
}
}

测试用例:

 class Program
{
static void Main(string[] args)
{
Trie trie = new Trie();
trie.AddNode(trie.Root, "fuc");
trie.AddNode(trie.Root, "fuc bitch");
trie.AddNode(trie.Root, "fuck");
trie.AddNode(trie.Root, "bitch");
trie.AddNode(trie.Root, "屠杀");
Console.WriteLine(trie.Filter("是哦fuckkkkkk山大的撒bi\tch"));
Console.WriteLine(trie.Filter("have world fuc bitch"));
Console.WriteLine(trie.Filter("1218fuck1")); Console.ReadKey();
}
}

结果:

最新文章

  1. MFC编程入门之二十七(常用控件:图片控件PictureControl)
  2. yii2 如何在页面底部加载css和js
  3. JavaMail入门第二篇 创建邮件
  4. vs2015 使用 visual studio on line 在线版本控制
  5. soapUI参数
  6. SORT_AREA_RETAINED_SIZE
  7. 译文链接:http://www.codeceo.com/article/10-truth-programmer-must-know.html
  8. 点击返回键退出popupwindow的方法
  9. Docker学习笔记 — Docker私有仓库搭建【转载】
  10. Windows 常用消息及含义
  11. 十二个 ASP.NET Core 例子——1.1版本 EF MySql快速搭建
  12. Dynamics CRM2013 ScLib::AccessCheckEx failed
  13. Linux学习之文件系统权限及表示
  14. getting-started-with-mqtt
  15. javascript常用积累
  16. Elasticsearch利用scroll查询获取所有数据
  17. 基于DB的编程
  18. PL/SQL之存储过程和函数
  19. 欢迎来怼-----Beta冲刺贡献分数分配结果
  20. oracle oci 调用 1

热门文章

  1. C# 类型转换相关
  2. QSS学习
  3. (原创)odoo中字段默认值的获取顺序
  4. java-Stream流的常见方法
  5. K8S群集调度器
  6. JAVA框架知识
  7. docker停止所有窗容器
  8. usb 2.0枚举过程
  9. gitee上传VS2022已有项目
  10. 在windows上搭建spark遇到的问题