一、什麼是敏感詞過濾? 敏感詞過濾是一種處理網路內容的技術,可以檢測和過濾出網路中的敏感/違禁辭彙。它通過給定的關鍵字或字元串,判斷網路內容是否包含某些敏感信息,從而防止違反法律法規的信息流通。 通常,可以使用兩種方法來過濾敏感詞: 黑名單過濾:即定義一個黑名單,將所有敏感詞擇記錄在其中,然後對輸入 ...
一、什麼是敏感詞過濾?
敏感詞過濾是一種處理網路內容的技術,可以檢測和過濾出網路中的敏感/違禁辭彙。它通過給定的關鍵字或字元串,判斷網路內容是否包含某些敏感信息,從而防止違反法律法規的信息流通。
通常,可以使用兩種方法來過濾敏感詞:
- 黑名單過濾:即定義一個黑名單,將所有敏感詞擇記錄在其中,然後對輸入的文本進行對比,如果發現有敏感詞,就將其過濾掉。
- 白名單過濾:即定義一個白名單,將所有不敏感的辭彙記錄在其中,然後對輸入的文本進行對比,如果發現有不在白名單中的辭彙,就將其過濾掉。
二、ToolGood.Words是什麼?
ToolGood.Words是一款高性能非法詞(敏感詞)檢測組件,附帶繁體簡體互換,支持全形半形互換,獲取拼音首字母,獲取拼音字母,拼音模糊搜索等功能。
ToolGood.Words的源碼網站:ToolGood.Words源碼網站
三、在Visual Studio中安裝ToolGood.Words
3.1、右鍵項目解決方案,選擇“管理NuGet程式包”,如下圖所示:
3.2、切換到“瀏覽”選項卡,搜索“ToolGood.Words”並安裝:
安裝完之後最好重新編譯生成項目
四、創建“subContentCheck”類
敏感/違禁辭彙因特殊內容不便上傳,可自行在網站上查找
using Microsoft.AspNetCore.DataProtection.KeyManagement;
using Microsoft.AspNetCore.Http;
using Microsoft.CodeAnalysis.Text;
using Newtonsoft.Json;
using System.Collections;
using System.Text;
using ToolGood.Words;
using static System.Net.Mime.MediaTypeNames;
using IHostingEnvironment = Microsoft.AspNetCore.Hosting.IHostingEnvironment;
namespace WebApplication1 //放在自己項目中時,需要更換為自己的命名空間
{
public class keywords
{
public List<string> IllegalKeywords { get; set; }
}
public class urlwords
{
public List<string> IllegalUrls { get; set; }
}
/// <summary>
/// 提交的內容敏感違禁詞檢查類
/// </summary>
public class subContentCheck
{
/// <summary>
/// 本地靜態文件地址路徑
/// </summary>
private IHostingEnvironment _hostingEnv;
/// <summary>
/// 敏感詞庫
/// </summary>
private string dictionaryPath = "/sensitiveWords/sensitiveWords.txt";
/// <summary>
/// 敏感鏈接、網站、網址庫
/// </summary>
private string urlsPath = "/sensitiveWords/IllegalUrls.txt";
/// <summary>
/// 保存敏感片語
/// </summary>
public string[] Words { get; set; }
/// <summary>
/// 一個參數的構造函數
/// </summary>
/// <param name="hostingEnv">本地靜態文件地址路徑</param>
public subContentCheck(IHostingEnvironment hostingEnv)
{
_hostingEnv = hostingEnv;
InitDictionary();
}
/// <summary>
/// 初始化記憶體敏感詞庫
/// </summary>
public void InitDictionary()
{
Words = new string[] { };
string wordsPath = _hostingEnv.WebRootPath + dictionaryPath;
string urlPath = _hostingEnv.WebRootPath + urlsPath;
//List<keywords> keys = new List<keywords>();
//List<urlwords> urls = new List<urlwords>();
string[] readAllWords = System.IO.File.ReadAllLines(wordsPath, System.Text.Encoding.UTF8);
string[] readAllurl = System.IO.File.ReadAllLines(urlPath, System.Text.Encoding.UTF8);
//由於數組是非動態的,不能進行動態的添加,所有先將它轉成list,操作
ArrayList arrayList = new ArrayList(Words.ToList());
if (readAllWords.Length > 0 || readAllurl.Length > 0)
{
if (readAllWords.Length > 1)
{
//keywords key = new keywords();
//key.IllegalKeywords = new List<string>();
foreach (string itemWords in readAllWords)
{
string[] allSplitWords = itemWords.Split('|');
foreach (string itemSplitWords in allSplitWords)
{
if (!string.IsNullOrEmpty(itemSplitWords))
{
arrayList.Add(itemSplitWords);
//string aaa = itemSplitWords;
//key.IllegalKeywords.Add(aaa);
//IllegalKeywords.Add(itemSplitWords);
}
}
}
//keys.Add(key);
}
else
{
if (readAllWords.Length == 1)
{
string[] allSplitWords = readAllWords[0].Split('|');
//keywords key = new keywords();
//key.IllegalKeywords = new List<string>();
foreach (string itemSplitWords in allSplitWords)
{
if (!string.IsNullOrEmpty(itemSplitWords))
{
arrayList.Add(itemSplitWords);
//string aaa = itemSplitWords;
//key.IllegalKeywords.Add(aaa);
//IllegalKeywords.Add(itemSplitWords);
}
}
//keys.Add(key);
}
}
if (readAllurl.Length > 1)
{
//urlwords url = new urlwords();
//url.IllegalUrls = new List<string>();
foreach (string itemUrls in readAllurl)
{
string[] allSplitUrls = itemUrls.Split('|');
foreach (string itemSplitUrls in allSplitUrls)
{
if (!string.IsNullOrEmpty(itemSplitUrls))
{
arrayList.Add(itemSplitUrls);
//string Keyword = itemSplitUrls;
//url.IllegalUrls.Add(Keyword);
//IllegalUrls.Add(itemSplitUrls);
}
}
}
//urls.Add(url);
}
else
{
if (readAllurl.Length == 1)
{
string[] allSplitUrls = readAllurl[0].Split('|');
//urlwords url = new urlwords();
//url.IllegalUrls = new List<string>();
foreach (string itemSplitUrls in allSplitUrls)
{
if (!string.IsNullOrEmpty(itemSplitUrls))
{
arrayList.Add(itemSplitUrls);
//IllegalUrls.Add(itemSplitUrls);
//string Keyword = itemSplitUrls;
//url.IllegalUrls.Add(Keyword);
}
}
//urls.Add(url);
}
}
}
//我們在將list轉換成String[]數組
Words = (string[])arrayList.ToArray(typeof(string));
}
/// <summary>
/// 過濾替換敏感詞
/// </summary>
/// <param name="sourceText">需要過濾替換的原內容</param>
/// <param name="replaceChar">敏感詞替換的字元;預設替換為‘*’</param>
/// <returns>返回狀態碼;為空則表示傳入的內容為空;“0”:設置違禁詞時發生錯誤;“1”:敏感內容替換時發生錯誤;“2”:需要替換的文本內容為空;其餘則返回替換成功的字元串內容</returns>
public string FilterWithChar(string sourceText, char replaceChar = '*')
{
if (!string.IsNullOrEmpty(sourceText))
{
string result = "";
WordsSearch wordsSearch = new WordsSearch();
try
{
wordsSearch.SetKeywords(Words);
}
catch (Exception ex)
{
result = "0";
return result;
}
try
{
result = wordsSearch.Replace(sourceText, replaceChar);
return result;
}
catch (Exception ex)
{
return result = "1";
}
}
else
{
return "2";
}
}
/// <summary>
/// 查找原內容中知否包含敏感/違禁詞
/// </summary>
/// <param name="sourceText">需要判斷的原內容</param>
/// <returns>返回狀態碼;為空則表示傳入的內容為空;“0”:設置違禁詞時發生錯誤;“1”:敏感內容查詢時發生錯誤;“2”:需要替換的文本內容為空;“3”:原內容中包含敏感/違禁辭彙;“4”:原內容中不包含敏感/違禁辭彙</returns>
public string FindSensitiveKey(string sourceText)
{
string result = "";
if (!string.IsNullOrEmpty(sourceText))
{
WordsSearch wordsSearch = new WordsSearch();
try
{
wordsSearch.SetKeywords(Words);
}
catch (Exception ex)
{
result = "0";
return result;
}
try
{
bool res = wordsSearch.ContainsAny(sourceText);
if (res)
{
result = "3";
return result;
}
else
{
result = "4";
return result;
}
}
catch (Exception ex)
{
return result = "1";
}
}
else
{
result = "2";
}
return result;
}
/// <summary>
/// 把對象寫入到json文件中
/// </summary>
/// <param name="obj"></param>
/// <returns></returns>
public static void Write(List<keywords> jsonData, List<urlwords> urlJsonData, string filename)
{
var directorypath = Directory.GetCurrentDirectory();
string strFileName = directorypath + "\\" + filename + ".json";
string ListJson = "";
if (jsonData != null)
{
ListJson = JsonConvert.SerializeObject(jsonData);
}
else
{
ListJson = JsonConvert.SerializeObject(urlJsonData);
}
Console.WriteLine(ListJson);
writeJsonFile(strFileName, ListJson);
//將序列化的json字元串內容寫入Json文件,並且保存
void writeJsonFile(string path, string jsonConents)
{
using (FileStream fs = new FileStream(path, FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite, FileShare.ReadWrite))
{
//如果json文件中有中文數據,可能會出現亂碼的現象,那麼需要加上如下代碼
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
using (StreamWriter sw = new StreamWriter(fs, Encoding.GetEncoding("GB2312")))
{
sw.WriteLine(jsonConents);
}
}
}
}
}
}
五、寫API介面
/// <summary>
/// 進行敏感詞脫敏
/// </summary>
/// <param name="sourctText">需要脫敏的文本內容</param>
/// <returns></returns>
[HttpPost]
public IActionResult sensitive_words_replace2(string sourctText)
{
string resultStr = "";
//實例化敏感詞庫
subContentCheck strCheck = new subContentCheck(_hostingEnv);
if (string.IsNullOrEmpty(sourctText))
{
return Json(new { code = 230, msg = "需要替換的文本內容為空!", resultStr = resultStr });
}
try
{
resultStr = strCheck.FilterWithChar(sourctText);
string resMsg = "";
int resCode = 200;
if (resultStr=="0")
{
resCode = 210;
resultStr = "";
resMsg = "設置違禁詞時發生錯誤,請聯繫管理員!";
}else if (resultStr=="1")
{
resCode = 240;
resultStr = "";
resMsg = "敏感內容替換時發生錯誤!";
}
else if (resultStr == "2")
{
resCode = 260;
resultStr = "";
resMsg = "需要替換的文本內容為空!";
}
else
{
resCode = 200;
resMsg = "敏感詞替換請求成功!";
}
return Json(new { code = resCode, msg = resMsg, resultStr = resultStr });
}
catch (Exception ex)
{
return Json(new { code = 220, msg = "敏感內容替換時發生錯誤!", resultStr = "" });
}
}
/// <summary>
/// 進行敏感詞判斷
/// </summary>
/// <param name="sourctText">需要脫敏的文本內容</param>
/// <returns></returns>
[HttpPost]
public IActionResult whether_sensitive_words(string sourctText)
{
string resultStr = "";
//實例化敏感詞庫
subContentCheck strCheck = new subContentCheck(_hostingEnv);
if (string.IsNullOrEmpty(sourctText))
{
return Json(new { code = 230, msg = "需要替換的文本內容為空!", resultStr = resultStr });
}
try
{
resultStr = strCheck.FindSensitiveKey(sourctText);
string resMsg = "";
int resCode = 200;
if (resultStr == "0")
{
resCode = 210;
resultStr = "";
resMsg = "設置違禁詞時發生錯誤,請聯繫管理員!";
}
else if (resultStr == "1")
{
resCode = 240;
resultStr = "";
resMsg = "敏感內容匹配時發生錯誤!";
}
else if (resultStr == "2")
{
resCode = 260;
resultStr = "";
resMsg = "需要判斷的文本內容為空!";
}
else if (resultStr == "3")
{
resCode = 270;
resultStr = "";
resMsg = "內容中含有敏感/違禁詞!";
}
else
{
resCode = 200;
resMsg = "內容中不含敏感/違禁詞!";
}
return Json(new { code = resCode, msg = resMsg, resultStr = resultStr });
}
catch (Exception ex)
{
return Json(new { code = 220, msg = "敏感內容匹配時發生錯誤!", resultStr = "" });
}
}
六、前端封裝JS方法
/**
* 敏感詞/違禁詞替換
* @param {string} sourctText 需要進行替換的內容
* @param {string} boxid 將替換成功之後的內容賦值的元素容器id屬性名
* @param {object} layui Layui實例
* @returns 替換之後的文本內容
*/
function sensitive_words_replace(sourctText, boxid, layui) {
let resultStr = "";
//let url = ["/Home/sensitive_words_replace", "/Home/sensitive_words_replace1", "/Home/sensitive_words_replace2"];
$.ajax({
url: "/Home/sensitive_words_replace2",//請求後端介面的路徑
dataType: "JSON",
type: "POST",
data: {
"sourctText": sourctText
},
success: function (res) {
let resCode = res.code;
let resMsg = res.msg;
if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) {
//返回數據後關閉loading
layer.closeAll();
resultStr = res.resultStr;
layui.layer.alert(resMsg, { icon: 5, title: "溫馨提示", closeBtn: 0 });
} else if (resCode == 200 || resCode == "200") {
resultStr = res.resultStr;
$("#" + boxid).val(resultStr);
//返回數據後關閉loading
layer.closeAll();
}
},
error: function (error) {
//返回數據後關閉loading
layer.closeAll();
layui.layer.alert(error, { icon: 5, title: "溫馨提示", closeBtn: 0 });
}
});
return resultStr;
}
/**
* 查詢是否包含敏感/違禁詞
* @param {string} sourctText 需要進行替換的內容
* @param {string} boxid 將替換成功之後的內容賦值的元素容器id屬性名
* @param {object} layui Layui實例
* @returns 返回Bool;包含:“true”;不包含:“false”
*/
function whether_sensitive_words(sourctText, boxid, layui) {
let resultBool = false;
$.ajax({
url: "/Home/whether_sensitive_words",//請求後端介面的路徑
dataType: "JSON",
type: "POST",
async: false,//此處需要註意的是要想獲取ajax返回的值這個async屬性必須設置成同步的,否則獲取不到返回值
data: {
"sourctText": sourctText
},
success: function (res) {
let resCode = res.code;
let resMsg = res.msg;
if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) {
resultBool = false;
layui.layer.alert(resMsg, { icon: 5, title: "溫馨提示", closeBtn: 0 });
} else if (resCode == 270 || resCode == "270") {
resultBool = true;
} else if (resCode == 200 || resCode == "200") {
resultBool = false;
//返回數據後關閉loading
layer.closeAll();
}
},
error: function (error) {
layui.layer.alert(error, { icon: 5, title: "溫馨提示", closeBtn: 0 });
}
});
return resultBool;
}
本文來自博客園,作者:TomLucas,轉載請註明原文鏈接:https://www.cnblogs.com/lucasDC/p/17255906.html