Java使用DFA算法实现过滤多家公司自定义敏感字功能详解

作者:袖梨 2022-06-29

背景

因为最近有通讯有个需求,说需要让多家客户公司可以自定义敏感词过滤掉他们自定义的规则,选择了DFA算法来做,不过和以前传统了DFA写法不太一样了

模式图

直接上代码

publicclassKeywordFilter {
//  private static ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
  publicstaticMap currentMap =newConcurrentHashMap();
  publicstaticMap nowhash =null;
  publicstaticObject wordMap;// map子节点
  // 不建立对象
  privateKeywordFilter() {
  }
  privatestaticString getKey(intcompanyId) {
    return"companyId"+ companyId;
  }
  /*
   * 

说明:清扫内容

* * @author:姚旭民 * * @data:2017-8-22 上午10:13:11 */ public static void clear() { try { currentMap.clear(); } catch (Exception e) { e.printStackTrace(); } finally { } } /* *

说明:各个渠道的过滤字符

* * @author:姚旭民 * * @data:2017-8-20 下午2:55:06 */ public static void saveKeywords(int companyId, List keywords) { try { Map tempAllMap = currentMap; String key = getKey(companyId); int l = keywords.size(); int il; Map tempMap; for (int i = 0; i newWordHash = new HashMap(); newWordHash.put(key, "0"); nowhash.put(word, newWordHash); nowhash = newWordHash; } if (j == il - 1) { nowhash.put(key, "1"); } } } } catch (Exception e) { e.printStackTrace(); } finally { nowhash = null; wordMap = null; } } /* *

说明:替换掉对应的渠道规定掉敏感字

* * @author:姚旭民 * * @data:2017-8-20 上午11:41:47 */ public static List repword(int companyId, String txt) { Map tempMap = currentMap; List result = new ArrayList(); String key = getKey(companyId); nowhash = currentMap; int l = txt.length(); char word; String keywordStr = ""; String keyStatu; StringBuilder keyword = new StringBuilder();// 敏感字 for (int i = 0; i 0 ? keywordStr.substring(0, keywordStr.length() - 1) : keywordStr); return result; } /* *

说明:检查是否存在敏感字

* * @author:姚旭民 * * @data:2017-8-20 下午3:00:06 专门设计成私有的,如果没有理由,别改动他 */ private static int checkKeyWords(String txt, int companyId, int begin) { int result = 0; String key = getKey(companyId); try { nowhash = currentMap; int l = txt.length(); char word = 0; for (int i = begin; i 说明:返回检查的文本中包含的敏感字 * * @author:姚旭民 * * @data:2017-8-20 下午3:32:53 */ public static String getTxtKeyWords(String txt, int companyId) { String result = null; StringBuilder temp = new StringBuilder(); String key; int l = txt.length(); for (int i = 0; i 0) { key = (txt.substring(i, i + len));// 挑选出来的关键字 temp.append(key + ","); txt = txt.replaceAll(key, "");// 挑选出来的关键字替换成空白,加快挑选速度 l = txt.length(); } else { i++; } } if (temp.length() > 0) { result = temp.substring(0, temp.length() - 1); } return result; } /* *

说明:判断文中是否包含渠道规定的敏感字

* * @author:姚旭民 * * @data:2017-8-20 下午3:33:19 */ publicbooleanisKeyWords(String txt,intcompanyId) { for(inti =0; i 0) { returntrue; } } returnfalse; } publicstaticvoidmain(String[] arg) { List keywords =newArrayList(); keywords.add("傻×"); keywords.add("汉奸"); keywords.add("草"); keywords.add("草泥马"); KeywordFilter.saveKeywords(1, keywords); String txt ="是傻×汉奸傻A傻B傻C傻D汉奸傻×草泥马"; List list = repword(1, txt); System.out.println("文中包含的敏感字为:"+ list.get(1)); System.out.println("原文:"+ txt); System.out.println("敏感字过滤后:"+ list.get(0)); } }

相关文章

精彩推荐