java filters html tags using regular expressions

  • 2020-05-17 05:30:11
  • OfStack

Recently, the development of the time to read the introduction of 1 article content (that is, the first 200 characters), use the hidden fields, may someone will ask, the background also can intercept character ah, that's because the editor contains html label inside, so the background it needs to do html label a regular expression, the other day online searched, finds that someone has written a class, to share, don't spare the younger brother out of...


import java.util.regex.Matcher; 
import java.util.regex.Pattern; 
 
/** 
 * <p> 
 * Title: HTML Related regular expression utility classes  
 * </p> 
 * <p> 
 * Description:  Including filtering HTML Tag, transform HTML Tag, replace specific HTML tag  
 * </p> 
 * <p> 
 * Copyright: Copyright (c) 2006 
 * </p> 
 * 
 * @author hejian 
 * @version 1.0 
 * @createtime 2006-10-16 
 */ 
 
public class HtmlRegexpUtil { 
 private final static String regxpForHtml = "<([^>]*)>"; //  Filter all < Beginning with > End tag  
 
 private final static String regxpForImgTag = "<\\s*img\\s+([^>]*)\\s*>"; //  To find out IMG The label  
 
 private final static String regxpForImaTagSrcAttrib = "src=\"([^\"]+)\""; //  To find out IMG Of the label SRC attribute  
 
 /** 
  * 
  */ 
 public HtmlRegexpUtil() { 
  // TODO Auto-generated constructor stub 
 } 
 
 /** 
  * 
  *  Basic function: replace the tag to display normally  
  * <p> 
  * 
  * @param input 
  * @return String 
  */ 
 public String replaceTag(String input) { 
  if (!hasSpecialChars(input)) { 
   return input; 
  } 
  StringBuffer filtered = new StringBuffer(input.length()); 
  char c; 
  for (int i = 0; i <= input.length() - 1; i++) { 
   c = input.charAt(i); 
   switch (c) { 
   case '<': 
    filtered.append("<"); 
    break; 
   case '>': 
    filtered.append(">"); 
    break; 
   case '"': 
    filtered.append("""); 
    break; 
   case '&': 
    filtered.append("&"); 
    break; 
   default: 
    filtered.append(c); 
   } 
 
  } 
  return (filtered.toString()); 
 } 
 
 /** 
  * 
  *  Basic function: determine whether the tag exists  
  * <p> 
  * 
  * @param input 
  * @return boolean 
  */ 
 public boolean hasSpecialChars(String input) { 
  boolean flag = false; 
  if ((input != null) && (input.length() > 0)) { 
   char c; 
   for (int i = 0; i <= input.length() - 1; i++) { 
    c = input.charAt(i); 
    switch (c) { 
    case '>': 
     flag = true; 
     break; 
    case '<': 
     flag = true; 
     break; 
    case '"': 
     flag = true; 
     break; 
    case '&': 
     flag = true; 
     break; 
    } 
   } 
  } 
  return flag; 
 } 
 
 /** 
  * 
  *  Basic function: filter all "<" Beginning with ">" End tag  
  * <p> 
  * 
  * @param str 
  * @return String 
  */ 
 public static String filterHtml(String str) { 
  Pattern pattern = Pattern.compile(regxpForHtml); 
  Matcher matcher = pattern.matcher(str); 
  StringBuffer sb = new StringBuffer(); 
  boolean result1 = matcher.find(); 
  while (result1) { 
   matcher.appendReplacement(sb, ""); 
   result1 = matcher.find(); 
  } 
  matcher.appendTail(sb); 
  return sb.toString(); 
 } 
 
 /** 
  * 
  *  Basic function: filter the specified label  
  * <p> 
  * 
  * @param str 
  * @param tag 
  *    Specify the label  
  * @return String 
  */ 
 public static String fiterHtmlTag(String str, String tag) { 
  String regxp = "<\\s*" + tag + "\\s+([^>]*)\\s*>"; 
  Pattern pattern = Pattern.compile(regxp); 
  Matcher matcher = pattern.matcher(str); 
  StringBuffer sb = new StringBuffer(); 
  boolean result1 = matcher.find(); 
  while (result1) { 
   matcher.appendReplacement(sb, ""); 
   result1 = matcher.find(); 
  } 
  matcher.appendTail(sb); 
  return sb.toString(); 
 } 
 
 /** 
  * 
  *  Basic function: replace the specified label  
  * <p> 
  * 
  * @param str 
  * @param beforeTag 
  *    The label to replace  
  * @param tagAttrib 
  *    The tag attribute value to replace  
  * @param startTag 
  *    New tag begins marking  
  * @param endTag 
  *    The new tag ends the tag  
  * @return String 
  * @ Such as: replace img Of the label src Attribute values for [img] Attribute values [/img] 
  */ 
 public static String replaceHtmlTag(String str, String beforeTag, 
   String tagAttrib, String startTag, String endTag) { 
  String regxpForTag = "<\\s*" + beforeTag + "\\s+([^>]*)\\s*>"; 
  String regxpForTagAttrib = tagAttrib + "=\"([^\"]+)\""; 
  Pattern patternForTag = Pattern.compile(regxpForTag); 
  Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib); 
  Matcher matcherForTag = patternForTag.matcher(str); 
  StringBuffer sb = new StringBuffer(); 
  boolean result = matcherForTag.find(); 
  while (result) { 
   StringBuffer sbreplace = new StringBuffer(); 
   Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag 
     .group(1)); 
   if (matcherForAttrib.find()) { 
    matcherForAttrib.appendReplacement(sbreplace, startTag 
      + matcherForAttrib.group(1) + endTag); 
   } 
   matcherForTag.appendReplacement(sb, sbreplace.toString()); 
   result = matcherForTag.find(); 
  } 
  matcherForTag.appendTail(sb); 
  return sb.toString(); 
 } 
} 

Related articles: