JAVA过滤html标签的方法
例子
正则
<table width="620" align="center" border="0" cellpadding="1" cellspacing="1" style="background:#FB7"> <tr> <td width="464" height="27" bgcolor="#FFE7CE"> 代码如下</td> <td width="109" align="center" bgcolor="#FFE7CE" style="cursor:pointer;" onclick="doCopy('copy5512')">复制代码</td> </tr> <tr> <td height="auto" colspan="2" valign="top" bgcolor="#FFFFFF" style="padding:10px;" class="copyclass" id=copy5512>re="<(\\s)*script[^>]*>([\\s\\S](?!<script))*?<\\/script>";
例子
<table width="620" align="center" border="0" cellpadding="1" cellspacing="1" style="background:#FB7"> <tr> <td width="464" height="27" bgcolor="#FFE7CE"> 代码如下</td> <td width="109" align="center" bgcolor="#FFE7CE" style="cursor:pointer;" onclick="doCopy('copy1996')">复制代码</td> </tr> <tr> <td height="auto" colspan="2" valign="top" bgcolor="#FFFFFF" style="padding:10px;" class="copyclass" id=copy1996>public class FilterHTMLTags {
public static String HtmlText(String inputString) {
String htmlStr = inputString; //含html标签的字符串
String textStr ="";
java.util.regex.Pattern p_script;
java.util.regex.Matcher m_script;
java.util.regex.Pattern p_style;
java.util.regex.Matcher m_style;
java.util.regex.Pattern p_html;
java.util.regex.Matcher m_html;
try {
String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; //定义script的正则表达式{或<script[^>]*?>[\\s\\S]*?<\\/script> }
String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; //定义style的正则表达式{或<style[^>]*?>[\\s\\S]*?<\\/style> }
String regEx_html = "<[^>] >"; //定义HTML标签的正则表达式
p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);
m_script = p_script.matcher(htmlStr);
htmlStr = m_script.replaceAll(""); //过滤script标签
p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);
m_style = p_style.matcher(htmlStr);
htmlStr = m_style.replaceAll(""); //过滤style标签
p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);
m_html = p_html.matcher(htmlStr);
htmlStr = m_html.replaceAll(""); //过滤html标签
/* 空格 —— */
// p_html = Pattern.compile("\\ ", Pattern.CASE_INSENSITIVE);
m_html = p_html.matcher(htmlStr);
htmlStr = htmlStr.replaceAll(" "," ");
textStr = htmlStr;
}catch(Exception e) {
}
return textStr;
}
}
例子
过滤URL网址,邮箱地址,html标签,JS代码,各种转义字符
<table width="620" align="center" border="0" cellpadding="1" cellspacing="1" style="background:#FB7"> <tr> <td width="464" height="27" bgcolor="#FFE7CE"> 代码如下</td> <td width="109" align="center" bgcolor="#FFE7CE" style="cursor:pointer;" onclick="doCopy('copy4227')">复制代码</td> </tr> <tr> <td height="auto" colspan="2" valign="top" bgcolor="#FFFFFF" style="padding:10px;" class="copyclass" id=copy4227>
public static final String Upset = " ";
public static String killTags(String news) {
String s = news.replaceAll("amp;", "").replaceAll("<","<").replaceAll(">", ">");
Pattern pattern = Pattern.compile("<(span)?\\sstyle.*?style>|(span)?\\sstyle=.*?>", Pattern.DOTALL);
Matcher matcher = pattern.matcher(s);
String str = matcher.replaceAll("");
Pattern pattern2 = Pattern.compile("(<[^>] >)",Pattern.DOTALL);
Matcher matcher2 = pattern2.matcher(str);
String strhttp = matcher2.replaceAll(" ");
String regEx = "(((http|https|ftp)(\\s)*((\\:)|:))(\\s)*(//|//)(\\s)*)?"
"([\\sa-zA-Z0-9(\\.|.)(\\s)*\\-] ((\\:)|(:)[\\sa-zA-Z0-9(\\.|.)&%\\$\\-] )*@(\\s)*)?"
"("
"(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])"
"(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"
"(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"
"(\\.|.)(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])"
"|([\\sa-zA-Z0-9\\-] (\\.|.)(\\s)*)*[\\sa-zA-Z0-9\\-] (\\.|.)(\\s)*[\\sa-zA-Z]*"
")"
"((\\s)*(\\:)|(:)(\\s)*[0-9] )?"
"(/(\\s)*[^/][\\sa-zA-Z0-9\\.\\,\\?\\'\\\\/\\ &%\\$\\=~_\\-@]*)*";
Pattern p1 = Pattern.compile(regEx,Pattern.DOTALL);
Matcher matchhttp = p1.matcher(strhttp);
String strnew = matchhttp.replaceAll("").replaceAll("(if[\\s]*\\(|else|elseif[\\s]*\\().*?;", " ");
Pattern patterncomma = Pattern.compile("(&[^;] ;)",Pattern.DOTALL);
Matcher matchercomma = patterncomma.matcher(strnew);
String strout = matchercomma.replaceAll(" ");
String answer = strout.replaceAll("[\\pP‘’“”]", " ")
.replaceAll("\r", " ").replaceAll("\n", " ")
.replaceAll("\\s", " ").replaceAll(Upset, "");
return answer;
}
您可能感兴趣的文章:
php正则过滤html特殊字符
php删除字符串中html标签的函数
php用strip_tags完整去除所有html标签的实例分享
php删除html标签及字符串中html标签的代码
php 防注入的一段代码(过滤参数)
php利用正则过滤链接、标签,空格,换行符程序
php正则过滤html标签、空格、换行符等的代码示例
php过滤html标记的函数strip_tags用法举例(图文)
PHP删除HTMl标签的代码
php去掉html标签函数代码