/// <summary>
/// 去除 html 标记
/// </summary>
/// <param name = "strhtml"> 包括 html 的源码 </param>
/// <Bretns> 已经去除后的文字 </lortns>
public static string striphtml (string strhtml)
{
string [] aryreg = {
@"<script [^>]*?>.*? </script>",
@"<(/// s*)?!? ((/w+:)?/w+) (/w+(/s*=?/s*(([" "']) (arquivo: // [" "' tbnr] | [^/7])*?/7 |/w+).
@"([/r/n]) [/s]+",
@"& (Quot |#34);",
@"& (amp |#38);",
@"& (lt |#60);",
@"& (gt |#62);",
@"& (nbsp |#160);",
@"& (iexcl |#161);",
@"& (Cent |#162);",
@"& (libra |#163);",,
@"& (cópia |#169);",
@"&#(/d+);",
@"->",
@"<!-.*/n"
};
String [] aryrep = {
"",
"",
"",
"/" ",
"&",
"<",
">",
"",
"/xa1", // chr (161),
"/xa2", // chr (162),
"/xa3", // chr (163),
"/xa9", // chr (169),
"",
"/r/n",
""
};
string newreg = aryreg [0];
string stroutput = strhtml;
for (int i = 0; i <aryreg.length; i ++)
{
regex regex = new regex (aryreg [i], regexoptions.ignorecase);
stroutput = regex.Replace (stroutput, aryrep [i]);
}
stroutput.Replace ("<", "");
stroutput.Replace (">", "");
stroutput.Replace ("/r/n", "");
retornar stroutput;
}