public static string Html2Text(string htmlStr)
{
if ( String .IsNullOrEmpty(htmlStr))
{
return "" ;
}
string regEx_style = "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "()",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*td([^>])*>","t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*br( )*>","r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*li( )*>","r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*div([^>])*>","rr", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*tr([^>])*>","rr", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*p([^>])*>","rr", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<[^>]*>",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @" "," ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"•"," * ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"‹","<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"›",">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"™","(tm)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"⁄","/", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<","<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @">",">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"©","(c)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"®","(r)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&(.{2,6});",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = result.Replace("n", "r");
result = System.Text.RegularExpressions.Regex.Replace(result, "(r)( )+(r)","rr", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(t)( )+(t)","tt", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(t)( )+(r)","tr", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(r)( )+(t)","rt", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(r)(t)+(r)","rr", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(r)(t)+","rt", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
string breaks = "rrr";
string tabs = "ttttt";
for (int index=0; index
{
result = result.Replace(breaks, "rr");
result = result.Replace(tabs, "tttt");
breaks = breaks + "r";
tabs = tabs + "t";
}
return result;
}
catch
{
//MessageBox.Show("Error");
return source;
}
}
|