匹配单层html的小demo,应该能匹配大多数html字符串.多层(嵌套)html标签解析不出来.可能有小bug,我抛砖引玉下,哈哈.
1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Text.RegularExpressions; 6 using System.Threading.Tasks; 7 8 namespace ResolveHtmlText 9 { 10 class Program 11 { 12 static void Main(string[] args) 13 { 14 string text = @" <span style=‘color:#1F497D‘><span>y<span></span> <span style=‘color:#1F497D;‘>1</span> <span style=‘color:#1F497D;background-color:#123456‘>2</span><span style=‘color:#1F497D;background-color:#123456;text-align:center‘>3</span> <span style=‘color:#1F497D;background-color:#123456;text-align:center;‘>4</span> <span style=‘color:#1F497D;background-color:#123456;text-align:center;tt-l: 134;‘>5</span>ggjf<a>123456</a>"; 15 Console.WriteLine("原字符串:" + text); 16 text = text.Replace("\"", "‘"); 17 text = text.Replace(""", "‘"); 18 text = text.Replace(" ", ""); 19 text = text.Replace("<", "<");//将<的转义码<都替换成< 20 text = text.Replace(">", ">");//将>的转义码>都替换成> 21 22 //string matchStr = @"<\s*[a-zA-Z0-9]+\s*>[^<^>]*<\s*/\s*[a-zA-Z0-9]+\s*>"; 23 string matchStr = @"<\s*[a-zA-Z0-9]+\s*[a-zA-Z]+\s*=\s*‘\s*[a-zA-Z]" 24 + @"+\s*:\s*[^<^>];?‘\s*>[^<^>]" 25 + @"*<\s*/\s*[a-zA-Z0-9]+\s*>|<\s*[a-zA-Z0-9]" 26 + @"+\s*(\s*[a-zA-Z-]+\s*=\s*‘(\s*[a-zA-Z-]+\s*:" 27 + @"\s*[^:^;^<^>]+\s*;\s*)*(\s*[a-zA-Z-]+\s*:\s*" 28 + @"[^:^;^<^>]+\s*)\s*;?\s*‘\s*)*" 29 + @"\s*>[^<^>]*<\s*/\s*[a-zA-Z0-9]+\s*>"; 30 31 Regex htmlReg = new Regex(matchStr); 32 string result = null; 33 MatchCollection htmlMatchCollection = htmlReg.Matches(text); 34 StringBuilder sb = new StringBuilder(); 35 36 foreach (Match m in htmlMatchCollection) 37 { 38 if (m != null && m.Groups != null && m.Groups.Count > 0) 39 { 40 string temp = m.Groups[0].Value; 41 Console.WriteLine("临时值:" + temp); 42 //Regex textReg1 = new Regex(@"[^<^>]+"); 43 //Match textMatch1 = textReg1.Match(temp); 44 //if (textMatch1 != null && textMatch1.Groups != null && textMatch1.Groups.Count > 0) 45 //{ 46 // result = textMatch1.Groups[0].Value; 47 // sb.Append(result); 48 //} 49 Regex textReg = new Regex(@">.+<"); 50 Match textMatch = textReg.Match(temp); 51 if (textMatch != null && textMatch.Groups != null && textMatch.Groups.Count > 0) 52 { 53 result = textMatch.Groups[0].Value; 54 if (result.Length > 2) 55 { 56 result = result.Substring(1, result.Length - 2); 57 sb.Append(result); 58 } 59 } 60 } 61 } 62 63 64 65 Console.WriteLine("解析出的结果:" + sb.ToString()); 66 Console.ReadLine(); 67 } 68 } 69 }
时间: 2024-10-19 20:35:49