因为用到了一款编辑器的原因,使得数据库中保存的数据会夹杂着一些HTML标签,之后导出的数据中就会出现一些不同的HTML的标签。严重影响用户的视觉体验(主要自己都看不下去了)。。。
下面是我将DataTable进行重新组装,清除HTML标签的方法:
1 /// <summary> 2 /// 重新组装DataTable(主要是为了清除DataTable数据中的HTML标签) 3 /// </summary> 4 /// <param name="new_dt">DataTable</param> 5 /// <param name="parms">列名(可以是多个列),如"name","age"</param> 6 /// <returns>组装后的DataTable</returns> 7 public DataTable GetNewTable(DataTable new_dt, params object[] parms) 8 { 9 Func<object, string> fnClearHtml = (object obj) =>//清除HTML标签函数 10 { 11 var HtmlStr = obj + ""; 12 //删除脚本 13 HtmlStr = HtmlStr.Replace("\r\n", ""); 14 HtmlStr = Regex.Replace(HtmlStr, @"<script.*?</script>", "", RegexOptions.IgnoreCase); 15 HtmlStr = Regex.Replace(HtmlStr, @"<style.*?</style>", "", RegexOptions.IgnoreCase); 16 HtmlStr = Regex.Replace(HtmlStr, @"<.*?>", "", RegexOptions.IgnoreCase); 17 //删除HTML 18 HtmlStr = Regex.Replace(HtmlStr, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); 19 HtmlStr = Regex.Replace(HtmlStr, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); 20 HtmlStr = Regex.Replace(HtmlStr, @"-->", "", RegexOptions.IgnoreCase); 21 HtmlStr = Regex.Replace(HtmlStr, @"<!--.*", "", RegexOptions.IgnoreCase); 22 HtmlStr = Regex.Replace(HtmlStr, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); 23 HtmlStr = Regex.Replace(HtmlStr, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); 24 HtmlStr = Regex.Replace(HtmlStr, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); 25 HtmlStr = Regex.Replace(HtmlStr, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); 26 HtmlStr = Regex.Replace(HtmlStr, @"&(nbsp|#160);", "", RegexOptions.IgnoreCase); 27 HtmlStr = Regex.Replace(HtmlStr, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); 28 HtmlStr = Regex.Replace(HtmlStr, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); 29 HtmlStr = Regex.Replace(HtmlStr, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); 30 HtmlStr = Regex.Replace(HtmlStr, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); 31 HtmlStr = Regex.Replace(HtmlStr, @"&#(\d+);", "", RegexOptions.IgnoreCase); 32 HtmlStr = HtmlStr.Replace("<", ""); 33 HtmlStr = HtmlStr.Replace(">", ""); 34 HtmlStr = HtmlStr.Replace("\r\n", ""); 35 HtmlStr = HttpContext.Current.Server.HtmlEncode(HtmlStr).Trim(); 36 return HtmlStr; 37 }; 38 if (new_dt != null) 39 { 40 foreach (var p in parms) 41 { 42 var pp = p + ""; 43 new_dt.Columns.Add(p + "1", typeof(string)); 44 foreach (DataRow dr in new_dt.Rows) 45 dr[p + "1"] = fnClearHtml(dr[pp]); 46 47 new_dt.Columns.Remove(pp); 48 new_dt.Columns[p + "1"].ColumnName = pp; 49 } 50 } 51 return new_dt; 52 }
其中fnClearHtml函数就是专门清除HTML标签的函数,但是小主在写下面删除列的时候发现 new_dt.Columns.Remove(pp)这句代码只是删除该列的数据以及该列的名字,这个列还会以Column1或Column2等其他的名称出现,列的总数还是那么多。。。
希望看到小主文言文的大神,能给小主一些解答。
小主感激不尽。。。。
时间: 2024-11-09 11:27:30