try { for (int i = 1; i < 130; i++) { var html = GetHtmls("http://bbs.fobshanghai.com/viewthread.php?tid=3885995&extra=&page="+i,"","","gbk"); var ms = Regex.Matches(html, @"<table[\s\S]+?</table"); File.AppendAllText("1.html",string.Format( "<h4>第{0}页</h4><hr>",i)); foreach (Match m in ms) { var temp = m.Groups[0].Value; if (!temp.Contains("鱼骨的个人空间")) continue; var m1 = Regex.Match(temp, @"t_msgfont"">([\s\S]+?)</div>\s+<br"); var str = m1.Groups[1].Value; str = Regex.Replace(str, @"\[<i>\s*本帖最后由.+?编辑\s*</i>\]", ""); File.AppendAllText("1.html","<p>"+str+"</p>"); } } MessageBox.Show("over"); } catch (Exception ex) { MessageBox.Show(ex.Message); }
看到这帖子不错 http://bbs.fobshanghai.com/viewthread.php?tid=3885995&extra=&page=1
写了一段代码 进行采集,看着方便多了
时间: 2024-10-12 23:00:41