C# 取html <data>内容

  private void button1_Click(object sender, EventArgs e)
        {
            string strSource = GetHttpWebRequest("http://www.******.aspx");

            //匹配出表格内容
            Regex rx = new Regex("<table width=\"936\" border=\"0\" cellpadding=\"0\" cellspacing=\"1\" bgcolor=\"#FFB91F\" align=\"center\" style=\"color:Black;\" id=\"panel\" >" + @"([\S\s]*?)" + "</table>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            MatchCollection matchs = rx.Matches(strSource);
            if (matchs.Count > 0)
            {
                strSource = matchs[0].Value;
                string pattern = "<tr align=\"center\" bgcolor=\"#@all\">@all<td height=\"32\" bgcolor=\"#@all\">(.*)</td>@all<td height=\"28\" bgcolor=\"#@all\">(.*)</td>@all<td bgcolor=\"#@all\">@allchkResult(.*);</script></td>@all</tr>";
                pattern = pattern.Replace("@all", @"[\S\s]*?");
                rx = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                //将匹配出的数据放入DataTable
                DataRow drow;
                matchs = rx.Matches(strSource);

                //MessageBox.Show(matchs[0].Groups[1].Value);
                for (int i = 0; i < matchs.Count; i++)
                {
                    listBox1.Items.Add(matchs[i].Groups[1].Value + "|" + matchs[i].Groups[2].Value + "|" + matchs[i].Groups[3].Value.Replace(",","").Replace("‘","").Replace("(","").Replace(")",""));

                }
            }
        }

带条件的取

  private void GetData(int cout)
        {
            string postData2;
            HttpWebRequest requestScore = (HttpWebRequest)WebRequest.Create("http://www.******.aspx");
           // postData2 = "__VIEWSTATE=%2FwEPDwUJNzc3MTAxMzU5ZGRoqAvv8WszDJmdGj4cP0O2gODj8g%3D%3D&soundshow=&reloadshow=&CurrentPageIndex="+cout.ToString();
            byte[] data  = Encoding.ASCII.GetBytes(postData2);
            requestScore.Method = "Post";
            requestScore.ContentType = "application/x-www-form-urlencoded";
            requestScore.ContentLength = data.Length;
            requestScore.KeepAlive = true;

            //使用登陆的cookies通过接下来的验证
            //requestScore.CookieContainer = container;
            Stream stream = requestScore.GetRequestStream();
            stream.Write(data, 0, data.Length);
            stream.Close();
            HttpWebResponse responseSorce = (HttpWebResponse)requestScore.GetResponse();
            StreamReader reader = new StreamReader(responseSorce.GetResponseStream(), Encoding.Default);
            string strSource = reader.ReadToEnd();
            Regex rx = new Regex("<table width=\"936\" border=\"0\" cellpadding=\"0\" cellspacing=\"1\" bgcolor=\"#FFB91F\" align=\"center\" style=\"color:Black;\" id=\"panel\" >" + @"([\S\s]*?)" + "</table>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            //<table width="936" border="0" cellpadding="0" cellspacing="1" bgcolor="#FFB91F" align="center" style="color:Black;" id="panel" >
            MatchCollection matchs = rx.Matches(strSource);
            if (matchs.Count > 0)
            {
                strSource = matchs[0].Value;
                string pattern = "<tr align=\"center\" bgcolor=\"#@all\">@all<td height=\"32\" bgcolor=\"#@all\">(.*)</td>@all<td height=\"28\" bgcolor=\"#@all\">(.*)</td>@all<td bgcolor=\"#@all\">@allchkResult(.*);</script></td>@all</tr>";
                pattern = pattern.Replace("@all", @"[\S\s]*?");
                rx = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                //将匹配出的数据放入DataTable
                DataRow drow;
                matchs = rx.Matches(strSource);

                //MessageBox.Show(matchs[0].Groups[1].Value);
                for (int i = 0; i < matchs.Count; i++)
                {
                    listBox1.Items.Add(matchs[i].Groups[1].Value + "|" + matchs[i].Groups[2].Value + "|" + matchs[i].Groups[3].Value.Replace(",", "").Replace("‘", "").Replace("(", "").Replace(")", ""));
                    One.Add(Convert.ToInt32( matchs[i].Groups[1].Value), matchs[i].Groups[2].Value + "|" + matchs[i].Groups[3].Value.Replace(",", "").Replace("‘", "").Replace("(", "").Replace(")", ""));
                    //插入数据库

                }
            }
        }

主页 www.yundll.com

时间： 2024-08-11 14:21:29

C# 取html <data>内容的相关文章

从kepware定时取web api内容

1 using System; 2 using System.Collections.Generic; 3 using System.ComponentModel; 4 using System.Data; 5 using System.Drawing; 6 using System.Linq; 7 using System.Text; 8 using System.Windows.Forms; 9 using Newtonsoft.Json.Linq; 10 using Newtonsoft.

快速抓取某个网站内容方法

是不是有人相抓取网页上面的内容,放到别的网站上面.下面我给大家介绍一种最常用的方法: 用HtmlAgilityPack 组件. public String GetHtml() { string url = "http://t.news.fx168.com/"; HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest; using (HttpWebResponse response = request.G

C#抓取网页HTML内容

网上很多内容采集工具,今天就自己试着写一个,发现C#可以轻松的抓去网页的内容,进而通过正则来分离出自己感兴趣的数据.下面是抓去网页内容的代码: using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Net; using System.Text; using System.IO; using System.Text.RegularExpressions; n

freecms使用jsoup和quartz抓取其他网站内容

这里提到了freecms,其实抓取和freecms没什么关系,主要还是靠jsoup jsoup里提供了html解析和读取的方法,集成了httprequest,可以从网络和本地读取,支持非闭合标签等. csdn中有比较详细的介绍 http://blog.csdn.net/column/details/jsoup.html 一般抓取页面的内容,都是后台进行的,多数是周期抓取,那么肯定要有调度的方法,包括 Timer,Scheduler, Quartz 以及 JCron Tab等等.这篇文件介绍和对比

PHPcurl抓取AJAX异步内容(转载)

PHPcurl抓取AJAX异步内容其实抓ajax异步内容的页面和抓普通的页面区别不大.ajax只不过是做了一次异步的http请求,只要使用firebug类似的工具,找到请求的后端服务url和传值的参数,然后对该url传递参数进行抓取即可. 利用Firebug的网络工具如果抓去的是页面,则内容中没有显示的数据,是一堆JS代码. Code $cookie_file=tempnam('./temp','cookie'); $ch = curl_init(); $url1 = "http://www

Oracle bbed 实用示例-----修改Data内容、恢复delete的rows

bbed 可以在db open 状态来进行修改,但是建议在做任何修改操作之前先shutdown db. 这样避免checkpoint 进程重写bbed 对block 的修改. 也避免oracle 在bbed 修改完成之前读block 或者申明block 为corrupt. 一. 示例: 修改Data内容 1.1连接bbed [[email protected] ~]$ bbed parfile=/u01/app/oracle/bbed/bbed.par Password: BBED: Relea

file_get_contents抓取远程URL内容

/** * POST URL * @param $url * @param null $post * @return false / string */ public static function UrlPost($url, $post = null, $timeout = 3) { if (is_array($post)) { ksort($post); $content = http_build_query($post); $content_length = strlen($content

Winform实现抓取web页面内容的方法

本文以一个非常简单的实例讲述了Winform实现抓取web页面内容的方法,代码简洁易懂,非常实用!分享给大家供大家参考. 具体实现代码如下: WebRequest request = WebRequest.Create("http://1.bjapp.sinaapp.com/play.php?a=" + PageUrl); WebResponse response = request.GetResponse(); Stream resStream = response.GetRespo

抓取网页文本内容

使用的是WebRequest类,在这以http://novel.hongxiu.com/a/1036665/10425842.html为例. 代码如下: using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.IO; using System.Net; using System.Text; u