数据抓取的一个类，包含一些常用的方法

原文:数据抓取的一个类，包含一些常用的方法

using System;
using System.Configuration;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;

namespace XXX
{
    /// <summary>
    /// Func 的摘要说明。
    /// </summary>
    public class Func
    {
        public CookieContainer myCookieContainer = new CookieContainer();

public void SetCookie(string cookieStr, string domain)
        {
            string[] cookstr = cookieStr.Split(‘;‘);
            foreach (string str in cookstr)
            {
                string[] cookieNameValue = str.Split(‘=‘);
                Cookie ck = new Cookie(cookieNameValue[0].Trim().ToString(), cookieNameValue[1].Trim().ToString());
                ck.Domain = domain;
                myCookieContainer.Add(ck);
            }
        }

public string GetPage(string PageUrl)
        {
            return GetPage(PageUrl, Encoding.Default);
        }

public string GetPage(string PageUrl, Encoding encoding)
        {
            string backstr = string.Empty;
            try
            {
                //System.Net.WebRequest  request = System.Net.WebRequest.Create(PageUrl);
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(PageUrl);
                request.CookieContainer = myCookieContainer;

request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
                System.Net.WebResponse response = request.GetResponse();
                System.IO.Stream resStream = response.GetResponseStream();
                resStream.ReadTimeout = 8000;
                System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
                backstr = sr.ReadToEnd();
                resStream.Close();
                sr.Close();
            }
            catch (Exception ex)
            {
                Log.WriteError(ex.Message);
                backstr = "";
            }
            return backstr;
        }

public string PostPage(string PageUrl, string postData)
        {
            ASCIIEncoding encoding = new ASCIIEncoding();
            byte[] data = encoding.GetBytes(postData);

HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(PageUrl);
            myRequest.Method = "POST";
            myRequest.ContentType = "application/x-www-form-urlencoded";
            myRequest.ContentLength = data.Length;
            Stream newStream = myRequest.GetRequestStream();
            // Send the data.
            newStream.Write(data, 0, data.Length);
            newStream.Close();
            // Get response
            HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
            StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.Default);
            string content = reader.ReadToEnd();
            reader.Close();
            return content;
        }

public string[] StrSplit(string mystr, string splitstr)
        {
            string str = mystr.Replace(splitstr, "\x254");
            string[] backstring = str.Split(‘\x254‘);
            return backstring;
        }

public string Cutstr(string mystr, string str1, string str2)
        {
            string backstr = string.Empty;
            int strstart = 0;
            int strend = mystr.Length;

if (str1 != "")
                strstart = mystr.IndexOf(str1);
            if (str2 != "")
                strend = mystr.IndexOf(str2, strstart + str1.Length);
            if (strstart != -1 && strend != -1)
                backstr = mystr.Substring(strstart + str1.Length, strend - strstart - str1.Length);
            else
                backstr = "-1";
            return backstr;
        }

public string ReplaceFirst(string mystr, string oldstr, string newstr)
        {
            string backstr = mystr;
            int oldindex = mystr.IndexOf(oldstr);
            if (oldindex > -1)
            {
                backstr = backstr.Remove(oldindex, oldstr.Length);
                backstr = backstr.Insert(oldindex, newstr);
            }
            return backstr;
        }

public void writetxt(string pathstr, string content)
        {
            FileStream fs = new FileStream(pathstr, FileMode.Create, FileAccess.Write);
            StreamWriter sw = new StreamWriter(fs, Encoding.UTF8);
            sw.Write(content);
            sw.Close();
            fs.Close();
        }

public string readtxt(string pathstr)
        {
            string tmpstr = string.Empty;
            FileStream fs = new FileStream(pathstr, FileMode.Open, FileAccess.Read);
            StreamReader sr = new StreamReader(fs, Encoding.UTF8);
            tmpstr = sr.ReadToEnd();
            sr.Close();
            fs.Close();
            return tmpstr;
        }

public string FilterLink(string str)
        {
            string tmpstr = str;
            Regex re = new Regex(@"<a[^>]*href=(""(?<href>[^""]*)""|‘(?<href>[^‘]*)‘|(?<href>[^\s>]*))[^>]*>(?<text>.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
            MatchCollection mc = re.Matches(tmpstr);
            for (int i = 0; i < mc.Count; i++)
            {
                tmpstr = tmpstr.Replace(mc[i].Value, mc[i].Groups["text"].Value);
            }
            return tmpstr;
        }

}
}

数据抓取的一个类，包含一些常用的方法

时间： 2024-10-24 17:19:11

数据抓取的一个类，包含一些常用的方法

数据抓取的一个类，包含一些常用的方法的相关文章

大数据抓取采集框架(摘抄至http://blog.jobbole.com/46673/)

Hibernate学习---第十一节：Hibernate之数据抓取策略&批量抓取

Android MaoZhuaWeiBo 好友动态信息列表数据抓取 -3

实现多进程爬虫的数据抓取

Ajax异步数据抓取

爬虫---selenium动态网页数据抓取

第四章爬虫进阶之动态网页数据抓取

Python爬虫新手教程：微医挂号网医生数据抓取

Fiddler微信公众号列表、浏览/查看量、评论数据抓取