C# 网页图片采集

http://blog.csdn.net/a237428367/article/details/5987832

using System;

using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Windows.Forms;
namespace ImageCollect
{
public class GatherPic
{
private string savePath;
private string getUrl;
private WebBrowser wb;
private int iImgCount;
//初始化参数
public GatherPic(string sWebUrl, string sSavePath)
{
this.getUrl = sWebUrl;
this.savePath = sSavePath;
}
//开始采集
public bool start()
{
if (getUrl.Trim().Equals(""))
{
MessageBox.Show("哪来的虾米连网址都没输！");
return false;
}
this.wb = new WebBrowser();
this.wb.Navigate(getUrl);
//委托事件
this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted);
return true;
}
//WebBrowser.DocumentCompleted委托事件
private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
//页面里框架iframe加载完成不掉用SearchImgList()
if (e.Url != wb.Document.Url) return;
SearchImgList();
}
//检查出所有图片并采集到本地
public void SearchImgList()
{
string sImgUrl;
//取得所有图片地址
HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img");
this.iImgCount = elemColl.Count;
foreach (HtmlElement elem in elemColl)
{
sImgUrl = elem.GetAttribute("src");
//调用保存远程图片函数
SaveImageFromWeb(sImgUrl, this.savePath);
}
}
//保存远程图片函数
public int SaveImageFromWeb(string imgUrl, string path)
{
string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1);
path = path + "//" + imgName;
string defaultType = ".jpg";
string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" };
string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));
foreach (string it in imgTypes)
{
if (imgType.ToLower().Equals(it))
break;
if (it.Equals(".bmp"))
imgType = defaultType;
}
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);
request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";
request.Timeout = 10000;
WebResponse response = request.GetResponse();
Stream stream = response.GetResponseStream();
if (response.ContentType.ToLower().StartsWith("image/"))
{
byte[] arrayByte = new byte[1024];
int imgLong = (int)response.ContentLength;
int l = 0;
// CreateDirectory(path);
FileStream fso = new FileStream(path, FileMode.Create);
while (l < imgLong)
{
int i = stream.Read(arrayByte, 0, 1024);
fso.Write(arrayByte, 0, i);
l += i;
}
fso.Close();
stream.Close();
response.Close();
return 1;
}
else
{
return 0;
}
}
catch (WebException)
{
return 0;
}
catch (UriFormatException)
{
return 0;
}
}
}
}

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Windows.Forms;
namespace ImageCollect
{
public class GatherPic
{
private string savePath;
private string getUrl;
private WebBrowser wb;
private int iImgCount;
//初始化参数
public GatherPic(string sWebUrl, string sSavePath)
{
this.getUrl = sWebUrl;
this.savePath = sSavePath;
}
//开始采集
public bool start()
{
if (getUrl.Trim().Equals(""))
{
MessageBox.Show("哪来的虾米连网址都没输！");
return false;
}
this.wb = new WebBrowser();
this.wb.Navigate(getUrl);
//委托事件
this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted);
return true;
}
//WebBrowser.DocumentCompleted委托事件
private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
//页面里框架iframe加载完成不掉用SearchImgList()
if (e.Url != wb.Document.Url) return;
SearchImgList();
}
//检查出所有图片并采集到本地
public void SearchImgList()
{
string sImgUrl;
//取得所有图片地址
HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img");
this.iImgCount = elemColl.Count;
foreach (HtmlElement elem in elemColl)
{
sImgUrl = elem.GetAttribute("src");
//调用保存远程图片函数
SaveImageFromWeb(sImgUrl, this.savePath);
}
}
//保存远程图片函数
public int SaveImageFromWeb(string imgUrl, string path)
{
string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1);
path = path + "//" + imgName;
string defaultType = ".jpg";
string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" };
string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));
foreach (string it in imgTypes)
{
if (imgType.ToLower().Equals(it))
break;
if (it.Equals(".bmp"))
imgType = defaultType;
}
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);
request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";
request.Timeout = 10000;
WebResponse response = request.GetResponse();
Stream stream = response.GetResponseStream();
if (response.ContentType.ToLower().StartsWith("image/"))
{
byte[] arrayByte = new byte[1024];
int imgLong = (int)response.ContentLength;
int l = 0;
// CreateDirectory(path);
FileStream fso = new FileStream(path, FileMode.Create);
while (l < imgLong)
{
int i = stream.Read(arrayByte, 0, 1024);
fso.Write(arrayByte, 0, i);
l += i;
}
fso.Close();
stream.Close();
response.Close();
return 1;
}
else
{
return 0;
}
}
catch (WebException)
{
return 0;
}
catch (UriFormatException)
{
return 0;
}
}
}
}

调用方法

[c-sharp] view plain copy print ?

GatherPic g = new GatherPic(“http://www.baidu.com”,"E:/XXX");
g.start();

=====================================================

在web项目中使用WebBrowser类-----给网站抓图

最近做一个WEB项目，其中要求有个功能就是程序能网页抓图，举个例子：在test.aspx页面上放一个TextBox和一个Button，TextBox用来输入要抓取的网页地址，然后按了Button之后，服务器要对前面输入的网址进行抓图，然后显示出来。我把抓图的业务逻辑做成一个类：

using System; using System.Data; using System.Windows.Forms; using System.Drawing;/// <summary> /// WebSnap ：网页抓图对象 /// </summary> public class WebSnap2 {    public WebSnap2()     {         //         // TODO: 在此处添加构造函数逻辑         //     }    /// <summary>     /// 开始一个抓图并返回图象     /// </summary>     /// <param name="Url">要抓取的网页地址</param>     /// <returns></returns>     public Bitmap StartSnap(string Url)     {         WebBrowser myWB = this.GetPage(Url);         Bitmap returnValue = this.SnapWeb(myWB);         myWB.Dispose();         return returnValue;     }    private WebBrowser GetPage(string Url)     {         WebBrowser myWB = new WebBrowser();         myWB.ScrollBarsEnabled = false;         myWB.Navigate(Url);         while (myWB.ReadyState != WebBrowserReadyState.Complete)         {             System.Windows.Forms.Application.DoEvents();         }         return myWB;     }    private Bitmap SnapWeb(WebBrowser wb)     {         HtmlDocument hd = wb.Document;         int height = Convert.ToInt32(hd.Body.GetAttribute("scrollHeight")) + 10;         int width = Convert.ToInt32(hd.Body.GetAttribute("scrollWidth")) + 10;         wb.Height = height;         wb.Width = width;         Bitmap bmp = new Bitmap(width, height);         Rectangle rec = new Rectangle();         rec.Width = width;         rec.Height = height;         wb.DrawToBitmap(bmp, rec);         return bmp;     }}

然后在test.asp的button_click事件里面调用：

        WebSnap ws = new WebSnap();         Bitmap bmp= ws.StartSnap(TextBox1.Text);         System.IO.MemoryStream ms = new System.IO.MemoryStream();         bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg);         Response.BinaryWrite(ms.GetBuffer());

时间： 2024-10-06 14:12:34

C# 网页图片采集

C# 网页图片采集的相关文章

3D图片采集与展示（SurfaceView 自适应 Camera, 录制视频，抽取帧）

C#图片采集软件自动翻页自动分类（收集美图必备工具）

一个咸鱼的Python爬虫之路（三）：爬取网页图片

网页图片的尺寸、分辨率、物理尺寸的理解。

java 抓取网页图片

Python -- 网络编程 -- 抓取网页图片 -- 图虫网

使用ScrapySharp快速从网页中采集数据

Python3简单爬虫抓取网页图片

Python爬虫网页图片