//当点击读取的时候,抓取网页源代码
string wangzhi;
string respHtml;
private void 读取网页源代码ToolStripMenuItem_Click(object sender, EventArgs e)
{
textBox2.Clear(); //读 取之前清空
wangzhi = comboBox1.Text;
HttpWebRequest rep = (HttpWebRequest)WebRequest.Create(wangzhi); //通过网址找到网页放在rep里。建立连接
HttpWebResponse resp = (HttpWebResponse)rep.GetResponse(); //读取网页
Encoding htmlEncoding = Encoding.Default; //确定编码格式
StreamReader sr = new StreamReader(resp.GetResponseStream(), htmlEncoding); //把网页源代码存入流中
respHtml = sr.ReadToEnd(); //把流从头到尾读出,转换成字符串
textBox2.Text = respHtml; //获取的网页源代码
comboBox1.Items.Add(comboBox1.Text);
}
//从网页源码中获取图片,并且下载到E盘 public int num = 0; private void 从网页源码中读取图片ToolStripMenuItem_Click(object sender, EventArgs e) { listView1.Columns.Clear(); listView1.Items.Clear(); listView1.Columns.Add("链接地址和图片地址",700); string result = respHtml; //用result接受网页源代码 MatchCollection mc; //mc是个集合可以放任何东西 //正则表达式获取<img src=>图片url mc = Regex.Matches(result, @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""‘]?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""‘<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); foreach (Match match in mc) //遍历集合,并把所有的图片地址放在listViews里 { listView1.Items.Add(match.Value.ToString()); } num = listView1.Items.Count; //记录有多少条img地址 string imgsrc = string.Empty; //定义 //循环下载 for (int i = 0; i < num; i++) { string imgurl = listView1.Items[i].ToString(); //获取图片url地址 Regex reg = new Regex(@"<img.*?src=""(?<src>[^""]*)""[^>]*>",RegexOptions.IgnoreCase);//表示不可变的正则表达式 MatchCollection mcl = reg.Matches(imgurl); //设定要查找的字符串 foreach (Match mm in mcl) { try { WebRequest req = WebRequest.Create(mm.Groups["src"].Value); //图片的src内容 WebResponse res = req.GetResponse(); //用文件流读取图片 Stream reader = res.GetResponseStream(); //从互联网返回数据流 string path = "E://" + i.ToString() + ".jpg"; //图片路径命名.注意格式,E后面的引号用中文!!!!!!!!!! FileStream writer = new FileStream(path, FileMode.OpenOrCreate, FileAccess.Write); byte[] buff = new byte[512]; //不是很明白,求大神解释 int c = 0; while ((c = reader.Read(buff, 0, buff.Length)) > 0) { writer.Write(buff, 0, c); } writer.Close(); writer.Dispose(); reader.Close(); reader.Dispose(); res.Close(); listView1.Items.Add(path + "图片保存成功!"); } catch { MessageBox.Show("本网页图片读取完毕"); } } } } } }
时间: 2024-11-05 18:52:00