.net语言获取网页的源代码

1.通过HttpWebRequest请求,HttpWebResponse响应获取网页源代码。

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApp1
{
    class Program
    {
        static void Main(string[] args)
        {
            //web请求
            string url = "https://baike.baidu.com/item/vs/14494077?fr=aladdin";
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            //主机
            request.Host = "baike.baidu.com";
            //来源
            request.Referer = "https://www.baidu.com/link?url=Bu_CrEdTSBhrOMDJ8onbirSI0bsUbWXp7VWNkbcbnqkS4FUeXFs8uDTsRxeQUkL8JSW19X6TRs-0D7bAAh-LQa&wd=&eqid=8d3d33880007de2d000000065db2aa9a";
            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36";

            //服务器响应

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return;
            }

            using (StreamReader sr = new StreamReader(response.GetResponseStream()))
            {
                Console.WriteLine(sr.ReadToEnd());
            }
            Console.ReadKey();
        }
    }
}

2.通过HttpWebRequest请求,HttpWebResponse响应获取文件(图片,音频,影视)

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApp1
{
    class Program
    {
        public static FileMode Filemode { get; private set; }

        static void Main(string[] args)
        {
            //web请求
            //请求文件的url,更改文件的url即可
            string url = "http://b-ssl.duitang.com/uploads/blog/201312/04/20131204184148_hhXUT.jpeg";
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            //主机
            //request.Host = "baike.baidu.com";
            //来源
            //request.Referer = "https://www.baidu.com/link?url=Bu_CrEdTSBhrOMDJ8onbirSI0bsUbWXp7VWNkbcbnqkS4FUeXFs8uDTsRxeQUkL8JSW19X6TRs-0D7bAAh-LQa&wd=&eqid=8d3d33880007de2d000000065db2aa9a";
            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36";

            //服务器响应

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return;
            }

            using (FileStream fs = new FileStream("1.jpg", FileMode.Create))
            {
                response.GetResponseStream().CopyTo(fs);
            }
        }
    }
}

  3.封装一个查看网页源码和图片下载的类

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ImgFormsApplication
{
    public class ImgDownLoadUtil
    {
        /// <summary>
        /// 获取网页源码流对象
        /// </summary>
        /// <param name="URL"></param>
        /// <returns></returns>
        public static Stream DownLoadFile(String URL)
        {
            //web请求
            //请求文件的url,更改文件的url即可
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            request.UserAgent = GetUA();

            //服务器响应
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return null;
            }

            return response.GetResponseStream();
        }

        /// <summary>
        /// 是否下载成功
        /// </summary>
        /// <param name="URL"></param> 源文件的url地址
        /// <param name="fileName"></param> 文件的名称
        /// <param name="referer"></param> 源文件的来源
        /// <returns></returns>
        public static Boolean DownLoadFile(String URL, String fileName, String referer)
        {
            //web请求
            //请求文件的url,更改文件的url即可
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            //主机,在url中分割获取
            request.Host = URL.Split(‘/‘)[2];
            //来源
            request.Referer = referer;
            request.UserAgent = GetUA();

            //服务器响应
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return false;
            }
            using (FileStream fs = new FileStream(fileName, FileMode.Create))
            {
                response.GetResponseStream().CopyTo(fs);
            }

            return true;
        }

        private static String GetUA()
        {
            String[] userAgents =
            {
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
            "Opera/8.0 (Windows NT 5.1; U; en)",
            "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
            "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
            "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36"
            };

            return userAgents[new Random().Next(0, userAgents.Length)];
        }
    }
}

 测试方法代码1

Stream stream = ImgDownLoadUtil.DownLoadFile("https://www.cnblogs.com/1906859953Lucas/p/9027165.html");
using (StreamReader sr = new StreamReader(stream))
{
   ImgInfo.Text = sr.ReadToEnd();
}

 测试方法代码2

ImgDownLoadUtil.DownLoadFile("https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1572006664637&di=6a73de4ab0d0092d2cc3e01bd2ecd93f&imgtype=0&src=http%3A%2F%2Fwx2.sinaimg.cn%2Fcrop.0.0.1797.1009.1000%2F005NLzplly1fvf2rfe838j31jm0s2gv8.jpg", "2.jpg","");

  

原文地址:https://www.cnblogs.com/x-zhoulin/p/11742993.html

时间: 2024-11-06 03:32:10

.net语言获取网页的源代码的相关文章

DIV+CSS+JS仿Select下拉表单网页特效源代码下载

DIV+CSS+JS仿Select下拉表单 原文:DIV+CSS+JS仿Select下拉表单网页特效源代码下载 源代码下载地址:http://www.zuidaima.com/share/1550463331830784.htm <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <

JS禁止别人查看网页的源代码

<html> <head> <title>JS禁止别人查看网页的源代码丨石家庄展柜制作|石家庄叉车</title> <script> function clear(){ Source=document.body.firstChild.data; document.open(); document.close(); document.title="没有源码"; document.body.innerHTML=Source; }&

防止别人查看你网页的源代码

彻底禁止查看网页源代码屏蔽掉右键.复制.选择的完整代码:  <script> document.oncontextmenu=new Function("return false") document.onselectstart=new Function("return false") </script>  选择文字或者点右键看看 或者 <body oncontextmenu="return false" onsele

iOS项目开发实战——使用异步请求获取网页HTML源代码

在网络编程中,异步请求是用的最多的请求方式,与同步相比,不会造成用户界面的卡死,会有较好的用户体验.现在我们使用Swift在iOS项目中获取某个网页的HTML源代码. override func viewDidLoad() { super.viewDidLoad() //异步请求 NSURLConnection.sendAsynchronousRequest(NSURLRequest(URL: NSURL(string: "http://www.baidu.com")!), queue

iOS项目开发实战——iOS网络编程获取网页Html源代码

如今我们身处互联网的时代,任何一个软件或是App,都会或多或少与网络打交道,并不断发生数据交互.一个没有涉及网络编程的应用会显得比较low,这里我们将会开始使用Swift开发iOS应用,并且主要来实现网络操作方面的功能. 这里的需求是获取某个网页的Html源代码,即从网上获取数据.具体实现如下: (1)创建一个iOS项目,Language选择Swift.然后在ViewController.swift中实现如下代码: override func viewDidLoad() { super.view

【转】防止别人查看你网页的源代码

原文网址:http://zhidao.baidu.com/link?url=nBeZZyd1TkttVQv4Fq92VdneQ2KRRe7MU0cXN65iviEB1b4SC-JlQCmMU8U4c3jW3S5Y2mHXe_Ls77kp0-NNr_ 彻底禁止查看网页源代码屏蔽掉右键.复制.选择的完整代码: <script> document.oncontextmenu=new Function("return false") document.onselectstart=n

(转载)分享一个昨天写的,3GQQ登录及取回sid的php源代码,内涵post/get访问网页的源代码。

<?php //3gQQ登录,1qq,2密码,3返回sid,4返回验证码地址. function qqlogin($qq,$mm,&$returnsid,&$yzmurl){ $ym=get('http://pt.3g.qq.com/g/s?aid=nLogin'); $sid=text_midtext($ym,'sid=','&'); $vdata=text_midtext($ym,'?vdata=','"'); $submitarr=array( 'login_

2048网页版源代码

<!DOCTYPE html> <html> <head> <title>2048</title> <meta charset="utf-8"/> <style> #gridPanel{ width:480px; height:480px; margin:0 auto; background-color:#bbada0; border-radius:10px; position:relative; }

C#控制台 webcllient下载一个网页的源代码

1 代码 1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Net; 5 using System.Text; 6 using System.Threading.Tasks; 7 8 namespace ConsoleApplication7 9 { 10 class Program 11 { 12 static void Main(string[] args) 13 {