httpClient get方式抓取数据

/*
   * 爬取网页信息
   */
   private static String pickData(String url) {
       CloseableHttpClient httpclient = HttpClients.createDefault();
       try {
           HttpGet httpget = new HttpGet(url);
           CloseableHttpResponse response = httpclient.execute(httpget);
           try {
               // 获取响应实体
               HttpEntity entity = response.getEntity();
               // 打印响应状态
               if (entity != null) {
                   InputStream in = entity.getContent();
                   // byte[] b=new byte[in.available()];
                   // in.read(b);
                   BufferedReader br = new BufferedReader(new InputStreamReader(in, "gbk"));
                   String temp = "";
                   String s = "";
                   while ((temp = br.readLine()) != null) {
                       s = s + temp;
                   }
                   return s;
               } else {
                   String content = "热门综艺节目抓取失败,请检查";
                   ErrorLog el = new ErrorLog();
                   Remind remind = new Remind();
                   remind.remind(el.getVerietyLog(), content);
                   return null;
               }
           } finally {
               response.close();
           }
       } catch (ClientProtocolException e) {
           e.printStackTrace();
       } catch (ParseException e) {
           e.printStackTrace();
       } catch (IOException e) {
           e.printStackTrace();
       } finally {
           // 关闭连接,释放资源
           try {
               httpclient.close();
           } catch (IOException e) {
               e.printStackTrace();
           }
       }
       return null;
   }

/*
   * 使用jsoup解析网页信息
   */
   private static Variety analyzeHTMLByString(String html) {
       Variety v = new Variety();
       String[] arr = new String[3];
       Document document = Jsoup.parse(html);
       // document.select("meta").attr("charset", "utf-8");
       // System.out.println(document);
       Elements array = document.getElementsByClass("keyword");
       System.out.println(array.size());
       String content = "热门综艺节目抓取失败,请检查";
       ErrorLog el = new ErrorLog();
       if (array.size() == 0) {
           Remind remind = new Remind();
           remind.remind(el.getVerietyLog(), content);
           return null;
       }else{
           if (array.size() >= 3) {
               for (int i = 0; i < 3; i++) {
                   String name = array.get(i).child(0).text();
                   arr[i] = name;
               }
           } else {
               for (int i = 0; i < array.size(); i++) {
                   String name = array.get(i).child(0).text();
                   arr[i] = name;
               }
           }
           v.setHot1(arr[0]);
           v.setHot2(arr[1]);
           v.setHot3(arr[2]);
           return v;
       }

   }

时间： 2024-10-15 22:22:58

httpClient get方式抓取数据

httpClient get方式抓取数据的相关文章

Requests库抓取数据

使用java开源工具httpClient及jsoup抓取解析网页数据

python自然语言处理1——从网络抓取数据

使用Apache Flume抓取数据（1）

PHP Curl模拟登录并抓取数据

分布式爬虫：使用Scrapy抓取数据

php curl模拟登陆抓取数据

测试开发Python培训：抓取新浪微博抓取数据-技术篇

[Python爬虫] 之三：Selenium 调用IEDriverServer 抓取数据