通过文本或url扫描下载文件

  1 package com.xxxx;
  2
  3 import java.io.BufferedInputStream;
  4 import java.io.BufferedReader;
  5 import java.io.File;
  6 import java.io.FileNotFoundException;
  7 import java.io.FileOutputStream;
  8 import java.io.IOException;
  9 import java.io.InputStreamReader;
 10 import java.net.MalformedURLException;
 11 import java.net.URL;
 12 import java.util.ArrayList;
 13 import java.util.List;
 14 import java.util.regex.Matcher;
 15 import java.util.regex.Pattern;
 16
 17 public class GetImage {
 18
 19     public int getCharacterPosition(String string,int numb){
 20         //这里是获取"#"符号的位置
 21         Matcher slashMatcher = Pattern.compile("/").matcher(string);
 22         int mIdx = 0;
 23         while(slashMatcher.find()) {
 24            mIdx++;
 25            //当"#"符号第二次出现的位置
 26            if(mIdx == numb){
 27               break;
 28            }
 29         }
 30         return slashMatcher.start();
 31     }
 32
 33
 34
 35
 36
 37     /**
 38      * 下载文件(图片、压缩包等文件都可以下载)
 39      * @param httpUrl
 40      * eg:http://www.xxxx.com/uploadfiles/123.rar
 41      */
 42     public void getHtmlFile(String httpUrl) {
 43     URL url;
 44     BufferedInputStream in;
 45     FileOutputStream file;
 46     try {
 47        System.out.println("取网络文件");
 48        //获取子目录
 49        String unitPath = httpUrl.substring(getCharacterPosition(httpUrl,3) ,httpUrl.lastIndexOf("/"));
 50        String fileName = httpUrl.substring(httpUrl.lastIndexOf("/"));
 51        String filePath = "F:\\FocuSimple"+unitPath+"\\";
 52        File up = new File(filePath);
 53         if(!up.exists()){    //判断文件夹是否不存在
 54             up.mkdirs();
 55         }
 56
 57        url = new URL(httpUrl);
 58
 59        in = new BufferedInputStream(url.openStream());
 60
 61        file = new FileOutputStream(new File(filePath+fileName));
 62        int t;
 63        while ((t = in.read()) != -1) {
 64        file.write(t);
 65        }
 66        file.close();
 67        in.close();
 68       System.out.println("文件获取成功");
 69     } catch (MalformedURLException e) {
 70        e.printStackTrace();
 71     } catch (FileNotFoundException e) {
 72       e.printStackTrace();
 73     } catch (IOException e) {
 74        e.printStackTrace();
 75     }
 76     }
 77
 78     public String getHtmlCode(String httpUrl) throws IOException {
 79     String content ="";
 80     URL uu = new URL(httpUrl); // 创建URL类对象
 81     BufferedReader ii = new BufferedReader(new InputStreamReader(uu
 82         .openStream())); // //使用openStream得到一输入流并由此构造一个BufferedReader对象
 83     String input;
 84     while ((input = ii.readLine()) != null) { // 建立读取循环,并判断是否有读取值
 85        content += input;
 86     }
 87     ii.close();
 88     return content;
 89     }
 90     public static List<String> getImageSrc(String htmlCode) {
 91         List<String> imageSrcList = new ArrayList<String>();
 92 //        Pattern p = Pattern.compile("<img\\b[^>]*\\bsrc\\b\\s*=\\s*(‘|\")?([^‘\"\n\r\f>]+(\\.jpg|\\.bmp|\\.eps|\\.gif|\\.mif|\\.miff|\\.png|\\.tif|\\.tiff|\\.svg|\\.wmf|\\.jpe|\\.jpeg|\\.dib|\\.ico|\\.tga|\\.cut|\\.pic)\\b)[^>]*>", Pattern.CASE_INSENSITIVE);
 93         Pattern p = Pattern.compile("src\\b\\s*=\\s*(‘|\")?([^‘\"\n\r\f>]+(\\.jpg|\\.bmp|\\.eps|\\.gif|\\.mif|\\.miff|\\.png|\\.tif|\\.tiff|\\.svg|\\.wmf|\\.jpe|\\.jpeg|\\.dib|\\.ico|\\.tga|\\.cut|\\.pic)\\b)[^>]*", Pattern.CASE_INSENSITIVE);
 94         Matcher m = p.matcher(htmlCode);
 95         String quote = null;
 96         String src = null;
 97         while (m.find()) {
 98             quote = m.group(1);
 99             src = (quote == null || quote.trim().length() == 0) ? m.group(2).split("\\s+")[0] : m.group(2);
100             imageSrcList.add(src);
101             System.out.println("src"+src);
102         }
103         return imageSrcList;
104     }
105
106     public void get(String url,String text) throws IOException {
107
108     String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=(‘|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))(‘|\")";
109     String searchImgReg2 = "(?x)(src|SRC|background|BACKGROUND)=(‘|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))(‘|\")";
110     String content  = "";
111     if(text == null){
112         content = this.getHtmlCode(url);
113     }else{
114         content = text;
115     }
116     System.out.println("内容:"+content);
117
118     Pattern p = Pattern.compile("src\\b\\s*=\\s*(‘|\")?([^‘\"\n\r\f>]+(\\.jpg|\\.bmp|\\.eps|\\.gif|\\.mif|\\.miff|\\.png|\\.tif|\\.tiff|\\.svg|\\.wmf|\\.jpe|\\.jpeg|\\.dib|\\.ico|\\.tga|\\.cut|\\.pic)\\b)[^>]*", Pattern.CASE_INSENSITIVE);
119     Matcher m = p.matcher(content);
120     String quote = null;
121     String src = null;
122     while (m.find()) {
123         quote = m.group(1);
124         src = (quote == null || quote.trim().length() == 0) ? m.group(2).split("\\s+")[0] : m.group(2);
125         this.getHtmlFile(url+src);
126     }
127
128     Pattern pattern = Pattern.compile(searchImgReg);
129     Matcher matcher = pattern.matcher(content);
130     while (matcher.find()) {
131        System.out.println("图片路径1:"+matcher.group(3));
132       this.getHtmlFile(url+matcher.group(3));
133
134     }
135
136     pattern = Pattern.compile(searchImgReg2);
137     matcher = pattern.matcher(content);
138     while (matcher.find()) {
139        System.out.println("图片路径1:"+matcher.group(3));
140       this.getHtmlFile(matcher.group(3));
141
142     }
143     // searchImgReg =
144     // "(?x)(src|SRC|background|BACKGROUND)=(‘|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))(‘|\")";
145     }
146     public static void main(String[] args) throws IOException {
147         GetImage gcp = new GetImage();
148         gcp.get("http://www.123rf.com.cn/#baidu01",null);
149         gcp.get(null,"<img src=\"/images/ico/logo.png\">");
150         gcp.getHtmlFile("http://www.xxxx.com/uploadfiles/123.rar");
151     }
152 }
时间: 2024-11-10 22:45:52

通过文本或url扫描下载文件的相关文章

java 从网络Url中下载文件

转自:http://blog.csdn.net/xb12369/article/details/40543649 /** * 从网络Url中下载文件 * @param urlStr * @param fileName * @param savePath * @throws IOException */ public static void downLoadFromUrl(String urlStr,String fileName,String savePath) throws IOExcepti

QTP 通过URL地址下载文件到本地(转)

While automation, you may come to situations where you need to need to download a file on clicking a link. This generally involves a lot of User Interface (GUI)overhead like syncing the download box, clicking the buttons, managing the Save As box, et

防止通过URL下载文件

网页中向用户提供了ppt文件的下载功能,前提是只有登录的用户才能下载,所以需要禁止通过URL对文件进行下载. 自己定义一个文件下载类. using System; using System.Data; using System.Configuration; using System.Web; using System.Web.Security; using System.Web.UI; using System.Web.UI.WebControls; using System.Web.UI.We

java中如何设置下载文件

如果想要设置某一url为下载文件的方法如下 需要设置文件响应类型,使用response.setContentType,比如jpeg格式的图片.如果想要访问该页面时出现下载保存的窗口,使用response.setHeader("Content-Disposition", "attachment;filename="+filename.getName())函数.也可以是pdf文件,或者其他格式的,如果想要查找浏览器可以识别什么格式的文件,需要到服务器的配置文件中找,比如

python多线程下载文件

从文件中读取图片url和名称,将url中的文件下载下来.文件中每一行包含一个url和文件名,用制表符隔开. 1.使用requests请求url并下载文件 def download(img_url, img_name): with closing(requests.get(img_url, stream=True)) as r: with open(os.path.join(out_dir, img_name), 'wb') as f: for data in r.iter_content(102

java按http地址列表下载文件队列

本文实现的功能是通过url列表下载文件队列,http url地址如:http://172.16.53.187:8080/LiveDownServer/Media/DownloadFile?path=E%3A%2Fvideofiles%2Ftest123%2Fdevelop%2F2015%2F06%2F12%2Fts%2F12%2F20150612124328.ts 路径经过了编码,可以利用java自带函数java.net.URLDecoder.decode进行解码,解码之后添加到url列表利用H

java通过http方式下载文件

package com.qiyi; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URL; import java.net.URLConnection; import java.util.List; import java.util.Map; import java.io

Windows从web下载文件的几种方式

最近搞app自动化测试,需要自动从网页上下载apk用于测试,顺便学习了几种从web下载文件的方式. 一.PowerShell DownloadFile 命令 PowerShell 是一种winodws原生的脚本语言,使用很方便,可以实现很多复杂的功能. 首先启动PowerShell,可以在cmd里执行 start powershell,也可以按住shift建右击选择打开powershell 启动powershell后输入如下命令: $client = new-object System.Net.

网络下载文件

/** * 从网络Url中下载文件 * @param urlStr * @param fileName * @param savePath * @throws IOException */ public static void downLoadFromUrl(String urlStr,String fileName,String savePath) throws IOException{ URL url = new URL(urlStr); HttpURLConnection conn = (