cvpr2015列表已经出来提供下载,文件太多可以用迅雷或者wget直接下载
http://www.cv-foundation.org/openaccess/CVPR2015.py
但是这样全部下载还得一个一个翻看,于是乎写了个小程序专门选则自己方向的论文。
需要htmlparser解析网页代码,下载地址http://sourceforge.net/projects/htmlparser/files/htmlparser/
还有为了方便,直接把paper网页保存在D盘了。
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.File;
import java.net.URL;
import java.net.URLConnection;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
public class DownCVPR {
/*
* 建立个目录 D:/CVPR2015/Segmentation/
* 页面保存位置
* String htmlName="D:\\CVPR 2015 Open Access Repository.htm";
*/
static String saveDir="D:/CVPR2015/Segmentation/";
public static String openFile( String szFileName ) {
try {
BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream( new File(szFileName))) );
String szContent="";
String szTemp;
while ( (szTemp = bis.readLine()) != null) {
szContent+=szTemp+"\n";
}
bis.close();
return szContent;
}
catch( Exception e ) {
return "";
}
}
static NodeFilter lnkFilter = new NodeFilter() {
public boolean accept(Node node) {
if(node instanceof LinkTag)
return true;
return false;
}
};
private static void downloadNet(String urlString, String filename)
{
System.out.println("Begin to download " + urlString + "...");
int byteread = 0;
try
{
URL url = new URL(urlString);
URLConnection conn = url.openConnection();
InputStream inStream = conn.getInputStream();
FileOutputStream fs = new FileOutputStream(saveDir + filename);
byte[] buffer = new byte[1204];
while ((byteread = inStream.read(buffer)) != -1)
{
fs.write(buffer, 0, byteread);
}
fs.flush();
fs.close();
System.out.println("Success to download file...");
}
catch (Exception e)
{
e.printStackTrace();
}
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String htmlName="D:\\CVPR 2015 Open Access Repository.htm";
String content=openFile(htmlName);
String paperPath[];
String paperName[];
String papertype="Segmentation";
int paperNum=0;
try {
Parser parser = new Parser(content);
// AndFilter filter = new AndFilter(new TagNameFilter("div"),
// new HasAttributeFilter("id"));
TagNameFilter filter=new TagNameFilter("dt");
NodeList nodes = parser.parse(filter);
System.out.println(nodes.size());
paperNum=nodes.size();
paperPath=new String[paperNum];
paperName=new String[paperNum];
for(int i=0;i<nodes.size();i++)
{
Node n=nodes.elementAt(i);
Node cn=n.getLastChild();
TagNode tn=new TagNode();
tn.setText(cn.toHtml());
//System.out.println(cn.getFirstChild().getText());
paperName[i]=cn.getFirstChild().getText();
paperPath[i]=tn.getAttribute("href").replaceAll(".html", ".pdf");
paperPath[i]=paperPath[i].substring(paperPath[i].lastIndexOf("/")+1);
// paperName[i]=path.substring(path.lastIndexOf("/")+1, path.indexOf(".html"));
// System.out.println(paperName[i]);
}
String pre="http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/";
for(int i=0;i<paperPath.length;i++){
//String name=paperPath[i].substring(paperPath[i].lastIndexOf("/")+1, paperPath[i].indexOf(".html"));
if(paperName[i].indexOf(papertype)>=0||paperName[i].indexOf(papertype.toLowerCase())>=0){
String name=name=paperPath[i].substring(paperPath[i].lastIndexOf("/")+1);
System.out.println(paperName[i]);
downloadNet(pre+paperPath[i], name) ;
}
else{
continue;
}
System.out.println(i);
}
} catch (ParserException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
时间: 2024-10-06 00:12:08