1.图片下载
1 public class Program 2 { 3 static void Main(string[] args) 4 { 5 string[] str = { "model", "sexy", "belle", "stars" }; 6 for (int url = 0; url < str.Length; url++) 7 { 8 Thread thread = new Thread(DownLoad); 9 10 thread.Start(str[url]); 11 } 12 Console.Read(); 13 } 14 15 public static void DownLoad(object category) 16 { 17 string url = string.Empty; 18 19 for (int purl = 9014; purl > 10; purl--) 20 { 21 for (int pageSize = 0; pageSize < 20; pageSize++) 22 { 23 try 24 { 25 if (pageSize == 0) 26 url = "http://www.mm8mm8.com/" + category + "/" + purl + ".html"; 27 else 28 url = "http://www.mm8mm8.com/" + category + "/" + purl + "_" + pageSize + ".html"; 29 30 //创建http链接 31 var request = (HttpWebRequest)WebRequest.Create(url); 32 33 request.Timeout = 1000 * 5; //5s过期 34 35 var response = (HttpWebResponse)request.GetResponse(); 36 37 Stream stream = response.GetResponseStream(); 38 39 StreamReader sr = new StreamReader(stream); 40 41 string content = sr.ReadToEnd(); 42 43 var list = GetHtmlImageUrlList(content); 44 45 WebClient client = new WebClient(); 46 47 48 var directoryName = @"C:\down\"; 49 50 if (!Directory.Exists(directoryName)) 51 Directory.CreateDirectory(directoryName); 52 53 var fileName = string.Empty; 54 55 if (list.Count == 0) 56 { 57 Console.WriteLine("时间:" + DateTime.Now + " 当前网址:" + url + " 未发现图片"); 58 break; 59 } 60 61 try 62 { 63 64 fileName = category + "_" + purl + "_" + (pageSize + 1) + ".jpg"; 65 66 var localFile = directoryName + fileName; 67 68 var imageRequest = (HttpWebRequest)WebRequest.Create(list[0]); 69 70 imageRequest.Timeout = 1000 * 5; //5s 超时 71 72 var imageResponse = (HttpWebResponse)imageRequest.GetResponse(); 73 74 var s = imageResponse.GetResponseStream(); 75 76 Image image = Image.FromStream(s); 77 78 image.Save(localFile); 79 80 image.Dispose(); 81 82 Console.WriteLine("时间:" + DateTime.Now + " 图片:" + fileName + " 已经下载 存入磁盘位置:" + localFile); 83 84 } 85 catch (Exception e) 86 { 87 Console.WriteLine("时间:" + DateTime.Now + " 当前图片:" + fileName + " 错误信息:" + e.Message); 88 continue; 89 } 90 } 91 catch (Exception ex) 92 { 93 Console.WriteLine("时间:" + DateTime.Now + " 当前网址:" + url + " 错误信息:" + ex.Message); 94 } 95 } 96 } 97 } 98 99 /// <summary> 100 /// 取得HTML中所有图片的 URL。 101 /// </summary> 102 /// <param name="sHtmlText">HTML代码</param> 103 /// <returns>图片的URL列表</returns> 104 public static List<string> GetHtmlImageUrlList(string sHtmlText) 105 { 106 // 定义正则表达式用来匹配 img 标签 107 Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""‘]?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""‘<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); 108 109 // 搜索匹配的字符串 110 MatchCollection matches = regImg.Matches(sHtmlText); 111 112 List<string> sUrlList = new List<string>(); 113 114 // 取得匹配项列表 115 foreach (Match match in matches) 116 sUrlList.Add(match.Groups["imgUrl"].Value); 117 return sUrlList; 118 } 119 }
时间: 2024-10-26 22:39:27