//引入模块 const http = require(‘http‘) const fs = require(‘fs‘) const cheerio = require(‘cheerio‘) const iconv = require(‘iconv-lite‘) const request = require(‘request‘) const async = require(‘async‘); const urlList = [] //地址列表 var id = 0 //计数器 //由于煎蛋妹子图的地址格式一样,所以只要拼接地址就好了 for (var i = 193; i > 190; i--) { urlList.push(‘http://jandan.net/ooxx/page-‘ + i) } function getPages(url, callback) { http.get(url, res => { const html = [] res.on(‘data‘, (chunk) => { html.push(chunk) }) res.on(‘end‘, () => { //如果网站是gbk编码,可以转成utf8,否则可能乱码 const html1 = iconv.decode(Buffer.concat(html), ‘utf8‘) //cheerio模块用来一jQuery的语法解析爬取的页面 const $ = cheerio.load(html1, { decodeEntities: false }) const link = [] $(‘.view_img_link‘).each((i, v) => { link.push($(v).attr(‘href‘)) }) callback(null, link) }) }) } function saveImage(links) { if (!fs.existsSync(`images/jiandan`)) { fs.mkdirSync(`images/jiandan`) } //此处links为一个二维数组,每个元素也是数组,保存了每个地址的多张图片地址,所以需要合并 var newArr = [] for (var i = 0; i < links.length; i++) { newArr = newArr.concat(links[i]) } console.log(‘length:‘ + newArr.length) async.mapLimit(newArr, 5, function (link, callback) { id++ //id充当计数器,保存一张图片就打印一次 requestAndwrite(link, callback, id) }, function (err, results) { if (err) { console.log(err) } else { console.log(results) } }) } var requestAndwrite = function (link, callback, id) { const url = ‘http:‘ + link request.head(url, function (err, res, body) { if (err) { console.log(err) } else { request(url).pipe(fs.createWriteStream(`images/jiandan/${link.split(‘/‘).pop()}`)).on(‘close‘, function () { console.log(id) callback(null, ‘result‘) }) } }) } // async.mapLimit(urlList, 5, function (url, callback) { getPages(url, callback) }, function (err, links) { saveImage(links) })
时间: 2024-10-19 02:57:56