import requests import re def get_song_id(id=None): if not id: url = ‘https://music.163.com/discover/toplist‘ cookie = ‘JSESSIONID-WYYY=Ha%2BrEImmcrumfCIKnkYrI2SOtTKhbHrAherOOQOVXv%2BTbE1mW00vhy4p98e0njMQJO7pFkDmOzj%5CT9WmD9KcGtwhiFy%2F77HPhV%2Ffm7h1qXhFcFl6fukX6%5CvQ%2FSZ%2FDrpP0ZntGp9PG%2BqFPXbKm18fgPlIS%5CJ0T61Yp4IeW9O5F4x6lZ%2Bs%3A1556980747731; _iuqxldmzr_=32; _ntes_nnid=c1a1f8011496ad9a9cf4125e13111bc2,1556978947753; _ntes_nuid=c1a1f8011496ad9a9cf4125e13111bc2; WM_NI=%2FEGH7stoomIYf6K2wfK7TSSLdIPiBFgfjjPjJxO35FVwTKDRvPPmfzcPddFc2SQSQerqkYFmJJmSaK5TSd7DUMAi8nRZd7RPxMaR96d6GRwpLRFSCf9iYF82ks4STQJAYVI%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eeccc565f498bad5d33fa7ac8eb3c84e879b9bbbb8638f8eafa5e843f78f98b1bc2af0fea7c3b92aed9b89a9b679b8b1a18ecc53a6ea85d3f66e89e99cb4d248a897a08ac14ea5baa68bd3628aa881d4b84e96a99cacc75ab690a08ee572a190ac85c569f28d9fb4d46fb2afa5bac5678a96a3b4e441f78c9d85b65097938f8abb669286acd8d15cbc93afb3bc3ea8ea89b3f67c908e8ab4c26efbb797a7bb3aa9adfcb4d75b8a9f97b7cc37e2a3; WM_TID=DbDOaS3RScRAEVFBFQd4zMAPWKNpUWbt‘ else: url = ‘https://music.163.com/discover/toplist?%s‘ % id cookie = ‘_iuqxldmzr_=32; _ntes_nnid=c1a1f8011496ad9a9cf4125e13111bc2,1556978947753; _ntes_nuid=c1a1f8011496ad9a9cf4125e13111bc2; WM_NI=%2FEGH7stoomIYf6K2wfK7TSSLdIPiBFgfjjPjJxO35FVwTKDRvPPmfzcPddFc2SQSQerqkYFmJJmSaK5TSd7DUMAi8nRZd7RPxMaR96d6GRwpLRFSCf9iYF82ks4STQJAYVI%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eeccc565f498bad5d33fa7ac8eb3c84e879b9bbbb8638f8eafa5e843f78f98b1bc2af0fea7c3b92aed9b89a9b679b8b1a18ecc53a6ea85d3f66e89e99cb4d248a897a08ac14ea5baa68bd3628aa881d4b84e96a99cacc75ab690a08ee572a190ac85c569f28d9fb4d46fb2afa5bac5678a96a3b4e441f78c9d85b65097938f8abb669286acd8d15cbc93afb3bc3ea8ea89b3f67c908e8ab4c26efbb797a7bb3aa9adfcb4d75b8a9f97b7cc37e2a3; WM_TID=DbDOaS3RScRAEVFBFQd4zMAPWKNpUWbt; playerid=16630313; JSESSIONID-WYYY=AAVK8lO93%2FgGNN%2BIEUvHH2%2FGsvPEYMaBB75JDi1e%5C%2BUyCv58%2Fkof0oCcdkThmIQk3s%2FS1a894JNDFJSgf4Xi7yWt%2FHdxpy9KKA2t7t3TRqKPAJ6uXJhYrT8GGaQeSaV81kpvy8C92GRjFYe34bYRDEOoyYuiHYw3l%5CKlkvHyS8SuY101%3A1556982487771‘ headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36‘, } params = { ‘id‘: str(id) } cookies = {i.split("=")[0]: i.split("=")[1] for i in cookie.split("; ")} if not id: response = requests.get(url=url, headers=headers, cookies=cookies) else: response = requests.get(url=url, headers=headers, cookies=cookies, params=params) list_id = re.findall(r‘<li><a href="/song\?id=(\d+)">(.+?)</a></li>‘, response.text) return list_id def download_song(song_name, song_id): singer_url = "http://music.163.com/song/media/outer/url?id={}.mp3".format(song_id) response = requests.get(url=singer_url) with open("H:music/{}.mp3".format(song_name), "wb", ) as f: f.write(response.content) if __name__ == ‘__main__‘: list_id = get_song_id(id=2006508653) for i in list_id: song_name = i[1] song_id = i[0] download_song(song_name, song_id)
这段代码的核心 就在 singer_url = "http://music.163.com/song/media/outer/url?id={}.mp3".format(song_id) 这是,一个外链。
实话说,我不知道怎么获取这种外链,不知道从哪里能弄到。 这是我在网上找到的。
实现思路:
这一排是 排行榜, 每一个里面都有一个id:
他的 Request Headers 里面, 有cookie。 在代码里面添加上就好了就好了。 最下面的
id: 2250011882 添加到 params 里面。
获取这个 页面之后。 用正则,拿到每个 li 标签里 a 标签的 href 属性。 就是每首歌的id
用这个 id 跟外链组合。 就能爬到想要的数据了。
原文地址:https://www.cnblogs.com/chengege/p/10810385.html
时间: 2024-10-10 00:13:01