2015-6-2
今天把昨天Git上看的一个下载网易云音乐歌单歌曲的脚本尝试看懂并修改
Git地址:https://github.com/keli/netease-music
#! /usr/bin/env python # -*- coding: utf-8 -*- import urllib2 import json import os import sys import md5 import string import random # Set cookie cookie_opener = urllib2.build_opener() cookie_opener.addheaders.append((‘Cookie‘, ‘appver=2.0.2‘)) cookie_opener.addheaders.append((‘Referer‘, ‘http://music.163.com‘)) urllib2.install_opener(cookie_opener) def encrypted_id(id): byte1 = bytearray(‘3go8&$8*3*3h0k(2)2‘) byte2 = bytearray(id) byte1_len = len(byte1) for i in xrange(len(byte2)): byte2[i] = byte2[i]^byte1[i%byte1_len] m = md5.new() m.update(byte2) result = m.digest().encode(‘base64‘)[:-1] result = result.replace(‘/‘, ‘_‘) result = result.replace(‘+‘, ‘-‘) return result def get_playlist(playlist_id): url = ‘http://music.163.com/api/playlist/detail?id=%s‘ % playlist_id resp = urllib2.urlopen(url) data = json.loads(resp.read()) return data[‘result‘] def save_track(track, folder, position): name = track[‘hMusic‘][‘name‘] if position < 10: pos = "0%d" % position else: pos = "%d" % position #fname = pos + ‘ ‘ + name + track[‘hMusic‘][‘extension‘] fname = name + ‘.‘ + track[‘hMusic‘][‘extension‘] fname = string.replace(fname, ‘/‘, ‘_‘) fpath = os.path.normpath(os.path.join(folder, fname)) if os.path.exists(fpath): return print "Downloading", fpath, "..." dfsId = str(track[‘hMusic‘][‘dfsId‘]) url = ‘http://m%d.music.126.net/%s/%s.%s‘ % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track[‘hMusic‘][‘extension‘]) resp = urllib2.urlopen(track[‘mp3Url‘]) data = resp.read() resp.close() with open(fpath, ‘wb‘) as mp3: mp3.write(data) def download_playlist(playlist_id, folder=‘.‘): playlist = get_playlist(playlist_id) name = playlist[‘name‘] folder = os.path.join(folder, name) if not os.path.exists(folder): os.makedirs(folder) for idx, track in enumerate(playlist[‘tracks‘]): save_track(track, folder, idx+1) if __name__ == ‘__main__‘: if len(sys.argv) < 2: print "Usage: %s <playlist id>" % sys.argv[0] sys.exit(1) download_playlist(sys.argv[1])
这边是对cookie的处理,addheaders的方法之前没有看到过,help查询居然也没有查到,但是有用
cookie_opener = urllib2.build_opener() cookie_opener.addheaders.append((‘Cookie‘, ‘appver=2.0.2‘)) cookie_opener.addheaders.append((‘Referer‘, ‘http://music.163.com‘)) urllib2.install_opener(cookie_opener)
这一段其实是没有用的,但是挺好奇这段代码里面函数的作用,过段时间再去学一下
def encrypted_id(id): byte1 = bytearray(‘3go8&$8*3*3h0k(2)2‘) byte2 = bytearray(id) byte1_len = len(byte1) for i in xrange(len(byte2)): byte2[i] = byte2[i]^byte1[i%byte1_len] m = md5.new() m.update(byte2) result = m.digest().encode(‘base64‘)[:-1] result = result.replace(‘/‘, ‘_‘) result = result.replace(‘+‘, ‘-‘) return result
下面这段代码是发挥主要作用的,但是这个脚本是原作者三个月前写的,网易云音乐应该有一些变化
def get_playlist(playlist_id): url = ‘http://music.163.com/api/playlist/detail?id=%s‘ % playlist_id resp = urllib2.urlopen(url) data = json.loads(resp.read()) return data[‘result‘] def save_track(track, folder, position): name = track[‘hMusic‘][‘name‘] if position < 10: pos = "0%d" % position else: pos = "%d" % position #fname = pos + ‘ ‘ + name + track[‘hMusic‘][‘extension‘] fname = name + ‘.‘ + track[‘hMusic‘][‘extension‘] fname = string.replace(fname, ‘/‘, ‘_‘) fpath = os.path.normpath(os.path.join(folder, fname)) if os.path.exists(fpath): return print "Downloading", fpath, "..." dfsId = str(track[‘hMusic‘][‘dfsId‘]) url = ‘http://m%d.music.126.net/%s/%s.%s‘ % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track[‘hMusic‘][‘extension‘]) resp = urllib2.urlopen(track[‘mp3Url‘]) data = resp.read() resp.close() with open(fpath, ‘wb‘) as mp3: mp3.write(data) def download_playlist(playlist_id, folder=‘.‘): playlist = get_playlist(playlist_id) name = playlist[‘name‘] folder = os.path.join(folder, name) if not os.path.exists(folder): os.makedirs(folder) for idx, track in enumerate(playlist[‘tracks‘]): save_track(track, folder, idx+1)
首先是JSON,我之前并没有学过JSON,XML也只是昨天看了一小会儿
http://music.163.com/api/playlist/detail?id=4566307 打开准备下载的歌单
用Firebug看Json挺清晰,结构都很清楚,chrome就感觉有点一堆凑一起
看代码也知道歌曲的链接在tracks里面
我按着原作者的代码调试 发现一直卡在
name = track[‘hMusic‘][‘name‘]
后来调了半天,才去JSON文件里看了,发现tracks里面不是所有歌曲都有 hMusic 这个 属性
后来直接改成 name = track[‘name‘]
而且再仔细看JSON 发现代码有好几处不对的地方 最后改成了这样
ef get_playlist(playlist_id): url = ‘http://music.163.com/api/playlist/detail?id=%s‘ % playlist_id resp = urllib2.urlopen(url) data = json.loads(resp.read()) print data[‘result‘][‘name‘] return data[‘result‘] def save_track(track, folder, position): name = track[‘name‘] #name = track[‘hMusic‘][‘name‘] print name if position < 10: pos = "0%d" % position else: pos = "%d" % position #fname = pos + ‘ ‘ + name + track[‘hMusic‘][‘extension‘] fname = name + str(position) + ‘.mp3‘ #fname = name + str(position) + ‘.‘ + track[‘hMusic‘][‘extension‘] fname = string.replace(fname, ‘/‘, ‘_‘) fpath = os.path.normpath(os.path.join(folder, fname)) if os.path.exists(fpath): return print "Downloading", fpath, "..." # dfsId = str(track[‘hMusic‘][‘dfsId‘]) # url = ‘http:/7m%d.music.126.net/%s/%s.%s‘ % (random.randrange(1, 3), encrypted_id(dfsId), dfsId, track[‘hMusic‘][‘extension‘]) try: resp = urllib2.urlopen(track[‘mp3Url‘], timeout = 5) data = resp.read() resp.close() except urllib2.URLError as e: print type(e) #not catch pass except socket.timeout as e: print type(e) #catched pass else: with open(fpath, ‘wb‘) as mp3: mp3.write(data) def download_playlist(playlist_id, folder=‘.‘): playlist = get_playlist(playlist_id) name = playlist[‘name‘] folder = os.path.join(folder, name) if not os.path.exists(folder): os.makedirs(folder) for idx, track in enumerate(playlist[‘tracks‘]): print ‘begin save‘ save_track(track, folder, idx+1)
注释掉的部分就是更改的地方
在下载歌曲的时候,经常会卡在一个地方,然后知道urlopen可以设置timeout 就是超时时间,
然后我的想法是超时的话就报错然后继续下载下一首
但是不是很熟悉try except 试了好久
试过好几个版本
try: resp = urllib2.urlopen(track[‘mp3Url‘], timeout = 5) data = resp.read() resp.close() except urllib2.URLError as e: print type(e) #not catch pass except socket.timeout as e: print type(e) #catched pass with open(fpath, ‘wb‘) as mp3: mp3.write(data)
try: resp = urllib2.urlopen(track[‘mp3Url‘], timeout = 5) data = resp.read() resp.close() except urllib2.URLError as e: print type(e) #not catch except socket.timeout as e: print type(e) #catched else: with open(fpath, ‘wb‘) as mp3: mp3.write(data)
try: resp = urllib2.urlopen(track[‘mp3Url‘], timeout = 5) except urllib2.URLError as e: print type(e) #not catch pass except socket.timeout as e: print type(e) #catched pass data = resp.read() resp.close() with open(fpath, ‘wb‘) as mp3: mp3.write(data)
总之都是代码报错,报错了也没有继续运行下去
后来改成这样
try: resp = urllib2.urlopen(track[‘mp3Url‘], timeout = 5) data = resp.read() resp.close() except urllib2.URLError as e: print type(e) #not catch pass except socket.timeout as e: print type(e) #catched pass else: with open(fpath, ‘wb‘) as mp3: mp3.write(data)
就行了=。=, 还是得再好好看看异常处理那一块
总之这就是一下午的学习,虽然下歌什么的对我并没有什么卵用
时间: 2024-10-14 13:05:25