获取罗辑思维每天的微信语音(python)
by 伍雪颖
一直喜欢听罗辑思维,不过每天去点那个微信的公众号好麻烦,而且每天听一分钟也不太爽,于是今天突然想把它们全pa下来,合并起来,找时间一次全听完不更好.
于是,开动:
上网找了下,发现已经有一个网站有mp3,于是去爬它的数据(也可以直接去爬罗辑思维官网的数据http://www.ljsw.cc)
http://www.ljsw.cc/forum-39-1.html
获取title和mp3的url:
#coding=utf-8
import
re,urllib2
f = file(‘luoji.txt‘,
‘w‘)
def
getHtmlCode(url):
return
urllib2.urlopen(url).read()
def
getTitle(htmlString):
regTitle = re.compile("xst\">(.+?) ")
return
regTitle.findall(htmlString)
def
getMp3Url(htmlString):
regMp3 = re.compile("http(.+?).mp3\‘")
return
regMp3.findall(htmlString)
def
getLuojiContent(url):
htmlCode = getHtmlCode(url)
titles = getTitle(htmlCode)
urls = getUrl(htmlCode)
for
i in
range(0,len(urls)):
print
titles[i]
f.write(titles[i] +
‘-‘)
contentHtml = getHtmlCode(urls[i])
contents = getMp3Url(contentHtml)
if
len(contents) >
0:
mp3Url =
‘http‘ + contents[0] +
‘.mp3‘
print mp3Url
f.write(mp3Url +
‘\n‘)
if
__name__ == ‘__main__‘:
for
i in
range(1,38):
print
str(i)
url = ‘http://www.ljsw.cc/forum-39-‘
+ str(i) +
‘.html‘
try:
getLuojiContent(url)
print
‘finished: ‘
+ str(i)
except:
print
str(i) +
‘: error!‘
下载mp3文件:
#coding=utf-8
import
re,urllib2,os
for
line in
open("luoji.txt"):
contents = line.split(‘-‘)
url = line[11:len(line)-1]
cmd = ‘curl -O "%s"‘
% (url)
os.system(cmd)
fileName = url.split(‘/‘)
name = fileName[len(fileName) -
1]
os.rename(name,contents[0] +
‘.mp3‘)
合并mp3文件:
from
glob import
iglob
import
shutil
import
os
PATH = r‘mp3‘
destination = open(‘luoji.mp3‘,
‘wb‘)
for
filename in
iglob(os.path.join(PATH,
‘*.mp3‘)):
shutil.copyfileobj(open(filename,
‘rb‘), destination)
destination.close()
搞定,于是可以一个个听,也可以合起来听
所有python代码:
所有mp3文件:
链接: http://pan.baidu.com/s/1nt5L7Pf 密码: 5mrg