电源

python取百度歌曲

2019-07-14 01:12发布生成海报

站内文章 / 电源技术

12568 0

不务正业学了一点python，没啥可练习的，做个取百度的歌曲，很粗糙有bug，生成一个方便用迅雷的html就算了，没时间继续弄，贴上备忘。urllib用的是IE的设置，在IE内设置了代理服务器，urllib也用代理。
top50：http://top.baidu.com/mp3.html
import urllib
import os
import logging
from sgmllib import SGMLParser

class top50(SGMLParser):
    'list top 50 mp3 at baidu.com'
    top50urls=[]
    def start_a(self, attrs):
        href = [v for k, v in attrs if k=='href']
        if href and href[0].find('wstsearch')>0:
            self.top50urls.extend(href)

f=file('top50.html','w')
f.write(r'')
f.write(r'

Top 50 mp3

(press mouse right key and select thunder to download them)

')
f.write(r'')
number=0
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(levelname)s %(message)s',
                    filename='download_baidu.log',
                    filemode='w')
print 'connecting baidu.com...'
html50=urllib.urlopen('http://top.baidu.com/mp3.html').read()
#html50=open(r'mp3.html').read()
i50=top50()
i50.feed(html50)
print 'we get',len(i50.top50urls),'songs'
for i in i50.top50urls:
    a=i.replace(r'&',r'&')
    b=a.replace(' ',r'%20')
    html1=urllib.urlopen(b).read()
    logging.info(b)
    #html1=open(r'wstsearch.htm').read()
    start=html1.find('http://220.181.27.54')
    end=html1.find(r'"',start)
    a=html1[start:end].replace(' ',r'%20')
    #here find the first hyperlink to mp3 file
    if(len(a)<10):
        logging.debug(html1)
        number=number+1
        f.write('')
        print number,'baidu.com error!!!'
        continue #baidu.com have a error
    html2=urllib.urlopen(a).read()
    #html2=open(r'm.htm').read()
    start=html2.find('href')
    end=html2.find(r'"',start+10)
    url=html2[start+6:end]
    if(len(url)>100):
        logging.debug(html2)
        number=number+1
        f.write('')
        print number,'baidu.com error!!!'
        continue #baidu.com have a error
    end2=html2.find(r'',end)
    name=html2[end+18:end2]
    number=number+1
    #here find the mp3
    #python download code is here,but we can use thunder
    #f = urllib.urlopen(url)
    #open(name,'wb').write(f.read())
    f.write('')
    print number,name
f.write(r'

'+str(number)+'.'+'	baidu.com error!!!
'+str(number)+'.'+'	baidu.com error!!!
'+str(number)+'.'+'	'+name+'

')
f.close()

os.system('top50.html')

轻音乐：http://list.mp3.baidu.com/list/qingyinyue.html#top19
class topmusic(SGMLParser):
    'list top music mp3 at baidu.com'
    topurls=[]
    def start_a(self, attrs):
        href = [v for k, v in attrs if k=='href']
        #logging.debug(href[0])
        if href and href[0].find(r'508&word')>0:
            self.topurls.extend(href)

python取百度歌曲

Top 50 mp3

Ta的文章更多 >>

热门文章

python取百度歌曲

Top 50 mp3

Ta的文章 更多 >>

热门文章

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

Ta的文章更多 >>