【Python】抓取电影海报并下载

2019-04-15 17:26发布

关于BeautifulSoup : soup.find_all(class_='v_picConBox mt15') tag.find("div",{"class": "pic"}) pic.img['data-src'] title =text.span.em.a['title'] #coding=utf-8 import re,os from urllib.request import urlretrieve import requests from bs4 import BeautifulSoup def geturl(url): html=requests.get(url).content soup=BeautifulSoup(html,'lxml') return soup #抓取电影海报 soup=geturl('http://dianying.2345.com/list/kehuan------.html') bookAlbum=soup.title.string.split('_')[0] # print(bookAlbum) tags=soup.find_all(class_='v_picConBox mt15') # print(tags) movies=[] for tag in tags[0].find_all('li'): pic= tag.find("div",{"class": "pic"}) if pic is None: continue text = tag.find("div",{"class": "txtPadding"}) img_url = pic.img['data-src'] title =text.span.em.a['title'] # print(title,img_url) movies.append([title,img_url]) print(movies) if not os.path.exists(bookAlbum): os.makedirs(bookAlbum) for movie in movies: filename=os.path.join(bookAlbum,movie[0]+'.png') print(filename) with open(filename,'w') as f: urlretrieve('http:'+movie[1],filename)