Python3 抓取网页中的图片
2019-04-15 18:08发布
生成海报
import urllib.request
import socket
import re
import sys
import os
targetDir = r"C:UserselqstuxDesktoppic"
def destFile(path):
if not os.path.isdir(targetDir):
os.mkdir(targetDir)
pos = path.rindex('/')
t = os.path.join(targetDir, path[pos+1:])
return t
if __name__ == "__main__":
hostname = "http://www.douban.com"
req = urllib.request.Request(hostname)
webpage = urllib.request.urlopen(req)
contentBytes = webpage.read()
for link, t in set(re.findall(r'(http:[^s]*?(jpg|png|gif))', str(contentBytes))):
print(link)
urllib.request.urlretrieve(link, destFile(link))
import urllib.request
import socket
import re
import sys
import os
targetDir = r"H:pic"
def destFile(path):
if not os.path.isdir(targetDir):
os.mkdir(targetDir)
pos = path.rindex('/')
t = os.path.join(targetDir, path[pos+1:]) #会以/作为分隔
return t
if __name__ == "__main__":
hostname = "http://www.douban.com/"
req = urllib.request.Request(hostname)
webpage = urllib.request.urlopen(req)
contentBytes = webpage.read()
match = re.findall(r'(http:[^s]*?(jpg|png|gif))', str(contentBytes) )#r'(http:[^s]*?(jpg|png|gif))'中包含两层圆括号,故有两个分组,
#上面会返回列表,括号中匹配的内容才会出现在列表中
for picname, picType in match:
print(picname)
print(picType)
'''
输出:
http://img3.douban.com/pics/blank.gif
gif
http://img3.douban.com/icon/g111328-1.jpg
jpg
http://img3.douban.com/pics/blank.gif
gif
http://img3.douban.com/icon/g197523-19.jpg
jpg
http://img3.douban.com/pics/blank.gif
gif
...
'''
转载来源:
http://blog.csdn.net/wangyangkobe/article/details/8712121
打开微信“扫一扫”,打开网页后点击屏幕右上角分享按钮