爬虫day2 (代理ip访问----人人网登录 //手动添加cookie/、实时存储cookie

2019-04-14 08:44发布

通过代理ip访问

#通过代理ip访问 from urllib import request proxy = { 'http':'http://219.141.153.41:80' } # 实例 一个操作(储存代理ip) handler = request.ProxyHandler(proxy) # 实例 request对象 opener(绑定以上操作) opener = request.build_opener(handler) # 访问path url = 'http://www.baidu.com/s?wd=ip' # 获取回应 response = opener.open(url) html_bytes = response.read() print(html_bytes)

人人网登录 登陆之后获取cookie,手动添加

# 人人网登录 登陆之后获取cookie,手动添加 # url #headers 含cookie # 获取回应 # 储存 from urllib import request url = 'http://www.renren.com/966927992' headers = { 'cookie':'anonymid=jkt0b8ab-q2jf6z; depovince=GW; _r01_=1; JSESSIONID=abcGJgLjV4-TIaBij_1uw; ick_login=9064892c-3ad2-43ae-b884-ed4ebbab6249; jebecookies=868d7d34-c453-40ee-93e8-197b43bdc86a|||||; _de=B996A2060AAF2DDA74DFAFD510163D3F; p=c48fa2c085eb307ad984daa9625fe72b2; first_login_flag=1; ln_uact=18510556963; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; t=41d3adf9f7f90cd5b1d7bcfd4e3daf3b2; societyguester=41d3adf9f7f90cd5b1d7bcfd4e3daf3b2; id=966927992; xnsid=c97d475f; ver=7.0; loginfrom=null; jebe_key=a34f3f96-cded-4ab0-b18b-def8f814944a%7C65e0c5286f0503e29e3484234097e54d%7C1534217862022%7C1%7C1534217873290; wp_fold=0' } req = request.Request(url,headers=headers) response = request.urlopen(req) html_bytes = response.read() # 写入文件 with open('renren.html','wb') as f: f.write(html_bytes)

人人网登录 操作储存cookie

# 人人网登录 操作储存cookie # 创建cookie容器对象 # 实例 一个操作 --》存储cookie # opener创建 from urllib import request,parse from http import cookiejar import json # 创建cookie容器对象 cookie_object = cookiejar.CookieJar() # 实例 一个操作 --》存储cookie handler = request.HTTPCookieProcessor(cookie_object) # opener创建 opener = request.build_opener(handler) # url url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2018721441132' # form form = { 'email': '18510556963', 'icode': '', 'origURL': 'http://www.renren.com/home', 'domain': 'renren.com', 'key_id': '1', 'captcha_type': 'web_login', 'password': '95cb2a1d59b918e0d16ab5d3535fb40103e4b546e651a3e3c99b91876927c78a', 'rkey': 'a7bccfbafd7ee702247450942dff5611', 'f': 'http%3A%2F%2Fwww.renren.com%2F966927992', } # post bytes类型data传入 # dict--str--bytes form_bytes = parse.urlencode(form).encode('utf-8') # 获取回应 response = opener.open(url,form_bytes) # 读取 html_bytes = response.read() # 通过json获取字典 str--dict res_dict = json.loads(html_bytes.decode('utf-8')) # 获取path home_url = res_dict['home_Url'] # 根据home_url访问页面 response = opener.open(home_url) # 读取 html_bytes = response.read() print(html_bytes.decode('utf-8'))

人人网登录 封装类

# 人人网登录 封装类 from urllib import request,parse from urllib.error import HTTPError,URLError import json class session(object): def __init__(self): from http import cookiejar # 创建cookie容器对象 cookie_object = cookiejar.CookieJar() # 实例 一个操作 --》存储cookie handler = request.HTTPCookieProcessor(cookie_object) # opener创建 self.opener = request.build_opener(handler) def get(self,url,headers=None): return get(url,headers,self.opener) def post(self,url,form=None,headers=None): return post(url,form,headers,self.opener) def get(url, headers=None,opener = None): return urlrequests(url, headers=headers, opener = opener) def post(url, form, headers=None,opener = None): return urlrequests(url, form, headers=headers, opener=opener) def urlrequests(url, form=None,headers=None,opener = None): user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' # 如果用户需要自行传入headers, 则覆盖之前的headers if headers == None: headers = { 'User-Agent': user_agent } html_bytes = b'' try: if form: # POST # 2.1 转换成str form_str = parse.urlencode(form, encoding='utf-8') #print(form_str) # 2.2 转换成bytes form_bytes = form_str.encode('utf-8') req = request.Request(url, data=form_bytes, headers=headers) else: # GET req = request.Request(url, headers=headers) if opener: response = opener.open(req) else: response = request.urlopen(req) html_bytes = response.read() except HTTPError as e: print(e) except URLError as e: print(e) return html_bytes

有道翻译 加盐破解

# 有道翻译 加盐破解 # 有道翻译 ajax触发 , post请求 # 勾选 preserve log # 点击 xhr(xmlttprequest) translates...url、headers、formdata # 求同存异,构造form内容 # 搜索 js内容 ,将变量计算转换为python语言 # 调用post, from get_post import post import time import random import json def Md5(str): import hashlib # 创建md5对象 hl = hashlib.md5() # Tips # 此处必须声明encode # 若写法为hl.update(str) 报错为: Unicode-objects must be encoded before hashing hl.update(str.encode(encoding='utf-8')) return hl.hexdigest() def fanyi(key): url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', # 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Connection': 'keep-alive', # 'Content-Length': '223', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie': 'OUTFOX_SEARCH_USER_ID=-493176930@10.168.8.63; OUTFOX_SEARCH_USER_ID_NCOO=38624120.26076847; SESSION_FROM_COOKIE=unknown; JSESSIONID=aaabYcV4ZOU-JbQUha2uw; ___rl__test__cookies=1534210912076', 'Host': 'fanyi.youdao.com', 'Origin': 'http://fanyi.youdao.com', 'Referer': 'http://fanyi.youdao.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', } # r = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)) salt = str(int(time.time()) + random.randint(0,10)) # sign=o = u.md5(S + n + r + D); # S = "fanyideskweb" # n=key # r=salt # D = "ebSeFb%=XZ%T[KZ)c(sy!" sign = Md5("fanyideskweb"+key+salt+"ebSeFb%=XZ%T[KZ)c(sy!") form = { 'i': key, 'from': 'AUTO', 'to': 'AUTO', 'smartresult': 'dict', 'client': 'fanyideskweb', 'salt': salt, 'sign': sign, 'doctype': 'json', 'version': '2.1', 'keyfrom': 'fanyi.web', 'action': 'FY_BY_REALTIME', 'typoResult': 'false', } html_bytes = post(url,form,headers) # 通过json将str--dict res_dict = json.loads(html_bytes.decode('utf-8')) # 返回翻译结果 return res_dict['translateResult'][0][0]['tgt'] print(fanyi('今天天气好晴朗'))