selenium +chrome+ firefox + webdriver 遇到的坑
lunix中启动webdriver时报错一:
测试代码为:
-
#!/usr/bin/python
-
# -*- coding: utf-8 -*-
-
from selenium import webdriver
-
driver = webdriver.Firefox()
-
driver.get("https://www.baidu.com")
运行报错信息如下:
-
Traceback (most recent call last):
-
File "maimai_web.py", line 14, in <module>
-
driver = webdriver.Firefox()
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py", line 152, in __init__
-
keep_alive=True)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 98, in __init__
-
self.start_session(desired_capabilities, browser_profile)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 188, in start_session
-
response = self.execute(Command.NEW_SESSION, parameters)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 256, in execute
-
self.error_handler.check_response(response)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
-
raise exception_class(message, screen, stacktrace)
-
selenium.common.exceptions.WebDriverException: Message: Process unexpectedly closed with status 1
处理方法:
-
#!/usr/bin/python
-
# -*- coding: utf-8 -*-
-
from pyvirtualdisplay import Display
-
from selenium import webdriver
-
display = Display(visible=0, size=(1920, 1080))
-
display.start()
-
driver = webdriver.Firefox()
-
driver.get("https://www.baidu.com")
结果:
运行ok,搞定!
坑二、webdriver实例化报错
采用多线程调用webdriver时候,偶尔会出现这样的错:selenium.common.exceptions.WebDriverException: Message: connection refused
-
Exception in thread Thread-2:
-
Traceback (most recent call last):
-
File "/usr/local/python3.6/lib/python3.6/threading.py", line 916, in _bootstrap_inner
-
self.run()
-
File "/usr/local/python3.6/lib/python3.6/threading.py", line 864, in run
-
self._target(*self._args, **self._kwargs)
-
File "maimai_tran_account_driver.py", line 591, in debug
-
t = TrainAccount(count,lock)
-
File "maimai_tran_account_driver.py", line 32, in __init__
-
self.chrome = webdriver.Firefox()
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/firefox/webdriver.py", line 152, in __init__
-
keep_alive=True)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 98, in __init__
-
self.start_session(desired_capabilities, browser_profile)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 188, in start_session
-
response = self.execute(Command.NEW_SESSION, parameters)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 256, in execute
-
self.error_handler.check_response(response)
-
File "/usr/local/python3.6/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
-
raise exception_class(message, screen, stacktrace)
-
selenium.common.exceptions.WebDriverException: Message: connection refused
查看geckodriver.log具体报错信息。
坑三、模拟器被反爬
原因是在webdriver发送请求的时候,会有webdriver的js判断,当检测到此字段时会被作为爬虫处理,应对策略如下。
工具:mitmproxy做代理,替换掉请求里面的webdriver为别的字段
部分代码如下:
-
if "/_next/static/js/common_pdd" in flow.request.url:
-
flow.response.text = flow.response.text.replace("webdriver", "userAgent")
坑四、滑动验证码验证失败
同样的代码,chromedriver验证码通过,firefox滑动到正常位置报失败,最后发现原因是firefox在滑动模块的时候速度太慢被机器识别出来,解决方法,增大滑动的速度,附上滑动验证的部分代码,如下:
-
def crack_geetest(self, max_retry=10):
-
driver = self.driver
-
l = self.logger
-
l.info("process handle geetest captcha...")
-
def get_position():
-
"""
-
获取验证码位置
-
:return: 验证码位置元组
-
"""
-
img = driver.find_element_by_xpath(‘//div[@class="geetest_canvas_img geetest_absolute"]‘)
-
time.sleep(2)
-
location = img.location
-
size = img.size
-
top, bottom, left, right = location[‘y‘], location[‘y‘] + size[‘height‘], location[‘x‘], location[‘x‘] + \
-
size[‘width‘]
-
return (top, bottom, left, right)
-
def get_geetest_image(name):
-
"""
-
获取验证码图片
-
:return: 图片对象
-
"""
-
full_img_path = ‘./zhilian_screenshot_{}.png‘.format(self.account[‘user_id‘])
-
driver.save_screenshot(filename=full_img_path)
-
image = Image.open(fp=full_img_path, mode=‘r‘)
-
top, bottom, left, right = get_position()
-
print(‘验证码位置:({},{},{},{})‘.format(left, top, right, bottom))
-
t = driver.execute_script(‘var q=document.documentElement.scrollTop; return q;‘)
-
print(‘验证码位置:({},{},{},{})‘.format(left, top - int(t), right, bottom - int(t)))
-
print(‘p--->>>‘, t)
-
captcha = image.crop((left, top - int(t), right, bottom - int(t)))
-
captcha_file_name = ‘./zhilian_captcha_{}_{}.png‘.format(self.account[‘user_id‘], name)
-
captcha.save(captcha_file_name)
-
return captcha, captcha_file_name
-
def get_slider():
-
"""
-
获取滑块
-
:return: 滑块对象
-
"""
-
slider = driver.find_element_by_xpath(‘//div[@class="geetest_slider_button"]‘)
-
return slider
-
def get_gap(captcha_file_name):
-
"""
-
获取缺口偏移量
-
:param image1: 不带缺口图片
-
:param image2: 带缺口图片
-
:return:
-
"""
-
res = self.dama2.decode_captcha(6137, captcha_file_name)
-
print(res)
-
# (‘b800b4f6-0d9a-40e2-a972-d87c91582b46‘, [(176, 101)])
-
return int(res[1][0][0])
-
def calculate_tracks(distance):
-
def generate_rand(n, sum_v): # 随机生成n个总和为sum_v的list
-
Vector = [random.randint(1, 3) for _ in range(n)]
-
Vector = [int(i / sum(Vector) * sum_v) for i in Vector]
-
if sum(Vector) < sum_v:
-
res = sum_v - sum(Vector)
-
for i in range(res):
-
Vector[random.randint(0, n - 1)] += 1
-
return [0 - i for i in Vector]
-
back_dis = random.randint(16, 26)
-
distance += back_dis # 先滑过一点,最后再反着滑动回来
-
v = 0
-
t = 0.2
-
forward_tracks = []
-
current = 0
-
mid = distance * 3 / 5
-
while current < distance:
-
if current < mid:
-
a = 2
-
else:
-
a = -3
-
s = v * t + 0.5 * a * (t ** 2)
-
v = v + a * t
-
current += s
-
forward_tracks.append(round(s))
-
# 反着滑动到准确位置
-
back_tracks = generate_rand(15, back_dis) # 总共等于 back_dis
-
return {‘forward_tracks‘: forward_tracks, ‘back_tracks‘: back_tracks}
-
def move_to_gap(slider, tracks):
-
"""
-
拖动滑块到缺口处
-
:param slider: 滑块
-
:param track: 轨迹
-
:return:
-
"""
-
ActionChains(driver).click_and_hold(slider).perform()
-
# 往后移动
-
for i in tracks[‘forward_tracks‘]:
-
ActionChains(driver).move_by_offset(i, 0).perform()
-
# 往回移动
-
time.sleep(0.5)
-
for i in tracks[‘back_tracks‘]:
-
ActionChains(driver).move_by_offset(i, 0).perform()
-
# 小范围震荡一下
-
# time.sleep(0.3)
-
random_sc = random.randint(3, 8)
-
ActionChains(driver).move_by_offset(0-random_sc, 0).perform()
-
time.sleep(0.5)
-
ActionChains(driver).move_by_offset(random_sc, 0).perform()
-
# 释放
-
time.sleep(0.5)
-
ActionChains(driver).release().perform()
-
def crack(retry=0):
-
# 输入用户名密码
-
# 点击验证按钮
-
# 获取验证码图片
-
print(‘get_geetest_image‘)
-
captcha_obj, captcha_file_name = get_geetest_image(‘2‘)
-
gap = get_gap(captcha_file_name)
-
l.info(‘缺口位置:{}‘.format(gap))
-
print(‘缺口位置:{}‘.format(gap))
-
# 减去起始缺口位移
-
BORDER = 29
-
gap -= BORDER
-
# 获取移动轨迹
-
track = calculate_tracks(gap)
-
l.info(‘滑动轨迹:{}‘.format(track))
-
print(‘滑动轨迹:{}‘.format(track))
-
# # 拖动滑块
-
slider = get_slider()
-
move_to_gap(slider, track)
-
driver.save_screenshot(‘./zhilian_capresult_{}_{}.png‘.format(self.account[‘user_id‘], retry))
-
#
-
time.sleep(3)
-
# #
-
result = driver.find_element_by_xpath(‘//div[@class="geetest_result_title"]‘).get_attribute(‘textContent‘)
-
l.info(result)
-
print(result)
-
return result
-
retry = 1
-
while True:
-
l.info(f‘{retry}/{max_retry} crack geetest.‘)
-
if retry == max_retry:
-
l.info("max retry reached, return False")
-
return False
-
success = crack(retry)
-
if ‘秒的速度超过‘ in success or ‘passport.lagou.com/login/login‘ not in driver.current_url:
-
l.info("crack succeeded!")
-
print("crack succeeded!")
-
return True
-
elif ‘拖动滑块将悬浮图像正确拼合‘ in success:
-
retry += 1
-
l.info("crack failed, retry:{}/{}".format(retry, max_retry))
-
driver.find_element_by_xpath(‘//a[@class="geetest_refresh_1"]‘).click()
-
time.sleep(5)
-
continue
-
else:
-
time.sleep(5)
-
retry += 1
-
l.info("crack failed, retry:{}/{}".format(retry, max_retry))
-
continue
来源:https://blog.csdn.net/wenq_yang/article/details/81258932
原文地址:https://www.cnblogs.com/alex-13/p/12019764.html