1 ‘‘‘ 2 爬取京东商品信息: 3 请求url: 4 https://www.jd.com/ 5 提取商品信息: 6 1.商品详情页 7 2.商品名称 8 3.商品价格 9 4.评价人数 10 5.商品商家 11 ‘‘‘ 12 from selenium import webdriver 13 from selenium.webdriver.common.keys import Keys 14 import time 15 16 17 def get_good(driver): 18 try: 19 20 # 通过JS控制滚轮滑动获取所有商品信息 21 js_code = ‘‘‘ 22 window.scrollTo(0,5000); 23 ‘‘‘ 24 driver.execute_script(js_code) # 执行js代码 25 26 # 等待数据加载 27 time.sleep(2) 28 29 # 3、查找所有商品div 30 # good_div = driver.find_element_by_id(‘J_goodsList‘) 31 good_list = driver.find_elements_by_class_name(‘gl-item‘) 32 n = 1 33 for good in good_list: 34 # 根据属性选择器查找 35 # 商品链接 36 good_url = good.find_element_by_css_selector( 37 ‘.p-img a‘).get_attribute(‘href‘) 38 39 # 商品名称 40 good_name = good.find_element_by_css_selector( 41 ‘.p-name em‘).text.replace("\n", "--") 42 43 # 商品价格 44 good_price = good.find_element_by_class_name( 45 ‘p-price‘).text.replace("\n", ":") 46 47 # 评价人数 48 good_commit = good.find_element_by_class_name( 49 ‘p-commit‘).text.replace("\n", " ") 50 51 good_content = f‘‘‘ 52 商品链接: {good_url} 53 商品名称: {good_name} 54 商品价格: {good_price} 55 评价人数: {good_commit} 56 \n 57 ‘‘‘ 58 print(good_content) 59 with open(‘jd.txt‘, ‘a‘, encoding=‘utf-8‘) as f: 60 f.write(good_content) 61 62 next_tag = driver.find_element_by_class_name(‘pn-next‘) 63 next_tag.click() 64 65 time.sleep(2) 66 67 # 递归调用函数 68 get_good(driver) 69 70 time.sleep(10) 71 72 finally: 73 driver.close() 74 75 76 if __name__ == ‘__main__‘: 77 78 good_name = input(‘请输入爬取商品信息:‘).strip() 79 80 driver = webdriver.Chrome() 81 driver.implicitly_wait(10) 82 # 1、往京东主页发送请求 83 driver.get(‘https://www.jd.com/‘) 84 85 # 2、输入商品名称,并回车搜索 86 input_tag = driver.find_element_by_id(‘key‘) 87 input_tag.send_keys(good_name) 88 input_tag.send_keys(Keys.ENTER) 89 time.sleep(2) 90 91 get_good(driver)
原文地址:https://www.cnblogs.com/lweiser/p/11047871.html
时间: 2024-10-12 21:01:48