用python的requests、lxml、pandas、selenium编写一段代码。 以https://www.sciencedirect.com/journal/the-lancet为初始界面,等待10秒,用selenium点击该页面上的链接(class="anchor js-volume volume-issue-text anchor-default"),跳转后等待10秒,用selenium点击该页面上的链接(class="switch-check switch-small js-previews-switch"),等待10秒,获取该页面上文章的标题、摘要、作者。建立excel表格,将结果导入excel表格
时间: 2023-03-20 20:01:56 浏览: 74
import requests
from lxml import etree
import pandas as pd
from selenium import webdriverurl = 'https://www.sciencedirect.com/journal/the-lancet'
driver = webdriver.Chrome()
driver.get(url)#等待10秒
driver.implicitly_wait(10)#用selenium点击该页面上的链接(class="anchor js-volume volume-issue-text anchor-default")
links_1=driver.find_elements_by_css_selector('a.anchor.js-volume.volume-issue-text.anchor-default')
for i in range(len(links_1)):
links_1[i].click()#等待10秒
driver.implicitly_wait(10)#用selenium点击该页面上的链接(class="switch-check switch-small js-previews-switch")
links_2=driver.find_elements_by_css_selector('a.switch-check.switch-small.js-previews-switch')
for i in range(len(links_2)):
links_2[i].click()#等待10秒
driver.implicitly_wait(10)#获取该页面上文章的标题、摘要、作者
titles=driver.find_elements_by_css_selector('h2.article-title')
titles=[i.text for i in titles]
abstracts=driver.find_elements_by_css_selector('p.Para')
abstracts=[i.text for i in abstracts]
authors=driver.find_elements_by_css_selector('div.Authors')
authors=[i.text for i in authors]#建立excel表格,将结果导入excel表格
dataframe = pd.DataFrame({'title':titles, 'abstract':abstracts, 'author':authors})
dataframe.to_excel('result.xlsx')