首页from jieba import posseg import requests from bs4 import BeautifulSoup import pandas as pd # 爬取教师信息 url = 'http://lxy.hzau.edu.cn/szll/jsml.htm' response = requests.get(url) response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') teacher_nodes = soup.select('td a') teacher_info = [] for node in teacher_nodes: teacher_name = node.text.strip() teacher_link = 'http://lxy.hzau.edu.cn/' + node['href'] teacher_info.append((teacher_name, teacher_link)) # 整理数据并保存为CSV文件 df = pd.DataFrame({'teacher_name': [name for name, _ in teacher_info], 'teacher_link': [link for _, link in teacher_info]}) df.to_csv('Info.csv', encoding='utf-8-sig', index=False)请用R重构这段代码

from jieba import posseg import requests from bs4 import BeautifulSoup import pandas as pd # 爬取教师信息 url = 'http://lxy.hzau.edu.cn/szll/jsml.htm' response = requests.get(url) response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') teacher_nodes = soup.select('td a') teacher_info = [] for node in teacher_nodes: teacher_name = node.text.strip() teacher_link = 'http://lxy.hzau.edu.cn/' + node['href'] teacher_info.append((teacher_name, teacher_link)) # 整理数据并保存为CSV文件 df = pd.DataFrame({'teacher_name': [name for name, _ in teacher_info], 'teacher_link': [link for _, link in teacher_info]}) df.to_csv('Info.csv', encoding='utf-8-sig', index=False)请用R重构这段代码

时间: 2024-04-02 13:33:17 浏览: 146

import reimport requestsfrom bs4 import BeautifulSoupimport t

import re import requests from bs4 import BeautifulSoup import time from xlwt import * poems = [] # 将故事变成了一个全局变量。 def getHtml(page): ''' 获取网页数据 :param page: 页数 :return: 网页html数据(文本格式) ''' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } url = 'https://www.gushiwen.org/default_{}.aspx'.format(page) # 获取几页数据 respons = requests.get(url, headers=headers

以下是使用 R 重构的代码： ```R library(rvest) library(dplyr) # 爬取教师信息 url <- 'http://lxy.hzau.edu.cn/szll/jsml.htm' response <- read_html(url, encoding = 'utf-8') teacher_nodes <- response %>% html_nodes('td a') teacher_info <- teacher_nodes %>% purrr::map(function(node) { teacher_name <- node %>% html_text(trim = TRUE) teacher_link <- node %>% html_attr('href') %>% paste0('http://lxy.hzau.edu.cn/', .) list(teacher_name = teacher_name, teacher_link = teacher_link) }) # 整理数据并保存为CSV文件 df <- teacher_info %>% purrr::map_df(~ as.list(.)) %>% select(teacher_name, teacher_link) write_csv(df, 'Info.csv', bom = TRUE) ``` 使用 R 的 rvest 包来进行网页爬取和解析，使用 purrr 包来进行函数式编程，使用 dplyr 包来进行数据整理和处理，并使用 readr 包的 write_csv 函数来将数据保存为 CSV 文件。

阅读全文