from jieba import posseg import requests from bs4 import BeautifulSoup import pandas as pd # 爬取教师信息 url = 'http://lxy.hzau.edu.cn/szll/jsml.htm' response = requests.get(url) response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') teacher_nodes = soup.select('td a') teacher_info = [] for node in teacher_nodes: teacher_name = node.text.strip() teacher_link = 'http://lxy.hzau.edu.cn/' + node['href'] teacher_info.append((teacher_name, teacher_link)) # 整理数据并保存为CSV文件 df = pd.DataFrame({'teacher_name': [name for name, _ in teacher_info], 'teacher_link': [link for _, link in teacher_info]}) df.to_csv('Info.csv', encoding='utf-8-sig', index=False)请用R重构这段代码
时间: 2024-04-02 18:33:17 浏览: 143
import reimport requestsfrom bs4 import BeautifulSoupimport t
以下是使用 R 重构的代码:
```R
library(rvest)
library(dplyr)
# 爬取教师信息
url <- 'http://lxy.hzau.edu.cn/szll/jsml.htm'
response <- read_html(url, encoding = 'utf-8')
teacher_nodes <- response %>%
html_nodes('td a')
teacher_info <- teacher_nodes %>%
purrr::map(function(node) {
teacher_name <- node %>% html_text(trim = TRUE)
teacher_link <- node %>% html_attr('href') %>% paste0('http://lxy.hzau.edu.cn/', .)
list(teacher_name = teacher_name, teacher_link = teacher_link)
})
# 整理数据并保存为CSV文件
df <- teacher_info %>%
purrr::map_df(~ as.list(.)) %>%
select(teacher_name, teacher_link)
write_csv(df, 'Info.csv', bom = TRUE)
```
使用 R 的 rvest 包来进行网页爬取和解析,使用 purrr 包来进行函数式编程,使用 dplyr 包来进行数据整理和处理,并使用 readr 包的 write_csv 函数来将数据保存为 CSV 文件。
阅读全文