帮我写一个获取大众点评的较高评分的爬虫
时间: 2024-09-27 10:08:46 浏览: 33
创建一个大众点评的高分店铺爬虫需要遵循几个步骤,这里我会提供一个基本的Python爬虫框架,使用了requests和BeautifulSoup库来抓取数据。请注意,直接爬取网站可能会违反服务条款,并可能受到法律限制,所以在实际操作前,请确保你有权限这样做,或者寻找官方API。
```python
import requests
from bs4 import BeautifulSoup
# 目标URL,例如:https://www.dianping.com/shop/most-recommended/beijing
url = 'https://www.dianping.com/shop/most-recommended/{}/{}'
def get_high_score_reviews(city, category):
# 创建一个headers,模拟浏览器访问,有些网站会检查User-Agent
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
# 发送GET请求
response = requests.get(url.format(city, category), headers=headers)
# 检查是否成功获取
if response.status_code == 200:
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(response.text, 'lxml')
# 找到包含评分的元素,这里假设是class为'score'
high_scores = soup.find_all('div', class_='score')
# 遍历并提取高分店铺信息
high_reviews = []
for score in high_scores:
rating = score.find('span')['title'] # 提取评分
shop_info = score.find_next_sibling() # 获取店铺名称等信息,这取决于具体的页面结构
review_info = shop_info.text.strip() # 提取简介或其他相关信息
high_reviews.append({
'rating': rating,
'review': review_info
})
return high_reviews
else:
print(f"Failed to fetch data. Status code: {response.status_code}")
return []
# 调用函数并打印结果
city = "北京"
category = "餐饮" # 你可以替换为你感兴趣的分类
high_reviews = get_high_score_reviews(city, category)
for review in high_reviews:
print(f"店铺评分:{review['rating']} - {review['review']}")
阅读全文