以淘宝为例,写一段具有防反爬功能,能够爬取各种商品价格和对应优惠劵的爬虫
时间: 2023-05-31 08:05:11 浏览: 42
import requests
import re
import time
import random
from fake_useragent import UserAgent
# 随机生成请求头,增加反爬功能
def random_header():
ua = UserAgent()
headers = {
"User-Agent": ua.random,
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7"
}
return headers
# 爬取商品价格和优惠劵
def get_price_coupon(url):
headers = random_header()
response = requests.get(url, headers=headers)
html = response.text
# 从html中提取商品价格
price_pattern = re.compile(r'"view_price":"([\d\.]+)"')
price_list = price_pattern.findall(html)
# 从html中提取优惠劵
coupon_pattern = re.compile(r'"coupon_info":"(.*?)元"')
coupon_list = coupon_pattern.findall(html)
# 将价格和优惠劵绑定在一起
price_coupon_list = list(zip(price_list, coupon_list))
return price_coupon_list
if __name__ == "__main__":
url = "https://item.taobao.com/item.htm?spm=a21bo.2017.201867-links-0.1.5af911d9EyW0aN&id=632853802929&sku_properties=1627207:1741988831"
price_coupon_list = get_price_coupon(url)
print(price_coupon_list)