import requests import parsel import csv f = open('射击.csv', mode='w', encoding='utf-8-sig', newline='') csv_writer = csv.DictWriter(f, fieldnames=['游戏名字', '游戏地址']) csv_writer.writeheader() for page in range(1, 300): url = 'https://www.4399.com/flash_fl/4_1.htm'.format(page) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } response = requests.get(url=url, headers=headers) response.encoding = response.apparent_encoding selector = parsel.Selector(response.text) lis = selector.css('#classic li') for li in lis: dit ={} data_url = li.css('a::attr(href)').get() new_url = 'http://www.4399.com' + data_url.replace('http://','/') dit['游戏地址'] = new_url title = li.css('img::attr(alt)').get() dit['游戏名字'] = title print(title, new_url) csv_writer.writerow(dit)怎么改成用xpath的
时间: 2024-04-28 21:26:22 浏览: 139
你可以将 `selector = parsel.Selector(response.text)` 改为 `selector = etree.HTML(response.text)`,然后使用 XPath 语法来解析 HTML 页面,下面是修改后的代码:
```
import requests
from lxml import etree
import csv
f = open('射击.csv', mode='w', encoding='utf-8-sig', newline='')
csv_writer = csv.DictWriter(f, fieldnames=['游戏名字', '游戏地址'])
csv_writer.writeheader()
for page in range(1, 300):
url = 'https://www.4399.com/flash_fl/4_1.htm'.format(page)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
selector = etree.HTML(response.text)
lis = selector.xpath('//*[@id="classic"]/li')
for li in lis:
dit = {}
data_url = li.xpath('a/@href')[0]
new_url = 'http://www.4399.com' + data_url.replace('http://', '/')
dit['游戏地址'] = new_url
title = li.xpath('a/img/@alt')[0]
dit['游戏名字'] = title
print(title, new_url)
csv_writer.writerow(dit)
f.close()
```
这里使用 XPath 语法,将 `lis` 变量中的值直接改为 `li`,然后在内部使用 XPath 表达式来获取游戏地址和游戏名称。
阅读全文