import requests from lxml import etree import csy headers = { User-Agent’: 'ozilla/5.0 (Windows NT 10.0: WOW64) ppleWebKit/537.36 (KHTL,lik f = open('益智.csy’, mode='w’, encoding='utf-8-sig’, newline=’csv_writer = csv.DictWriter(f,fieldnames=[,游戏名称’,’游戏地址’])csy_writer.writeheader. for i in range(1,123): fi= 1: wrl = 'https ://ww.4399.com/flash fl/5 1.htm' html = requests.get(url,headers=headers].content doc = etree.HTML(html)name = doc.xpath('//* [@class="list affix cf~]/li/a/text()')url 1 = doc.xpath('//*[@class=~list affix cf]/li/a/@href')#data {ist = for i in range(len(name)) : dit = 4 dit[’游戏名称’] = name [i]new_url ='https://u.4399.com’ + url 1[i]for id in new l:dit[’游戏地址’] = id #data list append(dit) csy writer.writerowidit] else: wrl = 'https://wuw.4399.com/flash fl/more 5 .htm'.format(str(i))html = requests.get(url,headers=headers].content doc = etree.HTL(html) name = doc.xpath('//*[@class=~list affix cf~]/li/a/text()')url 1 = doc.xpath('//*[@class=list affix cf~]/li/a/@href') for i in range(len(name)) : dit =几 dit[’游戏名称’] = name[i] new_url ='https://www.4399.com’ + wrl 1[il for id in new url: dit[’游戏地址’] = id #data jist aopendidit! csv_writer.writerow(dit) #orintidata fist! print(name,id)爬取重复
时间: 2024-02-29 13:55:13 浏览: 120
这段代码存在一些明显的错误,如缺少冒号、引号不匹配、缩进错误等。以下是修复后的代码:
```python
import requests
from lxml import etree
import csv
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
with open('益智.csv', mode='w', encoding='utf-8-sig', newline='') as f:
csv_writer = csv.DictWriter(f, fieldnames=['游戏名称', '游戏地址'])
csv_writer.writeheader()
for i in range(1, 123):
if i == 1:
url = 'https://www.4399.com/flash_fl/51.htm'
else:
url = 'https://www.4399.com/flash_fl/more_5_{}.htm'.format(str(i))
html = requests.get(url, headers=headers).content
doc = etree.HTML(html)
name = doc.xpath('//*[contains(@class, "list") and contains(@class, "affix") and contains(@class, "cf")]/li/a/text()')
url1 = doc.xpath('//*[contains(@class, "list") and contains(@class, "affix") and contains(@class, "cf")]/li/a/@href')
for i in range(len(name)):
data = {}
data['游戏名称'] = name[i]
new_url = 'https://www.4399.com' + url1[i]
for id in new_url:
data['游戏地址'] = id
csv_writer.writerow(data)
print(data)
```
需要注意的是,这段代码仍然存在一些问题,如 URL 的构造方式不太正确,以及数据的写入可能会存在覆盖的情况。你需要进一步检查和调整代码以确保其正确性。
阅读全文