import requests from lxml import etree import csv headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } f = open('4399排行.csv', mode='w', encoding='utf-8-sig', newline='') csv_writer = csv.DictWriter(f, fieldnames=[ '游戏名称','日期','类型']) csv_writer.writeheader() for i in range(1,11): if i == 1: url = 'https://www.4399.com/flash/game100.htm' html = requests.get(url,headers=headers).content doc = etree.HTML(html) name = doc.xpath('//*[@id="list"]/li/div/a/text()') kind = doc.xpath('//*[@id="list"]/li/span/a/text()') date = doc.xpath('//*[@id="list"]/li/span/text()') data_list = [] for i in range(len(name)): dit = {} dit['游戏名称'] = name[i] dit['类型'] = kind[i] dit['日期'] = date[i] data_list.append(dit) csv_writer.writerow(dit) else: url = 'https://www.4399.com/flash/game100_{}.htm'.format(str(i)) html = requests.get(url,headers=headers).content doc = etree.HTML(html) name = doc.xpath('//*[@id="list"]/li/div/a/text()') kind = doc.xpath('//*[@id="list"]/li/span/a/text()') date = doc.xpath('//*[@id="list"]/li/span/text()') data_list = [] for i in range(len(name)): dit = {} dit['游戏名称'] = name[i] dit['类型'] = kind[i] dit['日期'] = date[i] data_list.append(dit) csv_writer.writerow(dit) print(data_list)怎么改是爬取的时间只保存到月份
时间: 2024-04-28 13:26:01 浏览: 11
可以在爬取日期的代码处,对日期进行处理,只保留到月份。可以使用Python的datetime模块来实现。具体代码如下:
```python
import requests
from lxml import etree
import csv
from datetime import datetime
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
f = open('4399排行.csv', mode='w', encoding='utf-8-sig', newline='')
csv_writer = csv.DictWriter(f, fieldnames=[ '游戏名称','日期','类型'])
csv_writer.writeheader()
for i in range(1,11):
if i == 1:
url = 'https://www.4399.com/flash/game100.htm'
html = requests.get(url,headers=headers).content
doc = etree.HTML(html)
name = doc.xpath('//*[@id="list"]/li/div/a/text()')
kind = doc.xpath('//*[@id="list"]/li/span/a/text()')
date = doc.xpath('//*[@id="list"]/li/span/text()')
data_list = []
for i in range(len(name)):
dit = {}
dit['游戏名称'] = name[i]
dit['类型'] = kind[i]
# 对日期进行格式化,只保留到月份
dit['日期'] = datetime.strptime(date[i], '%Y-%m-%d').strftime('%Y-%m')
data_list.append(dit)
csv_writer.writerow(dit)
else:
url = 'https://www.4399.com/flash/game100_{}.htm'.format(str(i))
html = requests.get(url,headers=headers).content
doc = etree.HTML(html)
name = doc.xpath('//*[@id="list"]/li/div/a/text()')
kind = doc.xpath('//*[@id="list"]/li/span/a/text()')
date = doc.xpath('//*[@id="list"]/li/span/text()')
data_list = []
for i in range(len(name)):
dit = {}
dit['游戏名称'] = name[i]
dit['类型'] = kind[i]
# 对日期进行格式化,只保留到月份
dit['日期'] = datetime.strptime(date[i], '%Y-%m-%d').strftime('%Y-%m')
data_list.append(dit)
csv_writer.writerow(dit)
print(data_list)
```
在上述代码中,我们使用了datetime模块中的strftime方法来将日期格式化为只保留到月份的格式。具体来说,我们使用了`'%Y-%m-%d'`格式来解析原始日期,然后使用`'%Y-%m'`格式来格式化日期。这样就可以实现只保存到月份的效果。