import os.path import gzip import pickle import os import numpy as np import urllib url_base = 'http://yann.lecun.com/exdb/mnist/' key_file = { 'train_img':'train-images-idx3-ubyte.gz', 'train_label':'train-labels-idx1-ubyte.gz', 'test_img':'t10k-images-idx3-ubyte.gz', 'test_label':'t10k-labels-idx1-ubyte.gz' } dataset_dir = os.path.dirname(os.path.abspath("_file_")) save_file = dataset_dir + "/mnist.pkl" train_num=60000 test_num=10000 img_dim=(1,28,28) img_size=784 def _download(file_name): file_path = dataset_dir+"/"+file_name if os.path.exists(file_path): return print("Downloading"+file_name+" ... ") urllib.request.urlretrieve(url_base + file_name,file_path) print("Done") def download_mnist(): for v in key_file.values(): _download(v) def _load_label(file_name): file_path = dataset_dir+ "/" +file_name print("Converting" + file_name +"to Numpy Array ...") with gzip.open(file_path,'rb') as f: labels = np.frombuffer(f.read(),np.uint8,offset=8) print("Done") return labels def _load_img(file_name): file_path=dataset_dir+"/"+file_name print("Converting"+file_name+"to Numpy Array ...") with gzip.open(file_path,'rb') as f: data = np.frombuffer(f.read(),np.uint8,offset=16) data = data.reshape(-1,img_size) print("Done") return data def _convert_numpy(): dataset = {} dataset['train_img'] = _load_img(key_file['train_img']) dataset['train_label'] = _load_label(key_file['train_label']) dataset['test_img'] = _load_img(key_file['test_img']) dataset['test_label'] = _load_label(key_file['test_label']) return dataset def init_mnist(): download_mnist() dataset = _convert_numpy() print("Creating pickle file ...") with open(save_file,'wb') as f: pickle.dump(dataset,f,-1) print("Done") if __name__ =='__main__': init_mnist()
时间: 2024-04-03 21:31:26 浏览: 318
这段代码是用于下载MNIST数据集并将数据集转换成Numpy数组格式的函数。MNIST数据集是一个手写数字识别数据集,包含了60000张训练图片和10000张测试图片。在函数中,首先定义了数据集的下载地址和四个文件的名称,然后定义了四个函数用于下载和转换数据集。其中,_load_label和_load_img函数用于将标签和图片数据转换成Numpy数组格式,并且在转换过程中使用了gzip库解压缩数据。_convert_numpy函数用于将四个Numpy数组合成一个字典类型的数据集。最后,init_mnist函数用于下载数据集并将数据集转换成Numpy数组格式,并使用pickle库将数据集保存到本地文件中。如果MNIST数据集已经下载并保存到本地文件中,则直接加载本地文件中的数据集。
阅读全文