# 划分区间 # 训练集历史区间、中间区间、标签区间 train_history_field = off_train[off_train['date_received'].isin(pd.date_range('2016/3/2', periods=60))] # [20160302,20160501) train_middle_field = off_train[off_train['date'].isin(pd.date_range('2016/5/1', periods=15))] # [20160501,20160516) train_label_field = off_train[off_train['date_received'].isin(pd.date_range('2016/5/16', periods=31))] # [20160516,20160616) # 验证集历史区间、中间区间、标签区间 validate_history_field = off_train[off_train['date_received'].isin(pd.date_range('2016/1/16', periods=60))] # [20160116,20160316) validate_middle_field = off_train[off_train['date'].isin(pd.date_range('2016/3/16', periods=15))] # [20160316,20160331) validate_label_field = off_train[off_train['date_received'].isin(pd.date_range('2016/3/31', periods=31))] # [20160331,20160501) # 测试集历史区间、中间区间、标签区间 test_history_field = off_train[off_train['date_received'].isin(pd.date_range('2016/4/17', periods=60))] # [20160417,20160616) test_middle_field = off_train[off_train['date'].isin(pd.date_range('2016/6/16', periods=15))] # [20160616,20160701) test_label_field = off_test.copy() # [20160701,20160801)将其改为一个函数
时间: 2024-03-22 10:37:27 浏览: 56
titanic_train.zip_python学习文件_titanic train_titanic数据集_train_trai
可以将这段代码封装成一个函数,以便在需要时可以直接调用。以下是一个可能的实现:
```python
import pandas as pd
def split_dataset(off_train, off_test):
# 划分区间
# 训练集历史区间、中间区间、标签区间
train_history_field = off_train[off_train['date_received'].isin(pd.date_range('2016/3/2', periods=60))] # [20160302,20160501)
train_middle_field = off_train[off_train['date'].isin(pd.date_range('2016/5/1', periods=15))] # [20160501,20160516)
train_label_field = off_train[off_train['date_received'].isin(pd.date_range('2016/5/16', periods=31))] # [20160516,20160616)
# 验证集历史区间、中间区间、标签区间
validate_history_field = off_train[off_train['date_received'].isin(pd.date_range('2016/1/16', periods=60))] # [20160116,20160316)
validate_middle_field = off_train[off_train['date'].isin(pd.date_range('2016/3/16', periods=15))] # [20160316,20160331)
validate_label_field = off_train[off_train['date_received'].isin(pd.date_range('2016/3/31', periods=31))] # [20160331,20160501)
# 测试集历史区间、中间区间、标签区间
test_history_field = off_train[off_train['date_received'].isin(pd.date_range('2016/4/17', periods=60))] # [20160417,20160616)
test_middle_field = off_train[off_train['date'].isin(pd.date_range('2016/6/16', periods=15))] # [20160616,20160701)
test_label_field = off_test.copy() # [20160701,20160801)
return (train_history_field, train_middle_field, train_label_field,
validate_history_field, validate_middle_field, validate_label_field,
test_history_field, test_middle_field, test_label_field)
```
调用该函数时,需要传入 `off_train` 和 `off_test` 两个数据集作为参数,函数将返回九个数据集,分别对应训练集、验证集和测试集的历史区间、中间区间和标签区间。
阅读全文