解释这个代码def Agg(Feature): for dataset in [df_train,df_test]: for feat_1 in ['year','month','WeekOfYear','day']: dataset[f'{Feature}_Agg_{feat_1}_mean'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].mean())) dataset[f'{Feature}_Agg_{feat_1}_median'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].median())) dataset[f'{Feature}_Agg_{feat_1}_std'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].std())) dataset[f'{Feature}_Agg_{feat_1}_min'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].min())) dataset[f'{Feature}_Agg_{feat_1}_max'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].max())) dataset[f'{Feature}_Agg_{feat_1}_sum'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].sum())) dataset[f'{Feature}_Agg_{feat_1}_range'] = dataset[f'{Feature}_Agg_{feat_1}_max'] - dataset[f'{Feature}_Agg_{feat_1}_min'] dataset[f'{Feature}_Agg_{feat_1}_var'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].var())) dataset[f'{Feature}_Agg_{feat_1}_skew'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].skew())) for n in [0.10,0.25,0.75,0.90]: dataset[f'{Feature}_Agg_{feat_1}_quantile_{n}'] = dataset[feat_1].map(dict(df_train.groupby(feat_1)[Feature].quantile(n))) feats = ['meantemp'] for feat in feats: Agg(feat) print('处理删除前的特性: ', df_train.shape) cols = df_train.columns dup = [] for feat_1 in tqdm(cols): if (feat_1 in dup): continue for feat_2 in cols.drop(feat_1): if (feat_2 in dup): continue if (df_train[feat_1].equals(df_train[feat_2])): df_train.drop(feat_2,inplace=True,axis=1) dup.append(feat_2) for feat in tqdm(df_test.columns): if ((len(df_train[feat].value_counts().keys()) == 1) | (len(df_test[feat].value_counts().keys()) == 1)): df_train.drop(feat,inplace=True,axis=1) df_test.dr
时间: 2023-06-08 09:04:57 浏览: 95
op(feat,inplace=True,axis=1)
这个代码实现了对特征进行聚合的操作,将每个特征在年、月、周、日等维度上的均值、中位数、标准差、最小值、最大值、求和、极差、方差、偏度、分位数等统计量都计算出来,并且添加到原始数据集中。最后删除相同的特征和唯一值特征。
阅读全文