# 优惠券15天内核销率 feature_Coupon[prefixs+'15_consume_rate'] = list(map(lambda x,y: x/y if y!=0 else 0, feature_Coupon[prefixs + '15days_consume_cnt'], feature_Coupon[prefixs + 'received_cnt'])) # 优惠券15天内被核销的最大时间间隔 t6 = data[data['label']==1].copy() t6['time_gap'] = (t6['date'] - t6['date_received']).map(lambda x:x.total_seconds()/(24*60*60)) pivot = pd.pivot_table(t6, index=keys4, values='time_gap', aggfunc=np.max) pivot = pd.DataFrame(pivot).rename(columns={'time_gap':prefixs + '15days_max_time_gap'}).reset_index() feature_Coupon = pd.merge(feature_Coupon, pivot, on=keys4, how='left')请用groupby(函数改写)
时间: 2024-03-22 18:37:02 浏览: 108
可以使用groupby函数对数据进行分组并聚合,改写上述代码如下:
```
# 优惠券15天内核销率
feature_Coupon[prefixs+'15_consume_rate'] = feature_Coupon.apply(lambda x: x[prefixs + '15days_consume_cnt'] / x[prefixs + 'received_cnt'] if x[prefixs + 'received_cnt'] != 0 else 0, axis=1)
# 优惠券15天内被核销的最大时间间隔
t6 = data[data['label'] == 1].copy()
t6['time_gap'] = (t6['date'] - t6['date_received']).dt.days
pivot = t6.groupby(keys4)['time_gap'].max().reset_index()
pivot = pivot.rename(columns={'time_gap': prefixs + '15days_max_time_gap'})
feature_Coupon = pd.merge(feature_Coupon, pivot, on=keys4, how='left')
```
其中,对于优惠券15天内核销率的计算,可以使用apply函数对每一行数据进行计算。对于优惠券15天内被核销的最大时间间隔的计算,可以使用groupby函数对keys4进行分组,并对每组数据的time_gap列进行最大值的聚合。
相关问题
def get_label_feature_of_date(label_field): """提取标记区间日期相关特征""" # 源数据 data = label_field.copy() data['Coupon_id'] = data['Coupon_id'].map(int) # 将Coupon_id列中float类型的元素转换为int类型,因为列中存在np.nan即空值会让整列的元素变为float data['Date_received'] = data['Date_received'].map(int) # 将Date_received列中float类型的元素转换为int类型,因为列中存在np.nan即空值会让整列的元素变为float # 返回的特征数据集 feature = data.copy() feature['Week_received'] = feature['date_received'].map(lambda x: x.isoweekday()) # 星期几,星期一为1,星期天为7 feature['is_weekend_reveived'] = feature['Week_received'].map(lambda x: 1 if x == 6 or x == 7 else 0) # 判断领券日是否为休息日 feature['Month_received'] = feature['date_received'].map(lambda x: x.month)# 月份 feature = pd.concat([feature, pd.get_dummies(feature['Week_received'], prefix='Week_received')], axis=1) # one-hot离散星期几 feature.index = range(len(feature)) # 重置index # 返回 return feature解释
这段代码的主要作用是提取标记区间日期相关的特征,将标记区间内的数据作为输入。具体来说,该函数首先对输入数据进行复制,然后将Coupon_id和Date_received两列的数据类型从float转换为int,以便后续的处理。接着,通过lambda函数分别计算领券日是星期几、是否为休息日、月份等特征,并将星期几进行one-hot编码。最后,通过concat()函数将编码后的特征数据集与原始数据集进行合并,并通过reset_index()函数重置数据集的行索引。最终将特征数据集作为输出返回。
SELECT sum(t.pay_amount)+sum(t.promotion_amount)+SUM(t.author_coupon_subsidy)+SUM(t.actual_zt_pay_promotion)+SUM(t.actual_zr_pay_promotion)+SUM(t.refund_amount)+SUM(t.jylp_wp_amount)+SUM(t.post_amount)+SUM(t.promotion_amount_yf) as 'b_amount',\n" + "SUM(t.platform_service_fee)+SUM(t.commission)+SUM(t.channel_fee)+SUM(t.colonel_service_fee)+SUM(t.channel_promotion_fee)+SUM(t.other_sharing_amount)+SUM(t.yfx_amount)+SUM(t.jylp_yp_amount) as 's_amount'\n" + "from dy_settlement_push_input t where t.deleted = '0'
这是一条 SQL 查询语句,目的是查询 dy_settlement_push_input 表中满足条件(deleted = '0')的记录中,各种金额字段的总和。具体来说,该语句计算了以下金额:
1. pay_amount:支付金额
2. promotion_amount:促销优惠金额
3. author_coupon_subsidy:优惠券补贴金额
4. actual_zt_pay_promotion:直通车支付优惠金额
5. actual_zr_pay_promotion:直冲支付优惠金额
6. refund_amount:退款金额
7. jylp_wp_amount:加油礼品卡支付金额
8. post_amount:邮费金额
9. promotion_amount_yf:促销优惠金额(应付)
10. platform_service_fee:平台服务费
11. commission:佣金
12. channel_fee:渠道费用
13. colonel_service_fee:团长服务费
14. channel_promotion_fee:渠道促销费用
15. other_sharing_amount:其他分摊金额
16. yfx_amount:邮费险金额
17. jylp_yp_amount:加油礼品卡优惠金额
最终会返回两个金额总和,分别是 b_amount 和 s_amount。
阅读全文