def pre(dataset): ''''数据预处理''' # 源数据 data = dataset.copy() # 对折扣率进行处理 data['is_manjian'] = data['Discount_rate'].apply(lambda x: 1 if ":" in str(x) else 0) # Discount_rate是否为满减 data['discount_rate'] = data['Discount_rate'].apply(lambda x: float(x) if ":" not in str(x) else (float(str(x).split(':')[0]) - float(str(x).split(':')[1])) / float(str(x).split(':')[0])) # 满减全部转换为折扣率 data['min_cost_of_manjian'] = data['Discount_rate'].apply(lambda x: -1 if ":" not in str(x) else int(str(x).split(':')[0])) # 满减的最低消费 # 对距离进行处理 data['Distance'].fillna(-1, inplace=True) # 空距离填充为-1 data['null_distance'] = data['Distance'].apply(lambda x: 1 if x == -1 else 0) #对日期进行处理 data['date_received'] = pd.to_datetime(data['Date_received'], format='%Y%m%d') if 'Date' in data.columns.tolist(): data['date'] = pd.to_datetime(data['Date'], format='%Y%m%d') # 打标 #领取优惠券后15天内使用的样本标签为1,否则为0 data['label'] = list(map(lambda x, y: 1 if (x - y).total_seconds() / (60 * 60 * 24) <= 15 else 0, data['date'],data['date_received'])) return data请重构代码,降低查重率
时间: 2024-04-05 13:34:44 浏览: 94
def preprocess_data(dataset):
data = dataset.copy()
data['is_manjian'] = data['Discount_rate'].apply(lambda x: 1 if ":" in str(x) else 0)
data['discount_rate'] = data['Discount_rate'].apply(lambda x: (float(str(x).split(':')[0]) - float(str(x).split(':')[1])) / float(str(x).split(':')[0]) if ":" in str(x) else float(x))
data['min_cost_of_manjian'] = data['Discount_rate'].apply(lambda x: int(str(x).split(':')[0]) if ":" in str(x) else -1)
data['Distance'].fillna(-1, inplace=True)
data['null_distance'] = data['Distance'].apply(lambda x: 1 if x == -1 else 0)
data['date_received'] = pd.to_datetime(data['Date_received'], format='%Y%m%d')
if 'Date' in data.columns.tolist():
data['date'] = pd.to_datetime(data['Date'], format='%Y%m%d')
data['label'] = ((data['date'] - data['date_received']).dt.days <= 15).astype(int)
return data
阅读全文
相关推荐

















