给一段python的pandas合并清洗数据 的代码
时间: 2023-09-23 09:05:30 浏览: 114
### 回答1:
import pandas as pd# Read in data
df1 = pd.read_csv('data1.csv')
df2 = pd.read_csv('data2.csv')# Merge data
df_merged = df1.merge(df2, how='left', on='id')# Clean data
df_merged.fillna(0, inplace=True)
### 回答2:
import pandas as pd
# 读取两个数据文件
df1 = pd.read_csv('data1.csv')
df2 = pd.read_csv('data2.csv')
# 合并两个数据集
df = pd.concat([df1, df2])
# 清洗数据
# 删除重复行
df = df.drop_duplicates()
# 删除缺失值
df = df.dropna()
# 重设索引
df = df.reset_index(drop=True)
# 修改列名
df = df.rename(columns={'old_col1': 'new_col1', 'old_col2': 'new_col2'})
# 修改数据类型
df['new_col1'] = df['new_col1'].astype(int)
df['new_col2'] = df['new_col2'].astype(float)
# 提取新的一列,并根据条件重新赋值
df['new_col3'] = df['new_col1'].apply(lambda x: 'A' if x > 5 else 'B')
# 根据多个列的条件筛选数据
df = df[(df['new_col1'] > 3) & (df['new_col2'] < 10)]
# 进行统计计算
mean_val = df['new_col2'].mean()
max_val = df['new_col2'].max()
# 输出处理结果
print(df)
print('Mean value:', mean_val)
print('Max value:', max_val)
### 回答3:
import pandas as pd
# 读取两个文件的数据
data1 = pd.read_csv('file1.csv')
data2 = pd.read_csv('file2.csv')
# 合并数据
merged_data = pd.concat([data1, data2])
# 去除重复行
cleaned_data = merged_data.drop_duplicates()
# 处理缺失值
cleaned_data.fillna(0, inplace=True)
# 重置索引
cleaned_data.reset_index(drop=True, inplace=True)
# 保存清洗后的数据
cleaned_data.to_csv('cleaned_data.csv', index=False)
阅读全文