
In[3]:
In[4]:
In[5]:
In[6]:
In[7]:
In[8]:
年
龄
所有权 学历
受
教
育
年
限
婚姻 职业 家庭成员 人种 性别 国家 收入
0 39
State
gov
Bachelors 13
Never
married
Adm
clerical
Notin
family
White Male
United
States
<=50K
1 50
Self
emp
notinc
Bachelors 13
Married
civ
spouse
Exec
managerial
Husband White Male
United
States
<=50K
2 38 Private HSgrad 9 Divorced
Handlers
cleaners
Notin
family
White Male
United
States
<=50K
3 53 Private 11th 7
Married
civ
spouse
Handlers
cleaners
Husband Black Male
United
States
<=50K
4 28 Private Bachelors 13
Married
civ
spouse
Prof
specialty
Wife Black Female Cuba <=50K
df2.loc[df2['所有权']=='?',:]
for i in df2.columns.to_list():
if i=='年龄' or i=='受教育年限':
continue
df2.loc[:,i]=df2[i].str.replace('?','Others')
df2
area=[-1,20,30,40,50,60,70,80,90,101]
labels=['0-20','20-30','30-40','40-50','50-60','60-70','70-80','80-90','90-100']
df2['年龄段']=pd.cut(df2['年龄'],area,right=False,labels=labels)
new_columns=df2.columns.to_list()
new_columns.remove('年龄段')
new_columns.insert(new_columns.index('年龄')+1,'年龄段')
df2=df2.reindex(columns=new_columns)
评论1