修改代码from pandas import user=read_csv(APP】 cSV encoding='gbk') userL =user 概率].fi11na(0.0) user Fuser fi11na(0.0) user 愿分 享概 user <0]=0 user 享概率 user 1 user 愿喜久 率 user 0」 user 唇分享 率 user 享概率 >1 =1 user user 克击 =’T]=1 user[是否点击分享 LuserL 是古点击分享 ’F1=0 user.to_csv(user_clean.csv index=False) user_clean=read_csv(user_clean.csv' coding a:0,b:1,'c:2, d:3, e:4, r12 5 k:10,1:11, D6E8了8 2t:19,'u:20,'v:21, x:23, :24, 三是5,王三工·三工 def tocode(s): code =0 ss.lower()[o] if s in coding.keys(): code coding[s] return code user_clean['用户编码']=user_clean["用户名"门N apply(lambda x:tocode (x)) user_clean["分组后在线时长=qcut(user-cleanl在线时长"],\ 4,1abe1s=[4,3,2,1]) user_clean.to_csv('user_feature.csv',index=False) from numpy import from sklearn.cluster import KMeans from sklearn.metrics import fowlkes_mallows_score user_feature read_csv(user_feature.csv X=user_feature[不愿分享概率,'愿意分享概率',在线时长',\ 分后在线时长方 在线时长卢, y=user_feature['是香点击分享 y=list(y) model KMeans(n_clusters=2).fit(X) model.labels_=list (model.labels_) score=fowlkes_mallows_score(y,model.labels_) print('FMI评价分值为:%f%score).
时间: 2024-02-17 10:03:06 浏览: 167
抱歉,这段代码有很多错误,需要进行修改。以下是修改后的代码:
```
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import fowlkes_mallows_score
# 读取原始数据
user = pd.read_csv("APP.csv", encoding='gbk')
# 处理缺失值
user.fillna(0.0, inplace=True)
# 处理用户分享概率小于0的情况
user.loc[user["不愿分享概率"] < 0, "不愿分享概率"] = 0
# 处理用户分享概率大于1的情况
user.loc[user["不愿分享概率"] > 1, "不愿分享概率"] = 1
# 处理用户是否点击分享
user["是否点击分享"] = user["是香点击分享"].apply(lambda x: 1 if x == 'T' else 0)
# 用户编码
coding = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'r': 12, 'k': 10, 'l': 11, 't': 19, 'u': 20, 'v': 21, 'x': 23, 'y': 24}
def tocode(s):
code = 0
for c in s.lower():
if c in coding.keys():
code = code * 26 + coding[c]
return code
user["用户编码"] = user["用户名"].apply(lambda x: tocode(x))
# 分组后在线时长
user["分组后在线时长"] = pd.qcut(user["在线时长"], 4, labels=[4, 3, 2, 1])
# 保存处理后的数据
user_clean = user[["用户编码", "不愿分享概率", "愿意分享概率", "在线时长", "分组后在线时长", "是否点击分享"]]
user_clean.to_csv("user_clean.csv", index=False)
# 特征工程
user_feature = pd.read_csv("user_clean.csv")
# K-Means聚类
X = user_feature[["不愿分享概率", "愿意分享概率", "在线时长", "分组后在线时长"]]
y = user_feature["是否点击分享"]
model = KMeans(n_clusters=2).fit(X)
model.labels_ = list(model.labels_)
score = fowlkes_mallows_score(y, model.labels_)
print('FMI评价分值为:%f' % score)
```
这段代码的功能是对用户数据进行一系列处理和特征工程,最后使用KMeans算法进行聚类并输出FMI评价分值。
阅读全文