请优化下面这段代码:n=4 df = pd.DataFrame({'group': list('aabbabbbababaababbba'), 'value': [1,2,np.nan,2,4,np.nan,9,2,np.nan,3,7,6,8,np.nan,6,np.nan,np.nan,0,6,5]}) ndfa=df[df["group"] == "a"] ndfb=df[df["group"] == "b"] movingaverage1=[] movingaverage2=[] len1=len(ndfa["value"]) len2=len(ndfb["value"]) for i in range(1,len1+1): if i<=n: if True in np.array(np.isnan((ndfa[:1])["value"])): movingaverage1.append(0) else: sub_ndfa=ndfa[:i] sub_ndfa_withoutNaN=sub_ndfa[pd.notnull((sub_ndfa["value"]))]["value"] movingaverage1.append((sub_ndfa_withoutNaN.copy()).mean()) else: sub_ndfa=ndfa[i-n:i] sub_ndfa_withoutNaN=sub_ndfa[pd.notnull((sub_ndfa["value"]))]["value"] movingaverage1.append((sub_ndfa_withoutNaN.copy()).mean()) for i in range(1,len2+1): if i<=n: if True in np.array(np.isnan((ndfb[:1])["value"])): movingaverage2.append("0") else: sub_ndfb=ndfb[:i] sub_ndfb_withoutNaN=sub_ndfb[pd.notnull((sub_ndfb["value"]))]["value"] movingaverage2.append((sub_ndfb_withoutNaN.copy()).mean()) else: sub_ndfb=ndfb[i-n:i] sub_ndfb_withoutNaN=sub_ndfb[pd.notnull((sub_ndfb["value"]))]["value"] movingaverage2.append((sub_ndfb_withoutNaN.copy()).mean()) #确定顺序 astation=[] bstation=[] nlist=[] c=0 d=0 e=0 for i in df["group"]: if i=="a": astation.append(c) else: bstation.append(c) c+=1 for i in range(0,len1+len2): if i in astation: nlist.append(movingaverage1[d]) d+=1 else: nlist.append(movingaverage2[e]) e+=1 flist=pd.Series(nlist) print(flist)
时间: 2023-05-22 13:06:43 浏览: 154
可以优化的地方:
1. 首先可以将 ndfa 和 ndfb 的重复代码合并:
ndfa=df[df["group"] == "a"]
ndfb=df[df["group"] == "b"]
可以改成:
ndf_grouped = df.groupby("group")
ndfa = ndf_grouped.get_group("a")
ndfb = ndf_grouped.get_group("b")
这样可以避免重复代码,并且可以处理更多的组别。
2. 对于计算滑动平均的部分,可以使用 rolling 方法:
movingaverage1 = ndfa["value"].rolling(n, min_periods=1).mean().tolist()
movingaverage2 = ndfb["value"].rolling(n, min_periods=1).mean().tolist()
这样可以避免使用循环,代码更简洁,运行速度也更快。
优化后的代码如下:
df = pd.DataFrame({'group': list('aabbabbbababaababbba'), 'value': [1,2,np.nan,2,4,np.nan,9,2,np.nan,3,7,6,8,np.nan,6,np.nan,np.nan,0,6,5]})
ndf_grouped = df.groupby("group")
ndfa = ndf_grouped.get_group("a")
ndfb = ndf_grouped.get_group("b")
n = 4
movingaverage1 = ndfa["value"].rolling(n, min_periods=1).mean().tolist()
movingaverage2 = ndfb["value"].rolling(n, min_periods=1).mean().tolist()
print(movingaverage1)
print(movingaverage2)
阅读全文