优化以下代码 df_in_grown_ebv = pd.read_table(open(r"C:\Users\荆晓燕\Desktop\20230515分品种计算育种值\生长性能育种值N72分组 (7).txt"), delim_whitespace=True, encoding="gb18030", header=None) df_in_breed_ebv = pd.read_table(open(r"C:\Users\荆晓燕\Desktop\20230515分品种计算育种值\繁殖性能育种值N72分组 (7).txt"), delim_whitespace=True, encoding="gb18030", header=None) # df_in_grown_Phenotype.columns = ['个体号', '活仔EBV', '21d窝重EBV', '断配EBV'] # df_in_breed_Phenotype.columns = ['个体号', '115EBV', '饲料转化率EBV', '瘦肉率EBV', '眼肌EBV', '背膘EBV'] df_in_breed_ebv.columns = ['个体号', '活仔EBV', '21d窝重EBV', '断配EBV'] df_in_grown_ebv.columns = ['个体号', '115daysEBV', '饲料转化率EBV', '瘦肉率EBV', '眼肌EBV', '背膘EBV'] NBA_mean = np.mean(df_in_breed_ebv['活仔EBV']) NBA_std = np.std(df_in_breed_ebv['活仔EBV']) days_mean = np.mean(df_in_grown_ebv['115daysEBV']) days_std = np.std(df_in_grown_ebv['115daysEBV']) fcr_mean = np.mean(df_in_grown_ebv['饲料转化率EBV']) fcr_std = np.std(df_in_grown_ebv['饲料转化率EBV']) output = pd.merge(df_in_grown_ebv, df_in_breed_ebv, how='inner', left_on='个体号', right_on='个体号') # output['计算长白母系指数'] = 0.3 * (NBA - NBA_mean)/NBA_std - 0.3 * (days - days_mean)/days_std - 0.3 * (fcr-fcr_mean)/fcr_std + 0.1 * (pcl-pcl_mean)/pcl_std output['计算长白母系指数'] = 0.29 * (df_in_breed_ebv['活仔EBV'] - NBA_mean)/NBA_std - 0.58 * (df_in_grown_ebv['115daysEBV']- days_mean)/days_std - 0.13 * (df_in_grown_ebv['饲料转化率EBV']-fcr_mean)/fcr_std MLI_mean = np.mean(output['计算长白母系指数']) MLI_std = np.std(output['计算长白母系指数']) output['校正长白母系指数'] = 25 * ((output['计算长白母系指数'] - MLI_mean)/MLI_std) + 100 output.to_excel(r"C:\Users\荆晓燕\Desktop\20230515分品种计算育种值\权重3-N72权重指数_20230602.xlsx",index=False) print(NBA_mean) print(NBA_std) print(days_mean) print(days_std) print(fcr_mean) print(fcr_std) print(MLI_mean) print(MLI_std)
时间: 2024-01-26 18:03:19 浏览: 65
可以优化的地方有:
1. 文件路径可以使用变量存储,避免多次重复输入文件路径。
2. 读取文件时可以指定需要读取的列,避免读取不必要的列。
3. 计算指数时可以将计算公式提取为一个函数,避免重复代码。
4. 输出结果时可以使用变量存储,避免多次输入输出文件路径。
以下是优化后的代码:
```python
import pandas as pd
import numpy as np
# 文件路径
grown_file = r"C:\Users\荆晓燕\Desktop\20230515分品种计算育种值\生长性能育种值N72分组 (7).txt"
breed_file = r"C:\Users\荆晓燕\Desktop\20230515分品种计算育种值\繁殖性能育种值N72分组 (7).txt"
output_file = r"C:\Users\荆晓燕\Desktop\20230515分品种计算育种值\权重3-N72权重指数_20230602.xlsx"
# 读取文件
df_in_grown_ebv = pd.read_table(grown_file, usecols=[0, 1, 2, 3], delim_whitespace=True, encoding="gb18030", header=None)
df_in_breed_ebv = pd.read_table(breed_file, usecols=[0, 1, 2, 3], delim_whitespace=True, encoding="gb18030", header=None)
# 列名
df_in_breed_ebv.columns = ['个体号', '活仔EBV', '21d窝重EBV', '断配EBV']
df_in_grown_ebv.columns = ['个体号', '115daysEBV', '饲料转化率EBV', '瘦肉率EBV', '眼肌EBV', '背膘EBV']
# 计算函数
def calculate_index(breed_ebv, grown_ebv, NBA_mean, NBA_std, days_mean, days_std, fcr_mean, fcr_std):
NBA = breed_ebv['活仔EBV']
days = grown_ebv['115daysEBV']
fcr = grown_ebv['饲料转化率EBV']
return 0.29 * (NBA - NBA_mean) / NBA_std - 0.58 * (days - days_mean) / days_std - 0.13 * (fcr - fcr_mean) / fcr_std
# 计算均值和标准差
NBA_mean = np.mean(df_in_breed_ebv['活仔EBV'])
NBA_std = np.std(df_in_breed_ebv['活仔EBV'])
days_mean = np.mean(df_in_grown_ebv['115daysEBV'])
days_std = np.std(df_in_grown_ebv['115daysEBV'])
fcr_mean = np.mean(df_in_grown_ebv['饲料转化率EBV'])
fcr_std = np.std(df_in_grown_ebv['饲料转化率EBV'])
# 计算指数
output = pd.merge(df_in_grown_ebv, df_in_breed_ebv, how='inner', on='个体号')
output['计算长白母系指数'] = calculate_index(output[['活仔EBV']], output[['115daysEBV', '饲料转化率EBV']], NBA_mean, NBA_std, days_mean, days_std, fcr_mean, fcr_std)
# 计算校正指数
MLI_mean = np.mean(output['计算长白母系指数'])
MLI_std = np.std(output['计算长白母系指数'])
output['校正长白母系指数'] = 25 * ((output['计算长白母系指数'] - MLI_mean) / MLI_std) + 100
# 输出结果
output.to_excel(output_file, index=False)
# 输出均值和标准差
print(NBA_mean)
print(NBA_std)
print(days_mean)
print(days_std)
print(fcr_mean)
print(fcr_std)
print(MLI_mean)
print(MLI_std)
```
阅读全文