# -*- coding: utf-8 -*- import os import matplotlib.pyplot as plt import sys def extract_data(rbp_name): data = [] subfolders = ['lncRNA', 'miRNA', 'mRNA', 'snoRNA', 'snRNA', 'tRNA'] for subfolder in subfolders: folder_path = os.path.join(rbp_name, subfolder, '3.normalization') ctrl_file = os.path.join(folder_path, f'Ctrl_{subfolder}_rpm.txt') rep1_file = os.path.join(folder_path, f'rep1_{subfolder}_rpm.txt') rep2_file = os.path.join(folder_path, f'rep2_{subfolder}_rpm.txt') ctrl_data = [] rep1_data = [] rep2_data = [] with open(ctrl_file, 'r') as f: for line in f: ctrl_data.append(line.strip().split('\t')[1]) with open(rep1_file, 'r') as f: for line in f: rep1_data.append(line.strip().split('\t')[1]) with open(rep2_file, 'r') as f: for line in f: rep2_data.append(line.strip().split('\t')[1]) data.append(ctrl_data) data.append(rep1_data) data.append(rep2_data) return data def create_boxplot(rbp_name, data): fig, ax = plt.subplots() ax.boxplot(data) subfolders = ['lncRNA', 'miRNA', 'mRNA', 'snoRNA', 'snRNA', 'tRNA'] x_labels = [f'Ctrl_{subfolder}', f'rep1_{subfolder}', f'rep2_{subfolder}'] * len(subfolders) ax.set_xticklabels(x_labels, rotation=45) plt.savefig(os.path.join(rbp_name, f'{rbp_name}_boxplot.pdf')) plt.close() rbp_name = sys.argv[1] # 提取数据 data = extract_data(rbp_name) # 创建箱线图 create_boxplot(rbp_name, data) 请对其进行优化
时间: 2024-04-16 10:26:59 浏览: 187
为了优化这段代码,有几个方面可以考虑:
1. 减少文件读写操作:在 `extract_data` 函数中,每个文件都会被打开和读取三次。可以考虑在打开文件后,将所有数据一次性读取,并存储在一个列表中,以减少文件读写操作。
2. 使用列表推导式:在 `extract_data` 函数中,可以使用列表推导式来简化代码,减少重复的代码行数。
3. 将绘图部分与数据处理部分分离:将数据处理和绘图的功能拆分为两个独立的函数,这样可以提高代码的可读性和可维护性。
下面是优化后的代码示例:
```python
import os
import matplotlib.pyplot as plt
import sys
def extract_data(rbp_name):
data = []
subfolders = ['lncRNA', 'miRNA', 'mRNA', 'snoRNA', 'snRNA', 'tRNA']
for subfolder in subfolders:
folder_path = os.path.join(rbp_name, subfolder, '3.normalization')
ctrl_file = os.path.join(folder_path, f'Ctrl_{subfolder}_rpm.txt')
rep1_file = os.path.join(folder_path, f'rep1_{subfolder}_rpm.txt')
rep2_file = os.path.join(folder_path, f'rep2_{subfolder}_rpm.txt')
with open(ctrl_file, 'r') as f:
ctrl_data = [line.strip().split('\t')[1] for line in f]
with open(rep1_file, 'r') as f:
rep1_data = [line.strip().split('\t')[1] for line in f]
with open(rep2_file, 'r') as f:
rep2_data = [line.strip().split('\t')[1] for line in f]
data.append(ctrl_data)
data.append(rep1_data)
data.append(rep2_data)
return data
def create_boxplot(rbp_name, data):
fig, ax = plt.subplots()
ax.boxplot(data)
subfolders = ['lncRNA', 'miRNA', 'mRNA', 'snoRNA', 'snRNA', 'tRNA']
x_labels = [f'Ctrl_{subfolder}', f'rep1_{subfolder}', f'rep2_{subfolder}'] * len(subfolders)
ax.set_xticklabels(x_labels, rotation=45)
plt.savefig(os.path.join(rbp_name, f'{rbp_name}_boxplot.pdf'))
plt.close()
if __name__ == "__main__":
rbp_name = sys.argv[1]
# 提取数据
data = extract_data(rbp_name)
# 创建箱线图
create_boxplot(rbp_name, data)
```
这样,代码更加简洁,同时减少了文件读写操作,提高了效率。
阅读全文