import os fasta_file = "E:\泛基因组分析\ORF\ORF_xml\PRRSV.fasta" new_id_file = "E:\泛基因组分析\ORF\ORF_xml\prr.txt" if not os.path.exists(fasta_file): print("Error: Fasta file does not exist!") exit() if not os.path.exists(new_id_file): print("Error: New id file does not exist!") exit() try: with open(new_id_file, "r",encoding="utf-8") as f: new_ids = [line.strip() for line in f] except: print("Error: Failed to read new id file!") exit() try: with open(fasta_file, "r",encoding="utf-8") as f: lines = f.readlines() except: print("Error: Failed to read fasta file!") exit() new_lines = [] for line in lines: if line.startswith(">"): # 获取当前id在新id列表中的索引 id_str = line.strip().lstrip(">") if id_str.isdigit(): index = int(id_str) - 1 else: try: index = new_ids.index(id_str) except ValueError: print("Error: Id not found in new id file! ({})".format(id_str)) exit() # 替换为新id new_lines.append(">{}\n".format(new_ids[index])) else: new_lines.append(line) # 将替换后的fasta文件写入新文件 with open("E:\泛基因组分析\ORF\ORF_xml\1.fasta", "w") as f: f.writelines(new_lines)
时间: 2024-01-02 20:05:16 浏览: 128
这个代码看起来已经很完整了,不过还是有一些可以改进的地方:
1. 在文件路径中使用“\\”或者“/”代替“\”,因为“\”是转义字符,容易出现错误。
2. 可以添加一些输入输出路径的提示信息,方便用户操作。
3. 可以添加一个函数来判断文件是否存在,避免代码重复。
4. 可以将代码分解成多个函数,提高代码的可读性和可维护性。
下面是一个改进版本的代码,你可以参考一下:
```python
import os
def check_file(file_path):
if not os.path.exists(file_path):
print("Error: File does not exist! ({})".format(file_path))
return False
return True
def read_file(file_path):
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
return lines
except:
print("Error: Failed to read file! ({})".format(file_path))
return None
def replace_seq_id(fasta_file, new_id_file, output_file):
if not check_file(fasta_file):
exit()
if not check_file(new_id_file):
exit()
new_ids = read_file(new_id_file)
if new_ids is None:
exit()
lines = read_file(fasta_file)
if lines is None:
exit()
new_lines = []
for line in lines:
if line.startswith(">"):
# 获取当前id在新id列表中的索引
id_str = line.strip().lstrip(">")
if id_str.isdigit():
index = int(id_str) - 1
else:
try:
index = new_ids.index(id_str)
except ValueError:
print("Error: Id not found in new id file! ({})".format(id_str))
exit()
# 替换为新id
new_lines.append(">{}\n".format(new_ids[index].strip()))
else:
new_lines.append(line)
# 将替换后的fasta文件写入新文件
with open(output_file, "w") as f:
f.writelines(new_lines)
if __name__ == "__main__":
fasta_file = input("Please input fasta file path: ")
new_id_file = input("Please input new id file path: ")
output_file = input("Please input output file path: ")
replace_seq_id(fasta_file, new_id_file, output_file)
```
这个版本的代码中,添加了三个函数分别用来检查文件是否存在、读取文件内容和替换序列ID。在主函数中,通过输入文件路径来执行替换操作。
阅读全文