import os fasta_file = "E:\泛基因组分析\ORF\ORF_xml\PRRSV.fasta" new_id_file = "E:\泛基因组分析\ORF\ORF_xml\prr.txt" if not os.path.exists(fasta_file): print("Error: Fasta file does not exist!") exit() if not os.path.exists(new_id_file): print("Error: New id file does not exist!") exit() try: with open(new_id_file, "r",encoding="utf-8") as f: new_ids = [line.strip() for line in f] except: print("Error: Failed to read new id file!") exit() try: with open(fasta_file, "r",encoding="utf-8") as f: lines = f.readlines() except: print("Error: Failed to read fasta file!") exit() new_lines = [] for line in lines: if line.startswith(">"): # 获取当前id在新id列表中的索引 id_str = line.strip().lstrip(">") if id_str.isdigit(): index = int(id_str) - 1 else: try: index = new_ids.index(id_str) except ValueError: print("Error: Id not found in new id file! ({})".format(id_str)) exit() # 替换为新id new_lines.append(">{}\n".format(new_ids[index])) else: new_lines.append(line) # 将替换后的fasta文件写入新文件 with open("E:\泛基因组分析\ORF\ORF_xml\1.fasta", "w") as f: f.writelines(new_lines)

时间: 2024-01-02 19:05:16 浏览: 26

这个代码看起来已经很完整了，不过还是有一些可以改进的地方： 1. 在文件路径中使用“\\”或者“/”代替“\”，因为“\”是转义字符，容易出现错误。 2. 可以添加一些输入输出路径的提示信息，方便用户操作。 3. 可以添加一个函数来判断文件是否存在，避免代码重复。 4. 可以将代码分解成多个函数，提高代码的可读性和可维护性。下面是一个改进版本的代码，你可以参考一下： ```python import os def check_file(file_path): if not os.path.exists(file_path): print("Error: File does not exist! ({})".format(file_path)) return False return True def read_file(file_path): try: with open(file_path, "r", encoding="utf-8") as f: lines = f.readlines() return lines except: print("Error: Failed to read file! ({})".format(file_path)) return None def replace_seq_id(fasta_file, new_id_file, output_file): if not check_file(fasta_file): exit() if not check_file(new_id_file): exit() new_ids = read_file(new_id_file) if new_ids is None: exit() lines = read_file(fasta_file) if lines is None: exit() new_lines = [] for line in lines: if line.startswith(">"): # 获取当前id在新id列表中的索引 id_str = line.strip().lstrip(">") if id_str.isdigit(): index = int(id_str) - 1 else: try: index = new_ids.index(id_str) except ValueError: print("Error: Id not found in new id file! ({})".format(id_str)) exit() # 替换为新id new_lines.append(">{}\n".format(new_ids[index].strip())) else: new_lines.append(line) # 将替换后的fasta文件写入新文件 with open(output_file, "w") as f: f.writelines(new_lines) if __name__ == "__main__": fasta_file = input("Please input fasta file path: ") new_id_file = input("Please input new id file path: ") output_file = input("Please input output file path: ") replace_seq_id(fasta_file, new_id_file, output_file) ``` 这个版本的代码中，添加了三个函数分别用来检查文件是否存在、读取文件内容和替换序列ID。在主函数中，通过输入文件路径来执行替换操作。

相关推荐

CSDN会员

开通CSDN年卡参与万元壕礼抽奖

海量 VIP免费资源千本正版电子书商城会员专享价千门课程&专栏

全年可省5,000元立即开通