import sys import os f1 = open(sys.argv[1],'r') f2 = open(sys.argv[2],'r') fout = open(sys.argv[3],'w') dic1 = {} for line1 in f1: tmp1 = line1.strip().split("\t") Gene_ID = tmp1[0] anno = tmp1[1:] dic1[Gene_ID] = anno dic2 = {} for line2 in f2: tmp2 = line2.strip().split("\t") Gene_ID = tmp2[0] gene_name = tmp2[1] dic2[Gene_ID] = gene_name for key in dic2: if key in dic1: fout.write(key + "\t" + gene_name + "\t" + anno + "\n") else: fout.write(key + "\t" + gene_name + "\t" + "NA" + "\n" ) f1.close() f2.close() fout.close()
时间: 2024-04-28 17:25:29 浏览: 83
这是一段 Python 代码,它的作用是将两个文件中的数据进行合并,并输出到一个新的文件中。其中,sys.argv[1]、sys.argv[2] 和 sys.argv[3] 是命令行参数,分别代表输入文件1的路径、输入文件2的路径和输出文件的路径。代码中,首先打开输入文件1和输入文件2,并将它们的数据存储到字典 dic1 和字典 dic2 中。然后,对于字典 dic2 中的每一个键,如果它也存在于字典 dic1 中,就将它们的值(即注释信息和基因名)一起输出到输出文件中;如果它不存在于字典 dic1 中,则将其基因名和 "NA" 输出到输出文件中。最后,关闭所有文件句柄。
相关问题
import sys import os f1 = open(sys.argv[1],'r') f2 = open(sys.argv[2],'r') fout = open(sys.argv[3],'w') dic1 = {} for line1 in f1: tmp1 = line1.strip().split("\t") Gene_ID = tmp1[0] anno = tmp1[1:] dic1[Gene_ID] = anno dic2 = {} for line2 in f2: tmp2 = line2.strip().split("\t") Gene_ID = tmp2[0] gene_name = tmp2[1] dic2[Gene_ID] = gene_name for key in dic2: if key in dic1: fout.write(key + "\t" + dic2[key] + "\t" + dic1[key] + "\n") else: fout.write(key + "\t" + dic2[key] + "\t" + "NA" + "\n" ) f1.close() f2.close() fout.close(),这段代码报错信息为:Traceback (most recent call last): File "/public/work/Personal/wuxu/3_blast/ninanjie/pipei.sh", line 22, in <module> fout.write(key + "\t" + dic2[key] + "\t" + dic1[key] + "\n") TypeError: can only concatenate str (not "list") to str,请修改上述代码
根据报错信息,我们可以看到最后一行出现了TypeError,即字符串与列表不能直接拼接。这是因为dic1的值是一个列表,而我们在拼接时没有指定具体的元素,导致报错。因此,我们需要将dic1[key]改为dic1[key][0],即取出列表中的第一个元素进行拼接。修改后的代码如下:
```
import sys
import os
f1 = open(sys.argv[1],'r')
f2 = open(sys.argv[2],'r')
fout = open(sys.argv[3],'w')
dic1 = {}
for line1 in f1:
tmp1 = line1.strip().split("\t")
Gene_ID = tmp1[0]
anno = tmp1[1:]
dic1[Gene_ID] = anno
dic2 = {}
for line2 in f2:
tmp2 = line2.strip().split("\t")
Gene_ID = tmp2[0]
gene_name = tmp2[1]
dic2[Gene_ID] = gene_name
for key in dic2:
if key in dic1:
fout.write(key + "\t" + dic2[key] + "\t" + dic1[key][0] + "\n")
else:
fout.write(key + "\t" + dic2[key] + "\t" + "NA" + "\n" )
f1.close()
f2.close()
fout.close()
```
import sys import os f1 = open(sys.argv[1],'r') f2 = open(sys.argv[2],'r') f3 = open(sys.argv[3],'r') fout = open(sys.argv[4],'w') dic1 = {} for line1 in f1: tmp1 = line1.strip().split() Gene_ID = tmp1[0] # anno = tmp1[1:] # dic1[Gene_ID] = anno dic2 = {} for line2 in f2: tmp2 = line2.strip().split("\t") Gene_ID = tmp2[0] gene_name = tmp2[6] dic2[Gene_ID] = gene_name dic3 = {} for line3 in f3: tmp3 = line3.strip().split("\t") new_name = tmp3[0] raw_name = tmp3[1] dic3[new_name] = raw_name for key in dic2: if key in dic1: fout.write(key + "\t" + dic2[key] + "\n") else: fout.write(key + "\t" + dic2[key] + "\t" + "NA" + "\n" ) f1.close() f2.close() fout.close()修改代码,其中f1中只有一列数据,需要根据f1文件的内容去f2文件检索找到对应的第7列信息,由于第7列每行的字符串由逗号分割,需去掉逗号并进行换行成为新的一列,再根据新的一列信息去和f3第1列匹配,最后打印出f3中与匹配结果对应的第二列信息
import sys
import os
f1 = open(sys.argv[1],'r')
f2 = open(sys.argv[2],'r')
f3 = open(sys.argv[3],'r')
fout = open(sys.argv[4],'w')
dic2 = {}
for line2 in f2:
tmp2 = line2.strip().split("\t")
Gene_ID = tmp2[0]
gene_info = tmp2[6].replace(",", "\n")
dic2[Gene_ID] = gene_info
dic3 = {}
for line3 in f3:
tmp3 = line3.strip().split("\t")
new_name = tmp3[0]
raw_name = tmp3[1]
dic3[new_name] = raw_name
for line1 in f1:
Gene_ID = line1.strip()
if Gene_ID in dic2:
gene_info = dic2[Gene_ID].strip()
if gene_info in dic3:
fout.write(Gene_ID + "\t" + gene_info + "\t" + dic3[gene_info] + "\n")
else:
fout.write(Gene_ID + "\t" + gene_info + "\t" + "NA" + "\n")
else:
fout.write(Gene_ID + "\t" + "NA" + "\t" + "NA" + "\n")
f1.close()
f2.close()
fout.close()
阅读全文