import os header = ["[REF]", "[QRY]", "[Sequences]", "TotalSeqs", "AlignedSeqs", "UnalignedSeqs", "[Bases]", "TotalBases","AlignedBases", "UnalignedBases", "[Alignments]", "1-to-1", "TotalLength", "AvgLength", "AvgIdentity","M-to-M", "TotalLength", "AvgLength", "AvgIdentity", "[Feature Estimates]", "Breakpoints", "Relocations", "Translocations", "Inversions", "Insertions", "InsertionSum", "InsertionAvg", "TandemIns", "TandemInsSum","TandemInsAvg", "[SNPs]", "TotalSNPs"] data = {} for file_name in os.listdir("/public/work/Personal/wuxu/qiantao_17"): if file_name.endswith(".report"): with open(os.path.join("/public/work/Personal/wuxu/qiantao_17", file_name), "r") as f: for line in f: split_line = line.strip().split() if len(split_line) < 2: continue key, value = split_line[:2] if key not in data: data[key] = header if key == "[REF]" else [value] else: data[key].append(value) # 输出表头 print("file_name", *header, sep="\t") # 输出数据 for file_name in os.listdir("/public/work/Personal/wuxu/qiantao_17"): if file_name.endswith(".report"): for i in range(len(data.get("[REF]", []))): row = [file_name] + [data.get(key, [""] * len(data.get("[REF]", [])))[i] for key in header] print(*row, sep="\t"),修改代码,实现输出结果的第一列每一行为每个.report的文件名,其余列为对应的数据
时间: 2023-08-23 15:06:30 浏览: 121
import os
header = ["[REF]", "[QRY]", "[Sequences]", "TotalSeqs", "AlignedSeqs", "UnalignedSeqs", "[Bases]", "TotalBases","AlignedBases", "UnalignedBases", "[Alignments]", "1-to-1", "TotalLength", "AvgLength", "AvgIdentity","M-to-M", "TotalLength", "AvgLength", "AvgIdentity", "[Feature Estimates]", "Breakpoints", "Relocations", "Translocations", "Inversions", "Insertions", "InsertionSum", "InsertionAvg", "TandemIns", "TandemInsSum","TandemInsAvg", "[SNPs]", "TotalSNPs"]
data = {}
for file_name in os.listdir("/public/work/Personal/wuxu/qiantao_17"):
if file_name.endswith(".report"):
with open(os.path.join("/public/work/Personal/wuxu/qiantao_17", file_name), "r") as f:
for line in f:
split_line = line.strip().split()
if len(split_line) < 2:
continue
key, value = split_line[:2]
if key not in data:
data[key] = header if key == "[REF]" else [value]
else:
data[key].append(value)
# 输出表头
print("file_name", *header, sep="\t")
# 输出数据
for file_name in os.listdir("/public/work/Personal/wuxu/qiantao_17"):
if file_name.endswith(".report"):
for i in range(len(data.get("[REF]", []))):
row = [file_name.split(".")[0]] + [data.get(key, [""] * len(data.get("[REF]", [])))[i] for key in header]
print(*row, sep="\t")
修改后的代码中,将文件名进行了处理,只输出了文件名的前缀部分,即去掉了后缀".report",并在输出数据时,第一列输出了对应的文件名。其余部分的代码和原来的代码基本相同。
阅读全文