import os header = ["[REF]", "[QRY]", "[Sequences]", "TotalSeqs", "AlignedSeqs", "UnalignedSeqs", "[Bases]", "TotalBases","AlignedBases", "UnalignedBases", "[Alignments]", "1-to-1", "TotalLength", "AvgLength", "AvgIdentity","M-to-M", "TotalLength", "AvgLength", "AvgIdentity", "[Feature Estimates]", "Breakpoints", "Relocations", "Translocations", "Inversions", "Insertions", "InsertionSum", "InsertionAvg", "TandemIns", "TandemInsSum","TandemInsAvg", "[SNPs]", "TotalSNPs"] data = {} for file_name in os.listdir("/public/work/Personal/wuxu/qiantao_17"): if file_name.endswith(".report"): # print(file_name, end="\n") with open(os.path.join("/public/work/Personal/wuxu/qiantao_17", file_name), "r") as f: for line in f: split_line = line.strip().split() if len(split_line) < 2: # 检查是否有足够的元素 continue # 没有则忽略该行 key, value = split_line[:2] if key not in data: # 创建一个包含标题行的列表,以避免在处理某些键时出现“Index out of range”错误 data[key] = header if key == "[REF]" else [value] else: data[key].append(value) # 输出表头 #print(*header, sep="\t") print("file_name", *header, sep="\t") # 输出数据 for file_name in os.listdir("/public/work/Personal/wuxu/qiantao_17"): if file_name.endswith(".report"): print(file_name, end="\n") for i in range(len(data.get("[REF]", []))): row = [data.get(key, [""] * len(data.get("[REF]", [])))[i] for key in header] print(*row, sep="\t")修改脚本,使file_name作为键,*row作为键值,输出内容为每行第一列为file_name,其余列为*row
时间: 2023-07-03 16:33:43 浏览: 68
你可以在处理每个文件时,将所有的数据存储在一个以文件名为键的字典中,然后在输出时,遍历该字典,输出每个文件名及其对应的数据行。
可以按如下修改代码:
```python
import os
header = ["[REF]", "[QRY]", "[Sequences]", "TotalSeqs", "AlignedSeqs", "UnalignedSeqs",
"[Bases]", "TotalBases","AlignedBases", "UnalignedBases", "[Alignments]",
"1-to-1", "TotalLength", "AvgLength", "AvgIdentity","M-to-M", "TotalLength",
"AvgLength", "AvgIdentity", "[Feature Estimates]", "Breakpoints", "Relocations",
"Translocations", "Inversions", "Insertions", "InsertionSum", "InsertionAvg",
"TandemIns", "TandemInsSum","TandemInsAvg", "[SNPs]", "TotalSNPs"]
data = {}
for file_name in os.listdir("/public/work/Personal/wuxu/qiantao_17"):
if file_name.endswith(".report"):
with open(os.path.join("/public/work/Personal/wuxu/qiantao_17", file_name), "r") as f:
for line in f:
split_line = line.strip().split()
if len(split_line) < 2:
continue
key, value = split_line[:2]
if key not in data:
data[key] = header if key == "[REF]" else [value]
else:
data[key].append(value)
# 输出表头
print("file_name", *header, sep="\t")
# 输出数据
for file_name, values in data.items():
row = [file_name] + [values[i] if i < len(values) else "" for i in range(len(header)-1)]
print(*row, sep="\t")
```
这样修改后,输出的内容就包括每行的第一列为文件名,其余列为对应的数据行。
阅读全文