import os import pandas as pd path = 'public/work/Personal/wuxu/TF_TR' result = {} for root, dirs, files in os.walk(path): for file in files: if file.endswith('.txt'): filepath = os.path.join(root, file) data = pd.read_csv(filepath, sep='\t', header=None) data = data[data[2] == 'TF'] for index, row in data.iterrows(): value = row[1] if value not in result: result[value] = 1 else: result[value] += 1 # 将结果输出到文件中 with open('output.txt', 'w') as f: for key, value in result.items(): f.write(f'{key}\t{value}\n') 修改上述代码,以txt结尾的文件名为表头,以f1文件的第一列为每行的表头,循环遍历txt文件, 如果txt文件的第三列为“TF”,且与其对应的第二列信息在f1文件中,则统计出其出现的次数,写在对应行列里
时间: 2024-02-14 08:18:38 浏览: 76
import os
import pandas as pd
path = 'public/work/Personal/wuxu/TF_TR'
result = {}
# 获取所有以txt结尾的文件名
txt_files = [file for file in os.listdir(path) if file.endswith('.txt')]
# 遍历所有txt文件
for txt_file in txt_files:
# 拼接txt文件的完整路径
txt_filepath = os.path.join(path, txt_file)
# 获取txt文件的表头(去掉文件名后缀)
txt_header = os.path.splitext(txt_file)[0]
# 读取txt文件
data = pd.read_csv(txt_filepath, sep='\t', header=None)
# 选取第三列为“TF”的行
data = data[data[2] == 'TF']
# 选取第二列在f1文件中出现过的行
f1_filepath = os.path.join(path, 'f1.txt')
f1_data = pd.read_csv(f1_filepath, sep='\t', header=None)
f1_values = set(f1_data[0])
data = data[data[1].isin(f1_values)]
# 统计每个value出现的次数
for index, row in data.iterrows():
value = row[1]
if value not in result:
result[value] = {txt_header: 1}
else:
if txt_header not in result[value]:
result[value][txt_header] = 1
else:
result[value][txt_header] += 1
# 将结果输出到文件中
with open('output.txt', 'w') as f:
# 输出表头
f.write('\t')
for txt_file in txt_files:
txt_header = os.path.splitext(txt_file)[0]
f.write(f'{txt_header}\t')
f.write('\n')
# 输出每个value出现的次数
for key, value in result.items():
f.write(f'{key}\t')
for txt_file in txt_files:
txt_header = os.path.splitext(txt_file)[0]
if txt_header in value:
f.write(f'{value[txt_header]}\t')
else:
f.write('0\t')
f.write('\n')
阅读全文