"" base64编解码处理 用于处理某些被加密的敏感词库 author : @h-j-13 time : 2018-7-18 """ import os import base64 def decode64file(path_file): """解码base64加密的文件""" with open(path_file, 'rb') as f: str_set = set() for line in f: s = line.strip() # strip()方法消除多余的空格之类的特殊字符 if s.endswith(b'Cg=='): s = s.replace(b'Cg==', b'') str_set.add(base64.b64decode(s)) return str_set train_data_url = r"C:\Users\曹福滨\Downloads\tc-corpus-\answer" def get_all_file_by_path(path=train_data_url): """获取某个目录下的所有训练文件""" file_path = [] dir_list = os.listdir(train_data_url) for d in dir_list: file_path.extend(map(lambda x: train_data_url + d + '/' + x, os.listdir(train_data_url + d))) return file_path def decode_file2utf8(file_path): """将文件从GB2312编码解码为utf8文件""" decode_error = False file_data = [] with open(file_path, 'r') as f: for l in f.readlines(): try: tmp = l.encode('gbk').decode('utf8') except Exception as e: decode_error = True tmp = '' file_data.append(tmp) if decode_error: os.remove(file_path) else: with open(file_path, 'w') as f: f.writelines(file_data) if __name__ == '__main__': for p in get_all_file_by_path(): decode_file2utf8(p)
时间: 2023-04-05 20:03:45 浏览: 167
这段代码是用来解码base64加密的文件,主要是用于处理一些被加密的敏感词库。作者是@h-j-13,时间是2018年7月18日。代码中使用了Python的os和base64模块,其中decode64file函数接受一个文件路径作为参数,读取文件内容并解码,最终返回一个字符串集合。
阅读全文