用python获取一个通用规范汉字文件(TGhanzi.txt)中的每个汉字的拼音、部首名称的拼音、末笔划(最后一笔)名称的拼音,并写入文件
时间: 2024-05-19 13:13:04 浏览: 46
# 导入需要的库
import os
import re
# 定义规范汉字文件路径
file_path = "TGhanzi.txt"
# 定义拼音文件路径
pinyin_file_path = "pinyin.txt"
# 定义部首名称的拼音文件路径
bushou_file_path = "bushou.txt"
# 定义末笔划名称的拼音文件路径
mo_file_path = "mo.txt"
# 定义正则表达式
re_pattern = r"(\S)\s+(\S+)\s+(\S+)\s+(\S+)\s+"
# 读取规范汉字文件
with open(file_path, "r", encoding="utf-8") as f:
hanzi_content = f.read()
# 提取每个汉字的拼音、部首名称的拼音、末笔划名称的拼音
pattern = re.compile(re_pattern)
result = pattern.findall(hanzi_content)
# 写入拼音文件、部首名称的拼音文件和末笔划名称的拼音文件
with open(pinyin_file_path, "w", encoding="utf-8") as f:
for r in result:
f.write(r[0] + ":" + r[1] + "\n")
with open(bushou_file_path, "w", encoding="utf-8") as f:
for r in result:
f.write(r[0] + ":" + r[2] + "\n")
with open(mo_file_path, "w", encoding="utf-8") as f:
for r in result:
f.write(r[0] + ":" + r[3] + "\n")