python pdf批量转txt
时间: 2023-09-06 20:09:13 浏览: 200
可以使用Python中的pdfminer库来实现将PDF文件批量转换为txt文件。
以下是一个示例代码,它可以将指定文件夹中的所有PDF文件转换为txt文件:
```python
import os
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
def pdf_to_txt(pdf_path, txt_path):
resource_manager = PDFResourceManager()
codec = 'utf-8'
laparams = LAParams()
with open(txt_path, 'wb') as output:
with open(pdf_path, 'rb') as input:
device = TextConverter(resource_manager, output, codec=codec, laparams=laparams)
interpreter = PDFPageInterpreter(resource_manager, device)
for page in PDFPage.get_pages(input):
interpreter.process_page(page)
device.close()
if __name__ == '__main__':
pdf_folder = './pdf_folder/'
txt_folder = './txt_folder/'
for file_name in os.listdir(pdf_folder):
if file_name.endswith('.pdf'):
pdf_path = os.path.join(pdf_folder, file_name)
txt_path = os.path.join(txt_folder, file_name.replace('.pdf', '.txt'))
pdf_to_txt(pdf_path, txt_path)
```
将以上代码保存为一个Python文件并运行,即可将指定文件夹中的所有PDF文件转换为txt文件。
阅读全文