raceback (most recent call last): File "C:/Users/郑紫晗/Desktop/测试.py", line 8, in <module> doc.add_paragraph(pdf_text) File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\document.py", line 56, in add_paragraph return self._body.add_paragraph(text, style) File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\blkcntnr.py", line 37, in add_paragraph paragraph.add_run(text) File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\text\paragraph.py", line 37, in add_run run.text = text File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\text\run.py", line 163, in text self._r.text = text File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\oxml\text\run.py", line 104, in text _RunContentAppender.append_to_run_from_text(self, text) File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\oxml\text\run.py", line 134, in append_to_run_from_text appender.add_text(text) File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\oxml\text\run.py", line 143, in add_text self.flush() File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\oxml\text\run.py", line 165, in flush self._r.add_t(text) File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\oxml\text\run.py", line 41, in add_t t = self._add_t(text=text) File "C:\Users\郑紫晗\AppData\Local\Programs\Python\Python39\lib\site-packages\docx\oxml\xmlchemy.py", line 273, in _add_child setattr(child, key, value) File "src\lxml\etree.pyx", line 1042, in lxml.etree._Element.text.__set__ File "src\lxml\apihelpers.pxi", line 748, in lxml.etree._setNodeText File "src\lxml\apihelpers.pxi", line 736, in lxml.etree._createTextNode File "src\lxml\apihelpers.pxi", line 1541, in lxml.etree._utf8 ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters
时间: 2023-06-28 19:10:34 浏览: 182
linux 下 pip3 报错“ File “/usr/bin/pip3”, line 9, in from pip import mainImportError: canno”处理
5星 · 资源好评率100%
这个错误提示表明你传递给docx库的文本中包含了非法字符,如NULL字节或控制字符。你需要先对文本进行处理,去除其中的非法字符后再传递给docx库。可以使用Python的字符串处理函数,如replace()函数来去除非法字符。例如:pdf_text.replace('\x00', '')可以去除文本中的NULL字节。
阅读全文