Windows系统下python实现从Word中读取全部内容包含图片,并放入新的Word中
时间: 2024-05-15 16:19:14 浏览: 231
要实现从Word中读取全部内容包含图片,并放入新的Word中,需要使用Python的docx库和python-docx-image库。下面是一个基本的示例代码:
```
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.image.image import Image
from docx.image.exceptions import UnrecognizedImageError
from docx.text.paragraph import Paragraph
def add_picture(paragraph, picture_path):
try:
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = paragraph.add_run()
element = run._element
element.tag = 'w:pict'
element.set(qn('xmlns:wp'), 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing')
element.set(qn('xmlns:r'), 'http://schemas.openxmlformats.org/officeDocument/2006/relationships')
img = Image(picture_path)
width, height = img.width, img.height
inline = element.makeelement(qn('wp:inline'))
inline.set(qn('distT'), "0")
inline.set(qn('distB'), "0")
inline.set(qn('distL'), "0")
inline.set(qn('distR'), "0")
extent = inline.makeelement(qn('wp:extent'))
extent.set(qn('cx'), str(width * 9525))
extent.set(qn('cy'), str(height * 9525))
inline.append(extent)
docpr = inline.makeelement(qn('wp:docPr'))
docpr.set(qn('id'), "1")
docpr.set(qn('name'), "Image")
docpr.set(qn('descr'), "Image")
inline.append(docpr)
graphic = inline.makeelement(qn('a:graphic'))
inline.append(graphic)
graphic.set(qn('xmlns:a'), 'http://schemas.openxmlformats.org/drawingml/2006/main')
graphicwrap = graphic.makeelement(qn('a:graphicData'))
graphicwrap.set(qn('uri'), 'http://schemas.openxmlformats.org/drawingml/2006/picture')
graphic.append(graphicwrap)
pic = graphicwrap.makeelement(qn('pic:pic'))
graphicwrap.append(pic)
nvpicpr = pic.makeelement(qn('pic:nvPicPr'))
pic.append(nvpicpr)
cnvpr = nvpicpr.makeelement(qn('pic:cNvPr'))
cnvpr.set(qn('id'), "0")
cnvpr.set(qn('name'), "Image")
cnvpr.set(qn('descr'), "Image")
nvpicpr.append(cnvpr)
nvpicpr.append(inline.makeelement(qn('pic:cNvPicPr')))
blip = pic.makeelement(qn('pic:blipFill'))
pic.append(blip)
blip.set(qn('dpi'), "96, 96")
blip.set(qn('rotWithShape'), "1")
blipfill = blip.makeelement(qn('a:blip'))
blip.append(blipfill)
blipfill.set(qn('r:embed'), inline.makeelement(qn('a:extLst')))
blipfill.set(qn('cstate'), "print")
stretch = blip.makeelement(qn('a:stretch'))
blip.append(stretch)
fillrect = stretch.makeelement(qn('a:fillRect'))
stretch.append(fillrect)
run._r.append(element)
except UnrecognizedImageError:
pass
# 读取原始文档
doc = Document('original.docx')
# 创建新文档
new_doc = Document()
for element in doc.element.body:
if isinstance(element, Paragraph):
# 复制段落
new_paragraph = new_doc.add_paragraph(element.text, element.style)
# 处理段落中的图片
for run in element.runs:
if run._element.tag.endswith('}r'):
for child in run._element:
if child.tag.endswith('}pict'):
for pic_child in child:
if pic_child.tag.endswith('}blip'):
picture_path = doc.part.related_parts[pic_child.attrib['{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed']].partname[1:]
add_picture(new_paragraph, picture_path)
new_doc.save('new.docx')
```
这个示例代码首先读取了原始文档,然后创建一个新的文档。它遍历原始文档中的所有段落,并复制它们到新文档中。在复制段落时,它会处理其中的图片,将它们添加到新文档中。处理图片时,它使用了add_picture函数,该函数会将图片添加到指定的段落中。
阅读全文