java读PDF表格中的文字,查询到指定文字‘签名’后,向右边的表格内插入本地图片,写详细demo
时间: 2023-05-22 15:07:01 浏览: 165
非常感谢您的提问。以下是一个基于Apache PDFBox的Java代码示例,它可以读取PDF表格中的文字,并在找到指定文字“签名”时,在其右侧插入本地图片。
在本示例中,我们将使用一个名为"test.pdf"的PDF文件。我们将尝试在该文件的第一个页面中查找名为“签名”的单元格,并在单元格的右侧插入本地图像"image.png"。
以下是完整的示例代码:
// 导入PDFBox库
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.table.PDPageContentStreamTableCell;
import org.apache.pdfbox.pdmodel.table.PDTable;
import org.apache.pdfbox.pdmodel.table.PDTableElement;
import org.apache.pdfbox.pdmodel.table.PDTableElementCell;
import org.apache.pdfbox.pdmodel.table.PDTableElementRow;
import org.apache.pdfbox.pdmodel.table.PDTableFactory;
import org.apache.pdfbox.pdmodel.table.PDTableStyle;
import org.apache.pdfbox.pdmodel.table.PDTextCell;
import org.apache.pdfbox.pdmodel.table.PDTextElement;
import org.apache.pdfbox.pdmodel.table.PDTextRow;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.util.Matrix;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
public class PDFTableImageInsertDemo {
public static void main(String[] args) {
String pdfFilePath = "test.pdf";
String findText = "签名";
String imageFilePath = "image.png";
try {
// 加载PDF文件
PDDocument document = PDDocument.load(new File(pdfFilePath));
PDPage page = document.getPage(0);
// 获得页面的宽和高
float pageWidth = page.getMediaBox().getWidth();
float pageHeight = page.getMediaBox().getHeight();
// 获得页面中的表格
PDTable table = PDTableFactory.createTable(getTableContent(page),
new PDTableStyle());
// 搜索指定的单元格
PDTableElementRow foundRow = null;
PDTableElementCell foundCell = null;
boolean found = false;
for (int rowIndex = 0; rowIndex < table.getHeaderRowCount(); rowIndex++) {
PDTableElementRow row = table.getHeaderRow(rowIndex);
for (int cellIndex = 0; cellIndex < row.getCellCount(); cellIndex++) {
PDTableElementCell cell = row.getCell(cellIndex);
if (cell.containsText() && cell.getText().equals(findText)) {
foundRow = row;
foundCell = cell;
found = true;
break;
}
}
if (found) break;
}
if (found) {
// 在发现的单元格右边插入图像
int rowNum = foundRow.getRowIndex();
int cellNum = foundCell.getColumnIndex();
PDTableElementRow imageRow = table.getRow(rowNum);
PDTableElementCell imageCell = new PDPageContentStreamTableCell();
imageCell.setColSpan(1);
imageCell.setRowSpan(1);
float cellWidth = foundCell.getColSpan() * foundCell.getWidth();
float cellLeft = foundCell.getTopLeftX();
float cellTop = imageRow.getTop() - imageRow.getHeight() - 5;
// 插入图像
BufferedImage image = ImageIO.read(new File(imageFilePath));
PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.PREPEND, false);
contentStream.drawImage(
image,
cellLeft + cellWidth,
cellTop,
image.getWidth() / 2,
image.getHeight() / 2
);
contentStream.close();
// 输出修改后的表格
try (PDPageContentStream contents = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
Matrix matrix = new Matrix();
matrix.translate(0, pageHeight);
matrix.scale(1, -1);
contents.transform(matrix);
table.draw(contents, 0, 0, pageWidth - 50, pageHeight - 50, true);
}
} else {
System.out.println("无法找到指定的单元格: " + findText);
}
// 保存修改后的PDF文档
document.save("output.pdf");
document.close();
System.out.println("已成功在指定单元格插入图像。");
} catch (IOException ex) {
System.err.println("出现错误: " + ex.getMessage());
}
}
// 从表格获取数据
private static PDTableElement getTableContent(PDPage page) throws IOException {
PDTableElement tableElement = new PDTextRow();
PDFRenderer renderer = new PDFRenderer(page);
BufferedImage image = renderer.renderImageWithDPI(72, ImageType.RGB);
int imageWidth = image.getWidth();
int imageHeight = image.getHeight();
PDType1Font font = PDType1Font.HELVETICA;
int fontSize = 12;
int tableTop = 0;
int tableLeft = 0;
int tableWidth = 0;
for (int y = 0; y < imageHeight; y++) {
StringBuilder rowText = null;
int rowLeft = -1, cellIndex = 0;
for (int x = 0; x < imageWidth; x++) {
int color = image.getRGB(x, y);
int red = (color >> 16) & 0xff;
if (red > 200) {
if (rowText == null) {
rowText = new StringBuilder();
rowLeft = x;
}
rowText.append((char) color);
} else if (rowText != null) {
// 新单元格开始
String cellContent = rowText.toString().trim();
PDTextElement cellElement = new PDTextCell();
cellElement.setFont(font);
cellElement.setFontSize(fontSize);
cellElement.setLineSpacing(1.0f);
cellElement.setIndent(5.0f);
cellElement.setText(cellContent);
PDTableElementCell cell = new PDPageContentStreamTableCell();
cell.setColSpan(1);
cell.setRowSpan(1);
cell.setElement(cellElement);
PDTableElementRow row = tableElement.getLast() != null ? (PDTableElementRow) tableElement.getLast() : null;
if (row == null || row.getTop() != tableTop) {
row = new PDTextRow();
((PDTextRow) row).setIndent(0);
tableElement.addElement(row);
tableTop = row.getTop();
tableLeft = rowLeft;
tableWidth = 0;
}
assert row != null;
if (x - rowLeft < tableWidth || cellIndex >= row.getCellCount()) {
row.addElement(cell);
} else {
for (int i = row.getCellCount() - 1; i >= cellIndex; i--) {
row.removeElement(i);
}
row.addElement(cell, cellIndex);
}
cellIndex++;
rowText = null;
}
}
if (rowText != null) {
// 新行开始
String rowContent = rowText.toString().trim();
PDTextElement rowElement = new PDTextRow();
rowElement.setFont(font);
rowElement.setFontSize(fontSize);
rowElement.setLineSpacing(1.0f);
rowElement.setIndent(5.0f);
rowElement.setText(rowContent);
PDTableElementRow row = new PDTextRow();
((PDTextRow) row).setIndent(0);
row.addElement(new PDPageContentStreamTableCell(), cellIndex);
row.addElement(new PDPageContentStreamTableCell(), cellIndex + 1);
row.setElement(rowElement);
tableElement.addElement(row);
}
}
return tableElement;
}
}
希望这个代码示例对您有所帮助。如果您有任何疑问,请随时告诉我。
阅读全文