java html转excel,保留原有框架
时间: 2024-01-01 21:05:09 浏览: 141
java实现 html转Excel
5星 · 资源好评率100%
如果您需要将 HTML 文件中的表格以及表格外的其他内容都转换为 Excel 文件,并保留原有的框架,可以使用 Apache POI 和 Apache Pdfbox 库来实现。以下是一个简单的示例代码:
```java
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HtmlToExcelConverter {
public static void main(String[] args) throws Exception {
// 读取 HTML 文件
String htmlFilePath = "example.html";
Document document = Jsoup.parse(new FileInputStream(htmlFilePath), "UTF-8", "");
// 创建 Excel 工作簿
Workbook workbook = new HSSFWorkbook();
// 创建单元格样式
CellStyle style = workbook.createCellStyle();
Font font = workbook.createFont();
font.setColor(IndexedColors.BLACK.getIndex());
style.setFont(font);
// 创建 Excel 工作表
Sheet sheet = workbook.createSheet("Sheet1");
// 读取 HTML 内容并写入 Excel 工作表
Elements elements = document.body().children();
int rowIndex = 0;
for (Element element : elements) {
if ("table".equals(element.tagName())) {
Elements rows = element.getElementsByTag("tr");
for (int i = 0; i < rows.size(); i++) {
Row row = sheet.createRow(rowIndex++);
Elements cells = rows.get(i).getElementsByTag("td");
for (int j = 0; j < cells.size(); j++) {
String cellText = cells.get(j).text();
row.createCell(j).setCellValue(cellText);
row.getCell(j).setCellStyle(style);
}
}
} else {
String tempPdfFilePath = "temp.pdf";
HtmlToPdfConverter.convertHtmlToPdf(element.outerHtml(), tempPdfFilePath);
PdfToImageConverter.convertPdfToImage(tempPdfFilePath);
insertImageIntoExcel(sheet, rowIndex++, tempPdfFilePath.replace(".pdf", ".png"));
new File(tempPdfFilePath).delete();
new File(tempPdfFilePath.replace(".pdf", ".png")).delete();
}
}
// 写入 Excel 文件
String excelFilePath = "example.xls";
FileOutputStream outputStream = new FileOutputStream(excelFilePath);
workbook.write(outputStream);
workbook.close();
outputStream.close();
System.out.println("Excel 文件已生成!");
}
private static void insertImageIntoExcel(Sheet sheet, int rowIndex, String imagePath) throws Exception {
FileInputStream inputStream = new FileInputStream(imagePath);
byte[] imageBytes = new byte[inputStream.available()];
inputStream.read(imageBytes);
int pictureId = sheet.getWorkbook().addPicture(imageBytes, Workbook.PICTURE_TYPE_PNG);
inputStream.close();
CreationHelper helper = sheet.getWorkbook().getCreationHelper();
Drawing drawing = sheet.createDrawingPatriarch();
ClientAnchor anchor = helper.createClientAnchor();
anchor.setCol1(0);
anchor.setRow1(rowIndex);
anchor.setCol2(1);
anchor.setRow2(rowIndex + 1);
drawing.createPicture(anchor, pictureId);
sheet.getRow(rowIndex).setHeightInPoints(200);
}
}
class HtmlToPdfConverter {
public static void convertHtmlToPdf(String html, String pdfFilePath) throws Exception {
PDDocument document = new PDDocument();
PDPage page = new PDPage();
document.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(document, page);
InputStream inputStream = new ByteArrayInputStream(html.getBytes("UTF-8"));
XMLWorkerHelper.getInstance().parseXHtml(contentStream, document, inputStream);
contentStream.close();
document.save(pdfFilePath);
document.close();
}
}
class PdfToImageConverter {
public static void convertPdfToImage(String pdfFilePath) throws Exception {
PDDocument document = PDDocument.load(new File(pdfFilePath));
PDFRenderer pdfRenderer = new PDFRenderer(document);
BufferedImage image = pdfRenderer.renderImage(0);
ImageIO.write(image, "png", new File(pdfFilePath.replace(".pdf", ".png")));
document.close();
}
}
```
上述代码中,我们先读取 HTML 文件中的内容,并遍历每个元素。如果元素是表格,则将表格内容写入 Excel 工作表;如果元素不是表格,则将元素转换为 PDF 文件,再将 PDF 文件转换为 PNG 图像,并将图像插入到 Excel 工作表中。需要注意的是,插入 PNG 图像时,我们需要根据行高进行调整。另外,我们还需要使用 Apache Pdfbox 和 iText 库来实现将 HTML 转换为 PDF 的功能。
需要将上述代码中的 `example.html` 替换为您要转换的 HTML 文件的路径,将 `example.xls` 替换为您要生成的 Excel 文件的路径。另外,您还需要在项目中添加以下依赖项:
```xml
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.24</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.24</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.5.13</version>
</dependency>
```
阅读全文