使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式
需要的jar包:
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>xdocreport</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency>
转换的代码:
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.xwpf.converter.core.BasicURIResolver; import org.apache.poi.xwpf.converter.core.FileImageExtractor; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.OutputStreamWriter; import java.util.List; public class Test { //doc转换为html void docToHtml() throws Exception { String sourceFileName = "C:\\doc\\test.doc"; String targetFileName = "C:\\html\\test.html"; String imagePathStr = "C:\\html\\image\\"; HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); //设置图片的相对路径 wordToHtmlConverter.setPicturesManager((a, b, suggestedName, d, e) -> "image" + File.separator + suggestedName); wordToHtmlConverter.processDocument(wordDocument); List<Picture> pics = wordDocument.getPicturesTable().getAllPictures(); for (Picture pic : pics) { //生成图片 pic.writeImageContent(new FileOutputStream(imagePathStr + pic.suggestFullFileName())); } Document htmlDocument = wordToHtmlConverter.getDocument(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(new File(targetFileName)); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } //docx转换为html public void docxToHtml() throws Exception { String sourceFileName = "C:\\doc\\test.doc"; String targetFileName = "C:\\html\\test.html"; String imagePathStr = "C:\\html\\image\\"; FileOutputStream fileOutputStream = null; OutputStreamWriter outputStreamWriter = null; try { XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName)); XHTMLOptions options = XHTMLOptions.create(); //存放图片的文件夹 options.setExtractor(new FileImageExtractor(new File(imagePathStr))); //html中图片的路径 options.URIResolver(new BasicURIResolver("image")); fileOutputStream = new FileOutputStream(targetFileName); outputStreamWriter = new OutputStreamWriter(fileOutputStream, "utf-8"); XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance(); xhtmlConverter.convert(document, outputStreamWriter, options); } finally { if (outputStreamWriter != null) { outputStreamWriter.close(); } if (fileOutputStream != null) { fileOutputStream.close(); } } } }
完整的源代码在http://git.oschina.net/xiaoyun_studio/xiaoyun-studio ,路径:src/main/java/studio/xiaoyun/common/tool/POITool.java
时间: 2024-11-01 18:18:48