此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。
所需jar包如下如下:
1:PoiUtil.java
package com.wzh.poi; import java.io.BufferedWriter; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hssf.converter.ExcelToHtmlConverter; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.w3c.dom.Document; /** * @date 2015-3-16 17:22:05 * @author y * @desc */ public class PoiUtil { /** * Excel 转为 HTML * @param fileName * @param outputFile * @throws FileNotFoundException * @throws IOException * @throws ParserConfigurationException * @throws TransformerConfigurationException * @throws TransformerException */ public static void excelToHtml(String fileName, String outputFile) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerConfigurationException, TransformerException { InputStream is = new FileInputStream(fileName); HSSFWorkbook excelBook = new HSSFWorkbook(is); ExcelToHtmlConverter ethc = new ExcelToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); ethc.setOutputColumnHeaders(false); ethc.setOutputRowNumbers(false); ethc.processWorkbook(excelBook); Document htmlDocument = ethc.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); String htmlStr = new String(out.toByteArray()); htmlStr = htmlStr.replace("<h2>Sheet1</h2>", "") .replace("<h2>Sheet2</h2>", "") .replace("<h2>Sheet3</h2>", "") .replace("<h2>Sheet4</h2>", "") .replace("<h2>Sheet5</h2>", ""); writeFile(htmlStr, outputFile); } /** * Word 转为 HTML * * @param fileName * @param outputFile * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public static void wordToHtml(String fileName, String outputFile) throws IOException, ParserConfigurationException, TransformerException { HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wthc = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wthc.setPicturesManager(new PicturesManager() { @Override public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) { return string; } }); wthc.processDocument(wordDoc); List<Picture> pics = wordDoc.getPicturesTable().getAllPictures(); if (null != pics && pics.size() > 0) { for (Picture pic : pics) { pic.writeImageContent(new FileOutputStream(pic.suggestFullFileName())); } } Document htmlDocument = wthc.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); String htmlStr = new String(out.toByteArray()); writeFile(htmlStr, outputFile); } public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; File file = new File(path); try { fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8")); bw.write(content); } catch (FileNotFoundException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } catch (UnsupportedEncodingException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (null != bw) { bw.close(); } if (null != fos) { fos.close(); } } catch (IOException ex) { Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex); } } } }
2.Test.java
import com.wzh.poi.PoiUtil; import java.io.IOException; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ /** * * @author y */ public class Test { /** * @param args the command line arguments */ public static void main(String[] args) { try { PoiUtil.excelToHtml("t2.xls", "test.html"); } catch (IOException ex) { Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex); } catch (ParserConfigurationException ex) { Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex); } catch (TransformerException ex) { Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex); } } }
时间: 2024-10-12 09:26:39