word导出html实现在线预览

需要的maven依赖经过编译，必须版本配合一致：  <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.apache.poi.xwpf.converter.core</artifactId> <version>1.0.5</version> </dependency>  <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> <version>1.0.5</version> </dependency>  <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-io</artifactId> <version>1.3.2</version> </dependency>


<dependency>

        <groupId>org.apache.poi</groupId>

        <artifactId>poi-scratchpad</artifactId>

        <version>3.17</version>

    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 -->

    <dependency>

        <groupId>org.apache.commons</groupId>

        <artifactId>commons-collections4</artifactId>

        <version>4.0</version>

    </dependency>
<dependency>

        <groupId>org.apache.poi</groupId>

        <artifactId>poi</artifactId>

        <version>3.17</version>

    </dependency>

    <!-- <dependency>

        <groupId>org.apache.poi</groupId>

        <artifactId>poi-ooxml-schemas</artifactId>

        <version>3.16</version>

    </dependency> -->

    <!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans -->

    <dependency>

        <groupId>org.apache.xmlbeans</groupId>

        <artifactId>xmlbeans</artifactId>

        <version>2.6.0</version>

    </dependency>
<dependency>

        <groupId>org.apache.poi</groupId>

        <artifactId>poi-ooxml</artifactId>

        <version>3.14</version>

    </dependency><!-- poi引用的包要保持版本号一致，不然也会出现 ClassNotFoundException: org.apache.poi.wp.usermodel.Paragraph这个错误 -->

    <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->

    <dependency>

        <groupId>org.apache.commons</groupId>

        <artifactId>commons-lang3</artifactId>

        <version>3.4</version>

    </dependency>

package com.zyhao.openec.excel.utils;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

public class WordToHtml {
public static void main(String[] args) {
try {
wordToHtml("E:\me\2.docx", "E:\me\", "123.html");
// wordToHtml("E:\me\2.doc", "E:\me\", "12.html");
} catch (Exception e) {
e.printStackTrace();
}
}

public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws Exception {
        convert2Html(wordPath, htmlPath, newFilename);
}    

public static void writeFile(String content, String path) throws Exception {
    FileOutputStream fos = null;
    BufferedWriter bw = null;
    try {
        File file = new File(path);
        fos = new FileOutputStream(file);
        bw = new BufferedWriter(new OutputStreamWriter(fos));
        bw.write(content);
    } catch (FileNotFoundException fnfe) {
        fnfe.printStackTrace();
    } catch (IOException ioe) {
        ioe.printStackTrace();
    } finally {
        try {
            if (bw != null)
                bw.close();
            if (fos != null)
                fos.close();
        } catch (IOException ie) {
        }
    }
}    

/**
 * 将word转换成html
 * 支持 .doc and .docx
 * @param fileName word文件名
 * @param outPutFilePath html存储路径
 * @param newFileName html名
 * @throws Exception
 */
public static void convert2Html(String fileName, String outPutFilePath,String newFileName)
        throws Exception {
    String substring = fileName.substring(fileName.lastIndexOf(".")+1);
    ByteArrayOutputStream out = new ByteArrayOutputStream();

    /**
     * word2007和word2003的构建方式不同，
     * 前者的构建方式是xml，后者的构建方式是dom树。
     * 文件的后缀也不同，前者后缀为.docx，后者后缀为.doc
     * 相应的，apache.poi提供了不同的实现类。
     */
    if("docx".equals(substring)){

// writeFile(new String("<html><head> <meta http-equiv=\"content-type\" content=\"text/html\" charset=\"utf-8\"/></head>对不起，.docx格式的word文档，暂时不能生成预览</html>".getBytes("utf-8")), outPutFilePath+newFileName);

        //step 1 : load DOCX into XWPFDocument
        InputStream inputStream = new FileInputStream(new File(fileName));
        XWPFDocument document = new XWPFDocument(inputStream);

        //step 2 : prepare XHTML options
        final String imageUrl = "";
        XHTMLOptions options = XHTMLOptions.create();
        options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl)));
        options.setIgnoreStylesIfUnused(false);
        options.setFragment(true);
        options.URIResolver(new IURIResolver() {

// @Override 重写的方法，加上这个报错，你看看是啥问题
public String resolve(String uri) {
return imageUrl + uri;
}
});

        //step 3 : convert XWPFDocument to XHTML
        XHTMLConverter.getInstance().convert(document, out, options);

    }else{
        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());

         wordToHtmlConverter.setPicturesManager( new PicturesManager()
         {
             public String savePicture( byte[] content,
                     PictureType pictureType, String suggestedName,
                     float widthInches, float heightInches )
             {
                 return suggestedName;
             }
         } );
        wordToHtmlConverter.processDocument(wordDocument);
        //save pictures
        List pics=wordDocument.getPicturesTable().getAllPictures();  

        if(pics!=null&&!pics.isEmpty()){

            for(int i=0;i<pics.size();i++){
                Picture pic = (Picture)pics.get(i);
                System.out.println();
                try {
                    pic.writeImageContent(new FileOutputStream(outPutFilePath
                            + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);    

        TransformerFactory tf = TransformerFactory.newInstance();    //这个应该是转换成xml的
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
    }    

    out.close();
    writeFile(new String(out.toByteArray()), outPutFilePath+newFileName);
}

}

原文地址：http://blog.51cto.com/17099933344/2087457

时间： 2024-10-24 09:43:42

word导出html实现在线预览

word导出html实现在线预览的相关文章

asp.net word ecxel类型文件在线预览

带进度的多文件上传(支持上传.doc后缀的word文档并在线预览)

Aspose office （Excel,Word,PPT）,PDF 在线预览

word转pdf swf 在线预览

怎么实现word，excel在线预览

Atitit.office word  excel  ppt pdf 的web在线预览方案与html转换方案 attilax 总结

[Asp.net]常见word，excel，ppt，pdf在线预览方案(转)

JSP实现word文档的上传，在线预览，下载

[Asp.net]常见word，excel，ppt，pdf在线预览方案