读取word文件
import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; public class WordDemo { public static void main(String[] args) { StringBuffer readWord = readWord("f:/app代码.docx"); System.out.println(readWord.toString()); } public static StringBuffer readWord(String path) { String s = ""; try { if (path.endsWith(".doc")) { InputStream is = new FileInputStream(new File(path)); WordExtractor ex = new WordExtractor(is); s = ex.getText(); } else if (path.endsWith("docx")) { OPCPackage opcPackage = POIXMLDocument.openPackage(path); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); s = extractor.getText(); } else { System.out.println("传入的word文件不正确:" + path); } } catch (Exception e) { e.printStackTrace(); } StringBuffer bf = new StringBuffer(s); return bf; } }
需要jar包
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.17</version> </dependency>
时间: 2024-12-21 11:03:08