对于用JAVA访问EXCEL文件,常用的组件有jxl和POI, jxl因为目前已无版本更新,并且仅能访问EXCEL2007以前的文件格式,对xlsx文件格式目前不支持, 故而推荐使用POI,
POI的官方下载地址为: http://poi.apache.org/
本质上来说, EXCEL2007文档是一个XML文档, 所以POI对EXCEL文件读操作有两种方式,一种是DOM, 一种是SAX, DOM对EXCEL操作简单,易上手, 但是占用较多的内存,尤其是表格超过10000行, 经常会出现内存溢出问题,虽然可以改变JVM启动参数, 加大内存,但不能从根本上解决问题,所以当需要读取较大的EXCEL文件时, 我们采用第二种方式更为合理.
对于EXCEL文档写操作也面临同样问题, 如果写入的行数过多, 也会遇到内存溢出, 但是写操作解决大文件的办法比较简单, 设置写缓存就可以了, 不过因为EXCEL的SHEET有最大行数的限制(65534), 所以如果行数超过这个限制, 需要创建新的SHEET, 以下是对EXCEL大文件读写操作的示例.
import java.io.InputStream; import java.util.Date; import java.util.Iterator; import java.util.Map; import java.util.regex.Pattern; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; public class TestExcelReader { public static void main(String[] args) throws Exception { String path = "c://cache//test.xlsx"; ExcelHandler handler = new ExcelHandler(); handler.process(path); } public static class ExcelHandler extends DefaultHandler { public ExcelHandler() {} private StylesTable stylesTable; private CellDataType dataType; private SharedStringsTable sst; private Map<String, String> datas; private short dataFormat; private String dataFormatString; private boolean tagString; private boolean tElement = false; private String content; private Coordinate coordinate; @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { super.startElement(uri, localName, qName, attributes); if ("c".equals(qName)) { this.setDataType(attributes); coordinate = getCoordinate(attributes.getValue("r")); } else if ("t".equals(qName)) { tElement = true; } else { tElement = false; } content = ""; } @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); if (tagString) { int idx = Integer.parseInt(content); content = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); } if (tElement) { String value = content.trim(); setDataValue(datas, value, true); tElement = false; } if ("v".equals(qName)) { setDataValue(datas, content.trim(), false); } if ("row".equals(qName)) { // process the row datas; System.out.println(datas); datas.clear(); } } public void process(String path) { InputStream is = null; OPCPackage pkg = null; try { pkg = OPCPackage.open(path); XSSFReader reader = new XSSFReader(pkg); stylesTable = reader.getStylesTable(); SharedStringsTable sst = reader.getSharedStringsTable(); this.sst = sst; Iterator<InputStream> it = reader.getSheetsData(); if (it.hasNext()) { is = it.next(); InputSource source = new InputSource(is); XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); parser.setContentHandler(this); parser.parse(source); } } catch(Throwable ex) { ex.printStackTrace(); } finally { try { if (is != null) is.close(); if (pkg != null) pkg.close(); } catch(Throwable e) {} } } public void setDataValue(Map<String, String> datas, String value, boolean t) { String column = coordinate.getColumnName(); if (t) { datas.put(column, value); } else { switch (dataType) { case BOOL: char first = value.charAt(0); datas.put(column, first == ‘0‘ ? "否":"是"); break; case ERROR: String err = "\"ERROR:" + value.toString() + ‘"‘; datas.put(column, err); break; case FORMULA: datas.put(column, value); break; case INLINESTR: XSSFRichTextString rts = new XSSFRichTextString(value.toString()); String inline = rts.toString(); datas.put(column, inline); break; case SSTINDEX: String sidx = value.toString(); try { int idx = Integer.parseInt(sidx); XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx)); String sstindex = rtss.toString(); datas.put(column, sstindex); rtss = null; } catch (NumberFormatException ex) { ex.printStackTrace(); datas.put(column, value); } break; case NUMBER: Double db = Double.parseDouble(value); datas.put(column, String.valueOf(db)); break; case DATE: double number = Double.parseDouble(value); Date date = DateUtil.getJavaDate(number); datas.put(column, date.toString()); break; default: datas.put(column, ""); break; } } } public void setDataType(Attributes attributes) { String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); dataType = CellDataType.NUMBER; if ("b".equals(cellType)) { dataType = CellDataType.BOOL; } else if ("e".equals(cellType)) { dataType = CellDataType.ERROR; } else if ("inlineStr".equals(cellType)) { dataType = CellDataType.INLINESTR; } else if ("s".equals(cellType)) { dataType = CellDataType.SSTINDEX; } else if ("str".equals(cellType)) { dataType = CellDataType.FORMULA; } if (cellStyleStr != null) { int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); dataFormat = style.getDataFormat(); dataFormatString = style.getDataFormatString(); if (dataFormatString == null) { dataType = CellDataType.NULL; dataFormatString = BuiltinFormats.getBuiltinFormat(dataFormat); } if (dataFormat > 0) { if (!isScientificReg(dataFormatString)) { dataType = CellDataType.DATE; } } } } } public static interface Coordinate { String getColumnName(); int getLine(); } public static Coordinate getCoordinate(String row) { Coordinate coordinate = null; if (row != null && !"".equals(row)) { int last = -1; for(int i = 0; i < row.length(); i++) { if (Character.isDigit(row.charAt(i))) { last = i; break; } } String sline = last != -1 ? row.substring(last):"0"; final String column = last != -1 ? row.substring(0, last):row; final int line = Integer.parseInt(sline); coordinate = new Coordinate() { @Override public String getColumnName() { return column; } @Override public int getLine() { return line; } }; } if (coordinate == null) { coordinate = new Coordinate() { @Override public String getColumnName() { return ""; } @Override public int getLine() { return 0; } }; } return coordinate; } public static boolean isScientificReg(String str) { boolean result = false; if (str != null) { String reg = "0.[0]+[Ee][+-]00"; Pattern pattern = Pattern.compile(reg); result = pattern.matcher(str).matches(); } return result; } public static enum CellDataType { BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL } }
时间: 2024-10-10 05:32:14