目标:读取execl的第一个sheet,并传入不需要读取的表头的行数,返回该execl里所有数据的list
解析共有2种:1、DOM 2、SAX
1 import java.io.File; 2 import java.io.IOException; 3 import java.io.InputStream; 4 import java.text.DecimalFormat; 5 import java.util.ArrayList; 6 import java.util.Iterator; 7 import java.util.List; 8 import java.util.regex.Matcher; 9 import java.util.regex.Pattern; 10 11 import org.apache.commons.logging.Log; 12 import org.apache.commons.logging.LogFactory; 13 import org.apache.poi.openxml4j.exceptions.InvalidFormatException; 14 import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 15 import org.apache.poi.openxml4j.opc.OPCPackage; 16 import org.apache.poi.ss.usermodel.Cell; 17 import org.apache.poi.ss.usermodel.Row; 18 import org.apache.poi.ss.usermodel.Sheet; 19 import org.apache.poi.ss.usermodel.Workbook; 20 import org.apache.poi.ss.usermodel.WorkbookFactory; 21 import org.apache.poi.xssf.eventusermodel.XSSFReader; 22 import org.apache.poi.xssf.model.SharedStringsTable; 23 import org.apache.poi.xssf.usermodel.XSSFRichTextString; 24 import org.xml.sax.Attributes; 25 import org.xml.sax.ContentHandler; 26 import org.xml.sax.InputSource; 27 import org.xml.sax.SAXException; 28 import org.xml.sax.XMLReader; 29 import org.xml.sax.helpers.DefaultHandler; 30 import org.xml.sax.helpers.XMLReaderFactory; 31 32 /** 33 * 基于XSSF and SAX (Event API) 34 * 读取excel的第一个Sheet的内容 35 * @author yzl 36 * 37 */ 38 public class ReadExcelUtils { 39 private int headCount = 0; 40 private List<List<String>> list = new ArrayList<List<String>>(); 41 private static final Log log = LogFactory.getLog(ReadExcelUtils.class); 42 43 /** 44 * 通过文件流构建DOM进行解析 45 * @param ins 46 * @param headRowCount 47 * @return 48 * @throws InvalidFormatException 49 * @throws IOException 50 */ 51 public List<List<String>> processDOMReadSheet(InputStream ins,int headRowCount) throws InvalidFormatException, IOException { 52 Workbook workbook = WorkbookFactory.create(ins); 53 return this.processDOMRead(workbook, headCount); 54 } 55 56 /** 57 * 采用DOM的形式进行解析 58 * @param filename 59 * @param headRowCount 60 * @return 61 * @throws IOException 62 * @throws InvalidFormatException 63 * @throws Exception 64 */ 65 public List<List<String>> processDOMReadSheet(String filename,int headRowCount) throws InvalidFormatException, IOException { 66 Workbook workbook = WorkbookFactory.create(new File(filename)); 67 return this.processDOMRead(workbook, headCount); 68 } 69 70 /** 71 * 采用SAX进行解析 72 * @param filename 73 * @param headRowCount 74 * @return 75 * @throws OpenXML4JException 76 * @throws IOException 77 * @throws SAXException 78 * @throws Exception 79 */ 80 public List<List<String>> processSAXReadSheet(String filename,int headRowCount) throws IOException, OpenXML4JException, SAXException { 81 headCount = headRowCount; 82 83 OPCPackage pkg = OPCPackage.open(filename); 84 XSSFReader r = new XSSFReader( pkg ); 85 SharedStringsTable sst = r.getSharedStringsTable(); 86 XMLReader parser = fetchSheetParser(sst); 87 88 Iterator<InputStream> sheets = r.getSheetsData(); 89 InputStream sheet = sheets.next(); 90 InputSource sheetSource = new InputSource(sheet); 91 parser.parse(sheetSource); 92 sheet.close(); 93 94 log.debug("时间:"+DateUtils.getNowTime()+",共读取了execl的记录数为 :"+list.size()); 95 96 return list; 97 } 98 99 private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { 100 XMLReader parser = 101 XMLReaderFactory.createXMLReader( 102 "org.apache.xerces.parsers.SAXParser" 103 ); 104 ContentHandler handler = new SheetHandler(sst); 105 parser.setContentHandler(handler); 106 return parser; 107 } 108 109 /** 110 * SAX 解析excel 111 */ 112 private class SheetHandler extends DefaultHandler { 113 private SharedStringsTable sst; 114 private String lastContents; 115 private boolean nextIsString; 116 private boolean isNullCell; 117 //读取行的索引 118 private int rowIndex = 0; 119 //是否重新开始了一行 120 private boolean curRow = false; 121 private List<String> rowContent; 122 123 private SheetHandler(SharedStringsTable sst) { 124 this.sst = sst; 125 } 126 127 public void startElement(String uri, String localName, String name, 128 Attributes attributes) throws SAXException { 129 //节点的类型 130 //System.out.println("---------begin:" + name); 131 if(name.equals("row")){ 132 rowIndex++; 133 } 134 //表头的行直接跳过 135 if(rowIndex > headCount){ 136 curRow = true; 137 // c => cell 138 if(name.equals("c")) { 139 String cellType = attributes.getValue("t"); 140 if(null == cellType){ 141 isNullCell = true; 142 }else{ 143 if(cellType.equals("s")) { 144 nextIsString = true; 145 } else { 146 nextIsString = false; 147 } 148 isNullCell = false; 149 } 150 } 151 // Clear contents cache 152 lastContents = ""; 153 } 154 } 155 156 public void endElement(String uri, String localName, String name) 157 throws SAXException { 158 //System.out.println("-------end:"+name); 159 if(rowIndex > headCount){ 160 if(nextIsString) { 161 int idx = Integer.parseInt(lastContents); 162 lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); 163 nextIsString = false; 164 } 165 if(name.equals("v")) { 166 //System.out.println(lastContents); 167 if(curRow){ 168 //是新行则new一行的对象来保存一行的值 169 if(null==rowContent){ 170 rowContent = new ArrayList<String>(); 171 } 172 rowContent.add(lastContents); 173 } 174 }else if(name.equals("c") && isNullCell){ 175 if(curRow){ 176 //是新行则new一行的对象来保存一行的值 177 if(null==rowContent){ 178 rowContent = new ArrayList<String>(); 179 } 180 rowContent.add(null); 181 } 182 } 183 184 isNullCell = false; 185 186 if("row".equals(name)){ 187 list.add(rowContent); 188 curRow = false; 189 rowContent = null; 190 } 191 } 192 193 } 194 195 public void characters(char[] ch, int start, int length) 196 throws SAXException { 197 lastContents += new String(ch, start, length); 198 } 199 } 200 201 /** 202 * DOM的形式解析execl 203 * @param workbook 204 * @param headRowCount 205 * @return 206 * @throws InvalidFormatException 207 * @throws IOException 208 */ 209 private List<List<String>> processDOMRead(Workbook workbook,int headRowCount) throws InvalidFormatException, IOException { 210 headCount = headRowCount; 211 212 Sheet sheet = workbook.getSheetAt(0); 213 //行数 214 int endRowIndex = sheet.getLastRowNum(); 215 216 Row row = null; 217 List<String> rowList = null; 218 219 for(int i=headCount; i<=endRowIndex; i++){ 220 rowList = new ArrayList<String>(); 221 row = sheet.getRow(i); 222 for(int j=0; j<row.getLastCellNum();j++){ 223 if(null==row.getCell(j)){ 224 rowList.add(null); 225 continue; 226 } 227 int dataType = row.getCell(j).getCellType(); 228 if(dataType == Cell.CELL_TYPE_NUMERIC){ 229 DecimalFormat df = new DecimalFormat("0.####################"); 230 rowList.add(df.format(row.getCell(j).getNumericCellValue())); 231 }else if(dataType == Cell.CELL_TYPE_BLANK){ 232 rowList.add(null); 233 }else if(dataType == Cell.CELL_TYPE_ERROR){ 234 rowList.add(null); 235 }else{ 236 //这里的去空格根据自己的情况判断 237 String valString = row.getCell(j).getStringCellValue(); 238 Pattern p = Pattern.compile("\\s*|\t|\r|\n"); 239 Matcher m = p.matcher(valString); 240 valString = m.replaceAll(""); 241 //去掉狗日的不知道是啥东西的空格 242 if(valString.indexOf(" ")!=-1){ 243 valString = valString.substring(0, valString.indexOf(" ")); 244 } 245 246 rowList.add(valString); 247 } 248 } 249 250 list.add(rowList); 251 } 252 log.debug("时间:"+DateUtils.getNowTime()+",共读取了execl的记录数为 :"+list.size()); 253 254 return list; 255 } 256 257 @SuppressWarnings("unused") 258 public static void main(String[] args) throws Exception { 259 ReadExcelUtils howto = new ReadExcelUtils(); 260 String fileName = "f:/test.xlsx"; 261 List<List<String>> list = howto.processSAXReadSheet(fileName,2); 262 263 ReadExcelUtils h = new ReadExcelUtils(); 264 String fileName1 = "f:/test.xls"; 265 List<List<String>> result = h.processDOMReadSheet(fileName1,2); 266 } 267 }
时间: 2024-10-12 08:49:18