ExcelReaderUtils.java 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. package com.example.poiaddr.util;
  2. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  3. import org.apache.poi.ss.usermodel.*;
  4. import java.io.File;
  5. import java.io.FileInputStream;
  6. import java.io.IOException;
  7. import java.util.*;
  8. import org.apache.poi.util.IOUtils;
  9. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  10. import java.io.FileOutputStream;
  11. /**
  12. * xlsx文档解析并输出内容为List<Map<String,String>>
  13. * 大文件分割
  14. */
  15. public class ExcelReaderUtils {
  16. public static void writeToExcel(List<Map<String, Object>> dataList, String filePath) throws IOException {
  17. // 创建一个新的工作簿(对应一个Excel文件)
  18. Workbook workbook = new XSSFWorkbook();
  19. // 创建一个工作表
  20. Sheet sheet = workbook.createSheet("Sheet1");
  21. // 写入表头(从Map的键中获取列名)
  22. if (!dataList.isEmpty()) {
  23. Map<String, Object> firstMap = dataList.get(0);
  24. int colIndex = 0;
  25. Row headerRow = sheet.createRow(0);
  26. for (String key : firstMap.keySet()) {
  27. Cell cell = headerRow.createCell(colIndex++);
  28. cell.setCellValue(key);
  29. }
  30. }
  31. // 逐行写入数据
  32. int rowIndex = 1;
  33. for (Map<String, Object> dataMap : dataList) {
  34. Row dataRow = sheet.createRow(rowIndex++);
  35. int colIndex = 0;
  36. for (Object value : dataMap.values()) {
  37. Cell cell = dataRow.createCell(colIndex++);
  38. if (value instanceof String) {
  39. cell.setCellValue((String) value);
  40. } else if (value instanceof Integer) {
  41. cell.setCellValue((Integer) value);
  42. } else if (value instanceof Double) {
  43. cell.setCellValue((Double) value);
  44. } else if (value instanceof Boolean) {
  45. cell.setCellValue((Boolean) value);
  46. } else {
  47. cell.setCellValue(value != null ? value.toString() : "");
  48. }
  49. }
  50. }
  51. // 将工作簿写入到文件
  52. try (FileOutputStream outputStream = new FileOutputStream(filePath)) {
  53. workbook.write(outputStream);
  54. } finally {
  55. workbook.close();
  56. }
  57. }
  58. public static List<Map<String, Object>> readExcel(String filePath) throws IOException {
  59. List<Map<String, Object>> resultList = new ArrayList<>();
  60. FileInputStream fis = new FileInputStream(filePath);
  61. IOUtils.setByteArrayMaxOverride(400000000);
  62. // 创建工作簿对象,用于代表整个Excel文件
  63. Workbook workbook = WorkbookFactory.create(fis);
  64. // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改
  65. Sheet sheet = workbook.getSheetAt(0);
  66. // 获取表头行
  67. Row headerRow = sheet.getRow(0);
  68. int headerSize = headerRow.getLastCellNum();
  69. // 遍历数据行(从第二行开始,第一行是表头)
  70. for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
  71. Row currentRow = sheet.getRow(rowIndex);
  72. Map<String, Object> rowMap = new HashMap<>();
  73. for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) {
  74. Cell headerCell = headerRow.getCell(cellIndex);
  75. Cell currentCell = currentRow.getCell(cellIndex);
  76. String headerValue = getCellValue(headerCell).toString();
  77. Object currentValue = getCellValue(currentCell);
  78. rowMap.put(headerValue, currentValue);
  79. }
  80. resultList.add(rowMap);
  81. }
  82. workbook.close();
  83. fis.close();
  84. return resultList;
  85. }
  86. public static List<Map<String, Object>> readExcel(File file) throws IOException {
  87. List<Map<String, Object>> resultList = new ArrayList<>();
  88. FileInputStream fis = new FileInputStream(file);
  89. IOUtils.setByteArrayMaxOverride(400000000);
  90. // 创建工作簿对象,用于代表整个Excel文件
  91. Workbook workbook = WorkbookFactory.create(fis);
  92. // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改
  93. Sheet sheet = workbook.getSheetAt(0);
  94. // 获取表头行
  95. Row headerRow = sheet.getRow(0);
  96. int headerSize = headerRow.getLastCellNum();
  97. // 遍历数据行(从第二行开始,第一行是表头)
  98. for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
  99. Row currentRow = sheet.getRow(rowIndex);
  100. Map<String, Object> rowMap = new HashMap<>();
  101. for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) {
  102. Cell headerCell = headerRow.getCell(cellIndex);
  103. Cell currentCell = currentRow.getCell(cellIndex);
  104. String headerValue = getCellValue(headerCell).toString();
  105. Object currentValue = getCellValue(currentCell);
  106. rowMap.put(headerValue, currentValue);
  107. }
  108. resultList.add(rowMap);
  109. }
  110. workbook.close();
  111. fis.close();
  112. return resultList;
  113. }
  114. public static void splitExcelByRows(String inputFilePath, int rowsPerFile) throws IOException {
  115. FileInputStream inputStream = new FileInputStream(new File(inputFilePath));
  116. IOUtils.setByteArrayMaxOverride(400000000);
  117. Workbook workbook = new XSSFWorkbook(inputStream);
  118. Sheet sheet = workbook.getSheetAt(0);
  119. int totalRows = sheet.getLastRowNum() + 1;
  120. int fileCount = (totalRows / rowsPerFile) + (totalRows % rowsPerFile == 0? 0 : 1);
  121. for (int i = 0; i < fileCount; i++) {
  122. Workbook newWorkbook = new XSSFWorkbook();
  123. Sheet newSheet = newWorkbook.createSheet("Sheet1");
  124. int startRow = i * rowsPerFile;
  125. int endRow = Math.min((i + 1) * rowsPerFile, totalRows);
  126. for (int rowIndex = startRow; rowIndex < endRow; rowIndex++) {
  127. Row sourceRow = sheet.getRow(rowIndex);
  128. Row newRow = newSheet.createRow(rowIndex - startRow);
  129. if (sourceRow!= null) {
  130. for (int cellIndex = 0; cellIndex < sourceRow.getLastCellNum(); cellIndex++) {
  131. Cell sourceCell = sourceRow.getCell(cellIndex);
  132. Cell newCell = newRow.createCell(cellIndex);
  133. if (sourceCell!= null) {
  134. switch (sourceCell.getCellType()) {
  135. case STRING:
  136. newCell.setCellValue(sourceCell.getStringCellValue());
  137. break;
  138. case NUMERIC:
  139. newCell.setCellValue(sourceCell.getNumericCellValue());
  140. break;
  141. case BOOLEAN:
  142. newCell.setCellValue(sourceCell.getBooleanCellValue());
  143. break;
  144. // 可以根据实际情况添加更多的类型处理,比如日期等
  145. default:
  146. newCell.setCellValue("");
  147. }
  148. }
  149. }
  150. }
  151. }
  152. String outputFilePath = getOutputFilePath(inputFilePath, i);
  153. FileOutputStream outputStream = new FileOutputStream(outputFilePath);
  154. newWorkbook.write(outputStream);
  155. outputStream.close();
  156. newWorkbook.close();
  157. }
  158. workbook.close();
  159. inputStream.close();
  160. }
  161. private static String getOutputFilePath(String inputFilePath, int index) {
  162. String baseName = inputFilePath.substring(0, inputFilePath.lastIndexOf('.'));
  163. String extension = inputFilePath.substring(inputFilePath.lastIndexOf('.'));
  164. return baseName + "_part_" + (index + 1) + extension;
  165. }
  166. private static Map<String, String> updateTableHeader() {
  167. Map<String, String> table1colToName = new HashMap<>();
  168. table1colToName.put("ID", "序列号");
  169. table1colToName.put("TASKID", "任务编号");
  170. table1colToName.put("BANLIRESULT_12345", "办理结果");
  171. table1colToName.put("APPEAL_EXPLAIN", "诉求认定说明");
  172. table1colToName.put("DESCRIPTION_12345", "反馈结论");
  173. table1colToName.put("NOT_REASON", "未联原因");
  174. table1colToName.put("CASEVALUATION_12345", "是否满意");
  175. table1colToName.put("VIEWINFO", "现场查看");
  176. table1colToName.put("DISPATCHNOTE", "派遣备注");
  177. table1colToName.put("ENDNOTE", "结案备注");
  178. table1colToName.put("PARTSN", "部件编号");
  179. table1colToName.put("WORKGRID", "工作网格");
  180. table1colToName.put("SPEICALSIGN", "案件特殊标识(特征要素)");
  181. table1colToName.put("CANCLETIME", "作废时间");
  182. table1colToName.put("CONTACTINFO", "联系方式");
  183. table1colToName.put("ACCEPTTIME", "接单时间(最后一次主责部门处理完成时间)");
  184. table1colToName.put("LASTCONTACTTIME", "首次联系截止时间(最后主责)");
  185. table1colToName.put("INFOSOURCENAME", "来源名");
  186. table1colToName.put("INFOTYPENAME", "案件属性名");
  187. table1colToName.put("INFOBCNAME", "大类名");
  188. table1colToName.put("INFOSCNAME", "小类名");
  189. table1colToName.put("INFOZCNAME", "子类名");
  190. table1colToName.put("STREETNAME", "街道名");
  191. table1colToName.put("COMMUNITYNAME", "居委名");
  192. table1colToName.put("WORKGRIDCODE", "责任网格");
  193. table1colToName.put("EXECUTEDEPTNAME", "最后主责部门");
  194. table1colToName.put("REPORTER", "反映人");
  195. table1colToName.put("REPORTDEPTNAME", "渠道来源");
  196. table1colToName.put("SYNCTIME", "更新时间");
  197. table1colToName.put("WP_SOURCE", "工单来源");
  198. table1colToName.put("CASESN", "案卷编号");
  199. table1colToName.put("INFOSOURCEID", "问题来源");
  200. table1colToName.put("DISCOVERTIME", "发现时间");
  201. table1colToName.put("PERCREATETIME", "受理时间(最后一次受理时间)");
  202. table1colToName.put("CREATETIME", "立案时间(最后一次立案时间)");
  203. table1colToName.put("DISPATCHTIME", "派遣时间(最后一次派遣时间)");
  204. table1colToName.put("SOLVINGTIME", "处理完成时间(最后一次主责部门处理完成时间)");
  205. table1colToName.put("TELASKTIME", "回访时间(最后一次回访时间)");
  206. table1colToName.put("ENDTIME", "结案时间");
  207. table1colToName.put("STREETCODE", "街道编号");
  208. table1colToName.put("COMMUNITYCODE", "村、居村编码");
  209. table1colToName.put("GRIDCODE", "万米网格编码");
  210. table1colToName.put("COORDX", "X坐标");
  211. table1colToName.put("COORDY", "Y坐标");
  212. table1colToName.put("ADDRESS", "发生地址");
  213. table1colToName.put("INFOTYPEID", "问题类型编码(0,value:部件;1:事件;...)");
  214. table1colToName.put("INFOBCCODE", "问题大类编号");
  215. table1colToName.put("INFOSCCODE", "问题小类编号");
  216. table1colToName.put("INFOZCCODE", "问题子类编码");
  217. table1colToName.put("INFOATCODE", "问题管理要点编码");
  218. table1colToName.put("DESCRIPTION", "问题描述");
  219. table1colToName.put("STATUS", "T_INFO_MAIN表状态");
  220. table1colToName.put("DEPTCODE", "立案部门");
  221. table1colToName.put("EXECUTEDEPTCODE", "最后一次主责部门");
  222. table1colToName.put("INSERTDEPTCODE", "收集部门");
  223. table1colToName.put("KEEPERSN", "上报监督员编号");
  224. table1colToName.put("INSERTUSER", "记录添加操作员(收集人)");
  225. table1colToName.put("URGENTDEGREE", "紧急程度(0:一般;1:紧急)");
  226. table1colToName.put("APPROACH", "12345工单处理方式/案卷类型(0:转办; /1:督办; /2:回访复核)");
  227. table1colToName.put("SIMILARCASESN", "相关案件编号");
  228. table1colToName.put("SERVICETYPE", "业务类型");
  229. table1colToName.put("ISANONYMITY", "是否匿名");
  230. table1colToName.put("USEREVALUATE", "用户评价/满意度(0,value:满意; /1,value:基本满意; /2,value:不满意)");
  231. table1colToName.put("ALLMIDDLETIME", "整体案卷黄灯开始时间(根据紧急程度middle计算的)");
  232. table1colToName.put("ALLIMPORTANTTIME", "整体案卷橙灯开始时间(根据紧急程度important计算的)");
  233. table1colToName.put("ALLENDTIME", "整体案卷截止时间(红灯开始时间)");
  234. table1colToName.put("MIDDLESOLVINGTIME", "处理阶段黄灯开始时间");
  235. table1colToName.put("IMPORTANTSOLVINGTIME", "处理阶段橙灯开始时间");
  236. table1colToName.put("LASTSOLVINGTIME", "处理阶段红灯开始时间(处理截止时间)");
  237. table1colToName.put("CALLBACK_FLAG", "12345回访复核单状态标识(0:否,1:是)");
  238. table1colToName.put("URGE_COUNT", "12345催单次数(12345催单时带过来的催单次数)");
  239. table1colToName.put("DU_LIMIT", "12345督办时限(天)");
  240. table1colToName.put("CASEEND", "是否自行处置");
  241. table1colToName.put("BANLIRESULT", "12345办理结果(0,value:解决,1,value:未解决,2,value:部分解决,3,value:不办理退单)");
  242. table1colToName.put("ENDRESULT", "结案评价");
  243. table1colToName.put("VERIFYRESULT", "最后的核实结果(1,value:属实,0,value:不属实)");
  244. table1colToName.put("CHECKRESULT", "最后的核查结果(1,value:完成,0,value:未完成)");
  245. table1colToName.put("PRIORITYAREA", "重要区域");
  246. table1colToName.put("CONTACTMODE", "反映人联系方式");
  247. table1colToName.put("BACKCOUNT", "退单次数(案件所有主责部门累加)");
  248. table1colToName.put("HESHICOUNT", "核实次数");
  249. table1colToName.put("HECHACOUNT", "核查次数");
  250. table1colToName.put("HUIFANGCOUNT", "回访次数");
  251. table1colToName.put("HASLEADTYPECOUNT", "领导督办次数");
  252. table1colToName.put("HASTENTYPECOUNT", "催办过的次数");
  253. table1colToName.put("HOTLINESN", "12319编号,延伸为外系统管理单号");
  254. table1colToName.put("JHPT_UPDATE_TIME", "JHPT_UPDATE_TIME");
  255. table1colToName.put("JHPT_DELETE", "JHPT_UPDATE_TIME");
  256. table1colToName.put("DEPTNAME", "部门名");
  257. table1colToName.put("STATUSNAME", "状态名");
  258. return table1colToName;
  259. }
  260. private static String getCellValueAsString(Cell cell) {
  261. switch (cell.getCellType()) {
  262. case STRING:
  263. return cell.getStringCellValue();
  264. case NUMERIC:
  265. if (DateUtil.isCellDateFormatted(cell)) {
  266. return cell.getDateCellValue().toString();
  267. } else {
  268. return String.valueOf(cell.getNumericCellValue());
  269. }
  270. case BOOLEAN:
  271. return String.valueOf(cell.getBooleanCellValue());
  272. case FORMULA:
  273. try {
  274. return cell.getCellFormula();
  275. } catch (Exception e) {
  276. return "";
  277. }
  278. default:
  279. return "";
  280. }
  281. }
  282. /**
  283. * 替换表头
  284. */
  285. private static void ModifyExcelHeaderJExcelApi(){
  286. try {
  287. // 读取现有的Excel文件
  288. FileInputStream file = new FileInputStream("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\tableHeader.xlsx");
  289. Workbook workbook = new XSSFWorkbook(file);
  290. Map<String, String> table1colToName = updateTableHeader();
  291. // 遍历每个工作表
  292. for (int sheetIndex = 0; sheetIndex < workbook.getNumberOfSheets(); sheetIndex++) {
  293. Sheet sheet = workbook.getSheetAt(sheetIndex);
  294. // 获取表头行(假设第一行是表头)
  295. Row headerRow = sheet.getRow(0);
  296. if (headerRow!= null) {
  297. // 遍历每个表头单元格进行替换
  298. // 遍历表头行中的每个单元格
  299. for (Cell cell : headerRow) {
  300. if (cell!= null) {
  301. String oldHeaderValue = getCellValueAsString(cell);
  302. if (table1colToName.containsKey(oldHeaderValue)) {
  303. cell.setCellValue(table1colToName.get(oldHeaderValue));
  304. }
  305. }
  306. }
  307. }
  308. }
  309. // 保存修改后的Excel文件
  310. FileOutputStream outputStream = new FileOutputStream("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\outPutTableHeader.xlsx");
  311. workbook.write(outputStream);
  312. outputStream.close();
  313. file.close();
  314. } catch (IOException e) {
  315. e.printStackTrace();
  316. }
  317. }
  318. private static void XlsxFileSplitByRowRange(String filePath, int tag) {
  319. try {
  320. // 读取原始xlsx文件
  321. Workbook workbook = new XSSFWorkbook(new File(filePath));
  322. Sheet sheet = workbook.getSheetAt(0);
  323. int totalRows = sheet.getLastRowNum();
  324. int rowsPerFile = 10000; // 每个分割文件包含的行数
  325. for (int i = 0; i <= totalRows; i += rowsPerFile) {
  326. // 创建新的工作簿用于保存分割后的行数据
  327. Workbook newWorkbook = new XSSFWorkbook();
  328. Sheet newSheet = newWorkbook.createSheet();
  329. int endRow = Math.min(i + rowsPerFile, totalRows + 1);
  330. for (int j = i; j < endRow; j++) {
  331. Row row = sheet.getRow(j);
  332. if (row != null) {
  333. Row newRow = newSheet.createRow(j - i);
  334. for (Cell cell : row) {
  335. if (cell != null) {
  336. Cell newCell = newRow.createCell(cell.getColumnIndex());
  337. // 复制单元格类型和值
  338. newCell.setCellType(cell.getCellType());
  339. if (cell.getCellType() == CellType.STRING) {
  340. newCell.setCellValue(cell.getStringCellValue());
  341. } else if (cell.getCellType() == CellType.NUMERIC) {
  342. newCell.setCellValue(cell.getNumericCellValue());
  343. } else if (cell.getCellType() == CellType.BOOLEAN) {
  344. newCell.setCellValue(cell.getBooleanCellValue());
  345. }
  346. }
  347. }
  348. }
  349. }
  350. // 将分割后的行数据保存为新的xlsx文件
  351. FileOutputStream outputStream = new FileOutputStream("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\output_" + tag + "_" + (i / rowsPerFile) + ".xlsx");
  352. newWorkbook.write(outputStream);
  353. outputStream.close();
  354. newWorkbook.close();
  355. }
  356. workbook.close();
  357. } catch (IOException | InvalidFormatException e) {
  358. e.printStackTrace();
  359. }
  360. }
  361. private static Object getCellValue(Cell cell) {
  362. if (cell == null) {
  363. return null;
  364. }
  365. CellType cellType = cell.getCellType();
  366. switch (cellType) {
  367. case STRING:
  368. return cell.getStringCellValue();
  369. case NUMERIC:
  370. if (DateUtil.isCellDateFormatted(cell)) {
  371. return cell.getDateCellValue();
  372. } else {
  373. return cell.getNumericCellValue();
  374. }
  375. case BOOLEAN:
  376. return cell.getBooleanCellValue();
  377. case FORMULA:
  378. return cell.getCellFormula();
  379. default:
  380. return null;
  381. }
  382. }
  383. public static List<File> listFilesInDirectory(String path) {
  384. List<File> fileList = new ArrayList<>();
  385. File directory = new File(path);
  386. // 首先判断给定的路径是否是一个目录
  387. if (directory.isDirectory()) {
  388. File[] files = directory.listFiles();
  389. if (files != null) {
  390. for (File file : files) {
  391. if (file.isFile()) {
  392. fileList.add(file);
  393. } else if (file.isDirectory()) {
  394. // 如果是子目录,递归调用获取子目录下的文件
  395. fileList.addAll(listFilesInDirectory(file.getAbsolutePath()));
  396. }
  397. }
  398. }
  399. }
  400. return fileList;
  401. }
  402. public static void main(String[] args) {
  403. // 根据Map字段修改表头
  404. // ModifyExcelHeaderJExcelApi();
  405. // 根据文件路径得到下面的所有文件集合
  406. List<File> fileList = listFilesInDirectory("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\");
  407. // 根据文件集合得到
  408. // Set<String> addressSet = new HashSet<>();
  409. // TODO 所有文件加入
  410. int tagIndex = 0;
  411. for (File fileItem : fileList) {
  412. if (fileItem.getPath().contains(".xlsx") && fileItem.getPath().contains("marge_excel_uuid")) {
  413. System.out.println(fileItem.getPath());
  414. tagIndex++;
  415. // 分割文件
  416. XlsxFileSplitByRowRange(fileItem.getPath(), tagIndex);
  417. }
  418. }
  419. System.exit(0);
  420. /*
  421. // 测试请求结果
  422. ArrayList<String> resultList = new ArrayList<>();
  423. ArrayList<String> errorResultList = new ArrayList<>();
  424. for (String addrStr : addressSet) {
  425. String resultStr = RequestUtils.request(StringUtils.deleteWhitespace(addrStr));
  426. if (resultStr != null && resultStr.contains("result")) {
  427. JSONParser jsonParser = new JSONParser();
  428. try {
  429. // System.out.println(addrStr + "------解析成功数据:" + jsonParser.parse(resultStr));
  430. resultList.add(resultStr);
  431. } catch (Exception e) {
  432. // System.err.println(addrStr + "------解析失败数据:" + resultStr);
  433. errorResultList.add(addrStr);
  434. }
  435. }
  436. }
  437. System.out.println("总地址个数:" + addressSet.size());
  438. System.out.println("解析成功的地址个数:" + resultList.size());
  439. System.out.println("解析失败的地址个数:" + errorResultList.size());*/
  440. }
  441. }