ExcelReaderUtils.java 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. package com.skyversation.poiaddr.util;
  2. import com.skyversation.poiaddr.addquery.Constant;
  3. import com.skyversation.poiaddr.entity.SjArrDzbzhSjWcbryDzxx;
  4. import com.skyversation.poiaddr.entity.YyskAddressStandardization;
  5. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  6. import org.apache.poi.ss.formula.functions.T;
  7. import org.apache.poi.ss.usermodel.*;
  8. import java.io.File;
  9. import java.io.FileInputStream;
  10. import java.io.IOException;
  11. import java.lang.reflect.Field;
  12. import java.util.*;
  13. import org.apache.poi.util.IOUtils;
  14. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  15. import java.io.FileOutputStream;
  16. /**
  17. * xlsx文档解析并输出内容为List<Map<String,String>>
  18. * 大文件分割
  19. */
  20. public class ExcelReaderUtils {
  21. public static void writeToExcel(List<Map<String, Object>> dataList, String filePath) throws IOException {
  22. // 创建一个新的工作簿(对应一个Excel文件)
  23. Workbook workbook = new XSSFWorkbook();
  24. // 创建一个工作表
  25. Sheet sheet = workbook.createSheet("Sheet1");
  26. // 写入表头(从Map的键中获取列名)
  27. if (!dataList.isEmpty()) {
  28. Map<String, Object> firstMap = dataList.get(0);
  29. int colIndex = 0;
  30. Row headerRow = sheet.createRow(0);
  31. for (String key : firstMap.keySet()) {
  32. Cell cell = headerRow.createCell(colIndex++);
  33. cell.setCellValue(key);
  34. }
  35. }
  36. // 逐行写入数据
  37. int rowIndex = 1;
  38. for (Map<String, Object> dataMap : dataList) {
  39. Row dataRow = sheet.createRow(rowIndex++);
  40. int colIndex = 0;
  41. for (Object value : dataMap.values()) {
  42. Cell cell = dataRow.createCell(colIndex++);
  43. if (value instanceof String) {
  44. cell.setCellValue((String) value);
  45. } else if (value instanceof Integer) {
  46. cell.setCellValue((Integer) value);
  47. } else if (value instanceof Double) {
  48. cell.setCellValue((Double) value);
  49. } else if (value instanceof Boolean) {
  50. cell.setCellValue((Boolean) value);
  51. } else {
  52. cell.setCellValue(value != null ? value.toString() : "");
  53. }
  54. }
  55. }
  56. // 将工作簿写入到文件
  57. try (FileOutputStream outputStream = new FileOutputStream(filePath)) {
  58. workbook.write(outputStream);
  59. } finally {
  60. workbook.close();
  61. }
  62. }
  63. public static void writeClassToExcel(List<SjArrDzbzhSjWcbryDzxx> dataList, String filePath) throws IOException {
  64. writeToExcel(convertListToMapList(dataList), filePath);
  65. }
  66. public static List<Map<String, Object>> convertListToMapList(List<SjArrDzbzhSjWcbryDzxx> list) {
  67. List<Map<String, Object>> result = new ArrayList<>();
  68. for (SjArrDzbzhSjWcbryDzxx item : list) {
  69. Map<String, Object> map = new HashMap<>();
  70. Class<?> clazz = item.getClass();
  71. Field[] fields = clazz.getDeclaredFields();
  72. for (Field field : fields) {
  73. field.setAccessible(true);
  74. try {
  75. map.put(field.getName(), field.get(item));
  76. } catch (Exception e) {
  77. e.printStackTrace();
  78. }
  79. }
  80. result.add(map);
  81. }
  82. return result;
  83. }
  84. public static void convertYYListToMapList(List<YyskAddressStandardization> list, String filePath) {
  85. List<Map<String, Object>> result = new ArrayList<>();
  86. for (YyskAddressStandardization item : list) {
  87. Map<String, Object> map = new HashMap<>();
  88. Class<?> clazz = item.getClass();
  89. Field[] fields = clazz.getDeclaredFields();
  90. for (Field field : fields) {
  91. field.setAccessible(true);
  92. try {
  93. map.put(field.getName(), field.get(item));
  94. } catch (Exception e) {
  95. e.printStackTrace();
  96. }
  97. }
  98. result.add(map);
  99. }
  100. list.clear();
  101. try {
  102. if (result.size() > 0) {
  103. writeToExcel(result, filePath);
  104. result.clear();
  105. }
  106. } catch (Exception e) {
  107. e.printStackTrace();
  108. }
  109. }
  110. public static List<Map<String, Object>> readExcel(String filePath) throws IOException {
  111. List<Map<String, Object>> resultList = new ArrayList<>();
  112. FileInputStream fis = new FileInputStream(filePath);
  113. IOUtils.setByteArrayMaxOverride(400000000);
  114. // 创建工作簿对象,用于代表整个Excel文件
  115. Workbook workbook = WorkbookFactory.create(fis);
  116. // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改
  117. Sheet sheet = workbook.getSheetAt(0);
  118. // 获取表头行
  119. Row headerRow = sheet.getRow(0);
  120. int headerSize = headerRow.getLastCellNum();
  121. // 遍历数据行(从第二行开始,第一行是表头)
  122. for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
  123. Row currentRow = sheet.getRow(rowIndex);
  124. Map<String, Object> rowMap = new HashMap<>();
  125. for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) {
  126. Cell headerCell = headerRow.getCell(cellIndex);
  127. Cell currentCell = currentRow.getCell(cellIndex);
  128. String headerValue = getCellValue(headerCell).toString();
  129. Object currentValue = getCellValue(currentCell);
  130. rowMap.put(headerValue, currentValue);
  131. }
  132. resultList.add(rowMap);
  133. }
  134. workbook.close();
  135. fis.close();
  136. return resultList;
  137. }
  138. public static List<Map<String, Object>> readExcel(File file) throws IOException {
  139. List<Map<String, Object>> resultList = new ArrayList<>();
  140. FileInputStream fis = new FileInputStream(file);
  141. IOUtils.setByteArrayMaxOverride(400000000);
  142. // 创建工作簿对象,用于代表整个Excel文件
  143. Workbook workbook = WorkbookFactory.create(fis);
  144. // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改
  145. Sheet sheet = workbook.getSheetAt(0);
  146. // 获取表头行
  147. Row headerRow = sheet.getRow(0);
  148. int headerSize = headerRow.getLastCellNum();
  149. // 遍历数据行(从第二行开始,第一行是表头)
  150. for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
  151. Row currentRow = sheet.getRow(rowIndex);
  152. Map<String, Object> rowMap = new HashMap<>();
  153. for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) {
  154. Cell headerCell = headerRow.getCell(cellIndex);
  155. Cell currentCell = currentRow.getCell(cellIndex);
  156. String headerValue = getCellValue(headerCell).toString();
  157. Object currentValue = getCellValue(currentCell);
  158. rowMap.put(headerValue, currentValue);
  159. }
  160. resultList.add(rowMap);
  161. }
  162. workbook.close();
  163. fis.close();
  164. return resultList;
  165. }
  166. public static void splitExcelByRows(String inputFilePath, int rowsPerFile) throws IOException {
  167. FileInputStream inputStream = new FileInputStream(new File(inputFilePath));
  168. IOUtils.setByteArrayMaxOverride(400000000);
  169. Workbook workbook = new XSSFWorkbook(inputStream);
  170. Sheet sheet = workbook.getSheetAt(0);
  171. int totalRows = sheet.getLastRowNum() + 1;
  172. int fileCount = (totalRows / rowsPerFile) + (totalRows % rowsPerFile == 0 ? 0 : 1);
  173. for (int i = 0; i < fileCount; i++) {
  174. Workbook newWorkbook = new XSSFWorkbook();
  175. Sheet newSheet = newWorkbook.createSheet("Sheet1");
  176. int startRow = i * rowsPerFile;
  177. int endRow = Math.min((i + 1) * rowsPerFile, totalRows);
  178. for (int rowIndex = startRow; rowIndex < endRow; rowIndex++) {
  179. Row sourceRow = sheet.getRow(rowIndex);
  180. Row newRow = newSheet.createRow(rowIndex - startRow);
  181. if (sourceRow != null) {
  182. for (int cellIndex = 0; cellIndex < sourceRow.getLastCellNum(); cellIndex++) {
  183. Cell sourceCell = sourceRow.getCell(cellIndex);
  184. Cell newCell = newRow.createCell(cellIndex);
  185. if (sourceCell != null) {
  186. switch (sourceCell.getCellType()) {
  187. case STRING:
  188. newCell.setCellValue(sourceCell.getStringCellValue());
  189. break;
  190. case NUMERIC:
  191. newCell.setCellValue(sourceCell.getNumericCellValue());
  192. break;
  193. case BOOLEAN:
  194. newCell.setCellValue(sourceCell.getBooleanCellValue());
  195. break;
  196. // 可以根据实际情况添加更多的类型处理,比如日期等
  197. default:
  198. newCell.setCellValue("");
  199. }
  200. }
  201. }
  202. }
  203. }
  204. String outputFilePath = getOutputFilePath(inputFilePath, i);
  205. FileOutputStream outputStream = new FileOutputStream(outputFilePath);
  206. newWorkbook.write(outputStream);
  207. outputStream.close();
  208. newWorkbook.close();
  209. }
  210. workbook.close();
  211. inputStream.close();
  212. }
  213. private static String getOutputFilePath(String inputFilePath, int index) {
  214. String baseName = inputFilePath.substring(0, inputFilePath.lastIndexOf('.'));
  215. String extension = inputFilePath.substring(inputFilePath.lastIndexOf('.'));
  216. return baseName + "_part_" + (index + 1) + extension;
  217. }
  218. private static String getCellValueAsString(Cell cell) {
  219. switch (cell.getCellType()) {
  220. case STRING:
  221. return cell.getStringCellValue();
  222. case NUMERIC:
  223. if (DateUtil.isCellDateFormatted(cell)) {
  224. return cell.getDateCellValue().toString();
  225. } else {
  226. return String.valueOf(cell.getNumericCellValue());
  227. }
  228. case BOOLEAN:
  229. return String.valueOf(cell.getBooleanCellValue());
  230. case FORMULA:
  231. try {
  232. return cell.getCellFormula();
  233. } catch (Exception e) {
  234. return "";
  235. }
  236. default:
  237. return "";
  238. }
  239. }
  240. private static void XlsxFileSplitByRowRange(String filePath, int tag) {
  241. try {
  242. // 读取原始xlsx文件
  243. Workbook workbook = new XSSFWorkbook(new File(filePath));
  244. Sheet sheet = workbook.getSheetAt(0);
  245. int totalRows = sheet.getLastRowNum();
  246. int rowsPerFile = 10000; // 每个分割文件包含的行数
  247. for (int i = 0; i <= totalRows; i += rowsPerFile) {
  248. // 创建新的工作簿用于保存分割后的行数据
  249. Workbook newWorkbook = new XSSFWorkbook();
  250. Sheet newSheet = newWorkbook.createSheet();
  251. int endRow = Math.min(i + rowsPerFile, totalRows + 1);
  252. for (int j = i; j < endRow; j++) {
  253. Row row = sheet.getRow(j);
  254. if (row != null) {
  255. Row newRow = newSheet.createRow(j - i);
  256. for (Cell cell : row) {
  257. if (cell != null) {
  258. Cell newCell = newRow.createCell(cell.getColumnIndex());
  259. // 复制单元格类型和值
  260. newCell.setCellType(cell.getCellType());
  261. if (cell.getCellType() == CellType.STRING) {
  262. newCell.setCellValue(cell.getStringCellValue());
  263. } else if (cell.getCellType() == CellType.NUMERIC) {
  264. newCell.setCellValue(cell.getNumericCellValue());
  265. } else if (cell.getCellType() == CellType.BOOLEAN) {
  266. newCell.setCellValue(cell.getBooleanCellValue());
  267. }
  268. }
  269. }
  270. }
  271. }
  272. // 将分割后的行数据保存为新的xlsx文件
  273. FileOutputStream outputStream = new FileOutputStream("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\output_" + tag + "_" + (i / rowsPerFile) + ".xlsx");
  274. newWorkbook.write(outputStream);
  275. outputStream.close();
  276. newWorkbook.close();
  277. }
  278. workbook.close();
  279. } catch (IOException | InvalidFormatException e) {
  280. e.printStackTrace();
  281. }
  282. }
  283. private static Object getCellValue(Cell cell) {
  284. if (cell == null) {
  285. return null;
  286. }
  287. CellType cellType = cell.getCellType();
  288. switch (cellType) {
  289. case STRING:
  290. return cell.getStringCellValue();
  291. case NUMERIC:
  292. if (DateUtil.isCellDateFormatted(cell)) {
  293. return cell.getDateCellValue();
  294. } else {
  295. return cell.getNumericCellValue();
  296. }
  297. case BOOLEAN:
  298. return cell.getBooleanCellValue();
  299. case FORMULA:
  300. return cell.getCellFormula();
  301. default:
  302. return null;
  303. }
  304. }
  305. public static List<File> listFilesInDirectory(String path) {
  306. List<File> fileList = new ArrayList<>();
  307. File directory = new File(path);
  308. // 首先判断给定的路径是否是一个目录
  309. if (directory.isDirectory()) {
  310. File[] files = directory.listFiles();
  311. if (files != null) {
  312. for (File file : files) {
  313. if (file.isFile()) {
  314. fileList.add(file);
  315. } else if (file.isDirectory()) {
  316. // 如果是子目录,递归调用获取子目录下的文件
  317. fileList.addAll(listFilesInDirectory(file.getAbsolutePath()));
  318. }
  319. }
  320. }
  321. }
  322. return fileList;
  323. }
  324. /*public static void main(String[] args) {
  325. // 根据Map字段修改表头
  326. // ModifyExcelHeaderJExcelApi();
  327. // 根据文件路径得到下面的所有文件集合
  328. List<File> fileList = listFilesInDirectory("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\");
  329. // TODO 所有文件加入
  330. int tagIndex = 0;
  331. for (File fileItem : fileList) {
  332. if (fileItem.getPath().contains(".xlsx") && fileItem.getPath().contains("marge_excel_uuid")) {
  333. System.out.println(fileItem.getPath());
  334. tagIndex++;
  335. // 分割文件
  336. XlsxFileSplitByRowRange(fileItem.getPath(), tagIndex);
  337. }
  338. }
  339. System.exit(0);
  340. }*/
  341. }