package com.skyversation.poiaddr.util; import com.skyversation.poiaddr.addquery.Constant; import com.skyversation.poiaddr.entity.SjArrDzbzhSjWcbryDzxx; import com.skyversation.poiaddr.entity.YyskAddressStandardization; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.formula.functions.T; import org.apache.poi.ss.usermodel.*; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.lang.reflect.Field; import java.util.*; import org.apache.poi.util.IOUtils; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import java.io.FileOutputStream; /** * xlsx文档解析并输出内容为List> * 大文件分割 */ public class ExcelReaderUtils { public static void writeToExcel(List> dataList, String filePath) throws IOException { // 创建一个新的工作簿(对应一个Excel文件) Workbook workbook = new XSSFWorkbook(); // 创建一个工作表 Sheet sheet = workbook.createSheet("Sheet1"); // 写入表头(从Map的键中获取列名) if (!dataList.isEmpty()) { Map firstMap = dataList.get(0); int colIndex = 0; Row headerRow = sheet.createRow(0); for (String key : firstMap.keySet()) { Cell cell = headerRow.createCell(colIndex++); cell.setCellValue(key); } } // 逐行写入数据 int rowIndex = 1; for (Map dataMap : dataList) { Row dataRow = sheet.createRow(rowIndex++); int colIndex = 0; for (Object value : dataMap.values()) { Cell cell = dataRow.createCell(colIndex++); if (value instanceof String) { cell.setCellValue((String) value); } else if (value instanceof Integer) { cell.setCellValue((Integer) value); } else if (value instanceof Double) { cell.setCellValue((Double) value); } else if (value instanceof Boolean) { cell.setCellValue((Boolean) value); } else { cell.setCellValue(value != null ? value.toString() : ""); } } } // 将工作簿写入到文件 try (FileOutputStream outputStream = new FileOutputStream(filePath)) { workbook.write(outputStream); } finally { workbook.close(); } } public static void writeClassToExcel(List dataList, String filePath) throws IOException { writeToExcel(convertListToMapList(dataList), filePath); } public static List> convertListToMapList(List list) { List> result = new ArrayList<>(); for (SjArrDzbzhSjWcbryDzxx item : list) { Map map = new HashMap<>(); Class clazz = item.getClass(); Field[] fields = clazz.getDeclaredFields(); for (Field field : fields) { field.setAccessible(true); try { map.put(field.getName(), field.get(item)); } catch (Exception e) { e.printStackTrace(); } } result.add(map); } return result; } public static void convertYYListToMapList(List list, String filePath) { List> result = new ArrayList<>(); for (YyskAddressStandardization item : list) { Map map = new HashMap<>(); Class clazz = item.getClass(); Field[] fields = clazz.getDeclaredFields(); for (Field field : fields) { field.setAccessible(true); try { map.put(field.getName(), field.get(item)); } catch (Exception e) { e.printStackTrace(); } } result.add(map); } list.clear(); try { if (result.size() > 0) { writeToExcel(result, filePath); result.clear(); } } catch (Exception e) { e.printStackTrace(); } } public static List> readExcel(String filePath) throws IOException { List> resultList = new ArrayList<>(); FileInputStream fis = new FileInputStream(filePath); IOUtils.setByteArrayMaxOverride(400000000); // 创建工作簿对象,用于代表整个Excel文件 Workbook workbook = WorkbookFactory.create(fis); // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改 Sheet sheet = workbook.getSheetAt(0); // 获取表头行 Row headerRow = sheet.getRow(0); int headerSize = headerRow.getLastCellNum(); // 遍历数据行(从第二行开始,第一行是表头) for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) { Row currentRow = sheet.getRow(rowIndex); Map rowMap = new HashMap<>(); for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) { Cell headerCell = headerRow.getCell(cellIndex); Cell currentCell = currentRow.getCell(cellIndex); String headerValue = getCellValue(headerCell).toString(); Object currentValue = getCellValue(currentCell); rowMap.put(headerValue, currentValue); } resultList.add(rowMap); } workbook.close(); fis.close(); return resultList; } public static List> readExcel(File file) throws IOException { List> resultList = new ArrayList<>(); FileInputStream fis = new FileInputStream(file); IOUtils.setByteArrayMaxOverride(400000000); // 创建工作簿对象,用于代表整个Excel文件 Workbook workbook = WorkbookFactory.create(fis); // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改 Sheet sheet = workbook.getSheetAt(0); // 获取表头行 Row headerRow = sheet.getRow(0); int headerSize = headerRow.getLastCellNum(); // 遍历数据行(从第二行开始,第一行是表头) for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) { Row currentRow = sheet.getRow(rowIndex); Map rowMap = new HashMap<>(); for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) { Cell headerCell = headerRow.getCell(cellIndex); Cell currentCell = currentRow.getCell(cellIndex); String headerValue = getCellValue(headerCell).toString(); Object currentValue = getCellValue(currentCell); rowMap.put(headerValue, currentValue); } resultList.add(rowMap); } workbook.close(); fis.close(); return resultList; } public static void splitExcelByRows(String inputFilePath, int rowsPerFile) throws IOException { FileInputStream inputStream = new FileInputStream(new File(inputFilePath)); IOUtils.setByteArrayMaxOverride(400000000); Workbook workbook = new XSSFWorkbook(inputStream); Sheet sheet = workbook.getSheetAt(0); int totalRows = sheet.getLastRowNum() + 1; int fileCount = (totalRows / rowsPerFile) + (totalRows % rowsPerFile == 0 ? 0 : 1); for (int i = 0; i < fileCount; i++) { Workbook newWorkbook = new XSSFWorkbook(); Sheet newSheet = newWorkbook.createSheet("Sheet1"); int startRow = i * rowsPerFile; int endRow = Math.min((i + 1) * rowsPerFile, totalRows); for (int rowIndex = startRow; rowIndex < endRow; rowIndex++) { Row sourceRow = sheet.getRow(rowIndex); Row newRow = newSheet.createRow(rowIndex - startRow); if (sourceRow != null) { for (int cellIndex = 0; cellIndex < sourceRow.getLastCellNum(); cellIndex++) { Cell sourceCell = sourceRow.getCell(cellIndex); Cell newCell = newRow.createCell(cellIndex); if (sourceCell != null) { switch (sourceCell.getCellType()) { case STRING: newCell.setCellValue(sourceCell.getStringCellValue()); break; case NUMERIC: newCell.setCellValue(sourceCell.getNumericCellValue()); break; case BOOLEAN: newCell.setCellValue(sourceCell.getBooleanCellValue()); break; // 可以根据实际情况添加更多的类型处理,比如日期等 default: newCell.setCellValue(""); } } } } } String outputFilePath = getOutputFilePath(inputFilePath, i); FileOutputStream outputStream = new FileOutputStream(outputFilePath); newWorkbook.write(outputStream); outputStream.close(); newWorkbook.close(); } workbook.close(); inputStream.close(); } private static String getOutputFilePath(String inputFilePath, int index) { String baseName = inputFilePath.substring(0, inputFilePath.lastIndexOf('.')); String extension = inputFilePath.substring(inputFilePath.lastIndexOf('.')); return baseName + "_part_" + (index + 1) + extension; } private static String getCellValueAsString(Cell cell) { switch (cell.getCellType()) { case STRING: return cell.getStringCellValue(); case NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { return cell.getDateCellValue().toString(); } else { return String.valueOf(cell.getNumericCellValue()); } case BOOLEAN: return String.valueOf(cell.getBooleanCellValue()); case FORMULA: try { return cell.getCellFormula(); } catch (Exception e) { return ""; } default: return ""; } } private static void XlsxFileSplitByRowRange(String filePath, int tag) { try { // 读取原始xlsx文件 Workbook workbook = new XSSFWorkbook(new File(filePath)); Sheet sheet = workbook.getSheetAt(0); int totalRows = sheet.getLastRowNum(); int rowsPerFile = 10000; // 每个分割文件包含的行数 for (int i = 0; i <= totalRows; i += rowsPerFile) { // 创建新的工作簿用于保存分割后的行数据 Workbook newWorkbook = new XSSFWorkbook(); Sheet newSheet = newWorkbook.createSheet(); int endRow = Math.min(i + rowsPerFile, totalRows + 1); for (int j = i; j < endRow; j++) { Row row = sheet.getRow(j); if (row != null) { Row newRow = newSheet.createRow(j - i); for (Cell cell : row) { if (cell != null) { Cell newCell = newRow.createCell(cell.getColumnIndex()); // 复制单元格类型和值 newCell.setCellType(cell.getCellType()); if (cell.getCellType() == CellType.STRING) { newCell.setCellValue(cell.getStringCellValue()); } else if (cell.getCellType() == CellType.NUMERIC) { newCell.setCellValue(cell.getNumericCellValue()); } else if (cell.getCellType() == CellType.BOOLEAN) { newCell.setCellValue(cell.getBooleanCellValue()); } } } } } // 将分割后的行数据保存为新的xlsx文件 FileOutputStream outputStream = new FileOutputStream("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\output_" + tag + "_" + (i / rowsPerFile) + ".xlsx"); newWorkbook.write(outputStream); outputStream.close(); newWorkbook.close(); } workbook.close(); } catch (IOException | InvalidFormatException e) { e.printStackTrace(); } } private static Object getCellValue(Cell cell) { if (cell == null) { return null; } CellType cellType = cell.getCellType(); switch (cellType) { case STRING: return cell.getStringCellValue(); case NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { return cell.getDateCellValue(); } else { return cell.getNumericCellValue(); } case BOOLEAN: return cell.getBooleanCellValue(); case FORMULA: return cell.getCellFormula(); default: return null; } } public static List listFilesInDirectory(String path) { List fileList = new ArrayList<>(); File directory = new File(path); // 首先判断给定的路径是否是一个目录 if (directory.isDirectory()) { File[] files = directory.listFiles(); if (files != null) { for (File file : files) { if (file.isFile()) { fileList.add(file); } else if (file.isDirectory()) { // 如果是子目录,递归调用获取子目录下的文件 fileList.addAll(listFilesInDirectory(file.getAbsolutePath())); } } } } return fileList; } /*public static void main(String[] args) { // 根据Map字段修改表头 // ModifyExcelHeaderJExcelApi(); // 根据文件路径得到下面的所有文件集合 List fileList = listFilesInDirectory("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\"); // TODO 所有文件加入 int tagIndex = 0; for (File fileItem : fileList) { if (fileItem.getPath().contains(".xlsx") && fileItem.getPath().contains("marge_excel_uuid")) { System.out.println(fileItem.getPath()); tagIndex++; // 分割文件 XlsxFileSplitByRowRange(fileItem.getPath(), tagIndex); } } System.exit(0); }*/ }