|
@@ -0,0 +1,347 @@
|
|
|
+package com.skyversation.poiaddr.util;
|
|
|
+
|
|
|
+import com.skyversation.poiaddr.addquery.Constant;
|
|
|
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
|
|
+import org.apache.poi.ss.usermodel.*;
|
|
|
+
|
|
|
+import java.io.File;
|
|
|
+import java.io.FileInputStream;
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.*;
|
|
|
+
|
|
|
+import org.apache.poi.util.IOUtils;
|
|
|
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
|
+
|
|
|
+import java.io.FileOutputStream;
|
|
|
+
|
|
|
+/**
|
|
|
+ * xlsx文档解析并输出内容为List<Map<String,String>>
|
|
|
+ * 大文件分割
|
|
|
+ */
|
|
|
+public class ExcelReaderUtils {
|
|
|
+
|
|
|
+ public static void writeToExcel(List<Map<String, Object>> dataList, String filePath) throws IOException {
|
|
|
+ // 创建一个新的工作簿(对应一个Excel文件)
|
|
|
+ Workbook workbook = new XSSFWorkbook();
|
|
|
+ // 创建一个工作表
|
|
|
+ Sheet sheet = workbook.createSheet("Sheet1");
|
|
|
+
|
|
|
+ // 写入表头(从Map的键中获取列名)
|
|
|
+ if (!dataList.isEmpty()) {
|
|
|
+ Map<String, Object> firstMap = dataList.get(0);
|
|
|
+ int colIndex = 0;
|
|
|
+ Row headerRow = sheet.createRow(0);
|
|
|
+ for (String key : firstMap.keySet()) {
|
|
|
+ Cell cell = headerRow.createCell(colIndex++);
|
|
|
+ cell.setCellValue(key);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 逐行写入数据
|
|
|
+ int rowIndex = 1;
|
|
|
+ for (Map<String, Object> dataMap : dataList) {
|
|
|
+ Row dataRow = sheet.createRow(rowIndex++);
|
|
|
+ int colIndex = 0;
|
|
|
+ for (Object value : dataMap.values()) {
|
|
|
+ Cell cell = dataRow.createCell(colIndex++);
|
|
|
+ if (value instanceof String) {
|
|
|
+ cell.setCellValue((String) value);
|
|
|
+ } else if (value instanceof Integer) {
|
|
|
+ cell.setCellValue((Integer) value);
|
|
|
+ } else if (value instanceof Double) {
|
|
|
+ cell.setCellValue((Double) value);
|
|
|
+ } else if (value instanceof Boolean) {
|
|
|
+ cell.setCellValue((Boolean) value);
|
|
|
+ } else {
|
|
|
+ cell.setCellValue(value != null ? value.toString() : "");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 将工作簿写入到文件
|
|
|
+ try (FileOutputStream outputStream = new FileOutputStream(filePath)) {
|
|
|
+ workbook.write(outputStream);
|
|
|
+ } finally {
|
|
|
+ workbook.close();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static List<Map<String, Object>> readExcel(String filePath) throws IOException {
|
|
|
+ List<Map<String, Object>> resultList = new ArrayList<>();
|
|
|
+ FileInputStream fis = new FileInputStream(filePath);
|
|
|
+ IOUtils.setByteArrayMaxOverride(400000000);
|
|
|
+ // 创建工作簿对象,用于代表整个Excel文件
|
|
|
+ Workbook workbook = WorkbookFactory.create(fis);
|
|
|
+ // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改
|
|
|
+ Sheet sheet = workbook.getSheetAt(0);
|
|
|
+ // 获取表头行
|
|
|
+ Row headerRow = sheet.getRow(0);
|
|
|
+ int headerSize = headerRow.getLastCellNum();
|
|
|
+
|
|
|
+ // 遍历数据行(从第二行开始,第一行是表头)
|
|
|
+ for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
|
|
|
+ Row currentRow = sheet.getRow(rowIndex);
|
|
|
+ Map<String, Object> rowMap = new HashMap<>();
|
|
|
+ for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) {
|
|
|
+ Cell headerCell = headerRow.getCell(cellIndex);
|
|
|
+ Cell currentCell = currentRow.getCell(cellIndex);
|
|
|
+ String headerValue = getCellValue(headerCell).toString();
|
|
|
+ Object currentValue = getCellValue(currentCell);
|
|
|
+ rowMap.put(headerValue, currentValue);
|
|
|
+ }
|
|
|
+ resultList.add(rowMap);
|
|
|
+ }
|
|
|
+
|
|
|
+ workbook.close();
|
|
|
+ fis.close();
|
|
|
+ return resultList;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public static List<Map<String, Object>> readExcel(File file) throws IOException {
|
|
|
+ List<Map<String, Object>> resultList = new ArrayList<>();
|
|
|
+ FileInputStream fis = new FileInputStream(file);
|
|
|
+ IOUtils.setByteArrayMaxOverride(400000000);
|
|
|
+ // 创建工作簿对象,用于代表整个Excel文件
|
|
|
+ Workbook workbook = WorkbookFactory.create(fis);
|
|
|
+ // 这里我们默认读取第一个工作表,如果需要读取指定名称或者索引的工作表可以进行相应修改
|
|
|
+ Sheet sheet = workbook.getSheetAt(0);
|
|
|
+ // 获取表头行
|
|
|
+ Row headerRow = sheet.getRow(0);
|
|
|
+ int headerSize = headerRow.getLastCellNum();
|
|
|
+
|
|
|
+ // 遍历数据行(从第二行开始,第一行是表头)
|
|
|
+ for (int rowIndex = 1; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
|
|
|
+ Row currentRow = sheet.getRow(rowIndex);
|
|
|
+ Map<String, Object> rowMap = new HashMap<>();
|
|
|
+ for (int cellIndex = 0; cellIndex < headerSize; cellIndex++) {
|
|
|
+ Cell headerCell = headerRow.getCell(cellIndex);
|
|
|
+ Cell currentCell = currentRow.getCell(cellIndex);
|
|
|
+ String headerValue = getCellValue(headerCell).toString();
|
|
|
+ Object currentValue = getCellValue(currentCell);
|
|
|
+ rowMap.put(headerValue, currentValue);
|
|
|
+ }
|
|
|
+ resultList.add(rowMap);
|
|
|
+ }
|
|
|
+
|
|
|
+ workbook.close();
|
|
|
+ fis.close();
|
|
|
+ return resultList;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void splitExcelByRows(String inputFilePath, int rowsPerFile) throws IOException {
|
|
|
+ FileInputStream inputStream = new FileInputStream(new File(inputFilePath));
|
|
|
+ IOUtils.setByteArrayMaxOverride(400000000);
|
|
|
+ Workbook workbook = new XSSFWorkbook(inputStream);
|
|
|
+ Sheet sheet = workbook.getSheetAt(0);
|
|
|
+
|
|
|
+ int totalRows = sheet.getLastRowNum() + 1;
|
|
|
+ int fileCount = (totalRows / rowsPerFile) + (totalRows % rowsPerFile == 0 ? 0 : 1);
|
|
|
+
|
|
|
+ for (int i = 0; i < fileCount; i++) {
|
|
|
+ Workbook newWorkbook = new XSSFWorkbook();
|
|
|
+ Sheet newSheet = newWorkbook.createSheet("Sheet1");
|
|
|
+ int startRow = i * rowsPerFile;
|
|
|
+ int endRow = Math.min((i + 1) * rowsPerFile, totalRows);
|
|
|
+
|
|
|
+ for (int rowIndex = startRow; rowIndex < endRow; rowIndex++) {
|
|
|
+ Row sourceRow = sheet.getRow(rowIndex);
|
|
|
+ Row newRow = newSheet.createRow(rowIndex - startRow);
|
|
|
+ if (sourceRow != null) {
|
|
|
+ for (int cellIndex = 0; cellIndex < sourceRow.getLastCellNum(); cellIndex++) {
|
|
|
+ Cell sourceCell = sourceRow.getCell(cellIndex);
|
|
|
+ Cell newCell = newRow.createCell(cellIndex);
|
|
|
+ if (sourceCell != null) {
|
|
|
+ switch (sourceCell.getCellType()) {
|
|
|
+ case STRING:
|
|
|
+ newCell.setCellValue(sourceCell.getStringCellValue());
|
|
|
+ break;
|
|
|
+ case NUMERIC:
|
|
|
+ newCell.setCellValue(sourceCell.getNumericCellValue());
|
|
|
+ break;
|
|
|
+ case BOOLEAN:
|
|
|
+ newCell.setCellValue(sourceCell.getBooleanCellValue());
|
|
|
+ break;
|
|
|
+ // 可以根据实际情况添加更多的类型处理,比如日期等
|
|
|
+ default:
|
|
|
+ newCell.setCellValue("");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ String outputFilePath = getOutputFilePath(inputFilePath, i);
|
|
|
+ FileOutputStream outputStream = new FileOutputStream(outputFilePath);
|
|
|
+ newWorkbook.write(outputStream);
|
|
|
+ outputStream.close();
|
|
|
+ newWorkbook.close();
|
|
|
+ }
|
|
|
+
|
|
|
+ workbook.close();
|
|
|
+ inputStream.close();
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String getOutputFilePath(String inputFilePath, int index) {
|
|
|
+ String baseName = inputFilePath.substring(0, inputFilePath.lastIndexOf('.'));
|
|
|
+ String extension = inputFilePath.substring(inputFilePath.lastIndexOf('.'));
|
|
|
+ return baseName + "_part_" + (index + 1) + extension;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String getCellValueAsString(Cell cell) {
|
|
|
+ switch (cell.getCellType()) {
|
|
|
+ case STRING:
|
|
|
+ return cell.getStringCellValue();
|
|
|
+ case NUMERIC:
|
|
|
+ if (DateUtil.isCellDateFormatted(cell)) {
|
|
|
+ return cell.getDateCellValue().toString();
|
|
|
+ } else {
|
|
|
+ return String.valueOf(cell.getNumericCellValue());
|
|
|
+ }
|
|
|
+ case BOOLEAN:
|
|
|
+ return String.valueOf(cell.getBooleanCellValue());
|
|
|
+ case FORMULA:
|
|
|
+ try {
|
|
|
+ return cell.getCellFormula();
|
|
|
+ } catch (Exception e) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+ default:
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void XlsxFileSplitByRowRange(String filePath, int tag) {
|
|
|
+ try {
|
|
|
+ // 读取原始xlsx文件
|
|
|
+ Workbook workbook = new XSSFWorkbook(new File(filePath));
|
|
|
+ Sheet sheet = workbook.getSheetAt(0);
|
|
|
+ int totalRows = sheet.getLastRowNum();
|
|
|
+ int rowsPerFile = 10000; // 每个分割文件包含的行数
|
|
|
+ for (int i = 0; i <= totalRows; i += rowsPerFile) {
|
|
|
+ // 创建新的工作簿用于保存分割后的行数据
|
|
|
+ Workbook newWorkbook = new XSSFWorkbook();
|
|
|
+ Sheet newSheet = newWorkbook.createSheet();
|
|
|
+ int endRow = Math.min(i + rowsPerFile, totalRows + 1);
|
|
|
+ for (int j = i; j < endRow; j++) {
|
|
|
+ Row row = sheet.getRow(j);
|
|
|
+ if (row != null) {
|
|
|
+ Row newRow = newSheet.createRow(j - i);
|
|
|
+ for (Cell cell : row) {
|
|
|
+ if (cell != null) {
|
|
|
+ Cell newCell = newRow.createCell(cell.getColumnIndex());
|
|
|
+ // 复制单元格类型和值
|
|
|
+ newCell.setCellType(cell.getCellType());
|
|
|
+ if (cell.getCellType() == CellType.STRING) {
|
|
|
+ newCell.setCellValue(cell.getStringCellValue());
|
|
|
+ } else if (cell.getCellType() == CellType.NUMERIC) {
|
|
|
+ newCell.setCellValue(cell.getNumericCellValue());
|
|
|
+ } else if (cell.getCellType() == CellType.BOOLEAN) {
|
|
|
+ newCell.setCellValue(cell.getBooleanCellValue());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 将分割后的行数据保存为新的xlsx文件
|
|
|
+ FileOutputStream outputStream = new FileOutputStream("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\output_" + tag + "_" + (i / rowsPerFile) + ".xlsx");
|
|
|
+ newWorkbook.write(outputStream);
|
|
|
+ outputStream.close();
|
|
|
+ newWorkbook.close();
|
|
|
+ }
|
|
|
+ workbook.close();
|
|
|
+ } catch (IOException | InvalidFormatException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static Object getCellValue(Cell cell) {
|
|
|
+ if (cell == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ CellType cellType = cell.getCellType();
|
|
|
+ switch (cellType) {
|
|
|
+ case STRING:
|
|
|
+ return cell.getStringCellValue();
|
|
|
+ case NUMERIC:
|
|
|
+ if (DateUtil.isCellDateFormatted(cell)) {
|
|
|
+ return cell.getDateCellValue();
|
|
|
+ } else {
|
|
|
+ return cell.getNumericCellValue();
|
|
|
+ }
|
|
|
+ case BOOLEAN:
|
|
|
+ return cell.getBooleanCellValue();
|
|
|
+ case FORMULA:
|
|
|
+ return cell.getCellFormula();
|
|
|
+ default:
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static List<File> listFilesInDirectory(String path) {
|
|
|
+ List<File> fileList = new ArrayList<>();
|
|
|
+ File directory = new File(path);
|
|
|
+ // 首先判断给定的路径是否是一个目录
|
|
|
+ if (directory.isDirectory()) {
|
|
|
+ File[] files = directory.listFiles();
|
|
|
+ if (files != null) {
|
|
|
+ for (File file : files) {
|
|
|
+ if (file.isFile()) {
|
|
|
+ fileList.add(file);
|
|
|
+ } else if (file.isDirectory()) {
|
|
|
+ // 如果是子目录,递归调用获取子目录下的文件
|
|
|
+ fileList.addAll(listFilesInDirectory(file.getAbsolutePath()));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return fileList;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static boolean isOtherDistrictThanQingpu(String address) {
|
|
|
+ // 将地址字符串转换为小写,以便进行不区分大小写的比较
|
|
|
+ String lowerCaseAddress = address.toLowerCase();
|
|
|
+ // 检查地址是否包含 "青浦区" 字样,如果不包含,则可能是其他区
|
|
|
+ // 列出上海市的其他区(已补充完整)
|
|
|
+ String[] otherDistricts = {"黄浦区", "徐汇区", "长宁区", "静安区", "普陀区", "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区", "金山区", ("青浦".equals(Constant.getArea()) ? "松江区" : "青浦区"), "奉贤区", "崇明区", "浦东新区"};
|
|
|
+ for (String district : otherDistricts) {
|
|
|
+ if (lowerCaseAddress.contains(district.toLowerCase())) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public static boolean isOtherDistrictThanSongJiang(String address) {
|
|
|
+ // 将地址字符串转换为小写,以便进行不区分大小写的比较
|
|
|
+ String lowerCaseAddress = address.toLowerCase();
|
|
|
+ // 检查地址是否包含 "青浦区" 字样,如果不包含,则可能是其他区
|
|
|
+ // 列出上海市的其他区(已补充完整)
|
|
|
+ String[] otherDistricts = {"黄浦区", "徐汇区", "长宁区", "静安区", "普陀区", "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区", "金山区", "青浦区", "奉贤区", "崇明区", "浦东新区"};
|
|
|
+ for (String district : otherDistricts) {
|
|
|
+ if (lowerCaseAddress.contains(district.toLowerCase())) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*public static void main(String[] args) {
|
|
|
+// 根据Map字段修改表头
|
|
|
+// ModifyExcelHeaderJExcelApi();
|
|
|
+ // 根据文件路径得到下面的所有文件集合
|
|
|
+ List<File> fileList = listFilesInDirectory("C:\\Users\\Liumouren\\Desktop\\临时文件\\元以科技\\青浦\\青浦城建所\\poiAddr\\doc\\");
|
|
|
+// TODO 所有文件加入
|
|
|
+ int tagIndex = 0;
|
|
|
+ for (File fileItem : fileList) {
|
|
|
+ if (fileItem.getPath().contains(".xlsx") && fileItem.getPath().contains("marge_excel_uuid")) {
|
|
|
+ System.out.println(fileItem.getPath());
|
|
|
+ tagIndex++;
|
|
|
+// 分割文件
|
|
|
+ XlsxFileSplitByRowRange(fileItem.getPath(), tagIndex);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ System.exit(0);
|
|
|
+ }*/
|
|
|
+}
|