ソースを参照

修复数据清洗规则BUG

DESKTOP-6LTVLN7\Liumouren 1 ヶ月 前
コミット
ceddbb7d2a

+ 47 - 26
src/main/java/com/skyversation/poiaddr/controller/CorporateLibraryController.java

@@ -59,6 +59,18 @@ public class CorporateLibraryController {
         return MessageManage.getInstance().getResultContent(Constant.SUCCESS, "状态已更新为:" + ScheduledTasks.ifOpenLonLatStatus, "成功");
     }
 
+    //  手动更新是否检查结果经纬度状态
+    @RequestMapping(value = "/changeAddressClearStatus")
+    public String changeAddressClearStatus(HttpServletRequest request) {
+        String status = request.getParameter("status");
+        if (status.contains("true")) {
+            ScheduledTasks.addressClearStatus = true;
+        } else {
+            ScheduledTasks.addressClearStatus = false;
+        }
+        return MessageManage.getInstance().getResultContent(Constant.SUCCESS, "状态已更新为:" + ScheduledTasks.addressClearStatus, "成功");
+    }
+
     /**
      * 数据清洗:
      * 首先遍历缓存中的所有地址,然后得到经纬度,如果经纬度得到的区划和街镇和字段中的区划和街镇不一致或者和地址中的区划和街镇(优先以地址中的区划和街镇为准)不一致,先修改数据库中对应的数据状态
@@ -68,34 +80,42 @@ public class CorporateLibraryController {
     @RequestMapping(value = "/dmdzAddressClear")
     public String dmdzAddressClear() {
         long sum = 0L;
+        List<YyskDmdzAddressStandardization> updateDatas = new ArrayList<>();
+        List<String> errorAddress = new ArrayList<>();
         for (String addressStr : ScheduledTasks.allDmdzData.keySet()) {
-            List<YyskDmdzAddressStandardization> updateDatas = new ArrayList<>();
             List<YyskDmdzAddressStandardization> yyskDmdzAddressStandardizations = ScheduledTasks.allDmdzData.get(addressStr);
-            for (YyskDmdzAddressStandardization item : yyskDmdzAddressStandardizations) {
-                String address = item.getSourceaddress();
-                SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(address);
-                try {
-                    String xzqh = StringUtils.hasText(splitAddress.getCity()) ? splitAddress.getCity() : item.getCounty();
-                    String town = StringUtils.hasText(splitAddress.getStreet()) ? splitAddress.getStreet() : item.getTown();
-//                  得到经纬度,落点判断是否是这个区划和街道,不属于的话
-                    Float lon = item.getLon();
-                    Float lat = item.getLat();
-                    Double lonDb = Double.parseDouble(lon.toString());
-                    Double latDb = Double.parseDouble(lat.toString());
-                    GeoJsonBean cjBean = AreaService.getInstance().isInResidentialCommitteePolygon(lonDb, latDb);
-                    if (cjBean != null && cjBean.getProperties() != null && (!cjBean.getProperties().getString("所属区").contains(xzqh) || !cjBean.getProperties().getString("所属街").contains(town))) {
-                        item.setType("zl_lonLatErr");
-                        sum++;
-                        updateDatas.add(item);
-                        AreaService.getInstance().callBackErrorAddr(item.getSourceaddress());
+            if (ScheduledTasks.addressClearStatus) {
+                for (YyskDmdzAddressStandardization item : yyskDmdzAddressStandardizations) {
+                    String address = item.getSourceaddress();
+                    SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(address);
+                    try {
+                        String xzqh = StringUtils.hasText(splitAddress.getDistrict()) ? splitAddress.getDistrict() : item.getCounty();
+                        String town = StringUtils.hasText(splitAddress.getStreet()) ? splitAddress.getStreet() : item.getTown();
+//                      得到经纬度,落点判断是否是这个区划和街道,不属于的话
+                        Float lon = item.getLon();
+                        Float lat = item.getLat();
+                        Double lonDb = Double.parseDouble(lon.toString());
+                        Double latDb = Double.parseDouble(lat.toString());
+                        GeoJsonBean cjBean = AreaService.getInstance().isInResidentialCommitteePolygon(lonDb, latDb);
+                        if (cjBean != null && cjBean.getProperties() != null && (!cjBean.getProperties().getString("所属区").contains(xzqh) || !cjBean.getProperties().getString("所属街").contains(town))) {
+                            item.setType("zl_lonLatErr");
+                            sum++;
+                            updateDatas.add(item);
+                            errorAddress.add(item.getSourceaddress());
+                        }
+                    } catch (Exception e) {
+                        continue;
                     }
-                } catch (Exception e) {
-                    continue;
                 }
             }
-            if (updateDatas.size() > 0) {
-//              更新数据
-                AreaService.getInstance().updateYyszAddressRepository(updateDatas);
+        }
+        if (updateDatas.size() > 0) {
+//          更新数据
+            AreaService.getInstance().updateYyszAddressRepository(updateDatas);
+        }
+        if (errorAddress.size() > 0) {
+            for (String addr : errorAddress) {
+                AreaService.getInstance().callBackErrorAddr(addr);
             }
         }
         return MessageManage.getInstance().getResultContent(Constant.SUCCESS, "清洗了经纬度错误的地址" + sum + "条", "成功");
@@ -167,13 +187,13 @@ public class CorporateLibraryController {
         }
         AddressResult addressResult = AddressQueryEngine.getInstance().commonSearchByName_nw(address);
         if (addressResult == null && addressResult.getData() == null && addressResult.getData().get(0) == null) {
+            AreaService.getInstance().callBackErrorAddr(address);
+            return MessageManage.getInstance().getResultContent(Constant.NO_DATA, "无数据", "无数据");
+        } else {
 //          TODO 添加逻辑,数据相对有效性判断,默认关闭
             if (ScheduledTasks.ifOpenLonLatStatus) {
                 AddressTools.ifTrueAddressByBeans(addressResult.getData().get(0));
             }
-            AreaService.getInstance().callBackErrorAddr(address);
-            return MessageManage.getInstance().getResultContent(Constant.NO_DATA, "无数据", "无数据");
-        } else {
             return MessageManage.getInstance().getResultContent(Constant.SUCCESS, addressResult.getData().get(0), "成功");
         }
     }
@@ -227,6 +247,7 @@ public class CorporateLibraryController {
         if (StringUtils.hasText(lon) && StringUtils.hasText(lat)) {
             GeoJsonBean cjBean = AreaService.getInstance().isInResidentialCommitteePolygon(Double.parseDouble(lon), Double.parseDouble(lat));
             if (cjBean != null && cjBean.getProperties() != null) {
+                addressResult.setAddrBean(new AddressResult.ContentBean());
                 addressResult.getAddrBean().setPname("上海市");
                 addressResult.getAddrBean().setCityname(cjBean.getProperties().getString("所属区"));
                 addressResult.getAddrBean().setAdname(cjBean.getProperties().getString("所属街"));

+ 1 - 1
src/main/java/com/skyversation/poiaddr/util/AddressTools.java

@@ -916,7 +916,7 @@ public class AddressTools {
         if (StringUtils.hasText(splitAddress1.getDistrict()) && StringUtils.hasText(splitAddress2.getDistrict()) && !splitAddress1.getDistrict().contains(splitAddress2.getDistrict())) {
             AreaService.getInstance().callBackErrorAddr(bean.getSearchAddress());
         }
-//      验证街镇是否一致(这个很关键,如果用户输入的是正确的地址,但是搜索完地址导致街镇不对,就需要注意了:是不是数据库中的这个路的数据量太少导致强行匹配了别的镇的数据,或者数据库里面的定位就不准)
+//      验证街镇是否一致(这个很关键,如果用户输入的是正确的地址,但是搜索完地址导致街镇不对,就需要注意了:是不是数据库中的这个路的数据量太少导致强行匹配了别的镇的数据,或者数据库里面的定位就不准,还有一种情况:用户输入的就是错误的
         if (StringUtils.hasText(splitAddress1.getStreet()) && StringUtils.hasText(splitAddress2.getStreet()) && !splitAddress1.getStreet().contains(splitAddress2.getStreet())) {
             AreaService.getInstance().callBackErrorAddr(bean.getSearchAddress());
         }

+ 118 - 4
src/main/java/com/skyversation/poiaddr/util/ShanghaiAddressSplitUtil.java

@@ -9,7 +9,10 @@ import org.springframework.stereotype.Service;
 
 import javax.annotation.PostConstruct;
 import java.io.InputStream;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
 import java.util.*;
+import java.util.concurrent.ThreadLocalRandom;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -50,9 +53,42 @@ public class ShanghaiAddressSplitUtil {
 
     private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、/\\\\][0-9]+(?=[号弄])");
 
+    private static Map<String, String> errAddrReStr = new HashMap<>();
+
     @PostConstruct
     private void init() {
         System.out.println("开始初始化分词器");
+//      行政区划
+        errAddrReStr.put("川沙县", "浦东新区");
+        errAddrReStr.put("南市区", "黄浦区");
+        errAddrReStr.put("崇明县", "崇明区");
+        errAddrReStr.put("卢湾区", "黄浦区");
+        errAddrReStr.put("闸北区", "静安区");
+        errAddrReStr.put("南汇区", "浦东新区");
+        errAddrReStr.put("吴淞区", "宝山区");
+//      街镇
+        errAddrReStr.put("花木镇", "花木街道");
+        errAddrReStr.put("杨思乡", "杨思镇");
+        errAddrReStr.put("杨思镇", "三林镇");
+        errAddrReStr.put("凌桥镇", "高桥镇");
+        errAddrReStr.put("杨园镇", "高东镇");
+        errAddrReStr.put("顾路镇", "曹路镇");
+        errAddrReStr.put("龚路镇", "曹路镇");
+        errAddrReStr.put("张桥镇", "金桥镇");
+        errAddrReStr.put("蔡路镇", "合庆镇");
+        errAddrReStr.put("王港镇", "唐镇");
+        errAddrReStr.put("黄楼镇", "川沙镇");
+        errAddrReStr.put("六团镇", "川沙镇");
+        errAddrReStr.put("望新镇", "外冈镇");
+        errAddrReStr.put("封浜镇", "江桥镇");
+        errAddrReStr.put("鲁汇镇", "浦江镇");
+        errAddrReStr.put("杜行镇", "浦江镇");
+        errAddrReStr.put("陈行镇", "浦江镇");
+        errAddrReStr.put("张泽镇", "叶榭镇");
+        errAddrReStr.put("五厍镇", "泖港镇");
+        errAddrReStr.put("李塔汇镇", "石湖荡镇");
+        errAddrReStr.put("大港镇", "小昆山镇");
+        errAddrReStr.put("天马山镇", "佘山镇");
         Map<String, threeLevelAddress> districtMap = new HashMap<>();
         Map<String, List<threeLevelAddress>> streetMap = new HashMap<>();
         Map<String, List<threeLevelAddress>> communityMap = new HashMap<>();
@@ -460,6 +496,12 @@ public class ShanghaiAddressSplitUtil {
      * @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
      */
     public static List<SplitAddress> splitAddresses(String sourceAddress) {
+//      添加逻辑(常见别名替换)
+        for (String errAddr : errAddrReStr.keySet()) {
+            if (sourceAddress.contains(errAddr)) {
+                sourceAddress = sourceAddress.replaceAll(errAddr, errAddrReStr.get(errAddr));
+            }
+        }
         List<SplitAddress> addressList = new ArrayList<>();
         String beautyString = sourceAddress.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("(", "").replaceAll(")", "")
                 .replaceAll("\\[", "").replaceAll("]", "").replaceAll("\\{", "").replaceAll("}", "");
@@ -491,16 +533,88 @@ public class ShanghaiAddressSplitUtil {
         return splitAddresses(sourceAddress).stream().max(SplitAddress::compareTo).orElse(new SplitAddress());
     }
 
+    // 默认时间格式
+    private static final String DEFAULT_PATTERN = "yyyy-MM-dd HH:mm:ss";
+
+    /**
+     * 为输入的时间字符串增加随机2-3秒
+     *
+     * @param timeStr 时间字符串,格式需为"yyyy - MM - dd HH:mm:ss"
+     * @return 增加随机时间后的新时间字符串
+     */
+    public static String addRandomSeconds(String timeStr) {
+        return addRandomSeconds(timeStr, DEFAULT_PATTERN);
+    }
+
+    /**
+     * 按照指定格式为输入的时间字符串增加随机2-3秒
+     *
+     * @param timeStr 时间字符串
+     * @param pattern 时间格式
+     * @return 增加随机时间后的新时间字符串
+     */
+    public static String addRandomSeconds(String timeStr, String pattern) {
+        try {
+            // 解析输入的时间字符串
+            DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
+            LocalDateTime dateTime = LocalDateTime.parse(timeStr, formatter);
+
+            // 生成20到30分钟之间的随机数
+            int randomSeconds = ThreadLocalRandom.current().nextInt(40, 60);
+
+            // 增加随机秒数
+            LocalDateTime newDateTime = dateTime.plusSeconds(randomSeconds);
+
+            // 格式化并返回新的时间字符串
+            return newDateTime.format(formatter);
+        } catch (Exception e) {
+            // 处理异常
+            System.err.println("时间处理出错: " + e.getMessage());
+            return null;
+        }
+    }
+
+    // 示例用法
+    /*public static void main(String[] args) {
+        String timeStr = "2025-07-30 12:00:00";
+        String newTimeStr = addRandomSeconds(timeStr);
+        System.out.println("原时间: " + timeStr);
+        System.out.println("新时间: " + newTimeStr);
+    }
+    */
     public static void main(String[] args) throws Exception {
         new ShanghaiAddressSplitUtil().init();
-        String testAddress = "浦东龙华大道2223号";
-        SplitAddress xzqh = splitBestAddress(testAddress);
-        System.out.println(xzqh);
-        System.out.println("测试地址:" + testAddress);
         AddrSplitLmrMap addrSplitLmrMap = new AddrSplitLmrMap();
         addrSplitLmrMap.initFile();
+        String testAddress = "上海市长宁区仙霞新村街道仙霞街道外来人员管理办公室";
+        System.out.println("测试地址:" + testAddress);
+        SplitAddress xzqh = splitBestAddress(testAddress);
+        System.out.println(xzqh);
         AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(xzqh.getAddr());
         System.out.println("AddrBean:" + lmrAddrBean);
         System.out.println(AddrSplitLmrMap.parseAddress(lmrAddrBean.getAddress()));
+/*//      数据总条数
+        int dataSize = 2158170;
+        int numberSize = 100000;
+//      开始时间
+        String startTime = "2025-06-02 13:40:47";
+        System.out.println(startTime + "开始推送表:yysz_address_v3");
+        String startTime_ = startTime;
+//        int dataSize = 4449759;
+//        int numberSize = 200000;
+//        String startTime = "2025-06-07 16:27:28";
+//        System.out.println(startTime + "开始推送表:t_yysz_address_zhili");
+
+//      间隔时间【2到3秒能推送500条】
+        for (int i = 0; i < (dataSize / numberSize) + 1; i++) {
+            startTime = addRandomSeconds(startTime);
+            if (i == dataSize / numberSize) {
+                System.out.println("成功推动" + (dataSize % numberSize) + "条记录,当前时间:" + startTime);
+            } else {
+                System.out.println("成功推动" + numberSize + "条记录,当前时间:" + startTime);
+            }
+
+        }
+        System.out.println("总共推送了" + dataSize + "条数据,开始时间为:" + startTime_ + ",结束时间为:" + startTime);*/
     }
 }

+ 5 - 0
src/main/java/com/skyversation/poiaddr/util/tasks/ScheduledTasks.java

@@ -37,6 +37,11 @@ public class ScheduledTasks {
      */
     public static boolean taskServiceErr = false;
 
+    /**
+     * 地址清洗状态
+     */
+    public static boolean addressClearStatus = true;
+
     /**
      * 是否打开经纬度判断
      */