瀏覽代碼

初始化地址库数据时过滤一些脏数据

DESKTOP-6LTVLN7\Liumouren 1 天之前
父節點
當前提交
40f450bfd7

+ 0 - 7
src/main/java/com/skyversation/poiaddr/controller/CorporateLibraryController.java

@@ -144,13 +144,6 @@ public class CorporateLibraryController {
             } catch (Exception e2) {
                 e2.printStackTrace();
             }
-            /**
-             * -- 第二部分:将匹配数据写入治理回匹表
-             * INSERT INTO TABLE 治理回匹表
-             * SELECT *
-             * FROM 治理结果汇总表
-             * WHERE address IN (SELECT address FROM 待治理表);
-             */
         }
         return "处理完成";
     }

+ 10 - 1
src/main/java/com/skyversation/poiaddr/service/AreaService.java

@@ -206,6 +206,13 @@ public class AreaService {
         }
     }
 
+    /**
+     * -- 第二部分:将匹配数据写入治理回匹表
+     * INSERT INTO TABLE 治理回匹表
+     * SELECT *
+     * FROM 治理结果汇总表
+     * WHERE address IN (SELECT address FROM 待治理表);
+     */
     public void selectLimitData(int page, int pageSize, String batchNumber) {
         Map<String, Object> loginfoMap = new HashMap<>();
         loginfoMap.put("page", page);
@@ -325,7 +332,9 @@ public class AreaService {
             returnDatas = ScheduledTasks.allDmdzData.get(addr2);
         } else {
             for (String key : ScheduledTasks.allDmdzData.keySet()) {
-                if (key != null && addr != null && (key.contains(addr2) || addr.contains(key))) {
+//                if (key != null && addr != null && (key.contains(addr2) || addr.contains(key))) {
+//                TODO 地址库有离谱的骚东西
+                if (key != null && addr != null && key.contains(addr2)) {
                     returnDatas.addAll(ScheduledTasks.allDmdzData.get(key));
                     break;
                 }

+ 1 - 1
src/main/java/com/skyversation/poiaddr/util/AddrSplitLmrMap.java

@@ -139,7 +139,7 @@ public class AddrSplitLmrMap {
                     }
                     //              判断外地县名
                     for (String x : All_NO_SH_tree.get(s).get(m)) {
-                        if (addr.contains(x) && !addr.contains(x + "场")) {
+                        if (x.length() > 2 && addr.contains(x) && !addr.contains(x + "场")) {
                             addrMap.setDistinguish(x);
                             addrMap.setProvinces(s);
                             addrMap.setMarket(m);

+ 0 - 7
src/main/java/com/skyversation/poiaddr/util/ShanghaiAddressSplitUtil.java

@@ -459,7 +459,6 @@ public class ShanghaiAddressSplitUtil {
      * @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
      */
     public static List<SplitAddress> splitAddresses(String sourceAddress) {
-        Matcher matcher = Pattern.compile("(\\(*\\))|((*))|(\\{*})|([*])").matcher(sourceAddress);
         List<SplitAddress> addressList = new ArrayList<>();
         String beautyString = sourceAddress.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("(", "").replaceAll(")", "")
                 .replaceAll("\\[", "").replaceAll("]", "").replaceAll("\\{", "").replaceAll("}", "");
@@ -478,12 +477,6 @@ public class ShanghaiAddressSplitUtil {
         }
         beautyString = sb.toString();
         addressList.add(beautyResult(split(beautyString)));
-        if (matcher.find()) {
-            String address = matcher.group();
-            if (address.length() > 2) {
-                addressList.addAll(splitAddresses(address.substring(1, address.length() - 1)));
-            };
-        }
         for (SplitAddress s : addressList) s.setSourceAddress(sourceAddress);
         return addressList;
     }

+ 4 - 4
src/main/java/com/skyversation/poiaddr/util/tasks/ScheduledTasks.java

@@ -159,8 +159,8 @@ public class ScheduledTasks {
                                 AddressResult.ContentBean contentBean = new AddressResult.ContentBean();
                                 AddressResult addressResult = AddressQueryEngine.getInstance().sj_szxSearchByName(addr, 3);
                                 if (addressResult != null && addressResult.getData() != null && addressResult.getData().size() > 0) {
-                                    for(AddressResult.ContentBean itemc: addressResult.getData()){
-                                        if(itemc.getScore().contains("rule_") || Float.parseFloat(itemc.getScore()) > 2.0){
+                                    for (AddressResult.ContentBean itemc : addressResult.getData()) {
+                                        if (itemc.getScore().contains("rule_") || Float.parseFloat(itemc.getScore()) > 2.0) {
                                             contentBean = addressResult.getData().get(0);
                                             break;
                                         }
@@ -276,7 +276,7 @@ public class ScheduledTasks {
         return yyskDmdzAddressStandardization;
     }
 
-    public static void yyskDmdzAddressStandardizationPutDmdz(YyskDmdzAddressStandardization yyskDmdzAddressStandardization){
+    public static void yyskDmdzAddressStandardizationPutDmdz(YyskDmdzAddressStandardization yyskDmdzAddressStandardization) {
         if (yyskDmdzAddressStandardization.getSourceaddress() != null && StringUtils.hasText(yyskDmdzAddressStandardization.getSourceaddress())) {
             SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(yyskDmdzAddressStandardization.getSourceaddress());
             if (splitAddress.getAddr().length() > 2) {
@@ -284,7 +284,7 @@ public class ScheduledTasks {
             } else {
                 putAllDmdzData(yyskDmdzAddressStandardization.getSourceaddress(), yyskDmdzAddressStandardization);
             }
-            if (yyskDmdzAddressStandardization.getAddress() != null && StringUtils.hasText(yyskDmdzAddressStandardization.getAddress()) && !yyskDmdzAddressStandardization.getSourceaddress().contains(yyskDmdzAddressStandardization.getAddress())) {
+            if (yyskDmdzAddressStandardization.getAddress() != null && yyskDmdzAddressStandardization.getAddress().length() > 2 && StringUtils.hasText(yyskDmdzAddressStandardization.getAddress()) && !yyskDmdzAddressStandardization.getSourceaddress().contains(yyskDmdzAddressStandardization.getAddress())) {
                 SplitAddress splitAddress2 = ShanghaiAddressSplitUtil.splitBestAddress(yyskDmdzAddressStandardization.getAddress());
                 if (splitAddress2.getAddr().length() > 2) {
                     putAllDmdzData(splitAddress2.getAddr(), yyskDmdzAddressStandardization);