Prechádzať zdrojové kódy

配置文件添加老镇名,方便过滤和后期处理

DESKTOP-6LTVLN7\Liumouren 3 mesiacov pred
rodič
commit
4304a708e4

+ 0 - 2
src/main/java/com/skyversation/poiaddr/addquery/AddressQueryEngine.java

@@ -411,13 +411,11 @@ public class AddressQueryEngine {
      * @return
      */
     public static List<Set<String>> tokenizeString(String input) {
-        input = townReplaceAll(input);
         // 初始化两个集合,一个用于存储非数字字符串,一个用于存储数字字符串
         Set<String> nonNumberSet = new HashSet<>();
         Set<String> numberSet = new HashSet<>();
 
         StringBuilder currentToken = new StringBuilder();
-
         for (int i = 0; i < input.length(); i++) {
             char c = input.charAt(i);
             if (Character.isDigit(c)) {

+ 0 - 1
src/main/java/com/skyversation/poiaddr/addquery/Constant.java

@@ -4,7 +4,6 @@ import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
 
 import javax.annotation.PostConstruct;
-import java.sql.DriverManager;
 import java.sql.SQLException;
 
 @Service

+ 2 - 0
src/main/java/com/skyversation/poiaddr/controller/CorporateLibraryController.java

@@ -87,4 +87,6 @@ public class CorporateLibraryController {
         long endTime = System.currentTimeMillis();
         return "处理完成!用时" + (endTime - startTime) / 1000 + "秒!";
     }
+
+
 }

+ 3 - 3
src/main/java/com/skyversation/poiaddr/service/impl/YyskAddressStandardizationServiceImpl.java

@@ -1,6 +1,7 @@
 package com.skyversation.poiaddr.service.impl;
 
 import com.skyversation.poiaddr.addquery.AddressQueryEngine;
+import com.skyversation.poiaddr.addquery.Constant;
 import com.skyversation.poiaddr.bean.AddressResult;
 import com.skyversation.poiaddr.config.DbConnection;
 import com.skyversation.poiaddr.entity.YyskAddressStandardization;
@@ -235,9 +236,8 @@ public class YyskAddressStandardizationServiceImpl {
                         try {
                             for (AddressResult.ContentBean contentBean : addressResult.getData()) {
                                 String resultAddrKey = contentBean.getAddress();
-//                                && AddressTools.isOtherDistrictThanShangHai(resultAddrKey)
-                                if (resultAddrKey != null && contentBean.getLon() != null && contentBean.getLat() != null) {
-                                    resultAddrKey = AddressTools.getInstance().deleteStr(contentBean.getAddress()) ;
+                                if (resultAddrKey != null && contentBean.getLon() != null && contentBean.getLat() != null && AddressTools.isOtherDistrictThanSongJiang2(resultAddrKey)) {
+                                    resultAddrKey = AddressTools.getInstance().deleteStr(contentBean.getAddress());
                                     if (contentBean.getAdname() != null && !contentBean.getAdname().isEmpty()) {
                                         item.setStreetTown(contentBean.getAdname());
                                     } else if (contentBean.getTownJson() != null && contentBean.getTownJson().getString("name") != null) {

+ 38 - 13
src/main/java/com/skyversation/poiaddr/util/AddressTools.java

@@ -462,8 +462,9 @@ public class AddressTools {
         }
 
         // 去除多余的空白字符
-        address = address.trim();
-
+        address = address.replaceAll(" ", "");
+        // 去除街镇
+        address = AddressQueryEngine.townReplaceAll(address);
         result[1] = foundDistrict;
         result[2] = foundStreet;
         result[3] = foundVillageOrCommunity;
@@ -502,17 +503,19 @@ public class AddressTools {
                     for (String addr2str : address2String) {
                         if (addressString.contains(addr2str)) {
                             addressStrSize--;
-                            if (addressNumber.size() == 0) {
-                                obj.put("总分", "rule_4");
-                                return obj;
-                            } else {
-                                int addressNumSize = addressNumber.size();
-                                for (String addr2Num : address2Number) {
-                                    if (addressNumber.contains(addr2Num)) {
-                                        addressNumSize--;
-                                        if (addressNumSize == 0) {
-                                            obj.put("总分", "rule_4");
-                                            return obj;
+                            if (addressStrSize == 0) {
+                                if (addressNumber.size() == 0) {
+                                    obj.put("总分", "rule_4");
+                                    return obj;
+                                } else {
+                                    int addressNumSize = addressNumber.size();
+                                    for (String addr2Num : address2Number) {
+                                        if (addressNumber.contains(addr2Num)) {
+                                            addressNumSize--;
+                                            if (addressNumSize == 0) {
+                                                obj.put("总分", "rule_4");
+                                                return obj;
+                                            }
                                         }
                                     }
                                 }
@@ -577,6 +580,28 @@ public class AddressTools {
         return true;
     }
 
+    public static boolean isOtherDistrictThanSongJiang2(String address) {
+        // 将地址字符串转换为小写,以便进行不区分大小写的比较
+        String lowerCaseAddress = address.toLowerCase();
+        // 检查地址是否包含 "青浦区" 字样,如果不包含,则可能是其他区
+        // 列出上海市的其他区(已补充完整)
+        String[] otherDistricts = {"北京市", "天津市", "重庆市",
+                "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
+                "江苏省", "浙江省", "安徽省", "福建省", "江西省",
+                "山东省", "河南省", "湖北省", "湖南省", "广东省",
+                "海南省", "四川省", "贵州省", "云南省", "陕西省",
+                "甘肃省", "青海省", "台湾省",
+                "内蒙古自治区", "广西壮族自治区", "西藏自治区",
+                "宁夏回族自治区", "新疆维吾尔自治区",
+                "香港特别行政区", "澳门特别行政区"};
+        for (String district : otherDistricts) {
+            if (lowerCaseAddress.contains(district.toLowerCase()) || lowerCaseAddress.indexOf(district.substring(0, 2)) == 0) {
+                return false;
+            }
+        }
+        return true;
+    }
+
 
     public static String isOtherDistrictThanShangHai(String address) {
         // 将地址字符串转换为小写,以便进行不区分大小写的比较

+ 0 - 36
src/main/java/com/skyversation/poiaddr/util/SerializationUtils.java

@@ -47,46 +47,10 @@ public class SerializationUtils {
 
     public static void main(String[] args) throws IOException {
         test();
-//        System.out.println(roleUpdate("上海市松江区中山街道东果子弄2号小区", "上海市松江区上海市松江区东果子弄2号"));
         /*List<SjArrDzbzhSjWcbryDzxx> listData = deserialize("output/testDataBase_all.ser");
 //      将序列化的文件转储为xlsx文件
         ExcelReaderUtils.writeClassToExcel(listData, "output/allData.xlsx");*/
     }
-
-    public static boolean roleUpdate(String returnAddress, String address) {
-        boolean roleUpdateTag = false;
-        Set<String> addressString = AddressQueryEngine.tokenizeString(AddressTools.parseAddressCJ(returnAddress)[4]).get(0);
-        Set<String> addressNumber = AddressQueryEngine.tokenizeString(AddressTools.parseAddressCJ(returnAddress)[4]).get(1);
-        Set<String> address2String = AddressQueryEngine.tokenizeString(AddressTools.parseAddressCJ(address)[4]).get(0);
-        Set<String> address2Number = AddressQueryEngine.tokenizeString(AddressTools.parseAddressCJ(address)[4]).get(1);
-        if (addressString != null && addressString.size() > 1) {
-            int addressStrSize = addressString.size();
-            for (String addr2str : address2String) {
-                if (addressString.contains(addr2str)) {
-                    addressStrSize--;
-                    if (addressStrSize == 0) {
-                        if (addressNumber.size() == 0) {
-                            roleUpdateTag = true;
-                            return roleUpdateTag;
-                        } else {
-                            int addressNumSize = addressNumber.size();
-                            for (String addr2Num : address2Number) {
-                                if (addressNumber.contains(addr2Num)) {
-                                    addressNumSize--;
-                                    if (addressNumSize == 0) {
-                                        roleUpdateTag = true;
-                                        return roleUpdateTag;
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        return roleUpdateTag;
-    }
-
     /**
      * 读取本地xlsx文件得到List<Map<String,Object>>结果,然后得到所有规范化结果地址字段,然后匹配街镇关键字,并将匹配到的街镇关键字放到新的一列
      * 2、根据主键address和标准化地址standardAddress直接跑规则,属于rule_4的要替换掉同时添加updateTag

+ 1 - 3
src/main/resources/application.properties

@@ -23,17 +23,15 @@ spring.servlet.multipart.max-request-size=300MB
 #spring.datasource.username=prd_sjbdc_kjyy
 #spring.datasource.password=B9344157Yi#5
 #spring.datasource.driver-class-name=com.argoDb.jdbc.Driver
-
 # JPA \u914D\u7F6E
 #spring.jpa.database-platform=org.hibernate.dialect.MySQL5Dialect
 #spring.jpa.hibernate.ddl-auto=update
 #spring.jpa.show-sql=false
 # \u677E\u6C5F\u8857\u9547
 app.area=\u4E0A\u6D77
-app.town=\u5CB3\u9633\u8857\u9053,\u6C38\u4E30\u8857\u9053,\u65B9\u677E\u8857\u9053,\u4E2D\u5C71\u8857\u9053,\u5E7F\u5BCC\u6797\u8857\u9053,\u4E5D\u91CC\u4EAD\u8857\u9053,\u6CD7\u6CFE\u9547,\u4F58\u5C71\u9547,\u8F66\u58A9\u9547,\u65B0\u6865\u9547,\u6D1E\u6CFE\u9547,\u4E5D\u4EAD\u9547,\u6CD6\u6E2F\u9547,\u77F3\u6E56\u8361\u9547,\u65B0\u6D5C\u9547,\u53F6\u69AD\u9547,\u5C0F\u6606\u5C71\u9547
+app.town=\u5CB3\u9633\u8857\u9053,\u6C38\u4E30\u8857\u9053,\u65B9\u677E\u8857\u9053,\u4E2D\u5C71\u8857\u9053,\u5E7F\u5BCC\u6797\u8857\u9053,\u4E5D\u91CC\u4EAD\u8857\u9053,\u6CD7\u6CFE\u9547,\u4F58\u5C71\u9547,\u8F66\u58A9\u9547,\u65B0\u6865\u9547,\u6D1E\u6CFE\u9547,\u4E5D\u4EAD\u9547,\u6CD6\u6E2F\u9547,\u77F3\u6E56\u8361\u9547,\u65B0\u6D5C\u9547,\u53F6\u69AD\u9547,\u5C0F\u6606\u5C71\u9547,\u5929\u9A6C\u5C71\u9547
 # \u878D\u5408\u7248\uFF1A1736930075105|\u5173\u952E\u5B57\uFF1A1742459783686
 app.search_server_id=1742459783686
-
 app.db.username=dev_sjbdc_kjyy
 app.db.password=30DFBEABYi#5
 app.db.driver=io.transwarp.jdbc.QuarkDriver