Ver Fonte

再次优化匹配逻辑和初始化逻辑,增强分词逻辑

DESKTOP-6LTVLN7\Liumouren há 5 dias atrás
pai
commit
617fe96654

+ 2 - 2
src/main/java/com/skyversation/poiaddr/addquery/AddressQueryEngine.java

@@ -86,7 +86,7 @@ public class AddressQueryEngine {
             return addressResult;
         }
         AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(addr);
-        if (lmrAddrBean.getRule() != null && Integer.parseInt(lmrAddrBean.getRule()) <= 0) {
+        if (lmrAddrBean.getRule() != null && Integer.parseInt(lmrAddrBean.getRule()) <= 0 && !addr.contains("新城区")) {
             addressResult.setMessage("非上海数据");
             AddressResult.ContentBean content = new AddressResult.ContentBean();
             content.setSearchAddress(addr);
@@ -100,7 +100,7 @@ public class AddressQueryEngine {
             return addressResult;
         }
         SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(addr);
-        if (splitAddress.getStatus() == 3) {// 不是地址
+        if (splitAddress.getStatus() == 3 && !addr.contains("新城区")) {// 不是地址
             addressResult.setMessage("非地址数据");
             return addressResult;
         } else {

+ 4 - 0
src/main/java/com/skyversation/poiaddr/controller/CorporateLibraryController.java

@@ -45,6 +45,10 @@ public class CorporateLibraryController {
         }
 //      数据库查询
         SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(address);
+        String district = request.getParameter("district");
+        if(StringUtils.hasText(district)){
+            splitAddress.setDistrict(district);
+        }
         List<YyskDmdzAddressStandardization> list = AreaService.getInstance().getAddressPoiOnlyDB(splitAddress);
         AddressResult addressResult = null;
         if (list == null || list.size() < 1) {

+ 3 - 7
src/main/java/com/skyversation/poiaddr/service/AreaService.java

@@ -206,7 +206,6 @@ public class AreaService {
                     } else {
                         ScheduledTasks.putAllDmdzData(item.getAddress(), item);
                     }
-
                 }
             }
             if (item.getOid() > maxOid) {
@@ -331,13 +330,10 @@ public class AreaService {
 //      TODO 数据库查询逻辑调整为内存处理的方式
         String addr = splitAddress.getAddr();
         String addr2 = addr + "";
-        if (addr2.contains("号")) {
-            addr2 = addr2.substring(0, addr2.indexOf("号"));
-        }
-        if (addr2.contains("弄")) {
-            addr2 = addr2.substring(0, addr2.indexOf("弄"));
+        Map<String, String> roadInfos = AddrSplitLmrMap.parseAddress(addr2);
+        if (roadInfos != null && StringUtils.hasText(roadInfos.get("路名"))) {
+            addr2 = roadInfos.get("路名");
         }
-        addr2 = addr2.replaceAll("(?<=[^\\d])\\d+$", "");
         List<YyskDmdzAddressStandardization> returnDatas = new ArrayList<>();
         if (ScheduledTasks.allDmdzData.containsKey(addr2)) {
             returnDatas = ScheduledTasks.allDmdzData.get(addr2);

+ 19 - 14
src/main/java/com/skyversation/poiaddr/util/AddrSplitLmrMap.java

@@ -266,6 +266,15 @@ public class AddrSplitLmrMap {
         return false;
     }
 
+    public static String deleteDisStr(String addressStr){
+        Matcher disMatcher = Pattern.compile("(新城区|工业区|委员会|开发区|科技园区|村委会)").matcher(addressStr);
+        if (disMatcher.find()) {
+            addressStr = addressStr.substring(disMatcher.end());
+            deleteDisStr(addressStr);
+        }
+        return addressStr;
+    }
+
     public static Map<String, String> parseAddress(String addressStr) {
         if (addressStr == null || addressStr.trim().isEmpty()) {
             return null;
@@ -273,6 +282,8 @@ public class AddrSplitLmrMap {
         addressStr = addressStr.trim();
         Map<String, String> returnMap = new HashMap<>();
         String remaining = addressStr;
+//        检查是否存在:工业区|委员会|开发区|科技园区|村委会
+        remaining = deleteDisStr(remaining);
 //        先替换xx号-xx号
         Matcher diyMatcher = Pattern.compile("(\\d号+-\\d号)").matcher(remaining);
         if (diyMatcher.find()) {
@@ -288,9 +299,14 @@ public class AddrSplitLmrMap {
             Matcher nongTagMatcher = Pattern.compile("(\\d+(?:-+\\d)?号)").matcher(remaining);
             if (nongTagMatcher.find()) {
 //                存在\d+(?:-+\d)?号
-                returnMap.put("楼栋号", nongTagMatcher.group(1));
-                remaining = remaining.substring(nongTagMatcher.end());
+                Matcher nongTagMatcher0 = Pattern.compile("(\\d+-\\d+号)").matcher(remaining);
+                if (nongTagMatcher0.find()) {
+                    returnMap.put("楼栋号", nongTagMatcher0.group(1));
+                } else {
+                    returnMap.put("楼栋号", nongTagMatcher.group(1));
+                }
             } else {
+//                不存在\d+(?:-+\d)?号
                 returnMap.put("楼栋号", null);
             }
         } else {
@@ -343,6 +359,7 @@ public class AddrSplitLmrMap {
         /*AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
         AddrSplitLmrMap.initFile();
         System.out.println(outAddrMapInAddr("村165号"));
+        System.out.println(outAddrMapInAddr("仓桥镇玉秀路136-16号101室"));
         System.out.println(outAddrMapInAddr("上海市松江区乐都路339号松江电信大楼1303室"));
         System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));*/
         // 测试示例(包含所有典型场景)
@@ -355,17 +372,5 @@ public class AddrSplitLmrMap {
 //        private String buildingNumber;
 ////      房间号
 //        private String roomNumber;
-//        String testAddress = "北松公路6961弄6-29号205";
-//        String testAddress = "香泾路377-1号306室";
-//        String testAddress = "新松江路887弄1号611室";
-//        String testAddress = "方塔东三村6号506室";
-//        String testAddress = "中山西路 36弄 5号 202室";
-        String testAddress = "叶榭镇八字桥村镇南31-29号001室";
-        Map<String, String> res = parseAddress(testAddress);
-        System.out.println("测试地址:" + testAddress);
-        System.out.println("路名:" + res.get("路名"));
-        System.out.println("弄号:" + res.get("弄号"));
-        System.out.println("楼栋号:" + res.get("楼栋号"));
-        System.out.println("室号:" + res.get("室号"));
     }
 }

+ 2 - 3
src/main/java/com/skyversation/poiaddr/util/ShanghaiAddressSplitUtil.java

@@ -47,7 +47,7 @@ public class ShanghaiAddressSplitUtil {
 
     private static final Pattern OVER_SPLIT = Pattern.compile("^(?:[0123456789-\\-一二三四五六七八九十大A-za-z]{0,4}[街队组栋号站弄]|(?:车站|工业区|市场|农贸市场)(?![东南西北中一二三四五六七八九十公大小支新老环]路)|[A-za-z]?[0123456789-\\-])");
 
-    private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、/\\\\-][0-9]+(?=[号弄])");
+    private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、/\\\\][0-9]+(?=[号弄])");
 
     @PostConstruct
     private void init() {
@@ -461,7 +461,6 @@ public class ShanghaiAddressSplitUtil {
     public static List<SplitAddress> splitAddresses(String sourceAddress) {
         Matcher matcher = Pattern.compile("(\\(*\\))|((*))|(\\{*})|([*])").matcher(sourceAddress);
         List<SplitAddress> addressList = new ArrayList<>();
-//        String beautyString = sourceAddress.replaceAll("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)", "");
         String beautyString = sourceAddress.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("(", "").replaceAll(")", "")
                 .replaceAll("\\[", "").replaceAll("]", "").replaceAll("\\{", "").replaceAll("}", "");
         StringBuilder sb = new StringBuilder();
@@ -500,7 +499,7 @@ public class ShanghaiAddressSplitUtil {
 
     public static void main(String[] args) throws Exception {
         new ShanghaiAddressSplitUtil().init();
-        String testAddress = "上海市松江区陈春公路198弄70号-2号1201室(我是一个奇怪的地址";
+        String testAddress = "永丰街道松江工业区仓桥镇玉秀路39号";
         SplitAddress xzqh = splitBestAddress(testAddress);
         System.out.println(xzqh);
         System.out.println("测试地址:" + testAddress);

+ 4 - 7
src/main/java/com/skyversation/poiaddr/util/tasks/ScheduledTasks.java

@@ -289,19 +289,16 @@ public class ScheduledTasks {
     }
 
     public static void putAllDmdzData(String addr, YyskDmdzAddressStandardization item) {
-        if (addr.contains("号")) {
-            addr = addr.substring(0, addr.indexOf("号"));
+        Map<String, String> roadInfos = AddrSplitLmrMap.parseAddress(addr);
+        if (roadInfos != null && StringUtils.hasText(roadInfos.get("路名"))) {
+            addr = roadInfos.get("路名");
         }
-        if (addr.contains("弄")) {
-            addr = addr.substring(0, addr.indexOf("弄"));
-        }
-        addr = addr.replaceAll("(?<=[^\\d])\\d+$", "");
         if (StringUtils.hasText(addr)) {
             if (!ScheduledTasks.allDmdzData.containsKey(addr)) {
                 List<YyskDmdzAddressStandardization> datas = new ArrayList<>();
                 datas.add(item);
                 ScheduledTasks.allDmdzData.put(addr, datas);
-            } else if (ScheduledTasks.allDmdzData.get(addr).size() < 100) {
+            } else if (ScheduledTasks.allDmdzData.get(addr).size() < 1000) {
                 ScheduledTasks.allDmdzData.get(addr).add(item);
             }
         }