浏览代码

分词逻辑增强

DESKTOP-6LTVLN7\Liumouren 2 天之前
父节点
当前提交
ef6330dd3e

+ 4 - 0
src/main/java/com/skyversation/poiaddr/service/AreaService.java

@@ -323,6 +323,10 @@ public class AreaService {
 //      TODO 数据库查询逻辑调整为内存处理的方式
         String addr = splitAddress.getAddr();
         String addr2 = addr + "";
+        AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(addr2);
+        if(lmrAddrBean.getAddress() != null){
+            addr2 = lmrAddrBean.getAddress();
+        }
         Map<String, String> roadInfos = AddrSplitLmrMap.parseAddress(addr2);
         if (roadInfos != null && StringUtils.hasText(roadInfos.get("路名"))) {
             addr2 = roadInfos.get("路名");

+ 50 - 45
src/main/java/com/skyversation/poiaddr/util/AddrSplitLmrMap.java

@@ -33,7 +33,7 @@ public class AddrSplitLmrMap {
     private static HashMap<String, String> districtCodeMap = new HashMap<>();
 
     @PostConstruct
-    private void initFile() {
+    void initFile() {
         System.out.println("开始初始化分词器");
         InputStream is = ShanghaiAddressSplitUtil.class.getResourceAsStream(outPutFilePath);
         if (is == null) is = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + outPutFilePath);
@@ -109,8 +109,8 @@ public class AddrSplitLmrMap {
      * @param addr
      * @return
      */
-    static Pattern pattern = Pattern.compile("市|区|镇|街道|县");
-    static Pattern spattern = Pattern.compile("路|街|道|村");
+    static Pattern pattern = Pattern.compile("市|区|镇|街道|大道|县");
+    static Pattern spattern = Pattern.compile("路|街|道|村");
 
     public static AddrBean outAddrMapInAddr(String addr) {
         AddrBean addrMap = new AddrBean();
@@ -154,55 +154,55 @@ public class AddrSplitLmrMap {
 //          如果不是外地数据和连接数据的话
             if (!errorAddr) {
 //              上海地址匹配
-                if (pattern.matcher(addr).find()) {
-                    if (addr.startsWith("上海")) {
+//                if (pattern.matcher(addr).find()) {
+                if (addr.startsWith("上海")) {
+                    addrMap.setProvinces("上海市");
+                    addrMap.setMarket("上海市");
+                    addrMap.setRule("2");
+                }
+//                  匹配区
+                boolean ifContains = false;
+//                  区匹配标识
+                String sh_distinguish = "";
+                for (String d : D_S_C_tree.keySet()) {
+                    if (addr.contains(d) || addr.contains(d.substring(0, 2) + "县")) {
+                        ifContains = true;
                         addrMap.setProvinces("上海市");
                         addrMap.setMarket("上海市");
-                        addrMap.setRule("2");
+                        addrMap.setDistinguish(d);
+                        sh_distinguish = d;
+                        addrMap.setRule("4");
+                        break;
                     }
-//                  匹配区
-                    boolean ifContains = false;
-//                  区匹配标识
-                    String sh_distinguish = "";
-                    for (String d : D_S_C_tree.keySet()) {
-                        if (addr.contains(d) || addr.contains(d.substring(0, 2) + "县")) {
-                            ifContains = true;
+                    if (addr.contains(d.substring(0, 2)) && !ifTrueAddr(addr, d.substring(0, 2))) {
+                        addrMap.setProvinces("上海市");
+                        addrMap.setMarket("上海市");
+                        addrMap.setDistinguish(d);
+                        sh_distinguish = d;
+                        addrMap.setRule("4");
+                    }
+                }
+//                  镇匹配
+                for (String d : D_S_C_tree.keySet()) {
+                    for (String s : D_S_C_tree.get(d).keySet()) {
+                        if (addr.contains(s)) {
                             addrMap.setProvinces("上海市");
                             addrMap.setMarket("上海市");
                             addrMap.setDistinguish(d);
-                            sh_distinguish = d;
-                            addrMap.setRule("4");
+                            addrMap.setStreetTown(s);
+                            addrMap.setRule("8");
                             break;
                         }
-                        if (addr.contains(d.substring(0, 2)) && ifTrueAddr(addr, d.substring(0, 2))) {
+                        if (addr.contains(s.substring(0, 2)) && ifContains && !sh_distinguish.isEmpty() && sh_distinguish.contains(d)) {
                             addrMap.setProvinces("上海市");
                             addrMap.setMarket("上海市");
                             addrMap.setDistinguish(d);
-                            sh_distinguish = d;
-                            addrMap.setRule("4");
-                        }
-                    }
-//                  镇匹配
-                    for (String d : D_S_C_tree.keySet()) {
-                        for (String s : D_S_C_tree.get(d).keySet()) {
-                            if (addr.contains(s)) {
-                                addrMap.setProvinces("上海市");
-                                addrMap.setMarket("上海市");
-                                addrMap.setDistinguish(d);
-                                addrMap.setStreetTown(s);
-                                addrMap.setRule("8");
-                                break;
-                            }
-                            if (addr.contains(s.substring(0, 2)) && ifContains && !sh_distinguish.isEmpty() && sh_distinguish.contains(d)) {
-                                addrMap.setProvinces("上海市");
-                                addrMap.setMarket("上海市");
-                                addrMap.setDistinguish(d);
-                                addrMap.setStreetTown(s);
-                                addrMap.setRule("8");
-                            }
+                            addrMap.setStreetTown(s);
+                            addrMap.setRule("8");
                         }
                     }
                 }
+//                }
             }
 //          特殊处理逻辑
             if (addrMap.getDistinguish() != null && addrMap.getAddress() != null && addrMap.getDistinguish().contains("松江区") && addrMap.getAddress().contains("工业区")) {
@@ -227,9 +227,11 @@ public class AddrSplitLmrMap {
                     if (addrMap.getAddress().split(addrMap.getDistinguish()).length > 1) {
                         addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish())[1]);
                     }
-                } else if (addrMap.getAddress().contains(addrMap.getDistinguish().substring(0, 2) + "县")) {
+                } else if (addrMap.getAddress().contains(addrMap.getDistinguish().substring(0, 2))) {
                     if (addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2) + "县").length > 1) {
                         addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2) + "县")[1]);
+                    } else if (addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2)).length > 1) {
+                        addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2))[1]);
                     }
                 }
             }
@@ -328,7 +330,13 @@ public class AddrSplitLmrMap {
                 }
             } else {
 //                不存在\d+(?:-+\d)?号
-                returnMap.put("路名", null);
+                Matcher nongTagMatcher0 = Pattern.compile("(\\d+)").matcher(remaining);
+                if (nongTagMatcher0.find()) {
+                    returnMap.put("路名", remaining.substring(0, nongTagMatcher0.start()));
+                } else {
+                    returnMap.put("路名", null);
+                }
+
                 returnMap.put("弄号", null);
             }
         }
@@ -364,12 +372,9 @@ public class AddrSplitLmrMap {
     }
 
     public static void main(String[] args) {
-        /*AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
+        AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
         AddrSplitLmrMap.initFile();
-        System.out.println(outAddrMapInAddr("村165号"));
-        System.out.println(outAddrMapInAddr("仓桥镇玉秀路136-16号101室"));
-        System.out.println(outAddrMapInAddr("上海市松江区乐都路339号松江电信大楼1303室"));
-        System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));*/
+        System.out.println(outAddrMapInAddr("浦东龙华大道2223号"));
         // 测试示例(包含所有典型场景)
 //        行政区划、街镇、居委、路名、弄号、室号
 ////      路名

+ 7 - 2
src/main/java/com/skyversation/poiaddr/util/ShanghaiAddressSplitUtil.java

@@ -2,6 +2,7 @@ package com.skyversation.poiaddr.util;
 
 import com.skyversation.poiaddr.addquery.AddressQueryEngine;
 import com.skyversation.poiaddr.bean.AddressResult;
+import com.skyversation.poiaddr.entity.AddrBean;
 import com.skyversation.poiaddr.service.AreaService;
 import lombok.AllArgsConstructor;
 import org.springframework.stereotype.Service;
@@ -492,10 +493,14 @@ public class ShanghaiAddressSplitUtil {
 
     public static void main(String[] args) throws Exception {
         new ShanghaiAddressSplitUtil().init();
-        String testAddress = "永丰街道松江工业区仓桥镇玉秀路39号";
+        String testAddress = "浦东龙华大道2223号";
         SplitAddress xzqh = splitBestAddress(testAddress);
         System.out.println(xzqh);
         System.out.println("测试地址:" + testAddress);
-        System.out.println(AddrSplitLmrMap.parseAddress(xzqh.getAddr()));
+        AddrSplitLmrMap addrSplitLmrMap = new AddrSplitLmrMap();
+        addrSplitLmrMap.initFile();
+        AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(xzqh.getAddr());
+        System.out.println("AddrBean:" + lmrAddrBean);
+        System.out.println(AddrSplitLmrMap.parseAddress(lmrAddrBean.getAddress()));
     }
 }

+ 1 - 1
src/main/java/com/skyversation/poiaddr/util/tasks/ScheduledTasks.java

@@ -124,7 +124,7 @@ public class ScheduledTasks {
      * 如果返回了正确的数据,入到地址库表中,并更新state为2(同时添加到地址库中)
      * 否则更新状态为1
      */
-    @Scheduled(cron = "0 30 2 * * ?")
+    @Scheduled(cron = "0 30 14 * * ?")
     public void dbdataCallBackTask() {
         try {
 //          根据当前日期生成开始id