Эх сурвалжийг харах

分词逻辑BUG修复,优化路名等信息获取逻辑,添加路名等信息到统一返回实体接口逻辑

DESKTOP-6LTVLN7\Liumouren 6 өдөр өмнө
parent
commit
6fe4fd7151

+ 17 - 4
src/main/java/com/skyversation/poiaddr/addquery/AddressQueryEngine.java

@@ -49,20 +49,20 @@ public class AddressQueryEngine {
         for (String addr : addrs) {
             //  创建请求
             String addr_ = addr + "";
-            if(StringUtils.hasText(addr_)){
+            if (StringUtils.hasText(addr_)) {
                 addressResult = sj_szxSearchByName(addr, 3);
                 if (addressResult != null) {
                     addressResult.setCode(AddressResultEnum.SZX_SUCCESS);
                     addressResult.setMessage("成功");
-                    try{
+                    try {
                         SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(addr_);
                         return getCjWgWgwByLoc(addressResult, splitAddress);
-                    }catch (Exception e){
+                    } catch (Exception e) {
                         e.printStackTrace();
                     }
                     addressResult = null;
                 }
-            }else{
+            } else {
                 addressResult = null;
             }
         }
@@ -309,6 +309,19 @@ public class AddressQueryEngine {
                 } else {
                     System.err.println("没有经纬度参数,不能根据经纬度落点补充街镇等信息!");
                 }
+//              TODO 添加逻辑(路名等信息返回)
+                try {
+                    Map<String, String> roomInfo = AddrSplitLmrMap.parseAddress(splitAddress.getAddr());
+                    if (roomInfo != null) {
+                        content.setRoadName(roomInfo.get("路名"));
+                        content.setBuildingNumber(roomInfo.get("弄号"));
+                        content.setLaneNumber(roomInfo.get("楼栋号"));
+                        content.setRoomNumber(roomInfo.get("室号"));
+                    }
+                } catch (Exception e) {
+                    e.printStackTrace();
+                    System.err.println("getCjWgWgwByLoc方法获取路名室号等信息处理逻辑异常" + e);
+                }
             }
             return result;
         }

+ 8 - 0
src/main/java/com/skyversation/poiaddr/bean/AddressResult.java

@@ -73,6 +73,14 @@ public class AddressResult {
         private String score;
         // 标准化地名地址
         private String standAddr;
+        // 路名
+        private String roadName;
+        // 弄号
+        private String laneNumber;
+        // 楼栋号
+        private String buildingNumber;
+        // 室号
+        private String roomNumber;
     }
 
 }

+ 80 - 88
src/main/java/com/skyversation/poiaddr/util/AddrSplitLmrMap.java

@@ -1,6 +1,9 @@
 package com.skyversation.poiaddr.util;
 
 import com.skyversation.poiaddr.entity.AddrBean;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
 import org.springframework.stereotype.Service;
 
 import javax.annotation.PostConstruct;
@@ -263,110 +266,99 @@ public class AddrSplitLmrMap {
         return false;
     }
 
-    /**
-     * 解析中文地址为结构化列表
-     *
-     * @param address 原始地址字符串
-     * @return 分词后的列表,按路、弄、号楼、层、室的顺序
-     */
-    public static List<String> parseAddress(String address) {
-        List<String> result = new ArrayList<>(Arrays.asList(null, null, null, null, null));
-        if (address == null || address.trim().isEmpty()) {
-            return result;
+    public static Map<String, String> parseAddress(String addressStr) {
+        if (addressStr == null || addressStr.trim().isEmpty()) {
+            return null;
         }
-        address = address.trim();
-
-        // 1. 提取路(简化版:直接匹配"弄"之前的所有字符)
-        String roadPattern = "(.+?)[弄]";
-        Matcher roadMatcher = Pattern.compile(roadPattern).matcher(address);
-        if (roadMatcher.find() && roadMatcher.start() == 0) {
-            result.set(0, roadMatcher.group(1));
-            address = address.substring(roadMatcher.end() - 1); // 从"弄"之后开始截取
+        addressStr = addressStr.trim();
+        Map<String, String> returnMap = new HashMap<>();
+        String remaining = addressStr;
+//        先替换xx号-xx号
+        Matcher diyMatcher = Pattern.compile("(\\d号+-\\d号)").matcher(remaining);
+        if (diyMatcher.find()) {
+            remaining = remaining.replaceAll("号-", "-");
+        }
+//        首先判断是否存在关键字:弄
+        Matcher nongNumberMatcher = Pattern.compile("(\\d+弄)").matcher(remaining);
+        if (nongNumberMatcher.find()) {
+//          存在弄
+            returnMap.put("路名", remaining.substring(0, nongNumberMatcher.start()));
+            returnMap.put("弄号", nongNumberMatcher.group(1));
+            remaining = remaining.substring(nongNumberMatcher.end());
+            Matcher nongTagMatcher = Pattern.compile("(\\d+(?:-+\\d)?号)").matcher(remaining);
+            if (nongTagMatcher.find()) {
+//                存在\d+(?:-+\d)?号
+                returnMap.put("楼栋号", nongTagMatcher.group(1));
+                remaining = remaining.substring(nongTagMatcher.end());
+            } else {
+                returnMap.put("楼栋号", null);
+            }
         } else {
-            // 如果没有找到"弄",则尝试使用原来的路名匹配逻辑
-            roadPattern = "(.+?[路街道路巷弄])";
-            roadMatcher = Pattern.compile(roadPattern).matcher(address);
-            if (roadMatcher.find() && roadMatcher.start() == 0) {
-                result.set(0, roadMatcher.group(1));
-                address = address.substring(roadMatcher.end());
+//          不存在弄
+            Matcher nongTagMatcher = Pattern.compile("(\\d+(?:-+\\d)?号)").matcher(remaining);
+            if (nongTagMatcher.find()) {
+//                存在\d+(?:-+\d)?号
+                returnMap.put("路名", remaining.substring(0, nongTagMatcher.start()));
+                String nonghao = nongTagMatcher.group(1);
+                if (nonghao.contains("-")) {
+                    String[] nonghaoList = nonghao.split("-");
+                    returnMap.put("弄号", nonghaoList[0]);
+                    returnMap.put("楼栋号", nonghaoList[1]);
+                } else {
+                    returnMap.put("弄号", nonghao);
+                    returnMap.put("楼栋号", null);
+                }
+            } else {
+//                不存在\d+(?:-+\d)?号
+                returnMap.put("路名", null);
+                returnMap.put("弄号", null);
+                returnMap.put("楼栋号", null);
             }
         }
 
-        // 2. 提取弄(支持连续的弄和支弄)
-        String lanePattern = "([0-9一二三四五六七八九十百千]+[弄支弄]+)";
-        Matcher laneMatcher = Pattern.compile(lanePattern).matcher(address);
-        StringBuilder laneBuilder = new StringBuilder();
-        while (laneMatcher.find() && laneMatcher.start() == 0) {
-            laneBuilder.append(laneMatcher.group(1));
-            address = address.substring(laneMatcher.end());
-            laneMatcher = Pattern.compile(lanePattern).matcher(address);
-        }
-        if (laneBuilder.length() > 0) {
-            result.set(1, laneBuilder.toString());
-        }
-
-        // 3. 提取号楼(支持XX号格式)
-        String buildingPattern = "([0-9一二三四五六七八九十百千]+[号楼栋号])";
-        Matcher buildingMatcher = Pattern.compile(buildingPattern).matcher(address);
-        if (buildingMatcher.find() && buildingMatcher.start() == 0) {
-            result.set(2, buildingMatcher.group(1));
-            address = address.substring(buildingMatcher.end());
-        }
-
-        // 智能楼层室号解析
-        String roomPattern = "([0-9]{1,2})([0-9]{2,})[室房]"; // 修改正则表达式,确保室号部分至少两位数
-        Matcher roomMatcher = Pattern.compile(roomPattern).matcher(address);
-
+        // 8. 解析房间号
+        Matcher roomMatcher = Pattern.compile("(\\d{3,4}(?:[室号房])?$)").matcher(remaining);
         if (roomMatcher.find()) {
-            String floorPart = roomMatcher.group(1);
-            String roomPart = roomMatcher.group(2);
-            // 设置楼层
-            result.set(3, floorPart + "层");
-            // 设置室号(直接使用匹配到的部分,不去除前导零)
-            result.set(4, floorPart + roomPart + "室"); // 修改此处,直接使用roomPart
-
-            // 移除已匹配的部分
-            address = address.substring(0, roomMatcher.start()) +
-                    address.substring(roomMatcher.end());
+            String roomNumber = roomMatcher.group(1);
+            returnMap.put("室号", roomNumber);
         } else {
-            // 4. 提取层
-            String floorPattern = "([0-9一二三四五六七八九十百千]+[层楼])";
-            Matcher floorMatcher = Pattern.compile(floorPattern).matcher(address);
-            if (floorMatcher.find()) {
-                result.set(3, floorMatcher.group(1));
-                address = address.substring(0, floorMatcher.start()) + address.substring(floorMatcher.end());
-            }
-
-            // 5. 提取室
-            String roomPatternSimple = "([0-9]+[室房])";
-            Matcher roomMatcherSimple = Pattern.compile(roomPatternSimple).matcher(address);
-            if (roomMatcherSimple.find()) {
-                result.set(4, roomMatcherSimple.group(1));
+            Matcher roomMatcher2 = Pattern.compile("(\\d{3,4}[室号房])").matcher(remaining);
+            if (roomMatcher2.find()) {
+                returnMap.put("室号", roomMatcher2.group(1));
+            } else {
+                returnMap.put("室号", null);
             }
         }
-
-        return result;
+        return returnMap;
     }
 
     public static void main(String[] args) {
         /*AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
         AddrSplitLmrMap.initFile();
         System.out.println(outAddrMapInAddr("村165号"));
-        System.out.println(outAddrMapInAddr("上海市松江区乐都路"));
+        System.out.println(outAddrMapInAddr("上海市松江区乐都路339号松江电信大楼1303室"));
         System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));*/
         // 测试示例(包含所有典型场景)
-        // 测试一位数楼层地址
-        String address4 = "广富林1188弄167号313室";
-        System.out.println("\n测试地址: " + address4);
-        printParsedResult(parseAddress(address4));
-    }
-
-    private static void printParsedResult(List<String> parsed) {
-        System.out.println("解析结果:");
-        System.out.println("路: " + parsed.get(0));
-        System.out.println("弄: " + parsed.get(1));
-        System.out.println("号楼: " + parsed.get(2));
-        System.out.println("层: " + parsed.get(3));
-        System.out.println("室: " + parsed.get(4));
+//        行政区划、街镇、居委、路名、弄号、室号
+////      路名
+//        private String roadName;
+////      弄号
+//        private String houseNumber;
+////      楼栋
+//        private String buildingNumber;
+////      房间号
+//        private String roomNumber;
+//        String testAddress = "北松公路6961弄6-29号205";
+//        String testAddress = "香泾路377-1号306室";
+//        String testAddress = "新松江路887弄1号611室";
+//        String testAddress = "方塔东三村6号506室";
+//        String testAddress = "中山西路 36弄 5号 202室";
+        String testAddress = "陈春公路198弄70号-2号1201室";
+        Map<String, String> res = parseAddress(testAddress);
+        System.out.println("测试地址:" + testAddress);
+        System.out.println("路名:" + res.get("路名"));
+        System.out.println("弄号:" + res.get("弄号"));
+        System.out.println("楼栋号:" + res.get("楼栋号"));
+        System.out.println("室号:" + res.get("室号"));
     }
 }

+ 13 - 13
src/main/java/com/skyversation/poiaddr/util/ShanghaiAddressSplitUtil.java

@@ -459,9 +459,11 @@ public class ShanghaiAddressSplitUtil {
      * @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
      */
     public static List<SplitAddress> splitAddresses(String sourceAddress) {
-        Matcher matcher = Pattern.compile("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)").matcher(sourceAddress);
+        Matcher matcher = Pattern.compile("(\\(*\\))|((*))|(\\{*})|([*])").matcher(sourceAddress);
         List<SplitAddress> addressList = new ArrayList<>();
-        String beautyString = sourceAddress.replaceAll("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)", "");
+//        String beautyString = sourceAddress.replaceAll("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)", "");
+        String beautyString = sourceAddress.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("(", "").replaceAll(")", "")
+                .replaceAll("\\[", "").replaceAll("]", "").replaceAll("\\{", "").replaceAll("}", "");
         StringBuilder sb = new StringBuilder();
         for (char c : beautyString.toCharArray()) {
             // 检查是否为全角数字
@@ -477,10 +479,11 @@ public class ShanghaiAddressSplitUtil {
         }
         beautyString = sb.toString();
         addressList.add(beautyResult(split(beautyString)));
-        while (matcher.find()) {
+        if (matcher.find()) {
             String address = matcher.group();
-            if (address.length() <= 2) continue;
-            addressList.addAll(splitAddresses(address.substring(1, address.length() - 1)));
+            if (address.length() > 2) {
+                addressList.addAll(splitAddresses(address.substring(1, address.length() - 1)));
+            };
         }
         for (SplitAddress s : addressList) s.setSourceAddress(sourceAddress);
         return addressList;
@@ -497,13 +500,10 @@ public class ShanghaiAddressSplitUtil {
 
     public static void main(String[] args) throws Exception {
         new ShanghaiAddressSplitUtil().init();
-        System.out.println(splitBestAddress("上海市松江区乐都路339"));
-//        System.out.println(splitBestAddress("新胜路88、98号3号厂房"));
-//        System.out.println(splitBestAddress("新胜路88-98号3号厂房"));
-//        System.out.println(splitBestAddress("新胜路、98号3号厂房"));
-//        System.out.println(splitBestAddress("新胜路88\\98号3号厂房"));
-//        System.out.println(splitBestAddress("新胜路18、28号3号厂房"));
-//        System.out.println(splitBestAddress("新胜路28号3号厂房"));
-//        System.out.println(splitBestAddress("88、98号3号厂房"));
+        String testAddress = "上海市松江区陈春公路198弄70号-2号1201室(我是一个奇怪的地址";
+        SplitAddress xzqh = splitBestAddress(testAddress);
+        System.out.println(xzqh);
+        System.out.println("测试地址:" + testAddress);
+        System.out.println(AddrSplitLmrMap.parseAddress(xzqh.getAddr()));
     }
 }