|
@@ -1,6 +1,9 @@
|
|
|
package com.skyversation.poiaddr.util;
|
|
|
|
|
|
import com.skyversation.poiaddr.entity.AddrBean;
|
|
|
+import lombok.AllArgsConstructor;
|
|
|
+import lombok.Data;
|
|
|
+import lombok.NoArgsConstructor;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
import javax.annotation.PostConstruct;
|
|
@@ -263,110 +266,99 @@ public class AddrSplitLmrMap {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * 解析中文地址为结构化列表
|
|
|
- *
|
|
|
- * @param address 原始地址字符串
|
|
|
- * @return 分词后的列表,按路、弄、号楼、层、室的顺序
|
|
|
- */
|
|
|
- public static List<String> parseAddress(String address) {
|
|
|
- List<String> result = new ArrayList<>(Arrays.asList(null, null, null, null, null));
|
|
|
- if (address == null || address.trim().isEmpty()) {
|
|
|
- return result;
|
|
|
+ public static Map<String, String> parseAddress(String addressStr) {
|
|
|
+ if (addressStr == null || addressStr.trim().isEmpty()) {
|
|
|
+ return null;
|
|
|
}
|
|
|
- address = address.trim();
|
|
|
-
|
|
|
- // 1. 提取路(简化版:直接匹配"弄"之前的所有字符)
|
|
|
- String roadPattern = "(.+?)[弄]";
|
|
|
- Matcher roadMatcher = Pattern.compile(roadPattern).matcher(address);
|
|
|
- if (roadMatcher.find() && roadMatcher.start() == 0) {
|
|
|
- result.set(0, roadMatcher.group(1));
|
|
|
- address = address.substring(roadMatcher.end() - 1); // 从"弄"之后开始截取
|
|
|
+ addressStr = addressStr.trim();
|
|
|
+ Map<String, String> returnMap = new HashMap<>();
|
|
|
+ String remaining = addressStr;
|
|
|
+// 先替换xx号-xx号
|
|
|
+ Matcher diyMatcher = Pattern.compile("(\\d号+-\\d号)").matcher(remaining);
|
|
|
+ if (diyMatcher.find()) {
|
|
|
+ remaining = remaining.replaceAll("号-", "-");
|
|
|
+ }
|
|
|
+// 首先判断是否存在关键字:弄
|
|
|
+ Matcher nongNumberMatcher = Pattern.compile("(\\d+弄)").matcher(remaining);
|
|
|
+ if (nongNumberMatcher.find()) {
|
|
|
+// 存在弄
|
|
|
+ returnMap.put("路名", remaining.substring(0, nongNumberMatcher.start()));
|
|
|
+ returnMap.put("弄号", nongNumberMatcher.group(1));
|
|
|
+ remaining = remaining.substring(nongNumberMatcher.end());
|
|
|
+ Matcher nongTagMatcher = Pattern.compile("(\\d+(?:-+\\d)?号)").matcher(remaining);
|
|
|
+ if (nongTagMatcher.find()) {
|
|
|
+// 存在\d+(?:-+\d)?号
|
|
|
+ returnMap.put("楼栋号", nongTagMatcher.group(1));
|
|
|
+ remaining = remaining.substring(nongTagMatcher.end());
|
|
|
+ } else {
|
|
|
+ returnMap.put("楼栋号", null);
|
|
|
+ }
|
|
|
} else {
|
|
|
- // 如果没有找到"弄",则尝试使用原来的路名匹配逻辑
|
|
|
- roadPattern = "(.+?[路街道路巷弄])";
|
|
|
- roadMatcher = Pattern.compile(roadPattern).matcher(address);
|
|
|
- if (roadMatcher.find() && roadMatcher.start() == 0) {
|
|
|
- result.set(0, roadMatcher.group(1));
|
|
|
- address = address.substring(roadMatcher.end());
|
|
|
+// 不存在弄
|
|
|
+ Matcher nongTagMatcher = Pattern.compile("(\\d+(?:-+\\d)?号)").matcher(remaining);
|
|
|
+ if (nongTagMatcher.find()) {
|
|
|
+// 存在\d+(?:-+\d)?号
|
|
|
+ returnMap.put("路名", remaining.substring(0, nongTagMatcher.start()));
|
|
|
+ String nonghao = nongTagMatcher.group(1);
|
|
|
+ if (nonghao.contains("-")) {
|
|
|
+ String[] nonghaoList = nonghao.split("-");
|
|
|
+ returnMap.put("弄号", nonghaoList[0]);
|
|
|
+ returnMap.put("楼栋号", nonghaoList[1]);
|
|
|
+ } else {
|
|
|
+ returnMap.put("弄号", nonghao);
|
|
|
+ returnMap.put("楼栋号", null);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+// 不存在\d+(?:-+\d)?号
|
|
|
+ returnMap.put("路名", null);
|
|
|
+ returnMap.put("弄号", null);
|
|
|
+ returnMap.put("楼栋号", null);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // 2. 提取弄(支持连续的弄和支弄)
|
|
|
- String lanePattern = "([0-9一二三四五六七八九十百千]+[弄支弄]+)";
|
|
|
- Matcher laneMatcher = Pattern.compile(lanePattern).matcher(address);
|
|
|
- StringBuilder laneBuilder = new StringBuilder();
|
|
|
- while (laneMatcher.find() && laneMatcher.start() == 0) {
|
|
|
- laneBuilder.append(laneMatcher.group(1));
|
|
|
- address = address.substring(laneMatcher.end());
|
|
|
- laneMatcher = Pattern.compile(lanePattern).matcher(address);
|
|
|
- }
|
|
|
- if (laneBuilder.length() > 0) {
|
|
|
- result.set(1, laneBuilder.toString());
|
|
|
- }
|
|
|
-
|
|
|
- // 3. 提取号楼(支持XX号格式)
|
|
|
- String buildingPattern = "([0-9一二三四五六七八九十百千]+[号楼栋号])";
|
|
|
- Matcher buildingMatcher = Pattern.compile(buildingPattern).matcher(address);
|
|
|
- if (buildingMatcher.find() && buildingMatcher.start() == 0) {
|
|
|
- result.set(2, buildingMatcher.group(1));
|
|
|
- address = address.substring(buildingMatcher.end());
|
|
|
- }
|
|
|
-
|
|
|
- // 智能楼层室号解析
|
|
|
- String roomPattern = "([0-9]{1,2})([0-9]{2,})[室房]"; // 修改正则表达式,确保室号部分至少两位数
|
|
|
- Matcher roomMatcher = Pattern.compile(roomPattern).matcher(address);
|
|
|
-
|
|
|
+ // 8. 解析房间号
|
|
|
+ Matcher roomMatcher = Pattern.compile("(\\d{3,4}(?:[室号房])?$)").matcher(remaining);
|
|
|
if (roomMatcher.find()) {
|
|
|
- String floorPart = roomMatcher.group(1);
|
|
|
- String roomPart = roomMatcher.group(2);
|
|
|
- // 设置楼层
|
|
|
- result.set(3, floorPart + "层");
|
|
|
- // 设置室号(直接使用匹配到的部分,不去除前导零)
|
|
|
- result.set(4, floorPart + roomPart + "室"); // 修改此处,直接使用roomPart
|
|
|
-
|
|
|
- // 移除已匹配的部分
|
|
|
- address = address.substring(0, roomMatcher.start()) +
|
|
|
- address.substring(roomMatcher.end());
|
|
|
+ String roomNumber = roomMatcher.group(1);
|
|
|
+ returnMap.put("室号", roomNumber);
|
|
|
} else {
|
|
|
- // 4. 提取层
|
|
|
- String floorPattern = "([0-9一二三四五六七八九十百千]+[层楼])";
|
|
|
- Matcher floorMatcher = Pattern.compile(floorPattern).matcher(address);
|
|
|
- if (floorMatcher.find()) {
|
|
|
- result.set(3, floorMatcher.group(1));
|
|
|
- address = address.substring(0, floorMatcher.start()) + address.substring(floorMatcher.end());
|
|
|
- }
|
|
|
-
|
|
|
- // 5. 提取室
|
|
|
- String roomPatternSimple = "([0-9]+[室房])";
|
|
|
- Matcher roomMatcherSimple = Pattern.compile(roomPatternSimple).matcher(address);
|
|
|
- if (roomMatcherSimple.find()) {
|
|
|
- result.set(4, roomMatcherSimple.group(1));
|
|
|
+ Matcher roomMatcher2 = Pattern.compile("(\\d{3,4}[室号房])").matcher(remaining);
|
|
|
+ if (roomMatcher2.find()) {
|
|
|
+ returnMap.put("室号", roomMatcher2.group(1));
|
|
|
+ } else {
|
|
|
+ returnMap.put("室号", null);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- return result;
|
|
|
+ return returnMap;
|
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
/*AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
|
|
|
AddrSplitLmrMap.initFile();
|
|
|
System.out.println(outAddrMapInAddr("村165号"));
|
|
|
- System.out.println(outAddrMapInAddr("上海市松江区乐都路"));
|
|
|
+ System.out.println(outAddrMapInAddr("上海市松江区乐都路339号松江电信大楼1303室"));
|
|
|
System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));*/
|
|
|
// 测试示例(包含所有典型场景)
|
|
|
- // 测试一位数楼层地址
|
|
|
- String address4 = "广富林1188弄167号313室";
|
|
|
- System.out.println("\n测试地址: " + address4);
|
|
|
- printParsedResult(parseAddress(address4));
|
|
|
- }
|
|
|
-
|
|
|
- private static void printParsedResult(List<String> parsed) {
|
|
|
- System.out.println("解析结果:");
|
|
|
- System.out.println("路: " + parsed.get(0));
|
|
|
- System.out.println("弄: " + parsed.get(1));
|
|
|
- System.out.println("号楼: " + parsed.get(2));
|
|
|
- System.out.println("层: " + parsed.get(3));
|
|
|
- System.out.println("室: " + parsed.get(4));
|
|
|
+// 行政区划、街镇、居委、路名、弄号、室号
|
|
|
+//// 路名
|
|
|
+// private String roadName;
|
|
|
+//// 弄号
|
|
|
+// private String houseNumber;
|
|
|
+//// 楼栋
|
|
|
+// private String buildingNumber;
|
|
|
+//// 房间号
|
|
|
+// private String roomNumber;
|
|
|
+// String testAddress = "北松公路6961弄6-29号205";
|
|
|
+// String testAddress = "香泾路377-1号306室";
|
|
|
+// String testAddress = "新松江路887弄1号611室";
|
|
|
+// String testAddress = "方塔东三村6号506室";
|
|
|
+// String testAddress = "中山西路 36弄 5号 202室";
|
|
|
+ String testAddress = "陈春公路198弄70号-2号1201室";
|
|
|
+ Map<String, String> res = parseAddress(testAddress);
|
|
|
+ System.out.println("测试地址:" + testAddress);
|
|
|
+ System.out.println("路名:" + res.get("路名"));
|
|
|
+ System.out.println("弄号:" + res.get("弄号"));
|
|
|
+ System.out.println("楼栋号:" + res.get("楼栋号"));
|
|
|
+ System.out.println("室号:" + res.get("室号"));
|
|
|
}
|
|
|
}
|