|
@@ -6,6 +6,7 @@ import org.springframework.stereotype.Service;
|
|
|
import javax.annotation.PostConstruct;
|
|
|
import java.io.InputStream;
|
|
|
import java.util.*;
|
|
|
+import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
/**
|
|
@@ -262,11 +263,110 @@ public class AddrSplitLmrMap {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * 解析中文地址为结构化列表
|
|
|
+ *
|
|
|
+ * @param address 原始地址字符串
|
|
|
+ * @return 分词后的列表,按路、弄、号楼、层、室的顺序
|
|
|
+ */
|
|
|
+ public static List<String> parseAddress(String address) {
|
|
|
+ List<String> result = new ArrayList<>(Arrays.asList(null, null, null, null, null));
|
|
|
+ if (address == null || address.trim().isEmpty()) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ address = address.trim();
|
|
|
+
|
|
|
+ // 1. 提取路(简化版:直接匹配"弄"之前的所有字符)
|
|
|
+ String roadPattern = "(.+?)[弄]";
|
|
|
+ Matcher roadMatcher = Pattern.compile(roadPattern).matcher(address);
|
|
|
+ if (roadMatcher.find() && roadMatcher.start() == 0) {
|
|
|
+ result.set(0, roadMatcher.group(1));
|
|
|
+ address = address.substring(roadMatcher.end() - 1); // 从"弄"之后开始截取
|
|
|
+ } else {
|
|
|
+ // 如果没有找到"弄",则尝试使用原来的路名匹配逻辑
|
|
|
+ roadPattern = "(.+?[路街道路巷弄])";
|
|
|
+ roadMatcher = Pattern.compile(roadPattern).matcher(address);
|
|
|
+ if (roadMatcher.find() && roadMatcher.start() == 0) {
|
|
|
+ result.set(0, roadMatcher.group(1));
|
|
|
+ address = address.substring(roadMatcher.end());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 提取弄(支持连续的弄和支弄)
|
|
|
+ String lanePattern = "([0-9一二三四五六七八九十百千]+[弄支弄]+)";
|
|
|
+ Matcher laneMatcher = Pattern.compile(lanePattern).matcher(address);
|
|
|
+ StringBuilder laneBuilder = new StringBuilder();
|
|
|
+ while (laneMatcher.find() && laneMatcher.start() == 0) {
|
|
|
+ laneBuilder.append(laneMatcher.group(1));
|
|
|
+ address = address.substring(laneMatcher.end());
|
|
|
+ laneMatcher = Pattern.compile(lanePattern).matcher(address);
|
|
|
+ }
|
|
|
+ if (laneBuilder.length() > 0) {
|
|
|
+ result.set(1, laneBuilder.toString());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. 提取号楼(支持XX号格式)
|
|
|
+ String buildingPattern = "([0-9一二三四五六七八九十百千]+[号楼栋号])";
|
|
|
+ Matcher buildingMatcher = Pattern.compile(buildingPattern).matcher(address);
|
|
|
+ if (buildingMatcher.find() && buildingMatcher.start() == 0) {
|
|
|
+ result.set(2, buildingMatcher.group(1));
|
|
|
+ address = address.substring(buildingMatcher.end());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 智能楼层室号解析
|
|
|
+ String roomPattern = "([0-9]{1,2})([0-9]{2,})[室房]"; // 修改正则表达式,确保室号部分至少两位数
|
|
|
+ Matcher roomMatcher = Pattern.compile(roomPattern).matcher(address);
|
|
|
+
|
|
|
+ if (roomMatcher.find()) {
|
|
|
+ String floorPart = roomMatcher.group(1);
|
|
|
+ String roomPart = roomMatcher.group(2);
|
|
|
+ // 设置楼层
|
|
|
+ result.set(3, floorPart + "层");
|
|
|
+ // 设置室号(直接使用匹配到的部分,不去除前导零)
|
|
|
+ result.set(4, floorPart + roomPart + "室"); // 修改此处,直接使用roomPart
|
|
|
+
|
|
|
+ // 移除已匹配的部分
|
|
|
+ address = address.substring(0, roomMatcher.start()) +
|
|
|
+ address.substring(roomMatcher.end());
|
|
|
+ } else {
|
|
|
+ // 4. 提取层
|
|
|
+ String floorPattern = "([0-9一二三四五六七八九十百千]+[层楼])";
|
|
|
+ Matcher floorMatcher = Pattern.compile(floorPattern).matcher(address);
|
|
|
+ if (floorMatcher.find()) {
|
|
|
+ result.set(3, floorMatcher.group(1));
|
|
|
+ address = address.substring(0, floorMatcher.start()) + address.substring(floorMatcher.end());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 5. 提取室
|
|
|
+ String roomPatternSimple = "([0-9]+[室房])";
|
|
|
+ Matcher roomMatcherSimple = Pattern.compile(roomPatternSimple).matcher(address);
|
|
|
+ if (roomMatcherSimple.find()) {
|
|
|
+ result.set(4, roomMatcherSimple.group(1));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
public static void main(String[] args) {
|
|
|
- AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
|
|
|
+ /*AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
|
|
|
AddrSplitLmrMap.initFile();
|
|
|
System.out.println(outAddrMapInAddr("村165号"));
|
|
|
System.out.println(outAddrMapInAddr("上海市松江区乐都路"));
|
|
|
- System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));
|
|
|
+ System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));*/
|
|
|
+ // 测试示例(包含所有典型场景)
|
|
|
+ // 测试一位数楼层地址
|
|
|
+ String address4 = "广富林1188弄167号313室";
|
|
|
+ System.out.println("\n测试地址: " + address4);
|
|
|
+ printParsedResult(parseAddress(address4));
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void printParsedResult(List<String> parsed) {
|
|
|
+ System.out.println("解析结果:");
|
|
|
+ System.out.println("路: " + parsed.get(0));
|
|
|
+ System.out.println("弄: " + parsed.get(1));
|
|
|
+ System.out.println("号楼: " + parsed.get(2));
|
|
|
+ System.out.println("层: " + parsed.get(3));
|
|
|
+ System.out.println("室: " + parsed.get(4));
|
|
|
}
|
|
|
}
|