|
@@ -33,7 +33,7 @@ public class AddrSplitLmrMap {
|
|
|
private static HashMap<String, String> districtCodeMap = new HashMap<>();
|
|
|
|
|
|
@PostConstruct
|
|
|
- private void initFile() {
|
|
|
+ void initFile() {
|
|
|
System.out.println("开始初始化分词器");
|
|
|
InputStream is = ShanghaiAddressSplitUtil.class.getResourceAsStream(outPutFilePath);
|
|
|
if (is == null) is = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + outPutFilePath);
|
|
@@ -109,8 +109,8 @@ public class AddrSplitLmrMap {
|
|
|
* @param addr
|
|
|
* @return
|
|
|
*/
|
|
|
- static Pattern pattern = Pattern.compile("市|区|镇|街道|县");
|
|
|
- static Pattern spattern = Pattern.compile("路|街|道|村");
|
|
|
+ static Pattern pattern = Pattern.compile("市|区|镇|街道|大道|县");
|
|
|
+ static Pattern spattern = Pattern.compile("路|街|大道|村");
|
|
|
|
|
|
public static AddrBean outAddrMapInAddr(String addr) {
|
|
|
AddrBean addrMap = new AddrBean();
|
|
@@ -154,55 +154,55 @@ public class AddrSplitLmrMap {
|
|
|
// 如果不是外地数据和连接数据的话
|
|
|
if (!errorAddr) {
|
|
|
// 上海地址匹配
|
|
|
- if (pattern.matcher(addr).find()) {
|
|
|
- if (addr.startsWith("上海")) {
|
|
|
+// if (pattern.matcher(addr).find()) {
|
|
|
+ if (addr.startsWith("上海")) {
|
|
|
+ addrMap.setProvinces("上海市");
|
|
|
+ addrMap.setMarket("上海市");
|
|
|
+ addrMap.setRule("2");
|
|
|
+ }
|
|
|
+// 匹配区
|
|
|
+ boolean ifContains = false;
|
|
|
+// 区匹配标识
|
|
|
+ String sh_distinguish = "";
|
|
|
+ for (String d : D_S_C_tree.keySet()) {
|
|
|
+ if (addr.contains(d) || addr.contains(d.substring(0, 2) + "县")) {
|
|
|
+ ifContains = true;
|
|
|
addrMap.setProvinces("上海市");
|
|
|
addrMap.setMarket("上海市");
|
|
|
- addrMap.setRule("2");
|
|
|
+ addrMap.setDistinguish(d);
|
|
|
+ sh_distinguish = d;
|
|
|
+ addrMap.setRule("4");
|
|
|
+ break;
|
|
|
}
|
|
|
-// 匹配区
|
|
|
- boolean ifContains = false;
|
|
|
-// 区匹配标识
|
|
|
- String sh_distinguish = "";
|
|
|
- for (String d : D_S_C_tree.keySet()) {
|
|
|
- if (addr.contains(d) || addr.contains(d.substring(0, 2) + "县")) {
|
|
|
- ifContains = true;
|
|
|
+ if (addr.contains(d.substring(0, 2)) && !ifTrueAddr(addr, d.substring(0, 2))) {
|
|
|
+ addrMap.setProvinces("上海市");
|
|
|
+ addrMap.setMarket("上海市");
|
|
|
+ addrMap.setDistinguish(d);
|
|
|
+ sh_distinguish = d;
|
|
|
+ addrMap.setRule("4");
|
|
|
+ }
|
|
|
+ }
|
|
|
+// 镇匹配
|
|
|
+ for (String d : D_S_C_tree.keySet()) {
|
|
|
+ for (String s : D_S_C_tree.get(d).keySet()) {
|
|
|
+ if (addr.contains(s)) {
|
|
|
addrMap.setProvinces("上海市");
|
|
|
addrMap.setMarket("上海市");
|
|
|
addrMap.setDistinguish(d);
|
|
|
- sh_distinguish = d;
|
|
|
- addrMap.setRule("4");
|
|
|
+ addrMap.setStreetTown(s);
|
|
|
+ addrMap.setRule("8");
|
|
|
break;
|
|
|
}
|
|
|
- if (addr.contains(d.substring(0, 2)) && ifTrueAddr(addr, d.substring(0, 2))) {
|
|
|
+ if (addr.contains(s.substring(0, 2)) && ifContains && !sh_distinguish.isEmpty() && sh_distinguish.contains(d)) {
|
|
|
addrMap.setProvinces("上海市");
|
|
|
addrMap.setMarket("上海市");
|
|
|
addrMap.setDistinguish(d);
|
|
|
- sh_distinguish = d;
|
|
|
- addrMap.setRule("4");
|
|
|
- }
|
|
|
- }
|
|
|
-// 镇匹配
|
|
|
- for (String d : D_S_C_tree.keySet()) {
|
|
|
- for (String s : D_S_C_tree.get(d).keySet()) {
|
|
|
- if (addr.contains(s)) {
|
|
|
- addrMap.setProvinces("上海市");
|
|
|
- addrMap.setMarket("上海市");
|
|
|
- addrMap.setDistinguish(d);
|
|
|
- addrMap.setStreetTown(s);
|
|
|
- addrMap.setRule("8");
|
|
|
- break;
|
|
|
- }
|
|
|
- if (addr.contains(s.substring(0, 2)) && ifContains && !sh_distinguish.isEmpty() && sh_distinguish.contains(d)) {
|
|
|
- addrMap.setProvinces("上海市");
|
|
|
- addrMap.setMarket("上海市");
|
|
|
- addrMap.setDistinguish(d);
|
|
|
- addrMap.setStreetTown(s);
|
|
|
- addrMap.setRule("8");
|
|
|
- }
|
|
|
+ addrMap.setStreetTown(s);
|
|
|
+ addrMap.setRule("8");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+// }
|
|
|
}
|
|
|
// 特殊处理逻辑
|
|
|
if (addrMap.getDistinguish() != null && addrMap.getAddress() != null && addrMap.getDistinguish().contains("松江区") && addrMap.getAddress().contains("工业区")) {
|
|
@@ -227,9 +227,11 @@ public class AddrSplitLmrMap {
|
|
|
if (addrMap.getAddress().split(addrMap.getDistinguish()).length > 1) {
|
|
|
addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish())[1]);
|
|
|
}
|
|
|
- } else if (addrMap.getAddress().contains(addrMap.getDistinguish().substring(0, 2) + "县")) {
|
|
|
+ } else if (addrMap.getAddress().contains(addrMap.getDistinguish().substring(0, 2))) {
|
|
|
if (addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2) + "县").length > 1) {
|
|
|
addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2) + "县")[1]);
|
|
|
+ } else if (addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2)).length > 1) {
|
|
|
+ addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2))[1]);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -328,7 +330,13 @@ public class AddrSplitLmrMap {
|
|
|
}
|
|
|
} else {
|
|
|
// 不存在\d+(?:-+\d)?号
|
|
|
- returnMap.put("路名", null);
|
|
|
+ Matcher nongTagMatcher0 = Pattern.compile("(\\d+)").matcher(remaining);
|
|
|
+ if (nongTagMatcher0.find()) {
|
|
|
+ returnMap.put("路名", remaining.substring(0, nongTagMatcher0.start()));
|
|
|
+ } else {
|
|
|
+ returnMap.put("路名", null);
|
|
|
+ }
|
|
|
+
|
|
|
returnMap.put("弄号", null);
|
|
|
}
|
|
|
}
|
|
@@ -364,12 +372,9 @@ public class AddrSplitLmrMap {
|
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
- /*AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
|
|
|
+ AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
|
|
|
AddrSplitLmrMap.initFile();
|
|
|
- System.out.println(outAddrMapInAddr("村165号"));
|
|
|
- System.out.println(outAddrMapInAddr("仓桥镇玉秀路136-16号101室"));
|
|
|
- System.out.println(outAddrMapInAddr("上海市松江区乐都路339号松江电信大楼1303室"));
|
|
|
- System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));*/
|
|
|
+ System.out.println(outAddrMapInAddr("浦东龙华大道2223号"));
|
|
|
// 测试示例(包含所有典型场景)
|
|
|
// 行政区划、街镇、居委、路名、弄号、室号
|
|
|
//// 路名
|