|
@@ -1,5 +1,7 @@
|
|
|
package com.skyversation.poiaddr.util;
|
|
|
|
|
|
+import com.skyversation.poiaddr.addquery.AddressQueryEngine;
|
|
|
+import com.skyversation.poiaddr.bean.AddressResult;
|
|
|
import com.skyversation.poiaddr.service.AreaService;
|
|
|
import lombok.AllArgsConstructor;
|
|
|
import org.springframework.stereotype.Service;
|
|
@@ -14,7 +16,7 @@ import java.util.stream.Collectors;
|
|
|
@Service
|
|
|
public class ShanghaiAddressSplitUtil {
|
|
|
@AllArgsConstructor
|
|
|
- static class threeLevelAddress{
|
|
|
+ static class threeLevelAddress {
|
|
|
String district;
|
|
|
String street;
|
|
|
String community;
|
|
@@ -26,11 +28,12 @@ public class ShanghaiAddressSplitUtil {
|
|
|
String communityCode;
|
|
|
|
|
|
}
|
|
|
- private static Map<String,List<threeLevelAddress>> All_STREET_IN_SHANGHAI;
|
|
|
- private static Map<String,List<threeLevelAddress>> All_COMMUNITY_IN_SHANGHAI;
|
|
|
- private static Map<String,List<String>> DISTRICT_TO_STREET_MAP;
|
|
|
- private static Map<String,List<String>> STREET_TO_COMMUNITY_MAP;
|
|
|
- private static Map<String,List<String>> DISTRICT_TO_COMMUNITY_MAP;
|
|
|
+
|
|
|
+ private static Map<String, List<threeLevelAddress>> All_STREET_IN_SHANGHAI;
|
|
|
+ private static Map<String, List<threeLevelAddress>> All_COMMUNITY_IN_SHANGHAI;
|
|
|
+ private static Map<String, List<String>> DISTRICT_TO_STREET_MAP;
|
|
|
+ private static Map<String, List<String>> STREET_TO_COMMUNITY_MAP;
|
|
|
+ private static Map<String, List<String>> DISTRICT_TO_COMMUNITY_MAP;
|
|
|
|
|
|
private static final Pattern LEVEL_1_SUFFIX_PATTERN = Pattern.compile("^(?:区|新区)");
|
|
|
|
|
@@ -42,22 +45,23 @@ public class ShanghaiAddressSplitUtil {
|
|
|
|
|
|
private static final Pattern UN_ADDRESS_PATTERN = Pattern.compile("http");
|
|
|
|
|
|
- private static final Pattern OVER_SPLIT=Pattern.compile("^(?:[0123456789-\\-一二三四五六七八九十大A-za-z]{0,4}[街队组栋号站弄]|(?:车站|工业区|市场|农贸市场)(?![东南西北中一二三四五六七八九十公大小支新老环]路)|[A-za-z]?[0123456789-\\-])");
|
|
|
+ private static final Pattern OVER_SPLIT = Pattern.compile("^(?:[0123456789-\\-一二三四五六七八九十大A-za-z]{0,4}[街队组栋号站弄]|(?:车站|工业区|市场|农贸市场)(?![东南西北中一二三四五六七八九十公大小支新老环]路)|[A-za-z]?[0123456789-\\-])");
|
|
|
|
|
|
private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、—/\\\\-][0-9]+(?=[号弄])");
|
|
|
+
|
|
|
@PostConstruct
|
|
|
- private void init(){
|
|
|
+ private void init() {
|
|
|
System.out.println("开始初始化分词器");
|
|
|
- Map<String,threeLevelAddress> districtMap= new HashMap<>();
|
|
|
- Map<String,List<threeLevelAddress>> streetMap= new HashMap<>();
|
|
|
- Map<String,List<threeLevelAddress>> communityMap= new HashMap<>();
|
|
|
- Map<String,List<String>> districtToStreetMap=new HashMap<>();
|
|
|
- Map<String,List<String>> streetToCommunityMap=new HashMap<>();
|
|
|
+ Map<String, threeLevelAddress> districtMap = new HashMap<>();
|
|
|
+ Map<String, List<threeLevelAddress>> streetMap = new HashMap<>();
|
|
|
+ Map<String, List<threeLevelAddress>> communityMap = new HashMap<>();
|
|
|
+ Map<String, List<String>> districtToStreetMap = new HashMap<>();
|
|
|
+ Map<String, List<String>> streetToCommunityMap = new HashMap<>();
|
|
|
|
|
|
String file = "上海市县乡记录.xlsx";
|
|
|
InputStream is = ShanghaiAddressSplitUtil.class.getResourceAsStream(file);
|
|
|
- if (is==null) is= ShanghaiAddressSplitUtil.class.getResourceAsStream("/"+file);
|
|
|
- if (is==null) throw new RuntimeException("无法找到"+file);
|
|
|
+ if (is == null) is = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + file);
|
|
|
+ if (is == null) throw new RuntimeException("无法找到" + file);
|
|
|
try {
|
|
|
for (Map<String, Object> row : ExcelReaderUtils.readExcel(is)) {
|
|
|
String district = Optional.ofNullable(row.get("县级市简称")).map(Object::toString).orElse("");
|
|
@@ -69,23 +73,23 @@ public class ShanghaiAddressSplitUtil {
|
|
|
String districtCode = Optional.ofNullable(row.get("县级市编码")).map(Object::toString).orElse("");
|
|
|
String streetCode = Optional.ofNullable(row.get("街道编码")).map(Object::toString).orElse("");
|
|
|
String communityCode = Optional.ofNullable(row.get("居委编码")).map(Object::toString).orElse("");
|
|
|
- initData(district, street, community, districtFullName, streetFullName, communityFullName,districtCode, streetCode, communityCode, districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode, districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
}
|
|
|
//自贸区
|
|
|
- initData("浦东", "试验区","", "浦东新区", "自由贸易试验区","","310115","","", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData("浦东", "试验区", "", "浦东新区", "自由贸易试验区", "", "310115", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
//松江镇特别处理
|
|
|
- initData("松江", "松江","", "松江区", "","","310117","","", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData("松江", "松江", "", "松江区", "", "", "310117", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
//金山工业区
|
|
|
- initData("金山", "金山工业区","", "金山区", "金山工业区","","310116","","", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData("金山", "金山工业区", "", "金山区", "金山工业区", "", "310116", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
|
|
|
} catch (Exception e) {
|
|
|
throw new RuntimeException(e);
|
|
|
}
|
|
|
All_STREET_IN_SHANGHAI = Collections.unmodifiableMap(streetMap);
|
|
|
All_COMMUNITY_IN_SHANGHAI = Collections.unmodifiableMap(communityMap);
|
|
|
- DISTRICT_TO_STREET_MAP=Collections.unmodifiableMap(districtToStreetMap);
|
|
|
- STREET_TO_COMMUNITY_MAP=Collections.unmodifiableMap(streetToCommunityMap);
|
|
|
- DISTRICT_TO_COMMUNITY_MAP=Collections.unmodifiableMap(DISTRICT_TO_STREET_MAP.entrySet().stream()
|
|
|
+ DISTRICT_TO_STREET_MAP = Collections.unmodifiableMap(districtToStreetMap);
|
|
|
+ STREET_TO_COMMUNITY_MAP = Collections.unmodifiableMap(streetToCommunityMap);
|
|
|
+ DISTRICT_TO_COMMUNITY_MAP = Collections.unmodifiableMap(DISTRICT_TO_STREET_MAP.entrySet().stream()
|
|
|
.collect(Collectors.toMap(
|
|
|
Map.Entry::getKey,
|
|
|
entry -> entry.getValue().stream()
|
|
@@ -93,83 +97,100 @@ public class ShanghaiAddressSplitUtil {
|
|
|
.collect(Collectors.toList())
|
|
|
)));
|
|
|
System.out.println("分词器初始化完成");
|
|
|
+// TODO 网络连通性测试,可以请求一下测试的市中心地址
|
|
|
+ try {
|
|
|
+ System.out.println("电脑最大线程数:" + Runtime.getRuntime().availableProcessors());
|
|
|
+ System.out.println("<<<<<<<<------开始网络连通性测试");
|
|
|
+ long startTime = System.currentTimeMillis();
|
|
|
+ List<String> addrs = new ArrayList<>();
|
|
|
+ addrs.add("上海市松江区乐都路339号");
|
|
|
+ AddressResult addressResult = AddressQueryEngine.getInstance().commonSearchByName(addrs);
|
|
|
+ long endTime = System.currentTimeMillis();
|
|
|
+ System.out.println(">>>>>>>>------网络连通性测试完成!用时" + (endTime - startTime) / 1000 + "秒!结果:" + addressResult);
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ System.err.println(">>>>>>>>------网络连通性测试结果:" + e);
|
|
|
+ }
|
|
|
System.out.println("<<<<<<<<------开始缓存ScheduledTasks.allDmdzData对象");
|
|
|
AreaService.getInstance().getAllDmdzAddressDatas();
|
|
|
}
|
|
|
|
|
|
- private static void initData(String district, String street, String community, String districtFullName, String streetFullName, String communityFullName,String districtCode, String streetCode, String communityCode, Map<String, threeLevelAddress> districtMap, Map<String, List<threeLevelAddress>> streetMap, Map<String, List<threeLevelAddress>> communityMap, Map<String, List<String>> districtToStreetMap, Map<String, List<String>> streetToCommunityMap) {
|
|
|
- threeLevelAddress add = new threeLevelAddress(district, street, community, districtFullName, streetFullName, communityFullName, districtCode ,streetCode, communityCode);
|
|
|
- districtMap.put(district,add);
|
|
|
- if (!streetMap.containsKey(street)) streetMap.put(street,new ArrayList<>());
|
|
|
+ private static void initData(String district, String street, String community, String districtFullName, String streetFullName, String communityFullName, String districtCode, String streetCode, String communityCode, Map<String, threeLevelAddress> districtMap, Map<String, List<threeLevelAddress>> streetMap, Map<String, List<threeLevelAddress>> communityMap, Map<String, List<String>> districtToStreetMap, Map<String, List<String>> streetToCommunityMap) {
|
|
|
+ threeLevelAddress add = new threeLevelAddress(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode);
|
|
|
+ districtMap.put(district, add);
|
|
|
+ if (!streetMap.containsKey(street)) streetMap.put(street, new ArrayList<>());
|
|
|
streetMap.get(street).add(add);
|
|
|
- if (!communityMap.containsKey(community)) communityMap.put(community,new ArrayList<>());
|
|
|
+ if (!communityMap.containsKey(community)) communityMap.put(community, new ArrayList<>());
|
|
|
communityMap.get(community).add(add);
|
|
|
- if (!districtToStreetMap.containsKey(district)) districtToStreetMap.put(district,new ArrayList<>());
|
|
|
+ if (!districtToStreetMap.containsKey(district)) districtToStreetMap.put(district, new ArrayList<>());
|
|
|
districtToStreetMap.get(district).add(street);
|
|
|
- if (!streetToCommunityMap.containsKey(street)) streetToCommunityMap.put(street,new ArrayList<>());
|
|
|
+ if (!streetToCommunityMap.containsKey(street)) streetToCommunityMap.put(street, new ArrayList<>());
|
|
|
streetToCommunityMap.get(street).add(community);
|
|
|
}
|
|
|
|
|
|
- private static class splittingAddress{
|
|
|
+ private static class splittingAddress {
|
|
|
SplitAddress splitAddress;
|
|
|
|
|
|
- int street=-1;
|
|
|
- int community=-1;
|
|
|
+ int street = -1;
|
|
|
+ int community = -1;
|
|
|
|
|
|
- Map<Integer,String> streetMap =new HashMap<>();
|
|
|
- Map<Integer,String> communityMap=new HashMap<>();
|
|
|
+ Map<Integer, String> streetMap = new HashMap<>();
|
|
|
+ Map<Integer, String> communityMap = new HashMap<>();
|
|
|
threeLevelAddress threeLevelAddress;
|
|
|
|
|
|
String targetString;
|
|
|
|
|
|
- void findStreet(){
|
|
|
- Map<Integer,String> results =null;
|
|
|
- int completeMatchIndex=-1;
|
|
|
+ void findStreet() {
|
|
|
+ Map<Integer, String> results = null;
|
|
|
+ int completeMatchIndex = -1;
|
|
|
//首先尝试在一选下匹配
|
|
|
- if (splitAddress.getDistrict()!=null){
|
|
|
- results = contain(this.targetString,DISTRICT_TO_STREET_MAP.get(splitAddress.getDistrict()),0);
|
|
|
- completeMatchIndex = washResult(this.targetString,results,LEVEL_2_SUFFIX_PATTERN,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN);
|
|
|
+ if (splitAddress.getDistrict() != null) {
|
|
|
+ results = contain(this.targetString, DISTRICT_TO_STREET_MAP.get(splitAddress.getDistrict()), 0);
|
|
|
+ completeMatchIndex = washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
|
|
|
}
|
|
|
//一选不存在或匹配无结果,直接搜全国
|
|
|
- if (results==null||results.isEmpty()){
|
|
|
- results = contain(this.targetString,All_STREET_IN_SHANGHAI.keySet(),0);
|
|
|
- if (completeMatchIndex==-1)completeMatchIndex = washResult(this.targetString,results,LEVEL_2_SUFFIX_PATTERN,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN);
|
|
|
+ if (results == null || results.isEmpty()) {
|
|
|
+ results = contain(this.targetString, All_STREET_IN_SHANGHAI.keySet(), 0);
|
|
|
+ if (completeMatchIndex == -1)
|
|
|
+ completeMatchIndex = washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
|
|
|
|
|
|
}
|
|
|
|
|
|
streetMap.putAll(results);
|
|
|
- street=completeMatchIndex;
|
|
|
+ street = completeMatchIndex;
|
|
|
//仅有一个选择时当成一选
|
|
|
- if (streetMap.size()==1){
|
|
|
- street = (int)streetMap.keySet().toArray()[0];
|
|
|
+ if (streetMap.size() == 1) {
|
|
|
+ street = (int) streetMap.keySet().toArray()[0];
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
- void findCommunity(){
|
|
|
- Map<Integer,String> results = null;
|
|
|
- int completeMatchCommunity=-1;
|
|
|
- String sub=targetString;
|
|
|
+ void findCommunity() {
|
|
|
+ Map<Integer, String> results = null;
|
|
|
+ int completeMatchCommunity = -1;
|
|
|
+ String sub = targetString;
|
|
|
//尝试一选
|
|
|
- if (street!=-1){
|
|
|
- sub = targetString.substring(street+streetMap.get(street).length());
|
|
|
+ if (street != -1) {
|
|
|
+ sub = targetString.substring(street + streetMap.get(street).length());
|
|
|
Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
- sub=sub.substring(m.end());
|
|
|
+ if (m.find()) {
|
|
|
+ sub = sub.substring(m.end());
|
|
|
}
|
|
|
- results= contain(sub,STREET_TO_COMMUNITY_MAP.get(streetMap.get(street)),targetString.length()-sub.length());
|
|
|
- completeMatchCommunity=washResult(targetString,results,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN,LEVEL_2_SUFFIX_PATTERN);
|
|
|
+ results = contain(sub, STREET_TO_COMMUNITY_MAP.get(streetMap.get(street)), targetString.length() - sub.length());
|
|
|
+ completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
|
|
|
|
|
|
}
|
|
|
//一选不存在或匹配无结果,先搜全区
|
|
|
- if ((results == null || results.isEmpty()) && splitAddress.getDistrict()!=null) {
|
|
|
- results = contain(sub, DISTRICT_TO_COMMUNITY_MAP.get(splitAddress.getDistrict()),targetString.length()-sub.length());
|
|
|
- if (completeMatchCommunity==-1)completeMatchCommunity=washResult(targetString,results,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN,LEVEL_2_SUFFIX_PATTERN);
|
|
|
+ if ((results == null || results.isEmpty()) && splitAddress.getDistrict() != null) {
|
|
|
+ results = contain(sub, DISTRICT_TO_COMMUNITY_MAP.get(splitAddress.getDistrict()), targetString.length() - sub.length());
|
|
|
+ if (completeMatchCommunity == -1)
|
|
|
+ completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
|
|
|
}
|
|
|
//最后全市
|
|
|
if (results == null || results.isEmpty()) {
|
|
|
- results = contain(sub, All_COMMUNITY_IN_SHANGHAI.keySet(),targetString.length()-sub.length());
|
|
|
- if (completeMatchCommunity==-1)completeMatchCommunity=washResult(targetString,results,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN,LEVEL_2_SUFFIX_PATTERN);
|
|
|
+ results = contain(sub, All_COMMUNITY_IN_SHANGHAI.keySet(), targetString.length() - sub.length());
|
|
|
+ if (completeMatchCommunity == -1)
|
|
|
+ completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
|
|
|
}
|
|
|
Iterator<Integer> iterator = results.keySet().iterator();
|
|
|
while (iterator.hasNext()) {
|
|
@@ -179,7 +200,7 @@ public class ShanghaiAddressSplitUtil {
|
|
|
iterator.remove();
|
|
|
}
|
|
|
if (key > 0 && name.equals("镇江")) {
|
|
|
- String sub1 = targetString.substring(key+2);
|
|
|
+ String sub1 = targetString.substring(key + 2);
|
|
|
if (LEVEL_3_SUFFIX_PATTERN.matcher(sub1).matches()) {
|
|
|
iterator.remove();
|
|
|
}
|
|
@@ -187,62 +208,64 @@ public class ShanghaiAddressSplitUtil {
|
|
|
}
|
|
|
communityMap.putAll(results);
|
|
|
//仅有一个选择时当成一选
|
|
|
- if (communityMap.size()==1){
|
|
|
- int index = (int)communityMap.keySet().toArray()[0];
|
|
|
- if (street!=index)community=index;
|
|
|
+ if (communityMap.size() == 1) {
|
|
|
+ int index = (int) communityMap.keySet().toArray()[0];
|
|
|
+ if (street != index) community = index;
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
- void matchThreeLevelAdd(){
|
|
|
- int handingPoint=0;
|
|
|
- threeLevelAddress handingTLA=new threeLevelAddress("","","","","","","","","");
|
|
|
- for (String communityName: new HashSet<>(communityMap.values())){
|
|
|
- if (communityName.isEmpty())continue;
|
|
|
- for(threeLevelAddress t:All_COMMUNITY_IN_SHANGHAI.get(communityName)){
|
|
|
+ void matchThreeLevelAdd() {
|
|
|
+ int handingPoint = 0;
|
|
|
+ threeLevelAddress handingTLA = new threeLevelAddress("", "", "", "", "", "", "", "", "");
|
|
|
+ for (String communityName : new HashSet<>(communityMap.values())) {
|
|
|
+ if (communityName.isEmpty()) continue;
|
|
|
+ for (threeLevelAddress t : All_COMMUNITY_IN_SHANGHAI.get(communityName)) {
|
|
|
int point = checkTLA(t);
|
|
|
- if (point>handingPoint){
|
|
|
- handingPoint=point;
|
|
|
- handingTLA=t;
|
|
|
+ if (point > handingPoint) {
|
|
|
+ handingPoint = point;
|
|
|
+ handingTLA = t;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- for (String streetName:new HashSet<>(streetMap.values())){
|
|
|
- if (streetName.isEmpty())continue;
|
|
|
- for(threeLevelAddress t:All_STREET_IN_SHANGHAI.get(streetName)){
|
|
|
+ for (String streetName : new HashSet<>(streetMap.values())) {
|
|
|
+ if (streetName.isEmpty()) continue;
|
|
|
+ for (threeLevelAddress t : All_STREET_IN_SHANGHAI.get(streetName)) {
|
|
|
int point = checkTLA(t);
|
|
|
- if (point>handingPoint){
|
|
|
- handingPoint=point;
|
|
|
- handingTLA=t;
|
|
|
+ if (point > handingPoint) {
|
|
|
+ handingPoint = point;
|
|
|
+ handingTLA = t;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
threeLevelAddress = handingTLA;
|
|
|
}
|
|
|
- int checkTLA(threeLevelAddress t){
|
|
|
- int output=0;
|
|
|
- if (t.district.equals(splitAddress.getDistrict()))output+=1;
|
|
|
- if (streetMap.containsValue(t.street))output+=10;
|
|
|
- if (street!=-1&&streetMap.get(street).equals(t.street))output+=1000;
|
|
|
- if (communityMap.containsValue(t.community))output+=100;
|
|
|
- if (community!=-1&&communityMap.get(community).equals(t.community))output+=1000;
|
|
|
- if (community!=-1&&Pattern.matches(".*\\d$",communityMap.get(community)))output-=1000;
|
|
|
+
|
|
|
+ int checkTLA(threeLevelAddress t) {
|
|
|
+ int output = 0;
|
|
|
+ if (t.district.equals(splitAddress.getDistrict())) output += 1;
|
|
|
+ if (streetMap.containsValue(t.street)) output += 10;
|
|
|
+ if (street != -1 && streetMap.get(street).equals(t.street)) output += 1000;
|
|
|
+ if (communityMap.containsValue(t.community)) output += 100;
|
|
|
+ if (community != -1 && communityMap.get(community).equals(t.community)) output += 1000;
|
|
|
+ if (community != -1 && Pattern.matches(".*\\d$", communityMap.get(community))) output -= 1000;
|
|
|
return output;
|
|
|
}
|
|
|
- void guessFirstMatch(){
|
|
|
+
|
|
|
+ void guessFirstMatch() {
|
|
|
//先街道
|
|
|
- if (!streetMap.isEmpty()&&street==-1) {
|
|
|
- for (int i :streetMap.keySet()){
|
|
|
- if (streetMap.get(i).equals(threeLevelAddress.street)&&(i<street||street==-1)) {
|
|
|
- street=i;
|
|
|
+ if (!streetMap.isEmpty() && street == -1) {
|
|
|
+ for (int i : streetMap.keySet()) {
|
|
|
+ if (streetMap.get(i).equals(threeLevelAddress.street) && (i < street || street == -1)) {
|
|
|
+ street = i;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
//再居委
|
|
|
- if (community==-1&& !communityMap.isEmpty()){
|
|
|
- for (int i :communityMap.keySet()){
|
|
|
- if (communityMap.get(i).equals(threeLevelAddress.community)&&street!=i&&(i<community||community==-1)){
|
|
|
- community=i;
|
|
|
+ if (community == -1 && !communityMap.isEmpty()) {
|
|
|
+ for (int i : communityMap.keySet()) {
|
|
|
+ if (communityMap.get(i).equals(threeLevelAddress.community) && street != i && (i < community || community == -1)) {
|
|
|
+ community = i;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -250,27 +273,28 @@ public class ShanghaiAddressSplitUtil {
|
|
|
|
|
|
}
|
|
|
}
|
|
|
- static int washResult(String sourceAddress, Map<Integer, String> result, Pattern should, Pattern... never){
|
|
|
- Map<Integer,String> output=new HashMap<>();
|
|
|
+
|
|
|
+ static int washResult(String sourceAddress, Map<Integer, String> result, Pattern should, Pattern... never) {
|
|
|
+ Map<Integer, String> output = new HashMap<>();
|
|
|
int outputInt = -1;
|
|
|
for (int index : result.keySet()) {
|
|
|
String name = result.get(index);
|
|
|
- String sub =sourceAddress.substring(index + name.length());
|
|
|
+ String sub = sourceAddress.substring(index + name.length());
|
|
|
//匹配到后缀时直接保留
|
|
|
if (should.matcher(sub).find()) {
|
|
|
- outputInt=index;
|
|
|
+ outputInt = index;
|
|
|
} else {
|
|
|
//去除南京路,北京大道型选手
|
|
|
if (ROAD_SUFFIX_PATTERN.matcher(sub).find()) {
|
|
|
continue;
|
|
|
}
|
|
|
- boolean skip =false;
|
|
|
- for (Pattern p :never){
|
|
|
- if (p.matcher(sub).find())skip=true;
|
|
|
+ boolean skip = false;
|
|
|
+ for (Pattern p : never) {
|
|
|
+ if (p.matcher(sub).find()) skip = true;
|
|
|
}
|
|
|
if (skip) continue;
|
|
|
}
|
|
|
- output.put(index,name);
|
|
|
+ output.put(index, name);
|
|
|
}
|
|
|
result.clear();
|
|
|
result.putAll(output);
|
|
@@ -279,33 +303,35 @@ public class ShanghaiAddressSplitUtil {
|
|
|
|
|
|
/**
|
|
|
* 检查字符串含有哪些字符,输出这些匹配字符的位置和字符的map
|
|
|
- * @param s 被检查字符串
|
|
|
+ *
|
|
|
+ * @param s 被检查字符串
|
|
|
* @param nameList 检查范围
|
|
|
*/
|
|
|
- private static Map<Integer,String> contain(String s,Iterable<String> nameList,int offset){
|
|
|
- Map<Integer,String> output = new HashMap<>();
|
|
|
- if (nameList==null){
|
|
|
+ private static Map<Integer, String> contain(String s, Iterable<String> nameList, int offset) {
|
|
|
+ Map<Integer, String> output = new HashMap<>();
|
|
|
+ if (nameList == null) {
|
|
|
return output;
|
|
|
}
|
|
|
- for (String name:nameList){
|
|
|
- if (name.isEmpty())continue;
|
|
|
+ for (String name : nameList) {
|
|
|
+ if (name.isEmpty()) continue;
|
|
|
int index = -1;
|
|
|
- while ((index = s.indexOf(name, index + 1)) != -1){
|
|
|
- output.put(index+offset,name);
|
|
|
+ while ((index = s.indexOf(name, index + 1)) != -1) {
|
|
|
+ output.put(index + offset, name);
|
|
|
}
|
|
|
}
|
|
|
return output;
|
|
|
}
|
|
|
- private static SplitAddress split(String sourceAddress){
|
|
|
+
|
|
|
+ private static SplitAddress split(String sourceAddress) {
|
|
|
//事前准备
|
|
|
- String beautyAddress = sourceAddress.replaceAll("[\\s]+","");
|
|
|
+ String beautyAddress = sourceAddress.replaceAll("[\\s]+", "");
|
|
|
|
|
|
SplitAddress splitAddress = new SplitAddress();
|
|
|
splitAddress.setFullAddress(sourceAddress);
|
|
|
|
|
|
|
|
|
splittingAddress splittingAddress = new splittingAddress();
|
|
|
- splittingAddress.splitAddress=splitAddress;
|
|
|
+ splittingAddress.splitAddress = splitAddress;
|
|
|
|
|
|
|
|
|
String[] result = AddressSplitUtil.splitAddress(beautyAddress);
|
|
@@ -316,18 +342,18 @@ public class ShanghaiAddressSplitUtil {
|
|
|
splitAddress.setDistrict(result[2]);
|
|
|
//检查是否在外省,未找到省市或者在省市中找到上海,或者找到上海的区都算作省内
|
|
|
Map<Integer, String> districtContainResult = contain(beautyAddress, DISTRICT_TO_COMMUNITY_MAP.keySet(), 0);
|
|
|
- int disIndex= washResult(beautyAddress, districtContainResult,LEVEL_1_SUFFIX_PATTERN);
|
|
|
+ int disIndex = washResult(beautyAddress, districtContainResult, LEVEL_1_SUFFIX_PATTERN);
|
|
|
Map<Integer, String> streetContainResult = contain(beautyAddress, STREET_TO_COMMUNITY_MAP.keySet(), 0);
|
|
|
- int streetIndex= washResult(beautyAddress, districtContainResult,LEVEL_2_SUFFIX_PATTERN);
|
|
|
+ int streetIndex = washResult(beautyAddress, districtContainResult, LEVEL_2_SUFFIX_PATTERN);
|
|
|
splitAddress.setAddr(result[3]);
|
|
|
- if (!((result[0].isEmpty()|| result[0].equals("上海市")) && (result[1].isEmpty() || result[1].equals("上海市"))||
|
|
|
- !districtContainResult.isEmpty()||!streetContainResult.isEmpty())) {
|
|
|
+ if (!((result[0].isEmpty() || result[0].equals("上海市")) && (result[1].isEmpty() || result[1].equals("上海市")) ||
|
|
|
+ !districtContainResult.isEmpty() || !streetContainResult.isEmpty())) {
|
|
|
splitAddress.setStatus(2);
|
|
|
return splitAddress;
|
|
|
}
|
|
|
- if (!districtContainResult.isEmpty()){
|
|
|
- if (disIndex!=-1){
|
|
|
- String district=districtContainResult.get(disIndex);
|
|
|
+ if (!districtContainResult.isEmpty()) {
|
|
|
+ if (disIndex != -1) {
|
|
|
+ String district = districtContainResult.get(disIndex);
|
|
|
threeLevelAddress disTLA = All_COMMUNITY_IN_SHANGHAI.get(DISTRICT_TO_COMMUNITY_MAP.get(district).get(0)).get(0);
|
|
|
splitAddress.setDistrict(disTLA.districtFullName);
|
|
|
splitAddress.setDistrictCode(disTLA.districtCode);
|
|
@@ -344,14 +370,14 @@ public class ShanghaiAddressSplitUtil {
|
|
|
splittingAddress.guessFirstMatch();
|
|
|
|
|
|
|
|
|
- if (splittingAddress.street!=-1||splittingAddress.community!=-1){
|
|
|
+ if (splittingAddress.street != -1 || splittingAddress.community != -1) {
|
|
|
splitAddress.setStreet(splittingAddress.threeLevelAddress.streetFullName);
|
|
|
splitAddress.setStreetCode(splittingAddress.threeLevelAddress.streetCode);
|
|
|
splitAddress.setDistrict(splittingAddress.threeLevelAddress.districtFullName);
|
|
|
splitAddress.setDistrictCode(splittingAddress.threeLevelAddress.districtCode);
|
|
|
|
|
|
}
|
|
|
- if (splittingAddress.community!=-1){
|
|
|
+ if (splittingAddress.community != -1) {
|
|
|
splitAddress.setCommunity(splittingAddress.threeLevelAddress.communityFullName);
|
|
|
splitAddress.setCommunityCode(splittingAddress.threeLevelAddress.communityCode);
|
|
|
|
|
@@ -359,111 +385,116 @@ public class ShanghaiAddressSplitUtil {
|
|
|
|
|
|
|
|
|
//检查是否能够分离
|
|
|
- if(splittingAddress.community==-1&&splittingAddress.street==-1){
|
|
|
+ if (splittingAddress.community == -1 && splittingAddress.street == -1) {
|
|
|
//检查是否是非地址
|
|
|
- if (UN_ADDRESS_PATTERN.matcher(splitAddress.getFullAddress()).find()){
|
|
|
+ if (UN_ADDRESS_PATTERN.matcher(splitAddress.getFullAddress()).find()) {
|
|
|
splitAddress.setStatus(3);
|
|
|
return splitAddress;
|
|
|
}
|
|
|
|
|
|
- if (disIndex!=-1){
|
|
|
- String sub = beautyAddress.substring(disIndex+districtContainResult.get(disIndex).length());
|
|
|
+ if (disIndex != -1) {
|
|
|
+ String sub = beautyAddress.substring(disIndex + districtContainResult.get(disIndex).length());
|
|
|
Matcher m = LEVEL_1_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
+ if (m.find()) {
|
|
|
sub = sub.substring(m.end());
|
|
|
}
|
|
|
splitAddress.setAddr(sub);
|
|
|
}
|
|
|
|
|
|
splitAddress.setStatus(1);
|
|
|
- if (result[0].isEmpty()&&result[1].isEmpty()&&districtContainResult.isEmpty())splitAddress.setStatus(4);
|
|
|
+ if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty())
|
|
|
+ splitAddress.setStatus(4);
|
|
|
return splitAddress;
|
|
|
- }else if (splittingAddress.street> splittingAddress.community){
|
|
|
- String sub = beautyAddress.substring(splittingAddress.street+splittingAddress.streetMap.get(splittingAddress.street).length());
|
|
|
+ } else if (splittingAddress.street > splittingAddress.community) {
|
|
|
+ String sub = beautyAddress.substring(splittingAddress.street + splittingAddress.streetMap.get(splittingAddress.street).length());
|
|
|
Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
+ if (m.find()) {
|
|
|
sub = sub.substring(m.end());
|
|
|
}
|
|
|
splitAddress.setAddr(sub);
|
|
|
- }else {
|
|
|
+ } else {
|
|
|
String sub = beautyAddress.substring(
|
|
|
- splittingAddress.community+
|
|
|
+ splittingAddress.community +
|
|
|
splittingAddress.communityMap.
|
|
|
get(splittingAddress.community).length());
|
|
|
Matcher m = LEVEL_3_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
+ if (m.find()) {
|
|
|
sub = sub.substring(m.end());
|
|
|
}
|
|
|
splitAddress.setAddr(sub);
|
|
|
}
|
|
|
|
|
|
splitAddress.setStatus(0);
|
|
|
- if (result[0].isEmpty()&&result[1].isEmpty()&&districtContainResult.isEmpty())splitAddress.setStatus(4);
|
|
|
- if (splitAddress.getStreet().equals("自由贸易试验区"))splitAddress.setStatus(0);
|
|
|
+ if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty()) splitAddress.setStatus(4);
|
|
|
+ if (splitAddress.getStreet().equals("自由贸易试验区")) splitAddress.setStatus(0);
|
|
|
return splitAddress;
|
|
|
}
|
|
|
|
|
|
- private static SplitAddress beautyResult(SplitAddress splitAddress){
|
|
|
+ private static SplitAddress beautyResult(SplitAddress splitAddress) {
|
|
|
//检查过度分割
|
|
|
- if (splitAddress.getAddr().isEmpty() ||OVER_SPLIT.matcher(splitAddress.getAddr()).find()){
|
|
|
- if (splitAddress.getCommunity().isEmpty()){
|
|
|
- if (splitAddress.getStreet().isEmpty()){
|
|
|
- if (splitAddress.getDistrict().isEmpty()){
|
|
|
- splitAddress.setAddr("上海市"+splitAddress.getAddr());
|
|
|
- }else {
|
|
|
- splitAddress.setAddr(splitAddress.getDistrict()+splitAddress.getAddr());
|
|
|
+ if (splitAddress.getAddr().isEmpty() || OVER_SPLIT.matcher(splitAddress.getAddr()).find()) {
|
|
|
+ if (splitAddress.getCommunity().isEmpty()) {
|
|
|
+ if (splitAddress.getStreet().isEmpty()) {
|
|
|
+ if (splitAddress.getDistrict().isEmpty()) {
|
|
|
+ splitAddress.setAddr("上海市" + splitAddress.getAddr());
|
|
|
+ } else {
|
|
|
+ splitAddress.setAddr(splitAddress.getDistrict() + splitAddress.getAddr());
|
|
|
}
|
|
|
- }else {
|
|
|
- splitAddress.setAddr(splitAddress.getStreet()+splitAddress.getAddr());
|
|
|
+ } else {
|
|
|
+ splitAddress.setAddr(splitAddress.getStreet() + splitAddress.getAddr());
|
|
|
}
|
|
|
- }else {
|
|
|
- splitAddress.setAddr(splitAddress.getCommunity()+splitAddress.getAddr());
|
|
|
+ } else {
|
|
|
+ splitAddress.setAddr(splitAddress.getCommunity() + splitAddress.getAddr());
|
|
|
}
|
|
|
}
|
|
|
//检查多号,多弄
|
|
|
- splitAddress.setAddr(splitAddress.getAddr().replaceAll(String.valueOf(MULTI_ADDRESS),""));
|
|
|
+ splitAddress.setAddr(splitAddress.getAddr().replaceAll(String.valueOf(MULTI_ADDRESS), ""));
|
|
|
|
|
|
return splitAddress;
|
|
|
}
|
|
|
+
|
|
|
/**
|
|
|
* 工具入口,返回所有数据
|
|
|
+ *
|
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
|
*/
|
|
|
- public static List<SplitAddress> splitAddresses(String sourceAddress){
|
|
|
+ public static List<SplitAddress> splitAddresses(String sourceAddress) {
|
|
|
Matcher matcher = Pattern.compile("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)").matcher(sourceAddress);
|
|
|
- List<SplitAddress> addressList =new ArrayList<>();
|
|
|
- String beautyString = sourceAddress.replaceAll("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)","");
|
|
|
+ List<SplitAddress> addressList = new ArrayList<>();
|
|
|
+ String beautyString = sourceAddress.replaceAll("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)", "");
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
for (char c : beautyString.toCharArray()) {
|
|
|
// 检查是否为全角数字
|
|
|
if (c >= '0' && c <= '9') {
|
|
|
// 转换为半角数字
|
|
|
sb.append((char) (c - '0' + '0'));
|
|
|
- } else if (c=='\uE5CE'){
|
|
|
+ } else if (c == '\uE5CE') {
|
|
|
// 奇妙的乱码,跳过
|
|
|
- }else {
|
|
|
+ } else {
|
|
|
// 保持原字符
|
|
|
sb.append(c);
|
|
|
}
|
|
|
}
|
|
|
beautyString = sb.toString();
|
|
|
addressList.add(beautyResult(split(beautyString)));
|
|
|
- while (matcher.find()){
|
|
|
- String address=matcher.group();
|
|
|
- if (address.length()<=2)continue;
|
|
|
- addressList.addAll(splitAddresses(address.substring(1,address.length()-1)));
|
|
|
+ while (matcher.find()) {
|
|
|
+ String address = matcher.group();
|
|
|
+ if (address.length() <= 2) continue;
|
|
|
+ addressList.addAll(splitAddresses(address.substring(1, address.length() - 1)));
|
|
|
}
|
|
|
- for (SplitAddress s :addressList)s.setSourceAddress(sourceAddress);
|
|
|
+ for (SplitAddress s : addressList) s.setSourceAddress(sourceAddress);
|
|
|
return addressList;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 工具入口,仅返回最优
|
|
|
+ *
|
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
|
*/
|
|
|
- public static SplitAddress splitBestAddress(String sourceAddress){
|
|
|
+ public static SplitAddress splitBestAddress(String sourceAddress) {
|
|
|
return splitAddresses(sourceAddress).stream().max(SplitAddress::compareTo).orElse(new SplitAddress());
|
|
|
}
|
|
|
+
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
new ShanghaiAddressSplitUtil().init();
|
|
|
System.out.println(splitBestAddress("上海市松江区乐都路339"));
|