|
@@ -2,6 +2,7 @@ package com.skyversation.poiaddr.util.address_spliter;
|
|
|
|
|
|
import com.skyversation.poiaddr.util.ExcelReaderUtils;
|
|
|
import lombok.AllArgsConstructor;
|
|
|
+import lombok.Getter;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
import javax.annotation.PostConstruct;
|
|
@@ -10,11 +11,12 @@ import java.util.*;
|
|
|
import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
import java.util.stream.Collectors;
|
|
|
+import java.util.stream.Stream;
|
|
|
|
|
|
@Service
|
|
|
public class ShanghaiAddressSplitUtil {
|
|
|
@AllArgsConstructor
|
|
|
- static class threeLevelAddress{
|
|
|
+ static class ThreeLevelAddress {
|
|
|
String district;
|
|
|
String street;
|
|
|
String community;
|
|
@@ -24,13 +26,39 @@ public class ShanghaiAddressSplitUtil {
|
|
|
String districtCode;
|
|
|
String streetCode;
|
|
|
String communityCode;
|
|
|
+ }
|
|
|
+
|
|
|
+ static class ThreeLevelAddressTree {
|
|
|
+ private static class node {
|
|
|
+ node parent;
|
|
|
+ List<node> children;
|
|
|
+ ThreeLevelAddress address;
|
|
|
+ }
|
|
|
+
|
|
|
+ List<node> rootNodes;
|
|
|
+ }
|
|
|
|
|
|
+ @Getter
|
|
|
+ static class AddressPart {
|
|
|
+ String address;
|
|
|
+ int location;
|
|
|
+ boolean completeMatch;
|
|
|
+
|
|
|
+ AddressPart(String address, int location) {
|
|
|
+ this.address = address;
|
|
|
+ this.location = location;
|
|
|
+ }
|
|
|
+
|
|
|
+ void matchCompete() {
|
|
|
+ completeMatch = true;
|
|
|
+ }
|
|
|
}
|
|
|
- private static Map<String,List<threeLevelAddress>> All_STREET_IN_SHANGHAI;
|
|
|
- private static Map<String,List<threeLevelAddress>> All_COMMUNITY_IN_SHANGHAI;
|
|
|
- private static Map<String,List<String>> DISTRICT_TO_STREET_MAP;
|
|
|
- private static Map<String,List<String>> STREET_TO_COMMUNITY_MAP;
|
|
|
- private static Map<String,List<String>> DISTRICT_TO_COMMUNITY_MAP;
|
|
|
+
|
|
|
+ private static Map<String, List<ThreeLevelAddress>> All_STREET_IN_SHANGHAI;
|
|
|
+ private static Map<String, List<ThreeLevelAddress>> All_COMMUNITY_IN_SHANGHAI;
|
|
|
+ private static Map<String, List<String>> DISTRICT_TO_STREET_MAP;
|
|
|
+ private static Map<String, List<String>> STREET_TO_COMMUNITY_MAP;
|
|
|
+ private static Map<String, List<String>> DISTRICT_TO_COMMUNITY_MAP;
|
|
|
|
|
|
private static final Pattern LEVEL_1_SUFFIX_PATTERN = Pattern.compile("^(?:区|新区)");
|
|
|
|
|
@@ -42,22 +70,23 @@ public class ShanghaiAddressSplitUtil {
|
|
|
|
|
|
private static final Pattern UN_ADDRESS_PATTERN = Pattern.compile("http");
|
|
|
|
|
|
- private static final Pattern OVER_SPLIT=Pattern.compile("^(?:[0123456789-\\-一二三四五六七八九十大A-za-z]{0,4}[街队组栋号站弄]|(?:车站|工业区|市场|农贸市场)(?![东南西北中一二三四五六七八九十公大小支新老环]路)|[A-za-z]?[0123456789-\\-])");
|
|
|
+ private static final Pattern OVER_SPLIT = Pattern.compile("^(?:[0123456789-\\-一二三四五六七八九十大A-za-z]{0,4}[街队组栋号站弄]|(?:车站|工业区|市场|农贸市场)(?![东南西北中一二三四五六七八九十公大小支新老环]路)|[A-za-z]?[0123456789-\\-])");
|
|
|
|
|
|
private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、—/\\\\-][0-9]+(?=[号弄])");
|
|
|
+
|
|
|
@PostConstruct
|
|
|
- private void init(){
|
|
|
+ private void init() {
|
|
|
System.out.println("开始初始化分词器");
|
|
|
- Map<String,threeLevelAddress> districtMap= new HashMap<>();
|
|
|
- Map<String,List<threeLevelAddress>> streetMap= new HashMap<>();
|
|
|
- Map<String,List<threeLevelAddress>> communityMap= new HashMap<>();
|
|
|
- Map<String,List<String>> districtToStreetMap=new HashMap<>();
|
|
|
- Map<String,List<String>> streetToCommunityMap=new HashMap<>();
|
|
|
+ Map<String, ThreeLevelAddress> districtMap = new HashMap<>();
|
|
|
+ Map<String, List<ThreeLevelAddress>> streetMap = new HashMap<>();
|
|
|
+ Map<String, List<ThreeLevelAddress>> communityMap = new HashMap<>();
|
|
|
+ Map<String, List<String>> districtToStreetMap = new HashMap<>();
|
|
|
+ Map<String, List<String>> streetToCommunityMap = new HashMap<>();
|
|
|
|
|
|
- String file = "上海市县乡记录.xlsx";
|
|
|
+ String file = "excel/上海市县乡记录.xlsx";
|
|
|
InputStream is = ShanghaiAddressSplitUtil.class.getResourceAsStream(file);
|
|
|
- if (is==null) is= ShanghaiAddressSplitUtil.class.getResourceAsStream("/"+file);
|
|
|
- if (is==null) throw new RuntimeException("无法找到"+file);
|
|
|
+ if (is == null) is = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + file);
|
|
|
+ if (is == null) throw new RuntimeException("无法找到" + file);
|
|
|
try {
|
|
|
for (Map<String, Object> row : ExcelReaderUtils.readExcel(is)) {
|
|
|
String district = Optional.ofNullable(row.get("县级市简称")).map(Object::toString).orElse("");
|
|
@@ -69,23 +98,25 @@ public class ShanghaiAddressSplitUtil {
|
|
|
String districtCode = Optional.ofNullable(row.get("县级市编码")).map(Object::toString).orElse("");
|
|
|
String streetCode = Optional.ofNullable(row.get("街道编码")).map(Object::toString).orElse("");
|
|
|
String communityCode = Optional.ofNullable(row.get("居委编码")).map(Object::toString).orElse("");
|
|
|
- initData(district, street, community, districtFullName, streetFullName, communityFullName,districtCode, streetCode, communityCode, districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode, districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
}
|
|
|
//自贸区
|
|
|
- initData("浦东", "试验区","", "浦东新区", "自由贸易试验区","","310115","","", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData("浦东", "试验区", "", "浦东新区", "自由贸易试验区", "", "310115", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
//松江镇特别处理
|
|
|
- initData("松江", "松江","", "松江区", "","","310117","","", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData("松江", "松江", "", "松江区", "", "", "310117", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
//金山工业区
|
|
|
- initData("金山", "金山工业区","", "金山区", "金山工业区","","310116","","", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
+ initData("金山", "金山工业区", "", "金山区", "金山工业区", "", "310116", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
|
|
|
|
|
|
} catch (Exception e) {
|
|
|
throw new RuntimeException(e);
|
|
|
}
|
|
|
All_STREET_IN_SHANGHAI = Collections.unmodifiableMap(streetMap);
|
|
|
All_COMMUNITY_IN_SHANGHAI = Collections.unmodifiableMap(communityMap);
|
|
|
- DISTRICT_TO_STREET_MAP=Collections.unmodifiableMap(districtToStreetMap);
|
|
|
- STREET_TO_COMMUNITY_MAP=Collections.unmodifiableMap(streetToCommunityMap);
|
|
|
- DISTRICT_TO_COMMUNITY_MAP=Collections.unmodifiableMap(DISTRICT_TO_STREET_MAP.entrySet().stream()
|
|
|
+ DISTRICT_TO_STREET_MAP = Collections.unmodifiableMap(districtToStreetMap.entrySet()
|
|
|
+ .stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue()
|
|
|
+ .stream().distinct().collect(Collectors.toList()))));
|
|
|
+ STREET_TO_COMMUNITY_MAP = Collections.unmodifiableMap(streetToCommunityMap);
|
|
|
+ DISTRICT_TO_COMMUNITY_MAP = Collections.unmodifiableMap(DISTRICT_TO_STREET_MAP.entrySet().stream()
|
|
|
.collect(Collectors.toMap(
|
|
|
Map.Entry::getKey,
|
|
|
entry -> entry.getValue().stream()
|
|
@@ -95,215 +126,244 @@ public class ShanghaiAddressSplitUtil {
|
|
|
System.out.println("分词器初始化完成");
|
|
|
}
|
|
|
|
|
|
- private static void initData(String district, String street, String community, String districtFullName, String streetFullName, String communityFullName,String districtCode, String streetCode, String communityCode, Map<String, threeLevelAddress> districtMap, Map<String, List<threeLevelAddress>> streetMap, Map<String, List<threeLevelAddress>> communityMap, Map<String, List<String>> districtToStreetMap, Map<String, List<String>> streetToCommunityMap) {
|
|
|
- threeLevelAddress add = new threeLevelAddress(district, street, community, districtFullName, streetFullName, communityFullName, districtCode ,streetCode, communityCode);
|
|
|
- districtMap.put(district,add);
|
|
|
- if (!streetMap.containsKey(street)) streetMap.put(street,new ArrayList<>());
|
|
|
+ private static void initData(String district, String street, String community, String districtFullName, String streetFullName, String communityFullName, String districtCode, String streetCode, String communityCode, Map<String, ThreeLevelAddress> districtMap, Map<String, List<ThreeLevelAddress>> streetMap, Map<String, List<ThreeLevelAddress>> communityMap, Map<String, List<String>> districtToStreetMap, Map<String, List<String>> streetToCommunityMap) {
|
|
|
+ ThreeLevelAddress add = new ThreeLevelAddress(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode);
|
|
|
+ districtMap.put(district, add);
|
|
|
+ if (!streetMap.containsKey(street)) streetMap.put(street, new ArrayList<>());
|
|
|
streetMap.get(street).add(add);
|
|
|
- if (!communityMap.containsKey(community)) communityMap.put(community,new ArrayList<>());
|
|
|
+ if (!communityMap.containsKey(community)) communityMap.put(community, new ArrayList<>());
|
|
|
communityMap.get(community).add(add);
|
|
|
- if (!districtToStreetMap.containsKey(district)) districtToStreetMap.put(district,new ArrayList<>());
|
|
|
+ if (!districtToStreetMap.containsKey(district)) districtToStreetMap.put(district, new ArrayList<>());
|
|
|
districtToStreetMap.get(district).add(street);
|
|
|
- if (!streetToCommunityMap.containsKey(street)) streetToCommunityMap.put(street,new ArrayList<>());
|
|
|
+ if (!streetToCommunityMap.containsKey(street)) streetToCommunityMap.put(street, new ArrayList<>());
|
|
|
streetToCommunityMap.get(street).add(community);
|
|
|
}
|
|
|
|
|
|
- private static class splittingAddress{
|
|
|
+ private static class splittingAddress {
|
|
|
SplitAddress splitAddress;
|
|
|
|
|
|
- int street=-1;
|
|
|
- int community=-1;
|
|
|
+ List<AddressPart> streetParts = new ArrayList<>();
|
|
|
+ List<AddressPart> communityParts = new ArrayList<>();
|
|
|
|
|
|
- Map<Integer,String> streetMap =new HashMap<>();
|
|
|
- Map<Integer,String> communityMap=new HashMap<>();
|
|
|
- threeLevelAddress threeLevelAddress;
|
|
|
|
|
|
- String targetString;
|
|
|
+ ThreeLevelAddress threeLevelAddress;
|
|
|
|
|
|
- void findStreet(){
|
|
|
- Map<Integer,String> results =null;
|
|
|
- int completeMatchIndex=-1;
|
|
|
- //首先尝试在一选下匹配
|
|
|
- if (splitAddress.getDistrict()!=null){
|
|
|
- results = contain(this.targetString,DISTRICT_TO_STREET_MAP.get(splitAddress.getDistrict()),0);
|
|
|
- completeMatchIndex = washResult(this.targetString,results,LEVEL_2_SUFFIX_PATTERN,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN);
|
|
|
- }
|
|
|
- //一选不存在或匹配无结果,直接搜全国
|
|
|
- if (results==null||results.isEmpty()){
|
|
|
- results = contain(this.targetString,All_STREET_IN_SHANGHAI.keySet(),0);
|
|
|
- if (completeMatchIndex==-1)completeMatchIndex = washResult(this.targetString,results,LEVEL_2_SUFFIX_PATTERN,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN);
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- streetMap.putAll(results);
|
|
|
- street=completeMatchIndex;
|
|
|
+ String targetString;
|
|
|
+ private AddressPart bestStreet;
|
|
|
+ private AddressPart bestCommunity;
|
|
|
+
|
|
|
+
|
|
|
+ void findStreet() {
|
|
|
+ List<AddressPart> results = null;
|
|
|
+// int completeMatchIndex = -1;
|
|
|
+// //首先尝试在一选下匹配
|
|
|
+// if (splitAddress.getDistrict() != null) {
|
|
|
+// results = contain(this.targetString, DISTRICT_TO_STREET_MAP.get(splitAddress.getDistrict().replaceAll("新?区", "")), 0);
|
|
|
+// washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
|
|
|
+// }
|
|
|
+// //一选不存在或匹配无结果,直接搜全国
|
|
|
+// if (results == null || results.isEmpty()) {
|
|
|
+ results = contain(this.targetString, All_STREET_IN_SHANGHAI.keySet(), 0);
|
|
|
+ washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
|
|
|
+
|
|
|
+// }
|
|
|
+ results = results.stream().filter(addressPart -> {
|
|
|
+ int key = addressPart.location;
|
|
|
+ String name = addressPart.address;
|
|
|
+ switch (name) {
|
|
|
+ case "高桥": {
|
|
|
+ if (key > 0 && targetString.charAt(key - 1) == '外') {
|
|
|
+ return false; // 避免 “外高桥”→“高桥”
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ case "莘庄": {
|
|
|
+ String after = targetString.substring(key + name.length());
|
|
|
+ if (after.startsWith("工业区")) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ case "外滩":{
|
|
|
+ if (key > 0 && targetString.charAt(key - 1) == '北') {
|
|
|
+ return false; // 避免 “北外滩”→“外滩”
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+ streetParts.addAll(results);
|
|
|
//仅有一个选择时当成一选
|
|
|
- if (streetMap.size()==1){
|
|
|
- street = (int)streetMap.keySet().toArray()[0];
|
|
|
+ if (streetParts.size() == 1) {
|
|
|
+ streetParts.get(0).matchCompete();
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
- void findCommunity(){
|
|
|
- Map<Integer,String> results = null;
|
|
|
- int completeMatchCommunity=-1;
|
|
|
- String sub=targetString;
|
|
|
+ void findCommunity() {
|
|
|
+ List<AddressPart> results = null;
|
|
|
//尝试一选
|
|
|
- if (street!=-1){
|
|
|
- sub = targetString.substring(street+streetMap.get(street).length());
|
|
|
- Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
- sub=sub.substring(m.end());
|
|
|
- }
|
|
|
- results= contain(sub,STREET_TO_COMMUNITY_MAP.get(streetMap.get(street)),targetString.length()-sub.length());
|
|
|
- completeMatchCommunity=washResult(targetString,results,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN,LEVEL_2_SUFFIX_PATTERN);
|
|
|
-
|
|
|
- }
|
|
|
- //一选不存在或匹配无结果,先搜全区
|
|
|
- if ((results == null || results.isEmpty()) && splitAddress.getDistrict()!=null) {
|
|
|
- results = contain(sub, DISTRICT_TO_COMMUNITY_MAP.get(splitAddress.getDistrict()),targetString.length()-sub.length());
|
|
|
- if (completeMatchCommunity==-1)completeMatchCommunity=washResult(targetString,results,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN,LEVEL_2_SUFFIX_PATTERN);
|
|
|
- }
|
|
|
+// List<AddressPart> completeStreet = streetParts.stream().filter(AddressPart::isCompleteMatch).collect(Collectors.toList());
|
|
|
+// if (completeStreet.size() == 1) {
|
|
|
+// AddressPart completeStreetPart = completeStreet.get(0);
|
|
|
+// results = contain(targetString, STREET_TO_COMMUNITY_MAP.get(completeStreetPart.address), 0);
|
|
|
+// washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
|
|
|
+//
|
|
|
+// }
|
|
|
+// //一选不存在或匹配无结果,先搜全区
|
|
|
+// if ((results == null || results.isEmpty()) && splitAddress.getDistrict() != null) {
|
|
|
+// results = contain(targetString, DISTRICT_TO_COMMUNITY_MAP.get(splitAddress.getDistrict().replaceAll("新?区", "")), 0);
|
|
|
+// washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
|
|
|
+// }
|
|
|
//最后全市
|
|
|
- if (results == null || results.isEmpty()) {
|
|
|
- results = contain(sub, All_COMMUNITY_IN_SHANGHAI.keySet(),targetString.length()-sub.length());
|
|
|
- if (completeMatchCommunity==-1)completeMatchCommunity=washResult(targetString,results,LEVEL_3_SUFFIX_PATTERN,LEVEL_1_SUFFIX_PATTERN,LEVEL_2_SUFFIX_PATTERN);
|
|
|
- }
|
|
|
- Iterator<Integer> iterator = results.keySet().iterator();
|
|
|
- while (iterator.hasNext()) {
|
|
|
- int key = iterator.next();
|
|
|
- String name = results.get(key);
|
|
|
- if (key > 0 && name.equals("江镇") && targetString.charAt(key - 1) == '松') {
|
|
|
- iterator.remove();
|
|
|
- }
|
|
|
- if (key > 0 && name.equals("镇江")) {
|
|
|
- String sub1 = targetString.substring(key+2);
|
|
|
- if (LEVEL_3_SUFFIX_PATTERN.matcher(sub1).matches()) {
|
|
|
- iterator.remove();
|
|
|
+// if (results == null || results.isEmpty()) {
|
|
|
+ results = contain(targetString, All_COMMUNITY_IN_SHANGHAI.keySet(), 0);
|
|
|
+ washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
|
|
|
+// }
|
|
|
+ results = results.stream().filter(addressPart -> {
|
|
|
+ int key = addressPart.location;
|
|
|
+ String name = addressPart.address;
|
|
|
+ switch (name) {
|
|
|
+
|
|
|
+ case "江镇": {
|
|
|
+ if (key > 0 && targetString.charAt(key - 1) == '松') {
|
|
|
+ return false; // 避免 “松江镇”→“江镇”
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ case "镇江": {
|
|
|
+ String after = targetString.substring(key + name.length());
|
|
|
+ return LEVEL_3_SUFFIX_PATTERN.matcher(after).find();
|
|
|
+ // 避免 “xx镇江x村” 被拆出 “镇江”
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
- communityMap.putAll(results);
|
|
|
+ return true;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+ communityParts.addAll(results);
|
|
|
//仅有一个选择时当成一选
|
|
|
- if (communityMap.size()==1){
|
|
|
- int index = (int)communityMap.keySet().toArray()[0];
|
|
|
- if (street!=index)community=index;
|
|
|
+ if (communityParts.size() == 1) {
|
|
|
+ communityParts.get(0).matchCompete();
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
- void matchThreeLevelAdd(){
|
|
|
- int handingPoint=0;
|
|
|
- threeLevelAddress handingTLA=new threeLevelAddress("","","","","","","","","");
|
|
|
- for (String communityName: new HashSet<>(communityMap.values())){
|
|
|
- if (communityName.isEmpty())continue;
|
|
|
- for(threeLevelAddress t:All_COMMUNITY_IN_SHANGHAI.get(communityName)){
|
|
|
+ void matchThreeLevelAdd() {
|
|
|
+ int handingPoint = 0;
|
|
|
+ ThreeLevelAddress handingTLA = new ThreeLevelAddress("", "", "", "", "", "", "", "", "");
|
|
|
+ for (AddressPart addressPart : communityParts) {
|
|
|
+ for (ThreeLevelAddress t : All_COMMUNITY_IN_SHANGHAI.get(addressPart.address)) {
|
|
|
int point = checkTLA(t);
|
|
|
- if (point>handingPoint){
|
|
|
- handingPoint=point;
|
|
|
- handingTLA=t;
|
|
|
+ if (point > handingPoint) {
|
|
|
+ handingPoint = point;
|
|
|
+ handingTLA = t;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- for (String streetName:new HashSet<>(streetMap.values())){
|
|
|
- if (streetName.isEmpty())continue;
|
|
|
- for(threeLevelAddress t:All_STREET_IN_SHANGHAI.get(streetName)){
|
|
|
+ for (AddressPart addressPart : streetParts) {
|
|
|
+ for (ThreeLevelAddress t : All_STREET_IN_SHANGHAI.get(addressPart.address)) {
|
|
|
int point = checkTLA(t);
|
|
|
- if (point>handingPoint){
|
|
|
- handingPoint=point;
|
|
|
- handingTLA=t;
|
|
|
+ if (point > handingPoint) {
|
|
|
+ handingPoint = point;
|
|
|
+ handingTLA = t;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
threeLevelAddress = handingTLA;
|
|
|
}
|
|
|
- int checkTLA(threeLevelAddress t){
|
|
|
- int output=0;
|
|
|
- if (t.district.equals(splitAddress.getDistrict()))output+=1;
|
|
|
- if (streetMap.containsValue(t.street))output+=10;
|
|
|
- if (street!=-1&&streetMap.get(street).equals(t.street))output+=1000;
|
|
|
- if (communityMap.containsValue(t.community))output+=100;
|
|
|
- if (community!=-1&&communityMap.get(community).equals(t.community))output+=1000;
|
|
|
- if (community!=-1&&Pattern.matches(".*\\d$",communityMap.get(community)))output-=1000;
|
|
|
+
|
|
|
+ int checkTLA(ThreeLevelAddress t) {
|
|
|
+ int output = 0;
|
|
|
+ if (t.district.equals(splitAddress.getDistrict().replaceAll("新?区",""))) output += 1;
|
|
|
+
|
|
|
+ if (streetParts.stream().anyMatch(e -> e.address.equals(t.street))) output += 10;
|
|
|
+ if (streetParts.stream().filter(AddressPart::isCompleteMatch).anyMatch(e -> e.address.equals(t.street)))
|
|
|
+ output += 1000;
|
|
|
+
|
|
|
+ if (communityParts.stream().anyMatch(e -> e.address.equals(t.community))) output += 100;
|
|
|
+ if (communityParts.stream().filter(AddressPart::isCompleteMatch).filter(e -> !e.address.matches(".*\\d$")).anyMatch(e -> e.address.equals(t.community)))
|
|
|
+ output += 1000;
|
|
|
return output;
|
|
|
}
|
|
|
- void guessFirstMatch(){
|
|
|
- //先街道
|
|
|
- if (!streetMap.isEmpty()&&street==-1) {
|
|
|
- for (int i :streetMap.keySet()){
|
|
|
- if (streetMap.get(i).equals(threeLevelAddress.street)&&(i<street||street==-1)) {
|
|
|
- street=i;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- //再居委
|
|
|
- if (community==-1&& !communityMap.isEmpty()){
|
|
|
- for (int i :communityMap.keySet()){
|
|
|
- if (communityMap.get(i).equals(threeLevelAddress.community)&&street!=i&&(i<community||community==-1)){
|
|
|
- community=i;
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- }
|
|
|
+ void guessFirstMatch() {
|
|
|
+ // 街道
|
|
|
+ bestStreet = streetParts.stream()
|
|
|
+ .filter(p -> p.completeMatch)
|
|
|
+ .filter(p -> p.address.equals(threeLevelAddress.street))
|
|
|
+ .max(Comparator.comparingInt(p -> p.location)) // 取 location 最大的
|
|
|
+ .orElseGet(()->streetParts.stream().filter(p -> p.address.equals(threeLevelAddress.street))
|
|
|
+ .max(Comparator.comparingInt(p -> p.location)).orElse(null));
|
|
|
+ // 居委
|
|
|
+ bestCommunity = communityParts.stream()
|
|
|
+ .filter(p -> p.completeMatch)
|
|
|
+ .filter(p -> p.address.equals(threeLevelAddress.community))
|
|
|
+ .max(Comparator.comparingInt(p -> p.location))
|
|
|
+ .orElseGet(()->communityParts.stream().filter(p -> p.address.equals(threeLevelAddress.community))
|
|
|
+ .max(Comparator.comparingInt(p -> p.location)).orElse(null));
|
|
|
|
|
|
}
|
|
|
}
|
|
|
- static int washResult(String sourceAddress, Map<Integer, String> result, Pattern should, Pattern... never){
|
|
|
- Map<Integer,String> output=new HashMap<>();
|
|
|
- int outputInt = -1;
|
|
|
- for (int index : result.keySet()) {
|
|
|
- String name = result.get(index);
|
|
|
- String sub =sourceAddress.substring(index + name.length());
|
|
|
+
|
|
|
+ static List<AddressPart> washResult(String sourceAddress, List<AddressPart> result, Pattern should, Pattern... never) {
|
|
|
+ List<AddressPart> output = new ArrayList<>();
|
|
|
+ for (AddressPart part : result) {
|
|
|
+ int index = part.location;
|
|
|
+ String name = part.address;
|
|
|
+ String sub = sourceAddress.substring(index + name.length());
|
|
|
//匹配到后缀时直接保留
|
|
|
if (should.matcher(sub).find()) {
|
|
|
- outputInt=index;
|
|
|
+ part.matchCompete();
|
|
|
} else {
|
|
|
//去除南京路,北京大道型选手
|
|
|
if (ROAD_SUFFIX_PATTERN.matcher(sub).find()) {
|
|
|
continue;
|
|
|
}
|
|
|
- boolean skip =false;
|
|
|
- for (Pattern p :never){
|
|
|
- if (p.matcher(sub).find())skip=true;
|
|
|
+ boolean skip = false;
|
|
|
+ for (Pattern p : never) {
|
|
|
+ if (p.matcher(sub).find()) skip = true;
|
|
|
}
|
|
|
if (skip) continue;
|
|
|
}
|
|
|
- output.put(index,name);
|
|
|
+ output.add(part);
|
|
|
}
|
|
|
result.clear();
|
|
|
- result.putAll(output);
|
|
|
- return outputInt;
|
|
|
+ result.addAll(output);
|
|
|
+ return output;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 检查字符串含有哪些字符,输出这些匹配字符的位置和字符的map
|
|
|
- * @param s 被检查字符串
|
|
|
+ *
|
|
|
+ * @param s 被检查字符串
|
|
|
* @param nameList 检查范围
|
|
|
*/
|
|
|
- private static Map<Integer,String> contain(String s,Iterable<String> nameList,int offset){
|
|
|
- Map<Integer,String> output = new HashMap<>();
|
|
|
- if (nameList==null){
|
|
|
+ private static List<AddressPart> contain(String s, Iterable<String> nameList, int offset) {
|
|
|
+ List<AddressPart> output = new ArrayList<>();
|
|
|
+ if (nameList == null) {
|
|
|
return output;
|
|
|
}
|
|
|
- for (String name:nameList){
|
|
|
- if (name.isEmpty())continue;
|
|
|
+ for (String name : nameList) {
|
|
|
+ if (name.isEmpty()) continue;
|
|
|
int index = -1;
|
|
|
- while ((index = s.indexOf(name, index + 1)) != -1){
|
|
|
- output.put(index+offset,name);
|
|
|
+ while ((index = s.indexOf(name, index + 1)) != -1) {
|
|
|
+ output.add(new AddressPart(name, index));
|
|
|
}
|
|
|
}
|
|
|
return output;
|
|
|
}
|
|
|
- private static SplitAddress split(String sourceAddress){
|
|
|
+
|
|
|
+ private static SplitAddress split(String sourceAddress) {
|
|
|
//事前准备
|
|
|
- String beautyAddress = sourceAddress.replaceAll("[\\s]+","");
|
|
|
+ String beautyAddress = sourceAddress.replaceAll("[\\s]+", "");
|
|
|
|
|
|
SplitAddress splitAddress = new SplitAddress();
|
|
|
splitAddress.setFullAddress(sourceAddress);
|
|
|
|
|
|
|
|
|
splittingAddress splittingAddress = new splittingAddress();
|
|
|
- splittingAddress.splitAddress=splitAddress;
|
|
|
+ splittingAddress.splitAddress = splitAddress;
|
|
|
|
|
|
|
|
|
String[] result = AddressSplitUtil.splitAddress(beautyAddress);
|
|
@@ -313,23 +373,27 @@ public class ShanghaiAddressSplitUtil {
|
|
|
splitAddress.setCity(result[1]);
|
|
|
splitAddress.setDistrict(result[2]);
|
|
|
//检查是否在外省,未找到省市或者在省市中找到上海,或者找到上海的区都算作省内
|
|
|
- Map<Integer, String> districtContainResult = contain(beautyAddress, DISTRICT_TO_COMMUNITY_MAP.keySet(), 0);
|
|
|
- int disIndex= washResult(beautyAddress, districtContainResult,LEVEL_1_SUFFIX_PATTERN);
|
|
|
- Map<Integer, String> streetContainResult = contain(beautyAddress, STREET_TO_COMMUNITY_MAP.keySet(), 0);
|
|
|
- int streetIndex= washResult(beautyAddress, districtContainResult,LEVEL_2_SUFFIX_PATTERN);
|
|
|
+ List<AddressPart> districtContainResult = contain(beautyAddress, DISTRICT_TO_COMMUNITY_MAP.keySet(), 0);
|
|
|
+ washResult(beautyAddress, districtContainResult, LEVEL_1_SUFFIX_PATTERN);
|
|
|
+ List<AddressPart> streetContainResult = contain(beautyAddress, STREET_TO_COMMUNITY_MAP.keySet(), 0);
|
|
|
+ washResult(beautyAddress, districtContainResult, LEVEL_2_SUFFIX_PATTERN);
|
|
|
splitAddress.setAddr(result[3]);
|
|
|
- if (!((result[0].isEmpty()|| result[0].equals("上海市")) && (result[1].isEmpty() || result[1].equals("上海市"))||
|
|
|
- !districtContainResult.isEmpty()||!streetContainResult.isEmpty())) {
|
|
|
+ if (!((result[0].isEmpty() || result[0].equals("上海市")) && (result[1].isEmpty() || result[1].equals("上海市")) ||
|
|
|
+ !districtContainResult.isEmpty() || !streetContainResult.isEmpty())) {
|
|
|
splitAddress.setStatus(2);
|
|
|
return splitAddress;
|
|
|
}
|
|
|
- if (!districtContainResult.isEmpty()){
|
|
|
- if (disIndex!=-1){
|
|
|
- String district=districtContainResult.get(disIndex);
|
|
|
- threeLevelAddress disTLA = All_COMMUNITY_IN_SHANGHAI.get(DISTRICT_TO_COMMUNITY_MAP.get(district).get(0)).get(0);
|
|
|
- splitAddress.setDistrict(disTLA.districtFullName);
|
|
|
- splitAddress.setDistrictCode(disTLA.districtCode);
|
|
|
- }
|
|
|
+ AddressPart bestDistrict = null;
|
|
|
+ if (!districtContainResult.isEmpty()) {
|
|
|
+ bestDistrict = districtContainResult.stream()
|
|
|
+ .filter(p -> p.completeMatch)
|
|
|
+ .min(Comparator.comparingInt(p -> p.location))
|
|
|
+ .orElseGet(() -> districtContainResult.stream()
|
|
|
+ .min(Comparator.comparingInt(p -> p.location)).get());
|
|
|
+ String district = bestDistrict.address;
|
|
|
+ ThreeLevelAddress disTLA = All_COMMUNITY_IN_SHANGHAI.get(DISTRICT_TO_COMMUNITY_MAP.get(district).get(0)).get(0);
|
|
|
+ splitAddress.setDistrict(disTLA.districtFullName);
|
|
|
+ splitAddress.setDistrictCode(disTLA.districtCode);
|
|
|
}
|
|
|
splitAddress.setProvince("上海市");
|
|
|
splitAddress.setCity("上海市");
|
|
@@ -342,14 +406,15 @@ public class ShanghaiAddressSplitUtil {
|
|
|
splittingAddress.guessFirstMatch();
|
|
|
|
|
|
|
|
|
- if (splittingAddress.street!=-1||splittingAddress.community!=-1){
|
|
|
- splitAddress.setStreet(splittingAddress.threeLevelAddress.streetFullName);
|
|
|
- splitAddress.setStreetCode(splittingAddress.threeLevelAddress.streetCode);
|
|
|
+ if (splittingAddress.bestStreet != null || splittingAddress.bestCommunity != null) {
|
|
|
splitAddress.setDistrict(splittingAddress.threeLevelAddress.districtFullName);
|
|
|
splitAddress.setDistrictCode(splittingAddress.threeLevelAddress.districtCode);
|
|
|
-
|
|
|
}
|
|
|
- if (splittingAddress.community!=-1){
|
|
|
+ if (splittingAddress.bestStreet != null) {
|
|
|
+ splitAddress.setStreet(splittingAddress.threeLevelAddress.streetFullName);
|
|
|
+ splitAddress.setStreetCode(splittingAddress.threeLevelAddress.streetCode);
|
|
|
+ }
|
|
|
+ if (splittingAddress.bestCommunity != null) {
|
|
|
splitAddress.setCommunity(splittingAddress.threeLevelAddress.communityFullName);
|
|
|
splitAddress.setCommunityCode(splittingAddress.threeLevelAddress.communityCode);
|
|
|
|
|
@@ -357,111 +422,197 @@ public class ShanghaiAddressSplitUtil {
|
|
|
|
|
|
|
|
|
//检查是否能够分离
|
|
|
- if(splittingAddress.community==-1&&splittingAddress.street==-1){
|
|
|
+ if (splittingAddress.bestCommunity == null && splittingAddress.bestStreet == null) {
|
|
|
//检查是否是非地址
|
|
|
- if (UN_ADDRESS_PATTERN.matcher(splitAddress.getFullAddress()).find()){
|
|
|
+ if (UN_ADDRESS_PATTERN.matcher(splitAddress.getFullAddress()).find()) {
|
|
|
splitAddress.setStatus(3);
|
|
|
return splitAddress;
|
|
|
}
|
|
|
|
|
|
- if (disIndex!=-1){
|
|
|
- String sub = beautyAddress.substring(disIndex+districtContainResult.get(disIndex).length());
|
|
|
+ if (bestDistrict != null) {
|
|
|
+ String sub = beautyAddress.substring(bestDistrict.location + bestDistrict.address.length());
|
|
|
Matcher m = LEVEL_1_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
+ if (m.find()) {
|
|
|
sub = sub.substring(m.end());
|
|
|
}
|
|
|
splitAddress.setAddr(sub);
|
|
|
}
|
|
|
|
|
|
splitAddress.setStatus(1);
|
|
|
- if (result[0].isEmpty()&&result[1].isEmpty()&&districtContainResult.isEmpty())splitAddress.setStatus(4);
|
|
|
+ if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty())
|
|
|
+ splitAddress.setStatus(4);
|
|
|
return splitAddress;
|
|
|
- }else if (splittingAddress.street> splittingAddress.community){
|
|
|
- String sub = beautyAddress.substring(splittingAddress.street+splittingAddress.streetMap.get(splittingAddress.street).length());
|
|
|
- Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
- sub = sub.substring(m.end());
|
|
|
- }
|
|
|
- splitAddress.setAddr(sub);
|
|
|
- }else {
|
|
|
- String sub = beautyAddress.substring(
|
|
|
- splittingAddress.community+
|
|
|
- splittingAddress.communityMap.
|
|
|
- get(splittingAddress.community).length());
|
|
|
- Matcher m = LEVEL_3_SUFFIX_PATTERN.matcher(sub);
|
|
|
- if (m.find()){
|
|
|
- sub = sub.substring(m.end());
|
|
|
+ } else {
|
|
|
+ AddressPart bestStreet = splittingAddress.bestStreet;
|
|
|
+ AddressPart bestCommunity = splittingAddress.bestCommunity;
|
|
|
+ AddressPart lastPart = Stream.of(bestDistrict, bestStreet, bestCommunity)
|
|
|
+ .filter(Objects::nonNull)
|
|
|
+ .max(Comparator.comparingInt(p -> p.location))
|
|
|
+ .orElse(null);
|
|
|
+ String tail = "";
|
|
|
+ if (lastPart != null) {
|
|
|
+ int end = lastPart.location + lastPart.address.length();
|
|
|
+ tail = beautyAddress.substring(end);
|
|
|
+
|
|
|
+ Pattern suffixPat =
|
|
|
+ lastPart == bestDistrict ? LEVEL_1_SUFFIX_PATTERN :
|
|
|
+ lastPart == bestStreet ? LEVEL_2_SUFFIX_PATTERN :
|
|
|
+ LEVEL_3_SUFFIX_PATTERN;
|
|
|
+
|
|
|
+ Matcher m = suffixPat.matcher(tail);
|
|
|
+ if (m.find()) {
|
|
|
+ tail = tail.substring(m.end());
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ tail = beautyAddress;
|
|
|
}
|
|
|
- splitAddress.setAddr(sub);
|
|
|
+ splitAddress.setAddr(tail.trim());
|
|
|
}
|
|
|
|
|
|
splitAddress.setStatus(0);
|
|
|
- if (result[0].isEmpty()&&result[1].isEmpty()&&districtContainResult.isEmpty())splitAddress.setStatus(4);
|
|
|
- if (splitAddress.getStreet().equals("自由贸易试验区"))splitAddress.setStatus(0);
|
|
|
+ if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty()) splitAddress.setStatus(4);
|
|
|
+ if (splitAddress.getStreet().equals("自由贸易试验区")) splitAddress.setStatus(0);
|
|
|
return splitAddress;
|
|
|
}
|
|
|
|
|
|
- private static SplitAddress beautyResult(SplitAddress splitAddress){
|
|
|
+ private static SplitAddress beautyResult(SplitAddress splitAddress) {
|
|
|
//检查过度分割
|
|
|
- if (splitAddress.getAddr().isEmpty() ||OVER_SPLIT.matcher(splitAddress.getAddr()).find()){
|
|
|
- if (splitAddress.getCommunity().isEmpty()){
|
|
|
- if (splitAddress.getStreet().isEmpty()){
|
|
|
- if (splitAddress.getDistrict().isEmpty()){
|
|
|
- splitAddress.setAddr("上海市"+splitAddress.getAddr());
|
|
|
- }else {
|
|
|
- splitAddress.setAddr(splitAddress.getDistrict()+splitAddress.getAddr());
|
|
|
+ if (splitAddress.getAddr().isEmpty() || OVER_SPLIT.matcher(splitAddress.getAddr()).find()) {
|
|
|
+ if (splitAddress.getCommunity().isEmpty()) {
|
|
|
+ if (splitAddress.getStreet().isEmpty()) {
|
|
|
+ if (splitAddress.getDistrict().isEmpty()) {
|
|
|
+ splitAddress.setAddr("上海市" + splitAddress.getAddr());
|
|
|
+ } else {
|
|
|
+ splitAddress.setAddr(splitAddress.getDistrict() + splitAddress.getAddr());
|
|
|
}
|
|
|
- }else {
|
|
|
- splitAddress.setAddr(splitAddress.getStreet()+splitAddress.getAddr());
|
|
|
+ } else {
|
|
|
+ splitAddress.setAddr(splitAddress.getStreet() + splitAddress.getAddr());
|
|
|
}
|
|
|
- }else {
|
|
|
- splitAddress.setAddr(splitAddress.getCommunity()+splitAddress.getAddr());
|
|
|
+ } else {
|
|
|
+ splitAddress.setAddr(splitAddress.getCommunity() + splitAddress.getAddr());
|
|
|
}
|
|
|
}
|
|
|
//检查多号,多弄
|
|
|
- splitAddress.setAddr(splitAddress.getAddr().replaceAll(String.valueOf(MULTI_ADDRESS),""));
|
|
|
+ splitAddress.setAddr(splitAddress.getAddr().replaceAll(String.valueOf(MULTI_ADDRESS), ""));
|
|
|
|
|
|
return splitAddress;
|
|
|
}
|
|
|
+
|
|
|
/**
|
|
|
* 工具入口,返回所有数据
|
|
|
+ *
|
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
|
*/
|
|
|
- public static List<SplitAddress> splitAddresses(String sourceAddress){
|
|
|
- Matcher matcher = Pattern.compile("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)").matcher(sourceAddress);
|
|
|
- List<SplitAddress> addressList =new ArrayList<>();
|
|
|
- String beautyString = sourceAddress.replaceAll("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)","");
|
|
|
- StringBuilder sb = new StringBuilder();
|
|
|
- for (char c : beautyString.toCharArray()) {
|
|
|
+ public static List<SplitAddress> splitAddresses(String sourceAddress) {
|
|
|
+ List<SplitAddress> addressList = new ArrayList<>();
|
|
|
+ char[] leftParen = {'(', '{', '[', '('};
|
|
|
+ char[] rightParen = {')', '}', ']', ')'};
|
|
|
+ SplitParenString sp = splitByTopLevelParen(sourceAddress, leftParen, rightParen);
|
|
|
+ StringBuilder stringOutOfParen = new StringBuilder();
|
|
|
+ String outParen = String.join("", sp.outOfParen);
|
|
|
+ for (char c : outParen.toCharArray()) {
|
|
|
// 检查是否为全角数字
|
|
|
if (c >= '0' && c <= '9') {
|
|
|
// 转换为半角数字
|
|
|
- sb.append((char) (c - '0' + '0'));
|
|
|
- } else if (c=='\uE5CE'){
|
|
|
+ stringOutOfParen.append((char) (c - '0' + '0'));
|
|
|
+ } else if (c == '\uE5CE') {
|
|
|
// 奇妙的乱码,跳过
|
|
|
- }else {
|
|
|
+ } else {
|
|
|
// 保持原字符
|
|
|
- sb.append(c);
|
|
|
+ stringOutOfParen.append(c);
|
|
|
}
|
|
|
}
|
|
|
- beautyString = sb.toString();
|
|
|
- addressList.add(beautyResult(split(beautyString)));
|
|
|
- while (matcher.find()){
|
|
|
- String address=matcher.group();
|
|
|
- if (address.length()<=2)continue;
|
|
|
- addressList.addAll(splitAddresses(address.substring(1,address.length()-1)));
|
|
|
+ outParen = stringOutOfParen.toString();
|
|
|
+ addressList.add(beautyResult(split(outParen)));
|
|
|
+ for (String s : sp.inParen) {
|
|
|
+ addressList.addAll(splitAddresses(s));
|
|
|
}
|
|
|
- for (SplitAddress s :addressList)s.setSourceAddress(sourceAddress);
|
|
|
+ for (SplitAddress s : addressList) s.setSourceAddress(sourceAddress);
|
|
|
return addressList;
|
|
|
}
|
|
|
|
|
|
+ public static class SplitParenString {
|
|
|
+ List<String> outOfParen;
|
|
|
+ List<String> inParen;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static SplitParenString splitByTopLevelParen(String s, char[] left, char[] right) {
|
|
|
+ Set<Character> leftSet = new HashSet<>();
|
|
|
+ for (char c : left) {
|
|
|
+ leftSet.add(c);
|
|
|
+ }
|
|
|
+ Set<Character> rightSet = new HashSet<>();
|
|
|
+ for (char c : right) {
|
|
|
+ rightSet.add(c);
|
|
|
+ }
|
|
|
+ SplitParenString sp = new SplitParenString();
|
|
|
+ sp.outOfParen = new ArrayList<>();
|
|
|
+ sp.inParen = new ArrayList<>();
|
|
|
+ StringBuilder depth0 = new StringBuilder();
|
|
|
+ StringBuilder depth1 = new StringBuilder();
|
|
|
+ int depth = 0;
|
|
|
+ for (int i = 0; i < s.length(); i++) {
|
|
|
+ char c = s.charAt(i);
|
|
|
+ boolean isParen = false;
|
|
|
+ if (leftSet.contains(c)) {
|
|
|
+ isParen = true;
|
|
|
+ if (depth > 0) {
|
|
|
+ depth1.append(c);
|
|
|
+ }
|
|
|
+ depth++;
|
|
|
+ if (depth == 1) {
|
|
|
+ String depth0Str = depth0.toString();
|
|
|
+ if (!depth0Str.isEmpty()) {
|
|
|
+ sp.outOfParen.add(depth0Str);
|
|
|
+ depth0 = new StringBuilder();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else if (rightSet.contains(c)) {
|
|
|
+ isParen = true;
|
|
|
+ if (depth > 1) {
|
|
|
+ depth1.append(c);
|
|
|
+ }
|
|
|
+ depth--;
|
|
|
+ if (depth == 0) {
|
|
|
+ String depth1Str = depth1.toString();
|
|
|
+ if (!depth1Str.isEmpty()) {
|
|
|
+ sp.inParen.add(depth1Str);
|
|
|
+ depth1 = new StringBuilder();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (depth < 0) {
|
|
|
+ depth = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!isParen) {
|
|
|
+ if (depth == 0) {
|
|
|
+ depth0.append(c);
|
|
|
+ } else if (depth >= 1) {
|
|
|
+ depth1.append(c);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ String depth0Str = depth0.toString();
|
|
|
+ if (!depth0Str.isEmpty()) {
|
|
|
+ sp.outOfParen.add(depth0Str);
|
|
|
+ }
|
|
|
+ String depth1Str = depth1.toString();
|
|
|
+ if (!depth1Str.isEmpty()) {
|
|
|
+ sp.inParen.add(depth1Str);
|
|
|
+ }
|
|
|
+ return sp;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
/**
|
|
|
* 工具入口,仅返回最优
|
|
|
+ *
|
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
|
*/
|
|
|
- public static SplitAddress splitBestAddress(String sourceAddress){
|
|
|
+ public static SplitAddress splitBestAddress(String sourceAddress) {
|
|
|
return splitAddresses(sourceAddress).stream().max(SplitAddress::compareTo).orElse(new SplitAddress());
|
|
|
}
|
|
|
+
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
// List<SplitAddress> result = new ArrayList<>();
|
|
|
// for (Map<String,Object> row:ExcelReaderUtils.readExcel("C:\\Users\\dxh\\IdeaProjects\\address_poi_yysz_server\\src\\main\\resources\\yysk_dmdz_address_standardization_200000_36.xlsx")){
|
|
@@ -471,12 +622,11 @@ public class ShanghaiAddressSplitUtil {
|
|
|
// ExcelReaderUtils.writeSplitAddressExcel(result,"C:\\\\Users\\\\dxh\\\\IdeaProjects\\\\address_poi_yysz_server\\\\src\\\\main\\\\resources\\\\result.xlsx");
|
|
|
// System.out.println("完成");
|
|
|
new ShanghaiAddressSplitUtil().init();
|
|
|
- System.out.println(splitBestAddress("新胜路88、98号3号厂房"));
|
|
|
- System.out.println(splitBestAddress("新胜路88-98号3号厂房"));
|
|
|
- System.out.println(splitBestAddress("新胜路、98号3号厂房"));
|
|
|
- System.out.println(splitBestAddress("新胜路88\\98号3号厂房"));
|
|
|
- System.out.println(splitBestAddress("新胜路18、28号3号厂房"));
|
|
|
- System.out.println(splitBestAddress("新胜路28号3号厂房"));
|
|
|
- System.out.println(splitBestAddress("88、98号3号厂房"));
|
|
|
+ splitAddresses("上海市浦东新区保税区外高桥国际金融中心").forEach(System.out::println);
|
|
|
+ splitAddresses("上海市闵行区莘庄工业区上海汉洁科学仪器有限公司").forEach(System.out::println);
|
|
|
+ splitAddresses("上海市虹口区北外滩街道DK").forEach(System.out::println);
|
|
|
+ splitAddresses("上海市虹口区凉城新村街道好邻居(凉城路店)").forEach(System.out::println);
|
|
|
+ splitAddresses("上海市闵行区虹桥镇上海感图网络科技有限公司").forEach(System.out::println);
|
|
|
+ splitAddresses("上海市长宁区仙霞新村街道仙霞街道外来人员管理办公室").forEach(System.out::println);
|
|
|
}
|
|
|
}
|