|
@@ -427,34 +427,106 @@ public class ShanghaiAddressSplitUtil {
|
|
* 工具入口,返回所有数据
|
|
* 工具入口,返回所有数据
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
*/
|
|
*/
|
|
- public static List<SplitAddress> splitAddresses(String sourceAddress){
|
|
|
|
- Matcher matcher = Pattern.compile("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)").matcher(sourceAddress);
|
|
|
|
- List<SplitAddress> addressList =new ArrayList<>();
|
|
|
|
- String beautyString = sourceAddress.replaceAll("\\(([^()]*|\\([^()]*\\))*\\)|\\[([^\\[\\]]*|\\[[^\\[\\]]*])*]|(([^()]*|([^()]*))*)","");
|
|
|
|
- StringBuilder sb = new StringBuilder();
|
|
|
|
- for (char c : beautyString.toCharArray()) {
|
|
|
|
|
|
+ public static List<SplitAddress> splitAddresses(String sourceAddress) {
|
|
|
|
+ List<SplitAddress> addressList = new ArrayList<>();
|
|
|
|
+ char[] leftParen = {'(', '{', '[', '('};
|
|
|
|
+ char[] rightParen = {')', '}', ']', ')'};
|
|
|
|
+ SplitParenString sp = splitByTopLevelParen(sourceAddress, leftParen, rightParen);
|
|
|
|
+ StringBuilder stringOutOfParen = new StringBuilder();
|
|
|
|
+ String outParen = String.join("", sp.outOfParen);
|
|
|
|
+ for (char c : outParen.toCharArray()) {
|
|
// 检查是否为全角数字
|
|
// 检查是否为全角数字
|
|
if (c >= '0' && c <= '9') {
|
|
if (c >= '0' && c <= '9') {
|
|
// 转换为半角数字
|
|
// 转换为半角数字
|
|
- sb.append((char) (c - '0' + '0'));
|
|
|
|
- } else if (c=='\uE5CE'){
|
|
|
|
|
|
+ stringOutOfParen.append((char) (c - '0' + '0'));
|
|
|
|
+ } else if (c == '\uE5CE') {
|
|
// 奇妙的乱码,跳过
|
|
// 奇妙的乱码,跳过
|
|
- }else {
|
|
|
|
|
|
+ } else {
|
|
// 保持原字符
|
|
// 保持原字符
|
|
- sb.append(c);
|
|
|
|
|
|
+ stringOutOfParen.append(c);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- beautyString = sb.toString();
|
|
|
|
- addressList.add(beautyResult(split(beautyString)));
|
|
|
|
- while (matcher.find()){
|
|
|
|
- String address=matcher.group();
|
|
|
|
- if (address.length()<=2)continue;
|
|
|
|
- addressList.addAll(splitAddresses(address.substring(1,address.length()-1)));
|
|
|
|
|
|
+ outParen = stringOutOfParen.toString();
|
|
|
|
+ addressList.add(beautyResult(split(outParen)));
|
|
|
|
+ for (String s:sp.inParen){
|
|
|
|
+ addressList.addAll(splitAddresses(s));
|
|
}
|
|
}
|
|
- for (SplitAddress s :addressList)s.setSourceAddress(sourceAddress);
|
|
|
|
|
|
+ for (SplitAddress s : addressList) s.setSourceAddress(sourceAddress);
|
|
return addressList;
|
|
return addressList;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ public static class SplitParenString{
|
|
|
|
+ List<String> outOfParen;
|
|
|
|
+ List<String> inParen;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static SplitParenString splitByTopLevelParen(String s, char[] left, char[] right) {
|
|
|
|
+ Set<Character> leftSet = new HashSet<>();
|
|
|
|
+ for (char c : left) {
|
|
|
|
+ leftSet.add(c);
|
|
|
|
+ }
|
|
|
|
+ Set<Character> rightSet = new HashSet<>();
|
|
|
|
+ for (char c : right) {
|
|
|
|
+ rightSet.add(c);
|
|
|
|
+ }
|
|
|
|
+ SplitParenString sp = new SplitParenString();
|
|
|
|
+ sp.outOfParen = new ArrayList<>();
|
|
|
|
+ sp.inParen = new ArrayList<>();
|
|
|
|
+ StringBuilder depth0 = new StringBuilder();
|
|
|
|
+ StringBuilder depth1 = new StringBuilder();
|
|
|
|
+ int depth = 0;
|
|
|
|
+ for (int i = 0; i < s.length(); i++) {
|
|
|
|
+ char c = s.charAt(i);
|
|
|
|
+ boolean isParen=false;
|
|
|
|
+ if (leftSet.contains(c)) {
|
|
|
|
+ isParen=true;
|
|
|
|
+ if (depth > 0) {
|
|
|
|
+ depth1.append(c);
|
|
|
|
+ }
|
|
|
|
+ depth++;
|
|
|
|
+ if (depth==1){
|
|
|
|
+ String depth0Str = depth0.toString();
|
|
|
|
+ if (!depth0Str.isEmpty()) {
|
|
|
|
+ sp.outOfParen.add(depth0Str);
|
|
|
|
+ depth0 = new StringBuilder();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else if (rightSet.contains(c)) {
|
|
|
|
+ isParen=true;
|
|
|
|
+ if (depth>1){
|
|
|
|
+ depth1.append(c);
|
|
|
|
+ }
|
|
|
|
+ depth--;
|
|
|
|
+ if (depth == 0) {
|
|
|
|
+ String depth1Str = depth1.toString();
|
|
|
|
+ if (!depth1Str.isEmpty()){
|
|
|
|
+ sp.inParen.add(depth1Str);
|
|
|
|
+ depth1 = new StringBuilder();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if (depth < 0) {
|
|
|
|
+ depth = 0;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if (!isParen) {
|
|
|
|
+ if (depth == 0) {
|
|
|
|
+ depth0.append(c);
|
|
|
|
+ }else if (depth >= 1) {
|
|
|
|
+ depth1.append(c);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ String depth0Str = depth0.toString();
|
|
|
|
+ if (!depth0Str.isEmpty()) {
|
|
|
|
+ sp.outOfParen.add(depth0Str);
|
|
|
|
+ }
|
|
|
|
+ String depth1Str = depth1.toString();
|
|
|
|
+ if (!depth1Str.isEmpty()){
|
|
|
|
+ sp.inParen.add(depth1Str);
|
|
|
|
+ }
|
|
|
|
+ return sp;
|
|
|
|
+ }
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* 工具入口,仅返回最优
|
|
* 工具入口,仅返回最优
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|