|
@@ -13,7 +13,9 @@ import org.springframework.http.ResponseEntity;
|
|
|
import org.springframework.util.StringUtils;
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.HashSet;
|
|
|
import java.util.List;
|
|
|
+import java.util.Set;
|
|
|
import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
@@ -316,10 +318,18 @@ public class AddressQueryEngine {
|
|
|
if (result != null && result.getData() != null && result.getData().size() > 0) {
|
|
|
List<AddressResult.ContentBean> contentBean = result.getData();
|
|
|
for (AddressResult.ContentBean contentBean1 : contentBean) {
|
|
|
- String address = extractNumbers(contentBean1.getAddress(), false);
|
|
|
- String address2 = extractNumbers(addr, false);
|
|
|
- if (isNotEmptyOrBlank(address) && address.contains(address2) || address2.contains(address)) {
|
|
|
- return contentBean1;
|
|
|
+ Set<String> address = tokenizeString(contentBean1.getAddress()).get(1);
|
|
|
+ if (address != null && address.size() > 1) {
|
|
|
+ Set<String> address2 = tokenizeString(addr).get(1);
|
|
|
+ int addressSize = address.size();
|
|
|
+ for (String addr2 : address2) {
|
|
|
+ if (address.contains(addr2)) {
|
|
|
+ addressSize--;
|
|
|
+ if (addressSize == 0) {
|
|
|
+ return contentBean1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
} else {
|
|
@@ -331,12 +341,28 @@ public class AddressQueryEngine {
|
|
|
if (result != null && result.getData() != null && result.getData().size() > 0) {
|
|
|
List<AddressResult.ContentBean> contentBean = result.getData();
|
|
|
for (AddressResult.ContentBean contentBean1 : contentBean) {
|
|
|
- String address = extractNumbers(addressReplaceAll(contentBean1.getAddress()), true);
|
|
|
- String address2 = extractNumbers(addressReplaceAll(addr), true);
|
|
|
- String addressNumber = extractNumbers(contentBean1.getAddress(), false);
|
|
|
- String addressNumber2 = extractNumbers(addr, false);
|
|
|
- if (isNotEmptyOrBlank(address) && (address.contains(address2) || address2.contains(address)) && (addressNumber.contains(addressNumber2) || addressNumber2.contains(addressNumber))) {
|
|
|
- return contentBean1;
|
|
|
+ Set<String> addressString = tokenizeString(contentBean1.getAddress()).get(0);
|
|
|
+ Set<String> addressNumber = tokenizeString(contentBean1.getAddress()).get(1);
|
|
|
+ Set<String> address2String = tokenizeString(addr).get(0);
|
|
|
+ Set<String> address2Number = tokenizeString(addr).get(1);
|
|
|
+ if (addressString != null && addressString.size() > 1) {
|
|
|
+ int addressStrSize = addressString.size();
|
|
|
+ for (String addr2str : address2String) {
|
|
|
+ if (addressString.contains(addr2str)) {
|
|
|
+ addressStrSize--;
|
|
|
+ if (addressStrSize == 0) {
|
|
|
+ int addressNumSize = addressNumber.size();
|
|
|
+ for (String addr2Num : address2Number) {
|
|
|
+ if (addressNumber.contains(addr2Num)) {
|
|
|
+ addressNumSize--;
|
|
|
+ if (addressNumSize == 0) {
|
|
|
+ return contentBean1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
} else {
|
|
@@ -368,22 +394,65 @@ public class AddressQueryEngine {
|
|
|
return address.replaceAll("上海市", "").replaceAll(Constant.getArea(), "").replaceAll("区", "").replaceAll("-", "").replaceAll("_", "").replaceAll("/", "").replaceAll(" ", "").replaceAll(",", "").replaceAll("\\.", "").replaceAll(",", "").replaceAll("。", "").replaceAll("\\+", "").replaceAll("\\*", "");
|
|
|
}
|
|
|
|
|
|
+ public static String townReplaceAll(String address) {
|
|
|
+ String[] towns = Constant.getTowns();
|
|
|
+ for (String town : towns) {
|
|
|
+ address.replaceAll(town, "");
|
|
|
+ }
|
|
|
+ return address.replaceAll("号", "0").replaceAll("弄", "0").replaceAll("室", "0").replaceAll("户", "0").replaceAll("单元", "0").replaceAll("幢", "0");
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
- * 得到字符串中所有的数字
|
|
|
+ * 数字和文字分词返回List<Set<String>>
|
|
|
*
|
|
|
* @param input
|
|
|
* @return
|
|
|
*/
|
|
|
- public static String extractNumbers(String input, boolean ifString) {
|
|
|
- // 定义正则表达式,用于匹配一个或多个数字
|
|
|
- Pattern pattern = Pattern.compile(ifString ? "\\D+" : "\\d+");
|
|
|
- Matcher matcher = pattern.matcher(input);
|
|
|
- String out = "";
|
|
|
- // 查找所有匹配的数字
|
|
|
- while (matcher.find()) {
|
|
|
- out += matcher.group();
|
|
|
+ public static List<Set<String>> tokenizeString(String input) {
|
|
|
+ input = townReplaceAll(input);
|
|
|
+ // 初始化两个集合,一个用于存储非数字字符串,一个用于存储数字字符串
|
|
|
+ Set<String> nonNumberSet = new HashSet<>();
|
|
|
+ Set<String> numberSet = new HashSet<>();
|
|
|
+
|
|
|
+ StringBuilder currentToken = new StringBuilder();
|
|
|
+
|
|
|
+ for (int i = 0; i < input.length(); i++) {
|
|
|
+ char c = input.charAt(i);
|
|
|
+ if (Character.isDigit(c)) {
|
|
|
+ // 如果当前字符是数字
|
|
|
+ if (currentToken.length() > 0 && !Character.isDigit(currentToken.charAt(0))) {
|
|
|
+ // 如果之前的 token 是非数字,将其添加到非数字集合中
|
|
|
+ nonNumberSet.add(currentToken.toString());
|
|
|
+ currentToken.setLength(0);
|
|
|
+ }
|
|
|
+ currentToken.append(c);
|
|
|
+ } else {
|
|
|
+ // 如果当前字符不是数字
|
|
|
+ if (currentToken.length() > 0 && Character.isDigit(currentToken.charAt(0))) {
|
|
|
+ // 如果之前的 token 是数字,将其添加到数字集合中
|
|
|
+ numberSet.add(currentToken.toString());
|
|
|
+ currentToken.setLength(0);
|
|
|
+ }
|
|
|
+ if (!Character.isWhitespace(c)) {
|
|
|
+ currentToken.append(c);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 处理最后一个 token
|
|
|
+ if (currentToken.length() > 0) {
|
|
|
+ if (Character.isDigit(currentToken.charAt(0))) {
|
|
|
+ numberSet.add(currentToken.toString());
|
|
|
+ } else {
|
|
|
+ nonNumberSet.add(currentToken.toString());
|
|
|
+ }
|
|
|
}
|
|
|
- return out;
|
|
|
- }
|
|
|
|
|
|
+ // 将两个集合添加到列表中
|
|
|
+ List<Set<String>> result = new ArrayList<>();
|
|
|
+ result.add(nonNumberSet);
|
|
|
+ result.add(numberSet);
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
}
|