AddressQueryEngine.java 28 KB


  1. package com.skyversation.poiaddr.addquery;
  2. import com.alibaba.fastjson.JSON;
  3. import com.alibaba.fastjson.JSONArray;
  4. import com.alibaba.fastjson.JSONObject;
  5. import com.skyversation.poiaddr.bean.AddressResult;
  6. import com.skyversation.poiaddr.bean.GeoJsonBean;
  7. import com.skyversation.poiaddr.bean.WDToken;
  8. import com.skyversation.poiaddr.entity.AmapAddressV3;
  9. import com.skyversation.poiaddr.entity.YyszAddressQp;
  10. import com.skyversation.poiaddr.service.AreaService;
  11. import com.skyversation.poiaddr.util.net.AddressNetTools;
  12. import com.skyversation.poiaddr.util.status.AddressLevel;
  13. import com.skyversation.poiaddr.util.status.AddressResultEnum;
  14. import com.skyversation.poiaddr.util.tasks.ScheduledTasks;
  15. import org.springframework.http.HttpMethod;
  16. import org.springframework.http.ResponseEntity;
  17. import org.springframework.stereotype.Service;
  18. import org.springframework.util.StringUtils;
  19. import org.apache.commons.codec.binary.Base64;
  20. import javax.crypto.Mac;
  21. import javax.crypto.spec.SecretKeySpec;
  22. import java.util.*;
  23. import java.util.concurrent.ThreadLocalRandom;
  24. public class AddressQueryEngine {
  25. private static AddressQueryEngine instance = new AddressQueryEngine();
  26. private AddressQueryEngine() {
  27. }
  28. public synchronized static AddressQueryEngine getInstance() {
  29. if (instance == null) {
  30. instance = new AddressQueryEngine();
  31. }
  32. return instance;
  33. }
  34. private WDToken wdToken = null;
  35. /***
  36. * 单条地名搜索通用方法,搜索顺序 1. 武大吉奥; 2. 市四中心; 3. 高德搜索; 4. 高德高级搜索;
  37. * @param addrs
  38. * @return
  39. */
  40. public AddressResult commonSearchByName(List<String> addrs) {
  41. AddressResult addressResult = new AddressResult();
  42. // 数据库查询
  43. for (String addr : addrs) {
  44. // 优先进行数据库查询
  45. List<AmapAddressV3> dbPois = AreaService.getInstance().getAddressPoisByAddr(addr);
  46. if (dbPois != null && dbPois.size() > 0) {
  47. JSONArray pois = new JSONArray();
  48. pois.addAll(dbPois);
  49. addressResult = new TransfromDataTool().gdV3ResultToResult(addr, pois, false);
  50. if (addressResult != null && addressResult.getData() != null && addressResult.getData().size() > 0) {
  51. addressResult.setCode(AddressResultEnum.GD_SUCCESS);
  52. addressResult.setMessage("成功");
  53. return getCjWgWgwByLoc(addressResult);
  54. }
  55. }
  56. }
  57. /*for (String addr : addrs) {
  58. // 创建请求
  59. addressResult = sj_szxSearchByName(addr, 3);
  60. if (addressResult != null) {
  61. addressResult.setCode(AddressResultEnum.SZX_SUCCESS);
  62. addressResult.setMessage("成功");
  63. return getCjWgWgwByLoc(addressResult);
  64. }
  65. }*/
  66. // 市中心转发的测绘院地名地址服务
  67. for (String addr : addrs) {
  68. // 创建请求
  69. addressResult = szxSearchByName(addr);
  70. // TODO 高德接口无法使用,市中心的接口请求成为了最后一道防线 && ifRuleData(addressResult.getData().get(0).getScore())
  71. if (addressResult != null && addressResult.getData() != null && addressResult.getData().size() > 0 && ifRuleData(addressResult.getData().get(0).getScore())) {
  72. addressResult.setCode(AddressResultEnum.SZX_SUCCESS);
  73. addressResult.setMessage("成功");
  74. return getCjWgWgwByLoc(addressResult);
  75. }
  76. }
  77. // TODO 如果多次尝试请求市中心地址后,还是请求失败就使用高德地名地址搜索,现在是到了上限,暂时不用
  78. for (String addr : addrs) {
  79. // 创建请求(要返回对应的搜索地址,返回地址,总分,市:cityname,街道:adname,村居:community)
  80. addressResult = gdV3SearchByName(addr);
  81. if (addressResult != null && addressResult.getData() != null && addressResult.getData().size() > 0) {
  82. addressResult.setCode(AddressResultEnum.GD_SUCCESS);
  83. addressResult.setMessage("成功");
  84. return getCjWgWgwByLoc(addressResult);
  85. }
  86. }
  87. if (addressResult != null) {
  88. addressResult.setMessage("失败");
  89. addressResult.setCode(AddressResultEnum.RESULT_NULL);
  90. }
  91. return addressResult;
  92. }
  93. /**
  94. * 使用市中心的地名地址搜索后,如果结果不理想,则使用高德地名地址接口请求
  95. *
  96. * @param score 市中心地名地址接口返回的最高打分
  97. * @return
  98. */
  99. public boolean ifRuleData(String score) {
  100. if (score.contains("rule_")) {
  101. return true;
  102. } else {
  103. try {
  104. return Float.parseFloat(score) > 1.8;
  105. } catch (Exception e) {
  106. // 防患于未然,也许会有什么空值,异常值……
  107. return false;
  108. }
  109. }
  110. }
  111. /***
  112. * 市四中心地名搜索
  113. * @param address
  114. * @return
  115. */
  116. public AddressResult szxSearchByName(String address) {
  117. ResponseEntity response = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.GET, Constant.SZX_HLW_URL + com.skyversation.poiaddr.util.AddressTools.reOutDistinguish(address), null, null, 3);
  118. if (response == null) {
  119. return null;
  120. }
  121. String body = response.getBody() + "";
  122. if (!StringUtils.hasText(body))
  123. return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
  124. try {
  125. JSONObject json = JSONObject.parseObject(body);
  126. return TransfromDataTool.szxResultToResult(json, address);
  127. } catch (Exception e) {
  128. return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
  129. }
  130. }
  131. /***
  132. * 武大吉奥单条地名搜索
  133. * @param address
  134. */
  135. public AddressResult wdjaSearchByName(String address) {
  136. System.out.println("<<<<<<<<<<----------开始武大吉奥地名地址搜索------------>>>>>>>>>>>>>");
  137. if (wdToken == null || System.currentTimeMillis() - wdToken.getTime() > 36000000) {
  138. wdToken = AddressTools.getInstance().getWDToken(Constant.WD_USER_NAME, Constant.WD_USER_PWD);
  139. if (wdToken == null) {
  140. return null;
  141. }
  142. wdToken.setTime(System.currentTimeMillis());
  143. }
  144. if (wdToken == null)
  145. return AddressTools.getInstance().faildQuery(AddressResultEnum.NO_TOKEN, "token获取失败");
  146. String token = wdToken.getToken();
  147. String url = Constant.GET_ADDRESS_MEG_URL + "?token=" + token + "&addr=" + address;
  148. String body = null;
  149. try {
  150. body = AddressNetTools.getInstance().wdSendGetRequest(url);
  151. } catch (Exception e) {
  152. e.toString();
  153. }
  154. if (body == null || body.equals("null") || !StringUtils.hasText(body))
  155. return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
  156. try {
  157. JSONObject json = JSONObject.parseObject(body);
  158. return TransfromDataTool.wdResultToResult(json);
  159. } catch (Exception e) {
  160. return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
  161. }
  162. }
  163. public static Map<String, String> initSigns() {
  164. Map<String, String> headerMap = new HashMap<>();
  165. String timestamp = System.currentTimeMillis() + "";
  166. String appCode = "7unv4vbwqxnq4a7m9h";
  167. String UUid = UUID.randomUUID().toString();
  168. String appSecret = "w6tawvf4k3ck4ikij9";
  169. headerMap.put("x-timestamp", timestamp);
  170. headerMap.put("x-appcode", appCode);
  171. headerMap.put("x-uuid", UUid);
  172. // 签名
  173. headerMap.put("x-sign", AddressQueryEngine.getSigns(timestamp, appCode, UUid, appSecret));
  174. return headerMap;
  175. }
  176. public static String getSigns(String timestamp, String appCode, String UUid, String appSecret) {
  177. // 先拼接
  178. String signData = timestamp + appCode + UUid;
  179. // 生成sign
  180. String sign = "";
  181. try {
  182. Mac sha256 = Mac.getInstance("HmacSHA256");
  183. sha256.init(new SecretKeySpec(appSecret.getBytes(), "HmacSHA256"));
  184. byte[] result = sha256.doFinal(signData.getBytes());
  185. sign = Base64.encodeBase64String(result);
  186. } catch (Exception e) {
  187. System.err.println("sign 生成异常:" + e);
  188. }
  189. return sign;
  190. }
  191. /**
  192. * 关键字地名地址搜索,专用搜索处理逻辑
  193. *
  194. * @param address
  195. * @param level
  196. * @return
  197. */
  198. public AddressResult sj_szxSearchByName(String address, Integer level) {
  199. // 鉴权
  200. JSONObject params = new JSONObject();
  201. JSONArray requestJson = new JSONArray();
  202. JSONObject paramObject = new JSONObject();
  203. paramObject.put("name", "query");
  204. paramObject.put("position", "QUERY");
  205. switch (level) {
  206. case 3:
  207. paramObject.put("value", address);
  208. break;
  209. case 2:
  210. if (address.contains("号")) {
  211. paramObject.put("value", address.substring(0, address.lastIndexOf("号") + 1));
  212. } else {
  213. level--;
  214. return sj_szxSearchByName(address, level);
  215. }
  216. break;
  217. case 1:
  218. if (address.contains("弄")) {
  219. paramObject.put("value", address.substring(0, address.lastIndexOf("弄") + 1));
  220. } else {
  221. paramObject.put("value", address);
  222. }
  223. break;
  224. }
  225. if (!address.startsWith("上海")) {
  226. paramObject.put("value", "上海市" + paramObject.get("value"));
  227. }
  228. requestJson.add(paramObject);
  229. // 判断是否是关键字版搜索服务
  230. if (Constant.SJ_SZX_SEARCH_BY_NAME.contains("1742459783686")) {
  231. JSONObject paramObject2 = new JSONObject();
  232. paramObject2.put("name", "region");
  233. paramObject2.put("position", "QUERY");
  234. paramObject2.put("value", Constant.getAMAP_CITY_CODE());
  235. requestJson.add(paramObject2);
  236. JSONObject paramObject3 = new JSONObject();
  237. paramObject3.put("name", "page_size");
  238. paramObject3.put("position", "QUERY");
  239. paramObject3.put("value", "10");
  240. requestJson.add(paramObject3);
  241. }
  242. params.put("requestJson", requestJson);
  243. // 发起请求
  244. ResponseEntity response = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.POST, Constant.SJ_SZX_SEARCH_BY_NAME, params, initSigns(), 5);
  245. if (response != null && response.hasBody()) {
  246. String body = response.getBody() + "";
  247. if (!StringUtils.hasText(body)) {
  248. if (level > 0) {
  249. level--;
  250. return sj_szxSearchByName(address, level);
  251. }
  252. return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
  253. }
  254. try {
  255. if (body.indexOf("[") != 0) {
  256. AddressResult addressResult = TransfromDataTool.szxResultToResult(JSONObject.parseObject(body), address);
  257. if (addressResult.getCode().equals(AddressResultEnum.RESULT_NULL) && level > 0) {
  258. level--;
  259. return sj_szxSearchByName(address, level);
  260. }
  261. return addressResult;
  262. } else {
  263. // 将得分最高的结果映射到实体类
  264. return TransfromDataTool.szxResultToResult2(com.skyversation.poiaddr.util.AddressTools.getInstance().findBestMatch(address, JSONArray.parseArray(body), "address"));
  265. }
  266. } catch (Exception e) {
  267. System.err.println("请求地址:" + address + ";返回结果:" + body + ";处理异常:" + e);
  268. return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
  269. }
  270. } else {
  271. if (level > 0) {
  272. level--;
  273. return sj_szxSearchByName(address, level);
  274. }
  275. System.out.println("地址[" + address + "]未查询到数据!");
  276. return null;
  277. }
  278. }
  279. /***
  280. * 高德普通地名搜索
  281. * @param address
  282. * @return
  283. */
  284. public AddressResult gdSearchByName(String address) {
  285. String geoUrl = Constant.AMAP_GEO_URL + "?key=" + Constant.AMAP_KEY[ThreadLocalRandom.current().nextInt(0, Constant.AMAP_KEY.length)] + "&address=" + address + "&city=" + Constant.getAMAP_CITY_CODE() + "&output=JSON";
  286. ResponseEntity response = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.GET, geoUrl, null, null, 0);
  287. if (response != null && response.getBody() != null) {
  288. String body = response.getBody() + "";
  289. if (!StringUtils.hasText(body))
  290. return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
  291. try {
  292. JSONObject json = JSONObject.parseObject(body);
  293. return TransfromDataTool.gdResultToResult(json);
  294. } catch (Exception e) {
  295. System.err.println(e);
  296. return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
  297. }
  298. } else {
  299. return null;
  300. }
  301. }
  302. /***
  303. * 高德高级地名搜索
  304. * @param address
  305. * @return
  306. */
  307. public AddressResult gdV3SearchByName(String address) {
  308. JSONArray pois = new JSONArray();
  309. ScheduledTasks.gdRequestSize++;
  310. String geoUrl =
  311. Constant.AMAP_SEARCH_NAME_V3 + "?key=" + Constant.AMAP_KEY[ThreadLocalRandom.current().nextInt(0, Constant.AMAP_KEY.length)] + "&types=" + Constant.AMAP_SEARCH_TYPES +
  312. "&keywords=" + com.skyversation.poiaddr.util.AddressTools.reOutDistinguish(address) + "&city=" + Constant.getAMAP_CITY_CODE() + "&offset=20";
  313. ResponseEntity responseEntity = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.GET, geoUrl, null, null, 0);
  314. if (responseEntity != null && responseEntity.hasBody()) {
  315. String body = responseEntity.getBody() + "";
  316. if (!StringUtils.hasText(body)) {
  317. return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
  318. }
  319. JSONObject json = JSONObject.parseObject(body);
  320. if (json.containsKey("pois")) {
  321. pois.addAll(json.getJSONArray("pois"));
  322. }
  323. // if (json.containsKey("count")) {
  324. // int count = Integer.parseInt(json.get("count") + "");
  325. // if (count > 20) {
  326. // for (int i = 2; i < (count % 20 + 1); i++) {
  327. // geoUrl =
  328. // Constant.AMAP_SEARCH_NAME_V3 + "?key=" + Constant.AMAP_KEY + "&types=" + Constant.AMAP_SEARCH_TYPES +
  329. // "&keywords=" + address + "&city=" + Constant.getAMAP_CITY_CODE() + "&offset=20&page=" + i;
  330. // ResponseEntity responseEntity2 = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.GET, geoUrl, null, null, 0);
  331. // if (responseEntity2.hasBody()) {
  332. // body = responseEntity2.getBody() + "";
  333. // }
  334. // if (!StringUtils.hasText(body)) {
  335. // return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_AGAIN_NULL, "多次搜索无结果");
  336. // }
  337. // JSONObject json2 = JSONObject.parseObject(body);
  338. // if (json2.containsKey(pois)) {
  339. // pois.addAll(JSONObject.parseObject(body).getJSONArray("pois"));
  340. // }
  341. // }
  342. // }
  343. // }
  344. try {
  345. return new TransfromDataTool().gdV3ResultToResult(address, pois, true);
  346. } catch (Exception e) {
  347. System.err.println(e);
  348. e.printStackTrace();
  349. return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
  350. }
  351. } else {
  352. return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_AGAIN_NULL, "高德V3分页查询无结果");
  353. }
  354. }
  355. /***
  356. * 根据搜索地址,赋值村居、网格、微格网信息
  357. * @param result
  358. * @return
  359. */
  360. public AddressResult getCjWgWgwByLoc(AddressResult result) {
  361. if (result == null || result.getData() == null || result.getData().size() < 1) {
  362. return result;
  363. } else {
  364. AreaService areaService = AreaService.getInstance();
  365. for (AddressResult.ContentBean content : result.getData()) {
  366. if ((content.getAdname() == null || content.getAdname().isEmpty() || content.getCityname() == null || content.getCityname().isEmpty() || content.getCommunity() == null || content.getCommunity().isEmpty()) && content.getLat() != null && content.getLon() != null) {
  367. // 判断是否存在行政区划数据
  368. if (content.getCityname() == null || content.getCityname().isEmpty()) {
  369. GeoJsonBean adBean = areaService.isInadPolygon(content.getLon(), content.getLat());
  370. content.setAdJson(adBean == null ? new JSONObject() : adBean.getProperties());
  371. }
  372. // 判断是否存在街镇数据
  373. if (content.getAdname() == null || content.getAdname().isEmpty()) {
  374. GeoJsonBean townBean = areaService.isInTownPolygon(content.getLon(), content.getLat());
  375. content.setTownJson(townBean == null ? new JSONObject() : townBean.getProperties());
  376. }
  377. // 判断是否存在村居数据
  378. if (content.getCommunity() == null || content.getCommunity().isEmpty()) {
  379. GeoJsonBean cjBean = areaService.isInResidentialCommitteePolygon(content.getLon(), content.getLat());
  380. content.setCjJson(cjBean == null ? new JSONObject() : cjBean.getProperties());
  381. }
  382. }
  383. }
  384. return result;
  385. }
  386. }
  387. //
  388. public static boolean isNotEmptyOrBlank(String str) {
  389. return str != null && !str.trim().isEmpty();
  390. }
  391. /**
  392. * 1. 搜索到结果就返回第一个;
  393. * 2. 搜索到结果去除行政区划后,存在包含关系,则使用;
  394. * 3. 搜索到结果,数字进行分词,数字匹配则使用;
  395. * 4. 搜索到结果,数字与文本均匹配,则使用;
  396. * 5. 结果与基准数据完全一致,则使用。
  397. *
  398. * @param result
  399. * @param level
  400. * @return
  401. */
  402. public AddressResult.ContentBean verificaData(AddressResult result, AddressLevel level, String addr) {
  403. switch (level) {
  404. case LEVEL_1: {
  405. if (result != null && result.getData() != null && result.getData().size() > 0) {
  406. return result.getData().get(0);
  407. } else {
  408. return null;
  409. }
  410. }
  411. // 2. 搜索到结果去除行政区划后,存在包含关系,则使用;
  412. case LEVEL_CONTAINS_2: {
  413. if (result != null && result.getData() != null && result.getData().size() > 0) {
  414. List<AddressResult.ContentBean> contentBean = result.getData();
  415. for (AddressResult.ContentBean contentBean1 : contentBean) {
  416. if (contentBean1.getAddress() != null) {
  417. String address = townReplaceAll(addressReplaceAll(contentBean1.getAddress()));
  418. String address2 = townReplaceAll(addressReplaceAll(addr));
  419. if (isNotEmptyOrBlank(address) && address.contains(address2)) {
  420. return contentBean1;
  421. }
  422. }
  423. }
  424. } else {
  425. return null;
  426. }
  427. }
  428. // 3. 搜索到结果,数字进行分词,数字匹配则使用;
  429. case LEVLE_NUMBER_3: {
  430. if (result != null && result.getData() != null && result.getData().size() > 0) {
  431. List<AddressResult.ContentBean> contentBean = result.getData();
  432. for (AddressResult.ContentBean contentBean1 : contentBean) {
  433. if (contentBean1.getAddress() != null) {
  434. Set<String> address = tokenizeString(contentBean1.getAddress()).get(1);
  435. if (address != null && address.size() > 1) {
  436. Set<String> address2 = tokenizeString(addr).get(1);
  437. int addressSize = address.size();
  438. for (String addr2 : address2) {
  439. if (address.contains(addr2)) {
  440. addressSize--;
  441. if (addressSize == 0) {
  442. return contentBean1;
  443. }
  444. }
  445. }
  446. }
  447. }
  448. }
  449. } else {
  450. return null;
  451. }
  452. }
  453. // 4. 搜索到结果,数字与文本均匹配,则使用;
  454. case LEVEL_NUMBER_TEXT_4: {
  455. if (result != null && result.getData() != null && result.getData().size() > 0) {
  456. List<AddressResult.ContentBean> contentBean = result.getData();
  457. for (AddressResult.ContentBean contentBean1 : contentBean) {
  458. if (contentBean1.getAddress() != null) {
  459. Set<String> addressString = tokenizeString(contentBean1.getAddress()).get(0);
  460. Set<String> addressNumber = tokenizeString(contentBean1.getAddress()).get(1);
  461. Set<String> address2String = tokenizeString(addr).get(0);
  462. Set<String> address2Number = tokenizeString(addr).get(1);
  463. if (addressString != null && addressString.size() > 1) {
  464. int addressStrSize = addressString.size();
  465. for (String addr2str : address2String) {
  466. if (addressString.contains(addr2str)) {
  467. addressStrSize--;
  468. if (addressStrSize == 0) {
  469. int addressNumSize = addressNumber.size();
  470. for (String addr2Num : address2Number) {
  471. if (addressNumber.contains(addr2Num)) {
  472. addressNumSize--;
  473. if (addressNumSize == 0) {
  474. return contentBean1;
  475. }
  476. }
  477. }
  478. }
  479. }
  480. }
  481. }
  482. }
  483. }
  484. } else {
  485. return null;
  486. }
  487. }
  488. // 结果与基准数据完全一致,则使用。
  489. case LEVEL_TOTAL_CONTAINS_5: {
  490. if (result != null && result.getData() != null && result.getData().size() > 0) {
  491. List<AddressResult.ContentBean> contentBean = result.getData();
  492. for (AddressResult.ContentBean contentBean1 : contentBean) {
  493. if (contentBean1.getAddress() != null) {
  494. String address = addressReplaceAll(contentBean1.getAddress());
  495. String address2 = addressReplaceAll(addr);
  496. if (isNotEmptyOrBlank(address) && address.equals(address2)) {
  497. return contentBean1;
  498. }
  499. }
  500. }
  501. } else {
  502. return null;
  503. }
  504. }
  505. default: {
  506. return null;
  507. }
  508. }
  509. }
  510. public static String addressReplaceAll(String address) {
  511. return address.replaceAll("上海市", "").replaceAll(Constant.getArea() + "区", "").replaceAll("-", "").replaceAll("_", "").replaceAll("/", "").replaceAll(" ", "").replaceAll(",", "").replaceAll("\\.", "").replaceAll(",", "").replaceAll("。", "").replaceAll("\\+", "").replaceAll("\\*", "");
  512. }
  513. public static String townReplace_ct(String address) {
  514. address = address.replaceAll(" ", "");
  515. // 替换市
  516. address = address.replaceAll("上海市", "");
  517. String[] towns = Constant.getTowns();
  518. for (String town : towns) {
  519. if (address.contains(town)) {
  520. address = address.substring(address.indexOf(town) + town.length());
  521. }
  522. }
  523. // TODO 强行去除居委
  524. if (address.contains("居委会")) {
  525. address = address.substring(address.indexOf("居委会") + 3);
  526. }
  527. if (address.contains("委员会")) {
  528. address = address.substring(address.indexOf("委员会") + 3);
  529. }
  530. return address;
  531. }
  532. public static String townReplaceAll(String address) {
  533. address = townReplace_ct(address);
  534. return address.replaceAll("小区", "0").replaceAll("号", "0").replaceAll("弄", "0").replaceAll("室", "0").replaceAll("户", "0").replaceAll("单元", "0").replaceAll("幢", "0");
  535. }
  536. /**
  537. * 数字和文字分词返回List<Set<String>>
  538. *
  539. * @param input
  540. * @return
  541. */
  542. public static List<Set<String>> tokenizeString(String input) {
  543. // 初始化两个集合,一个用于存储非数字字符串,一个用于存储数字字符串
  544. Set<String> nonNumberSet = new HashSet<>();
  545. Set<String> numberSet = new HashSet<>();
  546. StringBuilder currentToken = new StringBuilder();
  547. for (int i = 0; i < input.length(); i++) {
  548. char c = input.charAt(i);
  549. if (Character.isDigit(c)) {
  550. // 如果当前字符是数字
  551. if (currentToken.length() > 0 && !Character.isDigit(currentToken.charAt(0))) {
  552. // 如果之前的 token 是非数字,将其添加到非数字集合中
  553. nonNumberSet.add(currentToken.toString());
  554. currentToken.setLength(0);
  555. }
  556. currentToken.append(c);
  557. } else {
  558. // 如果当前字符不是数字
  559. if (currentToken.length() > 0 && Character.isDigit(currentToken.charAt(0))) {
  560. // 如果之前的 token 是数字,将其添加到数字集合中
  561. numberSet.add(currentToken.toString());
  562. currentToken.setLength(0);
  563. }
  564. if (!Character.isWhitespace(c)) {
  565. currentToken.append(c);
  566. }
  567. }
  568. }
  569. // 处理最后一个 token
  570. if (currentToken.length() > 0) {
  571. if (Character.isDigit(currentToken.charAt(0))) {
  572. numberSet.add(currentToken.toString());
  573. } else {
  574. nonNumberSet.add(currentToken.toString());
  575. }
  576. }
  577. // 将两个集合添加到列表中
  578. List<Set<String>> result = new ArrayList<>();
  579. result.add(nonNumberSet);
  580. result.add(numberSet);
  581. return result;
  582. }
  583. }