ShanghaiAddressSplitUtil.java 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. package com.skyversation.poiaddr.util;
  2. import com.alibaba.fastjson.JSONArray;
  3. import com.alibaba.fastjson.JSONObject;
  4. import com.skyversation.poiaddr.addquery.AddressQueryEngine;
  5. import com.skyversation.poiaddr.bean.AddressResult;
  6. import com.skyversation.poiaddr.entity.AddrBean;
  7. import com.skyversation.poiaddr.service.AreaService;
  8. import lombok.AllArgsConstructor;
  9. import org.springframework.stereotype.Service;
  10. import javax.annotation.PostConstruct;
  11. import java.io.InputStream;
  12. import java.time.LocalDateTime;
  13. import java.time.format.DateTimeFormatter;
  14. import java.util.*;
  15. import java.util.concurrent.ThreadLocalRandom;
  16. import java.util.regex.Matcher;
  17. import java.util.regex.Pattern;
  18. import java.util.stream.Collectors;
  19. @Service
  20. public class ShanghaiAddressSplitUtil {
  21. @AllArgsConstructor
  22. static class threeLevelAddress {
  23. String district;
  24. String street;
  25. String community;
  26. String districtFullName;
  27. String streetFullName;
  28. String communityFullName;
  29. String districtCode;
  30. String streetCode;
  31. String communityCode;
  32. }
  33. private static Map<String, List<threeLevelAddress>> All_STREET_IN_SHANGHAI;
  34. private static Map<String, List<threeLevelAddress>> All_COMMUNITY_IN_SHANGHAI;
  35. private static Map<String, List<String>> DISTRICT_TO_STREET_MAP;
  36. private static Map<String, List<String>> STREET_TO_COMMUNITY_MAP;
  37. private static Map<String, List<String>> DISTRICT_TO_COMMUNITY_MAP;
  38. private static final Pattern LEVEL_1_SUFFIX_PATTERN = Pattern.compile("^(?:区|新区)");
  39. private static final Pattern LEVEL_2_SUFFIX_PATTERN = Pattern.compile("^(?:街道|路街道|镇|乡|新镇)");
  40. private static final Pattern LEVEL_3_SUFFIX_PATTERN = Pattern.compile("^(?:居委会|管委会居委会|管委会|社区|社区居委会|居民委员会|居民区|居委|村|村委会|园区|苑|安居办|居|工作站|会)");
  41. private static final Pattern ROAD_SUFFIX_PATTERN = AddressSplitUtil.ROAD_SUFFIX_PATTERN;
  42. private static final Pattern UN_ADDRESS_PATTERN = Pattern.compile("http");
  43. private static final Pattern OVER_SPLIT = Pattern.compile("^(?:[0123456789-\\-一二三四五六七八九十大A-za-z]{0,4}[街队组栋号站弄]|(?:车站|工业区|市场|农贸市场)(?![东南西北中一二三四五六七八九十公大小支新老环]路)|[A-za-z]?[0123456789-\\-])");
  44. private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、/\\\\][0-9]+(?=[号弄])");
  45. public static Map<String, String> errAddrReStr = new HashMap<>();
  46. @PostConstruct
  47. private void init() {
  48. System.out.println("开始初始化分词器");
  49. // 行政区划
  50. errAddrReStr.put("川沙县", "浦东新区");
  51. errAddrReStr.put("南市区", "黄浦区");
  52. errAddrReStr.put("崇明县", "崇明区");
  53. errAddrReStr.put("卢湾区", "黄浦区");
  54. errAddrReStr.put("闸北区", "静安区");
  55. errAddrReStr.put("南汇区", "浦东新区");
  56. errAddrReStr.put("吴淞区", "宝山区");
  57. // 街镇
  58. errAddrReStr.put("花木镇", "花木街道");
  59. errAddrReStr.put("杨思乡", "杨思镇");
  60. errAddrReStr.put("杨思镇", "三林镇");
  61. errAddrReStr.put("凌桥镇", "高桥镇");
  62. errAddrReStr.put("杨园镇", "高东镇");
  63. errAddrReStr.put("顾路镇", "曹路镇");
  64. errAddrReStr.put("龚路镇", "曹路镇");
  65. errAddrReStr.put("张桥镇", "金桥镇");
  66. errAddrReStr.put("蔡路镇", "合庆镇");
  67. errAddrReStr.put("王港镇", "唐镇");
  68. errAddrReStr.put("黄楼镇", "川沙镇");
  69. errAddrReStr.put("六团镇", "川沙镇");
  70. errAddrReStr.put("望新镇", "外冈镇");
  71. errAddrReStr.put("封浜镇", "江桥镇");
  72. errAddrReStr.put("鲁汇镇", "浦江镇");
  73. errAddrReStr.put("杜行镇", "浦江镇");
  74. errAddrReStr.put("陈行镇", "浦江镇");
  75. errAddrReStr.put("张泽镇", "叶榭镇");
  76. errAddrReStr.put("五厍镇", "泖港镇");
  77. errAddrReStr.put("李塔汇镇", "石湖荡镇");
  78. errAddrReStr.put("大港镇", "小昆山镇");
  79. errAddrReStr.put("天马山镇", "佘山镇");
  80. errAddrReStr.put("江海镇", "南桥镇");
  81. errAddrReStr.put("工业区", "");
  82. Map<String, threeLevelAddress> districtMap = new HashMap<>();
  83. Map<String, List<threeLevelAddress>> streetMap = new HashMap<>();
  84. Map<String, List<threeLevelAddress>> communityMap = new HashMap<>();
  85. Map<String, List<String>> districtToStreetMap = new HashMap<>();
  86. Map<String, List<String>> streetToCommunityMap = new HashMap<>();
  87. String file = "上海市县乡记录.xlsx";
  88. InputStream is = ShanghaiAddressSplitUtil.class.getResourceAsStream(file);
  89. if (is == null) is = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + file);
  90. if (is == null) throw new RuntimeException("无法找到" + file);
  91. try {
  92. for (Map<String, Object> row : ExcelReaderUtils.readExcel(is)) {
  93. String district = Optional.ofNullable(row.get("县级市简称")).map(Object::toString).orElse("");
  94. String street = Optional.ofNullable(row.get("街道简称")).map(Object::toString).orElse("");
  95. String community = Optional.ofNullable(row.get("居委")).map(Object::toString).orElse("");
  96. String districtFullName = Optional.ofNullable(row.get("县级市")).map(Object::toString).orElse("");
  97. String streetFullName = Optional.ofNullable(row.get("街道")).map(Object::toString).orElse("");
  98. String communityFullName = Optional.ofNullable(row.get("居委")).map(Object::toString).orElse("");
  99. String districtCode = Optional.ofNullable(row.get("县级市编码")).map(Object::toString).orElse("");
  100. String streetCode = Optional.ofNullable(row.get("街道编码")).map(Object::toString).orElse("");
  101. String communityCode = Optional.ofNullable(row.get("居委编码")).map(Object::toString).orElse("");
  102. initData(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode, districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
  103. }
  104. //自贸区
  105. initData("浦东", "试验区", "", "浦东新区", "自由贸易试验区", "", "310115", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
  106. //松江镇特别处理
  107. initData("松江", "松江", "", "松江区", "", "", "310117", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
  108. //金山工业区
  109. initData("金山", "金山工业区", "", "金山区", "金山工业区", "", "310116", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
  110. } catch (Exception e) {
  111. throw new RuntimeException(e);
  112. }
  113. All_STREET_IN_SHANGHAI = Collections.unmodifiableMap(streetMap);
  114. All_COMMUNITY_IN_SHANGHAI = Collections.unmodifiableMap(communityMap);
  115. DISTRICT_TO_STREET_MAP = Collections.unmodifiableMap(districtToStreetMap);
  116. STREET_TO_COMMUNITY_MAP = Collections.unmodifiableMap(streetToCommunityMap);
  117. DISTRICT_TO_COMMUNITY_MAP = Collections.unmodifiableMap(DISTRICT_TO_STREET_MAP.entrySet().stream()
  118. .collect(Collectors.toMap(
  119. Map.Entry::getKey,
  120. entry -> entry.getValue().stream()
  121. .flatMap(street -> STREET_TO_COMMUNITY_MAP.getOrDefault(street, Collections.emptyList()).stream())
  122. .collect(Collectors.toList())
  123. )));
  124. System.out.println("分词器初始化完成");
  125. // TODO 网络连通性测试,可以请求一下测试的市中心地址
  126. try {
  127. System.out.println("电脑最大线程数:" + Runtime.getRuntime().availableProcessors());
  128. System.out.println("<<<<<<<<------开始网络连通性测试");
  129. long startTime = System.currentTimeMillis();
  130. List<String> addrs = new ArrayList<>();
  131. addrs.add("上海市浦东新区芳心路251弄");
  132. AddressResult addressResult = AddressQueryEngine.getInstance().commonSearchByName(addrs);
  133. long endTime = System.currentTimeMillis();
  134. System.out.println(">>>>>>>>------网络连通性测试完成!用时" + (endTime - startTime) / 1000 + "秒!结果:" + addressResult);
  135. } catch (Exception e) {
  136. e.printStackTrace();
  137. System.err.println(">>>>>>>>------网络连通性测试结果:" + e);
  138. }
  139. System.out.println("<<<<<<<<------开始缓存ScheduledTasks.allDmdzData对象");
  140. AreaService.getInstance().getAllDmdzAddressDatas();
  141. }
  142. private static void initData(String district, String street, String community, String districtFullName, String streetFullName, String communityFullName, String districtCode, String streetCode, String communityCode, Map<String, threeLevelAddress> districtMap, Map<String, List<threeLevelAddress>> streetMap, Map<String, List<threeLevelAddress>> communityMap, Map<String, List<String>> districtToStreetMap, Map<String, List<String>> streetToCommunityMap) {
  143. threeLevelAddress add = new threeLevelAddress(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode);
  144. districtMap.put(district, add);
  145. if (!streetMap.containsKey(street)) streetMap.put(street, new ArrayList<>());
  146. streetMap.get(street).add(add);
  147. if (!communityMap.containsKey(community)) communityMap.put(community, new ArrayList<>());
  148. communityMap.get(community).add(add);
  149. if (!districtToStreetMap.containsKey(district)) districtToStreetMap.put(district, new ArrayList<>());
  150. districtToStreetMap.get(district).add(street);
  151. if (!streetToCommunityMap.containsKey(street)) streetToCommunityMap.put(street, new ArrayList<>());
  152. streetToCommunityMap.get(street).add(community);
  153. }
  154. private static class splittingAddress {
  155. SplitAddress splitAddress;
  156. int street = -1;
  157. int community = -1;
  158. Map<Integer, String> streetMap = new HashMap<>();
  159. Map<Integer, String> communityMap = new HashMap<>();
  160. threeLevelAddress threeLevelAddress;
  161. String targetString;
  162. void findStreet() {
  163. Map<Integer, String> results = null;
  164. int completeMatchIndex = -1;
  165. //首先尝试在一选下匹配
  166. if (splitAddress.getDistrict() != null) {
  167. results = contain(this.targetString, DISTRICT_TO_STREET_MAP.get(splitAddress.getDistrict()), 0);
  168. completeMatchIndex = washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
  169. }
  170. //一选不存在或匹配无结果,直接搜全国
  171. if (results == null || results.isEmpty()) {
  172. results = contain(this.targetString, All_STREET_IN_SHANGHAI.keySet(), 0);
  173. if (completeMatchIndex == -1)
  174. completeMatchIndex = washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
  175. }
  176. streetMap.putAll(results);
  177. street = completeMatchIndex;
  178. //仅有一个选择时当成一选
  179. if (streetMap.size() == 1) {
  180. street = (int) streetMap.keySet().toArray()[0];
  181. }
  182. }
  183. void findCommunity() {
  184. Map<Integer, String> results = null;
  185. int completeMatchCommunity = -1;
  186. String sub = targetString;
  187. //尝试一选
  188. if (street != -1) {
  189. sub = targetString.substring(street + streetMap.get(street).length());
  190. Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
  191. if (m.find()) {
  192. sub = sub.substring(m.end());
  193. }
  194. results = contain(sub, STREET_TO_COMMUNITY_MAP.get(streetMap.get(street)), targetString.length() - sub.length());
  195. completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
  196. }
  197. //一选不存在或匹配无结果,先搜全区
  198. if ((results == null || results.isEmpty()) && splitAddress.getDistrict() != null) {
  199. results = contain(sub, DISTRICT_TO_COMMUNITY_MAP.get(splitAddress.getDistrict()), targetString.length() - sub.length());
  200. if (completeMatchCommunity == -1)
  201. completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
  202. }
  203. //最后全市
  204. if (results == null || results.isEmpty()) {
  205. results = contain(sub, All_COMMUNITY_IN_SHANGHAI.keySet(), targetString.length() - sub.length());
  206. if (completeMatchCommunity == -1)
  207. completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
  208. }
  209. Iterator<Integer> iterator = results.keySet().iterator();
  210. while (iterator.hasNext()) {
  211. int key = iterator.next();
  212. String name = results.get(key);
  213. if (key > 0 && name.equals("江镇") && targetString.charAt(key - 1) == '松') {
  214. iterator.remove();
  215. }
  216. if (key > 0 && name.equals("镇江")) {
  217. String sub1 = targetString.substring(key + 2);
  218. if (LEVEL_3_SUFFIX_PATTERN.matcher(sub1).matches()) {
  219. iterator.remove();
  220. }
  221. }
  222. }
  223. communityMap.putAll(results);
  224. //仅有一个选择时当成一选
  225. if (communityMap.size() == 1) {
  226. int index = (int) communityMap.keySet().toArray()[0];
  227. if (street != index) community = index;
  228. }
  229. }
  230. void matchThreeLevelAdd() {
  231. int handingPoint = 0;
  232. threeLevelAddress handingTLA = new threeLevelAddress("", "", "", "", "", "", "", "", "");
  233. for (String communityName : new HashSet<>(communityMap.values())) {
  234. if (communityName.isEmpty()) continue;
  235. for (threeLevelAddress t : All_COMMUNITY_IN_SHANGHAI.get(communityName)) {
  236. int point = checkTLA(t);
  237. if (point > handingPoint) {
  238. handingPoint = point;
  239. handingTLA = t;
  240. }
  241. }
  242. }
  243. for (String streetName : new HashSet<>(streetMap.values())) {
  244. if (streetName.isEmpty()) continue;
  245. for (threeLevelAddress t : All_STREET_IN_SHANGHAI.get(streetName)) {
  246. int point = checkTLA(t);
  247. if (point > handingPoint) {
  248. handingPoint = point;
  249. handingTLA = t;
  250. }
  251. }
  252. }
  253. threeLevelAddress = handingTLA;
  254. }
  255. int checkTLA(threeLevelAddress t) {
  256. int output = 0;
  257. if (t.district.equals(splitAddress.getDistrict())) output += 1;
  258. if (streetMap.containsValue(t.street)) output += 10;
  259. if (street != -1 && streetMap.get(street).equals(t.street)) output += 1000;
  260. if (communityMap.containsValue(t.community)) output += 100;
  261. if (community != -1 && communityMap.get(community).equals(t.community)) output += 1000;
  262. if (community != -1 && Pattern.matches(".*\\d$", communityMap.get(community))) output -= 1000;
  263. return output;
  264. }
  265. void guessFirstMatch() {
  266. //先街道
  267. if (!streetMap.isEmpty() && street == -1) {
  268. for (int i : streetMap.keySet()) {
  269. if (streetMap.get(i).equals(threeLevelAddress.street) && (i < street || street == -1)) {
  270. street = i;
  271. }
  272. }
  273. }
  274. //再居委
  275. if (community == -1 && !communityMap.isEmpty()) {
  276. for (int i : communityMap.keySet()) {
  277. if (communityMap.get(i).equals(threeLevelAddress.community) && street != i && (i < community || community == -1)) {
  278. community = i;
  279. }
  280. }
  281. }
  282. }
  283. }
  284. static int washResult(String sourceAddress, Map<Integer, String> result, Pattern should, Pattern... never) {
  285. Map<Integer, String> output = new HashMap<>();
  286. int outputInt = -1;
  287. for (int index : result.keySet()) {
  288. String name = result.get(index);
  289. String sub = sourceAddress.substring(index + name.length());
  290. //匹配到后缀时直接保留
  291. if (should.matcher(sub).find()) {
  292. outputInt = index;
  293. } else {
  294. //去除南京路,北京大道型选手
  295. if (ROAD_SUFFIX_PATTERN.matcher(sub).find()) {
  296. continue;
  297. }
  298. boolean skip = false;
  299. for (Pattern p : never) {
  300. if (p.matcher(sub).find()) skip = true;
  301. }
  302. if (skip) continue;
  303. }
  304. output.put(index, name);
  305. }
  306. result.clear();
  307. result.putAll(output);
  308. return outputInt;
  309. }
  310. /**
  311. * 检查字符串含有哪些字符,输出这些匹配字符的位置和字符的map
  312. *
  313. * @param s 被检查字符串
  314. * @param nameList 检查范围
  315. */
  316. private static Map<Integer, String> contain(String s, Iterable<String> nameList, int offset) {
  317. Map<Integer, String> output = new HashMap<>();
  318. if (nameList == null) {
  319. return output;
  320. }
  321. for (String name : nameList) {
  322. if (name.isEmpty()) continue;
  323. int index = -1;
  324. while ((index = s.indexOf(name, index + 1)) != -1) {
  325. output.put(index + offset, name);
  326. }
  327. }
  328. return output;
  329. }
  330. private static SplitAddress split(String sourceAddress) {
  331. //事前准备
  332. String beautyAddress = sourceAddress.replaceAll("[\\s]+", "");
  333. SplitAddress splitAddress = new SplitAddress();
  334. splitAddress.setFullAddress(sourceAddress);
  335. splittingAddress splittingAddress = new splittingAddress();
  336. splittingAddress.splitAddress = splitAddress;
  337. String[] result = AddressSplitUtil.splitAddress(beautyAddress);
  338. splitAddress.setProvince(result[0]);
  339. splitAddress.setCity(result[1]);
  340. splitAddress.setDistrict(result[2]);
  341. //检查是否在外省,未找到省市或者在省市中找到上海,或者找到上海的区都算作省内
  342. Map<Integer, String> districtContainResult = contain(beautyAddress, DISTRICT_TO_COMMUNITY_MAP.keySet(), 0);
  343. int disIndex = washResult(beautyAddress, districtContainResult, LEVEL_1_SUFFIX_PATTERN);
  344. Map<Integer, String> streetContainResult = contain(beautyAddress, STREET_TO_COMMUNITY_MAP.keySet(), 0);
  345. int streetIndex = washResult(beautyAddress, districtContainResult, LEVEL_2_SUFFIX_PATTERN);
  346. splitAddress.setAddr(result[3]);
  347. if (!((result[0].isEmpty() || result[0].equals("上海市")) && (result[1].isEmpty() || result[1].equals("上海市")) ||
  348. !districtContainResult.isEmpty() || !streetContainResult.isEmpty())) {
  349. splitAddress.setStatus(2);
  350. return splitAddress;
  351. }
  352. if (!districtContainResult.isEmpty()) {
  353. if (disIndex != -1) {
  354. String district = districtContainResult.get(disIndex);
  355. threeLevelAddress disTLA = All_COMMUNITY_IN_SHANGHAI.get(DISTRICT_TO_COMMUNITY_MAP.get(district).get(0)).get(0);
  356. splitAddress.setDistrict(disTLA.districtFullName);
  357. splitAddress.setDistrictCode(disTLA.districtCode);
  358. }
  359. }
  360. splitAddress.setProvince("上海市");
  361. splitAddress.setCity("上海市");
  362. splitAddress.setCityCode("3101");
  363. splittingAddress.targetString = beautyAddress;
  364. //开始省内分词
  365. splittingAddress.findStreet();
  366. splittingAddress.findCommunity();
  367. splittingAddress.matchThreeLevelAdd();
  368. splittingAddress.guessFirstMatch();
  369. if (splittingAddress.street != -1 || splittingAddress.community != -1) {
  370. splitAddress.setStreet(splittingAddress.threeLevelAddress.streetFullName);
  371. splitAddress.setStreetCode(splittingAddress.threeLevelAddress.streetCode);
  372. splitAddress.setDistrict(splittingAddress.threeLevelAddress.districtFullName);
  373. splitAddress.setDistrictCode(splittingAddress.threeLevelAddress.districtCode);
  374. }
  375. if (splittingAddress.community != -1) {
  376. splitAddress.setCommunity(splittingAddress.threeLevelAddress.communityFullName);
  377. splitAddress.setCommunityCode(splittingAddress.threeLevelAddress.communityCode);
  378. }
  379. //检查是否能够分离
  380. if (splittingAddress.community == -1 && splittingAddress.street == -1) {
  381. //检查是否是非地址
  382. if (UN_ADDRESS_PATTERN.matcher(splitAddress.getFullAddress()).find()) {
  383. splitAddress.setStatus(3);
  384. return splitAddress;
  385. }
  386. if (disIndex != -1) {
  387. String sub = beautyAddress.substring(disIndex + districtContainResult.get(disIndex).length());
  388. Matcher m = LEVEL_1_SUFFIX_PATTERN.matcher(sub);
  389. if (m.find()) {
  390. sub = sub.substring(m.end());
  391. }
  392. splitAddress.setAddr(sub);
  393. }
  394. splitAddress.setStatus(1);
  395. if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty())
  396. splitAddress.setStatus(4);
  397. return splitAddress;
  398. } else if (splittingAddress.street > splittingAddress.community) {
  399. String sub = beautyAddress.substring(splittingAddress.street + splittingAddress.streetMap.get(splittingAddress.street).length());
  400. Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
  401. if (m.find()) {
  402. sub = sub.substring(m.end());
  403. }
  404. splitAddress.setAddr(sub);
  405. } else {
  406. String sub = beautyAddress.substring(
  407. splittingAddress.community +
  408. splittingAddress.communityMap.
  409. get(splittingAddress.community).length());
  410. Matcher m = LEVEL_3_SUFFIX_PATTERN.matcher(sub);
  411. if (m.find()) {
  412. sub = sub.substring(m.end());
  413. }
  414. splitAddress.setAddr(sub);
  415. }
  416. splitAddress.setStatus(0);
  417. if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty()) splitAddress.setStatus(4);
  418. if (splitAddress.getStreet().equals("自由贸易试验区")) splitAddress.setStatus(0);
  419. return splitAddress;
  420. }
  421. private static SplitAddress beautyResult(SplitAddress splitAddress) {
  422. //检查过度分割
  423. if (splitAddress.getAddr().isEmpty() || OVER_SPLIT.matcher(splitAddress.getAddr()).find()) {
  424. if (splitAddress.getCommunity().isEmpty()) {
  425. if (splitAddress.getStreet().isEmpty()) {
  426. if (splitAddress.getDistrict().isEmpty()) {
  427. splitAddress.setAddr("上海市" + splitAddress.getAddr());
  428. } else {
  429. splitAddress.setAddr(splitAddress.getDistrict() + splitAddress.getAddr());
  430. }
  431. } else {
  432. splitAddress.setAddr(splitAddress.getStreet() + splitAddress.getAddr());
  433. }
  434. } else {
  435. splitAddress.setAddr(splitAddress.getCommunity() + splitAddress.getAddr());
  436. }
  437. }
  438. //检查多号,多弄
  439. splitAddress.setAddr(splitAddress.getAddr().replaceAll(String.valueOf(MULTI_ADDRESS), ""));
  440. return splitAddress;
  441. }
  442. /**
  443. * 工具入口,返回所有数据
  444. *
  445. * @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
  446. */
  447. public static List<SplitAddress> splitAddresses(String sourceAddress) {
  448. // 添加逻辑(常见别名替换)
  449. for (String errAddr : errAddrReStr.keySet()) {
  450. if (sourceAddress.contains(errAddr)) {
  451. sourceAddress = sourceAddress.replaceAll(errAddr, errAddrReStr.get(errAddr));
  452. }
  453. }
  454. List<SplitAddress> addressList = new ArrayList<>();
  455. String beautyString = sourceAddress.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("(", "").replaceAll(")", "")
  456. .replaceAll("\\[", "").replaceAll("]", "").replaceAll("\\{", "").replaceAll("}", "");
  457. StringBuilder sb = new StringBuilder();
  458. for (char c : beautyString.toCharArray()) {
  459. // 检查是否为全角数字
  460. if (c >= '0' && c <= '9') {
  461. // 转换为半角数字
  462. sb.append((char) (c - '0' + '0'));
  463. } else if (c == '\uE5CE') {
  464. // 奇妙的乱码,跳过
  465. } else {
  466. // 保持原字符
  467. sb.append(c);
  468. }
  469. }
  470. beautyString = sb.toString();
  471. addressList.add(beautyResult(split(beautyString)));
  472. for (SplitAddress s : addressList) s.setSourceAddress(sourceAddress);
  473. return addressList;
  474. }
  475. /**
  476. * 工具入口,仅返回最优
  477. *
  478. * @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
  479. */
  480. public static SplitAddress splitBestAddress(String sourceAddress) {
  481. return splitAddresses(sourceAddress).stream().max(SplitAddress::compareTo).orElse(new SplitAddress());
  482. }
  483. // 默认时间格式
  484. private static final String DEFAULT_PATTERN = "yyyy-MM-dd HH:mm:ss";
  485. /**
  486. * 为输入的时间字符串增加随机2-3秒
  487. *
  488. * @param timeStr 时间字符串,格式需为"yyyy - MM - dd HH:mm:ss"
  489. * @return 增加随机时间后的新时间字符串
  490. */
  491. public static String addRandomSeconds(String timeStr) {
  492. return addRandomSeconds(timeStr, DEFAULT_PATTERN);
  493. }
  494. /**
  495. * 按照指定格式为输入的时间字符串增加随机2-3秒
  496. *
  497. * @param timeStr 时间字符串
  498. * @param pattern 时间格式
  499. * @return 增加随机时间后的新时间字符串
  500. */
  501. public static String addRandomSeconds(String timeStr, String pattern) {
  502. try {
  503. // 解析输入的时间字符串
  504. DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
  505. LocalDateTime dateTime = LocalDateTime.parse(timeStr, formatter);
  506. // 生成20到30分钟之间的随机数
  507. int randomSeconds = ThreadLocalRandom.current().nextInt(40, 60);
  508. // 增加随机秒数
  509. LocalDateTime newDateTime = dateTime.plusSeconds(randomSeconds);
  510. // 格式化并返回新的时间字符串
  511. return newDateTime.format(formatter);
  512. } catch (Exception e) {
  513. // 处理异常
  514. System.err.println("时间处理出错: " + e.getMessage());
  515. return null;
  516. }
  517. }
  518. // 示例用法
  519. /*public static void main(String[] args) {
  520. String timeStr = "2025-07-30 12:00:00";
  521. String newTimeStr = addRandomSeconds(timeStr);
  522. System.out.println("原时间: " + timeStr);
  523. System.out.println("新时间: " + newTimeStr);
  524. }
  525. */
  526. public static void main(String[] args) throws Exception {
  527. new ShanghaiAddressSplitUtil().init();
  528. // 搜索地址
  529. String searchAddress = "王家浜路130号101室";
  530. System.out.println("》》搜索地址:" + searchAddress);
  531. System.out.println();
  532. // 返回的参考地址列表
  533. Set<String> arrayAddress = new HashSet<>();
  534. arrayAddress.add("茸梅路与王家浜路交叉口西北140米");
  535. arrayAddress.add("茸梅路与王家浜路交叉口东南100米");
  536. arrayAddress.add("上海市松江区中山街道五龙居民委员会王家浜路130号101室");
  537. arrayAddress.add("王家浜路80号101室");
  538. System.out.println("》》返回参考地址列表:" + arrayAddress);
  539. System.out.println();
  540. // 新建JSONArray对象
  541. JSONArray array = new JSONArray();
  542. // 把Set对象转换为接口接收的JSONArray对象
  543. for (String key : arrayAddress) {
  544. JSONObject item_ = new JSONObject();
  545. item_.put("sourceaddress", key);
  546. array.add(item_);
  547. }
  548. JSONArray reData = AddressTools.getInstance().findBestMatch2(searchAddress, array, "sourceaddress");
  549. System.out.println("------------------打分后从高到低显示》》》》》》》》》》》》");
  550. System.out.println();
  551. for (int i = 0; i < reData.size(); i++) {
  552. JSONObject jo = reData.getJSONObject(i);
  553. System.out.println("返回参考地址:" + jo.getString("sourceaddress") + "\t打分:" + jo.getString("总分"));
  554. }
  555. /*AddrSplitLmrMap addrSplitLmrMap = new AddrSplitLmrMap();
  556. addrSplitLmrMap.initFile();
  557. String testAddress = "上海市嘉定区兴庆路1551号";
  558. System.out.println("测试地址:" + testAddress);
  559. SplitAddress xzqh = splitBestAddress(testAddress);
  560. System.out.println(xzqh);
  561. AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(xzqh.getAddr());
  562. System.out.println("AddrBean:" + lmrAddrBean);
  563. System.out.println(AddrSplitLmrMap.parseAddress(lmrAddrBean.getAddress()));*/
  564. /*AddressResult.ContentBean test = new AddressResult.ContentBean();
  565. test.setRoomNumber("1551");
  566. test.setDistance("");
  567. test.setLon(121.18766784667969);
  568. test.setRoadName("兴庆路");
  569. test.setScore("rule_3");
  570. test.setAdname("嘉定工业区");
  571. test.setLat(31.439653396606445);
  572. test.setAddress("兴庆路930号内");
  573. test.setSearchAddress("兴庆路1551");
  574. test.setPname("上海市");
  575. test.setCityname("嘉定区");
  576. test.setCommunity("园区5");
  577. test.setStandAddr("上海市嘉定区嘉定工业区兴庆路1551");
  578. test.setName("兴庆路930号内");
  579. test.setX(-26568.0603190182);
  580. test.setY(22674.637411829084);
  581. test.setLocation("121.18767,31.439653");
  582. test.setCommunityCode("141348");
  583. AddressTools.ifTrueAddressByBeans(test);*/
  584. /*// 数据总条数
  585. int dataSize = 2158170;
  586. int numberSize = 100000;
  587. // 开始时间
  588. String startTime = "2025-06-02 13:40:47";
  589. System.out.println(startTime + "开始推送表:yysz_address_v3");
  590. String startTime_ = startTime;
  591. // int dataSize = 4449759;
  592. // int numberSize = 200000;
  593. // String startTime = "2025-06-07 16:27:28";
  594. // System.out.println(startTime + "开始推送表:t_yysz_address_zhili");
  595. // 间隔时间【2到3秒能推送500条】
  596. for (int i = 0; i < (dataSize / numberSize) + 1; i++) {
  597. startTime = addRandomSeconds(startTime);
  598. if (i == dataSize / numberSize) {
  599. System.out.println("成功推动" + (dataSize % numberSize) + "条记录,当前时间:" + startTime);
  600. } else {
  601. System.out.println("成功推动" + numberSize + "条记录,当前时间:" + startTime);
  602. }
  603. }
  604. System.out.println("总共推送了" + dataSize + "条数据,开始时间为:" + startTime_ + ",结束时间为:" + startTime);*/
  605. }
  606. }