AddressTools.java 41 KB


  1. package com.skyversation.poiaddr.util;
  2. import com.alibaba.fastjson.JSONArray;
  3. import com.alibaba.fastjson.JSONObject;
  4. import com.skyversation.poiaddr.addquery.AddressQueryEngine;
  5. import com.skyversation.poiaddr.addquery.Constant;
  6. import com.skyversation.poiaddr.bean.AddressResult;
  7. import java.util.*;
  8. import java.util.regex.Matcher;
  9. import java.util.regex.Pattern;
  10. public class AddressTools {
  11. private static AddressTools instance = new AddressTools();
  12. private AddressTools() {
  13. }
  14. ;
  15. public static AddressTools getInstance() {
  16. if (instance == null) instance = new AddressTools();
  17. return instance;
  18. }
  19. // 上海市所有的区
  20. private static final Set<String> SHANGHAI_DISTRICTS = new HashSet<>(Arrays.asList(
  21. "黄浦区", "徐汇区", "长宁区", "静安区", "普陀区",
  22. "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区",
  23. "浦东新区", "金山区", "松江区", "青浦区", "奉贤区",
  24. "崇明区"
  25. ));
  26. // 各区下辖的镇与街道
  27. private static final Map<String, Set<String>> DISTRICT_STREETS = new HashMap<>();
  28. // 街镇下辖的村和居委
  29. private static final Map<String, Set<String>> STREET_VILLAGES_COMMUNITIES = new HashMap<>();
  30. static {
  31. // // 松江区
  32. // Set<String> songjiangStreets = new HashSet<>(Arrays.asList(
  33. // "中山街道", "方松街道", "永丰街道", "岳阳街道",
  34. // "泗泾镇", "佘山镇", "车墩镇", "新桥镇", "洞泾镇",
  35. // "九亭镇", "泖港镇", "石湖荡镇", "新浜镇", "叶榭镇",
  36. // "小昆山镇"
  37. // ));
  38. // DISTRICT_STREETS.put("松江区", songjiangStreets);
  39. // 中山街道下辖的村居
  40. Set<String> zhongshanCommunities = new HashSet<>(Arrays.asList(
  41. "茸梅社区居委会", "方东社区居委会", "夏家浜社区居委会",
  42. "五龙村", "永隆村", "白洋村",
  43. "白云社区居委会"
  44. ));
  45. STREET_VILLAGES_COMMUNITIES.put("中山街道", zhongshanCommunities);
  46. // 方松街道下辖的村居
  47. Set<String> fangsongCommunities = new HashSet<>(Arrays.asList(
  48. "泰晤士小镇社区居委会", "绿地社区居委会", "公捷苑社区居委会",
  49. "新陈家村", "江秋村"
  50. ));
  51. STREET_VILLAGES_COMMUNITIES.put("方松街道", fangsongCommunities);
  52. // 永丰街道下辖的村居
  53. Set<String> yongfengCommunities = new HashSet<>(Arrays.asList(
  54. "秀南社区居委会", "仓桥社区居委会", "玉乐社区居委会",
  55. "周星村", "秀塘浜村", "薛家埭村"
  56. ));
  57. STREET_VILLAGES_COMMUNITIES.put("永丰街道", yongfengCommunities);
  58. // 岳阳街道下辖的村居
  59. Set<String> yueyangCommunities = new HashSet<>(Arrays.asList(
  60. "太平社区居委会", "菜花泾社区居委会", "佛字桥社区居委会",
  61. "长桥村"
  62. ));
  63. STREET_VILLAGES_COMMUNITIES.put("岳阳街道", yueyangCommunities);
  64. // 泗泾镇下辖的村居
  65. Set<String> sijingCommunities = new HashSet<>(Arrays.asList(
  66. "江川社区居委会", "横港社区居委会", "青松社区居委会",
  67. "泗泾村", "张施村", "打铁桥村"
  68. ));
  69. STREET_VILLAGES_COMMUNITIES.put("泗泾镇", sijingCommunities);
  70. // 佘山镇下辖的村居
  71. Set<String> sheshanCommunities = new HashSet<>(Arrays.asList(
  72. "陈坊桥社区居委会", "北干山社区居委会", "佘山家园社区居委会",
  73. "江秋村", "陈坊村", "高家厍村"
  74. ));
  75. STREET_VILLAGES_COMMUNITIES.put("佘山镇", sheshanCommunities);
  76. // 车墩镇下辖的村居
  77. Set<String> chendunCommunities = new HashSet<>(Arrays.asList(
  78. "虬长路社区居委会", "车墩社区居委会", "高桥村",
  79. "联建村", "联庄村", "洋泾村", "新余村"
  80. ));
  81. STREET_VILLAGES_COMMUNITIES.put("车墩镇", chendunCommunities);
  82. // 新桥镇下辖的村居
  83. Set<String> xinqiaoCommunities = new HashSet<>(Arrays.asList(
  84. "新乐社区居委会", "晨星社区居委会", "春申社区居委会",
  85. "民益村", "马汤村", "潘家浜村"
  86. ));
  87. STREET_VILLAGES_COMMUNITIES.put("新桥镇", xinqiaoCommunities);
  88. // 洞泾镇下辖的村居
  89. Set<String> dongjingCommunities = new HashSet<>(Arrays.asList(
  90. "海欣社区居委会", "洞泾社区居委会", "同欣社区居委会",
  91. "渔洋浜村", "砖桥村", "张泾村"
  92. ));
  93. STREET_VILLAGES_COMMUNITIES.put("洞泾镇", dongjingCommunities);
  94. // 九亭镇下辖的村居
  95. Set<String> jiutingCommunities = new HashSet<>(Arrays.asList(
  96. "亭中社区居委会", "亭东社区居委会", "亭南社区居委会",
  97. "九亭村", "朱泾浜村", "小寅村"
  98. ));
  99. STREET_VILLAGES_COMMUNITIES.put("九亭镇", jiutingCommunities);
  100. // 泖港镇下辖的村居
  101. Set<String> maogangCommunities = new HashSet<>(Arrays.asList(
  102. "泖港社区居委会", "五厍社区居委会", "腰泾村",
  103. "胡光村", "新龚村", "泖港村"
  104. ));
  105. STREET_VILLAGES_COMMUNITIES.put("泖港镇", maogangCommunities);
  106. // 石湖荡镇下辖的村居
  107. Set<String> shihudangCommunities = new HashSet<>(Arrays.asList(
  108. "古松社区居委会", "李塔汇社区居委会", "新源村",
  109. "东港村", "洙桥村", "金胜村"
  110. ));
  111. STREET_VILLAGES_COMMUNITIES.put("石湖荡镇", shihudangCommunities);
  112. // 新浜镇下辖的村居
  113. Set<String> xinbangCommunities = new HashSet<>(Arrays.asList(
  114. "新浜社区居委会", "赵王村", "胡家埭村",
  115. "南杨村", "黄家埭村", "许家草村"
  116. ));
  117. STREET_VILLAGES_COMMUNITIES.put("新浜镇", xinbangCommunities);
  118. // 叶榭镇下辖的村居
  119. Set<String> yexieCommunities = new HashSet<>(Arrays.asList(
  120. "叶榭社区居委会", "张泽社区居委会", "堰泾村",
  121. "井凌桥村", "兴达村", "同建村"
  122. ));
  123. STREET_VILLAGES_COMMUNITIES.put("叶榭镇", yexieCommunities);
  124. // 小昆山镇下辖的村居
  125. Set<String> xiaokunshanCommunities = new HashSet<>(Arrays.asList(
  126. "大港社区居委会", "平原社区居委会", "玉昆二村社区居委会",
  127. "汤村庙村", "陆家埭村", "昆西村"
  128. ));
  129. STREET_VILLAGES_COMMUNITIES.put("小昆山镇", xiaokunshanCommunities);
  130. }
  131. static {
  132. // 黄浦区
  133. Set<String> huangpuStreets = new HashSet<>(Arrays.asList(
  134. "南京东路街道", "外滩街道", "半淞园路街道", "小东门街道",
  135. "豫园街道", "老西门街道", "五里桥街道", "打浦桥街道"
  136. ));
  137. DISTRICT_STREETS.put("黄浦区", huangpuStreets);
  138. // 徐汇区
  139. Set<String> xuhuiStreets = new HashSet<>(Arrays.asList(
  140. "天平路街道", "湖南路街道", "斜土路街道", "枫林路街道",
  141. "长桥街道", "漕河泾街道", "康健新村街道", "徐家汇街道",
  142. "凌云路街道", "龙华街道", "漕河泾镇", "华泾镇"
  143. ));
  144. DISTRICT_STREETS.put("徐汇区", xuhuiStreets);
  145. // 长宁区
  146. Set<String> changningStreets = new HashSet<>(Arrays.asList(
  147. "华阳路街道", "江苏路街道", "新华路街道", "周家桥街道",
  148. "天山路街道", "仙霞新村街道", "虹桥街道", "程家桥街道",
  149. "北新泾街道", "新泾镇"
  150. ));
  151. DISTRICT_STREETS.put("长宁区", changningStreets);
  152. // 静安区
  153. Set<String> jinganStreets = new HashSet<>(Arrays.asList(
  154. "江宁路街道", "石门二路街道", "南京西路街道", "静安寺街道",
  155. "曹家渡街道", "天目西路街道", "北站街道", "宝山路街道",
  156. "芷江西路街道", "共和新路街道", "大宁路街道", "彭浦新村街道",
  157. "临汾路街道", "彭浦镇"
  158. ));
  159. DISTRICT_STREETS.put("静安区", jinganStreets);
  160. // 普陀区
  161. Set<String> putuoStreets = new HashSet<>(Arrays.asList(
  162. "曹杨新村街道", "长风新村街道", "长寿路街道", "甘泉路街道",
  163. "石泉路街道", "宜川路街道", "万里街道", "真如镇街道",
  164. "长征镇", "桃浦镇"
  165. ));
  166. DISTRICT_STREETS.put("普陀区", putuoStreets);
  167. // 虹口区
  168. Set<String> hongkouStreets = new HashSet<>(Arrays.asList(
  169. "欧阳路街道", "曲阳路街道", "广中路街道", "嘉兴路街道",
  170. "凉城新村街道", "四川北路街道", "提篮桥街道", "江湾镇街道"
  171. ));
  172. DISTRICT_STREETS.put("虹口区", hongkouStreets);
  173. // 杨浦区
  174. Set<String> yangpuStreets = new HashSet<>(Arrays.asList(
  175. "定海路街道", "平凉路街道", "江浦路街道", "四平路街道",
  176. "控江路街道", "长白新村街道", "延吉新村街道", "殷行街道",
  177. "大桥街道", "五角场街道", "新江湾城街道", "五角场镇"
  178. ));
  179. DISTRICT_STREETS.put("杨浦区", yangpuStreets);
  180. // 闵行区
  181. Set<String> minhangStreets = new HashSet<>(Arrays.asList(
  182. "江川路街道", "新虹街道", "古美路街道", "浦锦街道",
  183. "莘庄镇", "七宝镇", "颛桥镇", "华漕镇", "虹桥镇",
  184. "梅陇镇", "吴泾镇", "马桥镇", "浦江镇"
  185. ));
  186. DISTRICT_STREETS.put("闵行区", minhangStreets);
  187. // 宝山区
  188. Set<String> baoshanStreets = new HashSet<>(Arrays.asList(
  189. "吴淞街道", "友谊路街道", "张庙街道", "罗店镇",
  190. "大场镇", "杨行镇", "月浦镇", "罗泾镇", "顾村镇",
  191. "高境镇", "庙行镇", "淞南镇"
  192. ));
  193. DISTRICT_STREETS.put("宝山区", baoshanStreets);
  194. // 嘉定区
  195. Set<String> jiadingStreets = new HashSet<>(Arrays.asList(
  196. "新成路街道", "真新街道", "嘉定镇街道", "南翔镇",
  197. "安亭镇", "马陆镇", "徐行镇", "华亭镇", "外冈镇",
  198. "江桥镇"
  199. ));
  200. DISTRICT_STREETS.put("嘉定区", jiadingStreets);
  201. // 浦东新区
  202. Set<String> pudongStreets = new HashSet<>(Arrays.asList(
  203. "潍坊新村街道", "陆家嘴街道", "周家渡街道", "塘桥街道",
  204. "上钢新村街道", "南码头路街道", "沪东新村街道", "金杨新村街道",
  205. "洋泾街道", "浦兴路街道", "东明路街道", "花木街道",
  206. "川沙新镇", "高桥镇", "北蔡镇", "合庆镇", "唐镇",
  207. "曹路镇", "金桥镇", "高东镇", "张江镇", "三林镇",
  208. "惠南镇", "周浦镇", "新场镇", "大团镇", "康桥镇",
  209. "航头镇", "祝桥镇", "泥城镇", "宣桥镇", "书院镇",
  210. "万祥镇", "老港镇", "南汇新城镇"
  211. ));
  212. DISTRICT_STREETS.put("浦东新区", pudongStreets);
  213. // 金山区
  214. Set<String> jinshanStreets = new HashSet<>(Arrays.asList(
  215. "石化街道", "朱泾镇", "枫泾镇", "张堰镇", "亭林镇",
  216. "吕巷镇", "廊下镇", "金山卫镇", "漕泾镇", "山阳镇"
  217. ));
  218. DISTRICT_STREETS.put("金山区", jinshanStreets);
  219. // 松江区
  220. Set<String> songjiangStreets = new HashSet<>(Arrays.asList(
  221. "岳阳街道", "永丰街道", "方松街道", "中山街道", "广富林街道", "九里亭街道", "泗泾镇", "佘山镇", "车墩镇", "新桥镇", "洞泾镇", "九亭镇", "泖港镇", "石湖荡镇", "新浜镇", "叶榭镇", "小昆山镇"
  222. ));
  223. DISTRICT_STREETS.put("松江区", songjiangStreets);
  224. // 青浦区
  225. Set<String> qingpuStreets = new HashSet<>(Arrays.asList(
  226. "夏阳街道", "盈浦街道", "香花桥街道", "朱家角镇",
  227. "练塘镇", "金泽镇", "赵巷镇", "徐泾镇", "华新镇",
  228. "重固镇", "白鹤镇"
  229. ));
  230. DISTRICT_STREETS.put("青浦区", qingpuStreets);
  231. // 奉贤区
  232. Set<String> fengxianStreets = new HashSet<>(Arrays.asList(
  233. "西渡街道", "奉浦街道", "南桥镇", "庄行镇", "金汇镇",
  234. "柘林镇", "青村镇", "奉城镇", "四团镇", "海湾镇"
  235. ));
  236. DISTRICT_STREETS.put("奉贤区", fengxianStreets);
  237. // 崇明区
  238. Set<String> chongmingStreets = new HashSet<>(Arrays.asList(
  239. "城桥镇", "堡镇", "新河镇", "庙镇", "竖新镇",
  240. "向化镇", "三星镇", "港沿镇", "中兴镇", "陈家镇",
  241. "绿华镇", "港西镇", "建设镇", "新海镇", "东平镇"
  242. ));
  243. DISTRICT_STREETS.put("崇明区", chongmingStreets);
  244. }
  245. /***
  246. * 根据地名地址返回到街镇一级分词,
  247. * @param address 上海市松江区车墩镇乐都路590号
  248. * @return 上海市,松江区,车墩镇,乐都路590号
  249. */
  250. public static String[] parseAddressJZ(String address) {
  251. String[] result = new String[4];
  252. result[0] = "上海市";
  253. // 去除地址中的“上海”和“上海市”
  254. address = address.replaceAll("上海(市)?", "");
  255. // 查找区
  256. String foundDistrict = null;
  257. for (String district : SHANGHAI_DISTRICTS) {
  258. if (address.contains(district)) {
  259. foundDistrict = district;
  260. address = address.replace(district, "");
  261. break;
  262. }
  263. }
  264. result[1] = foundDistrict;
  265. // 查找街镇
  266. String foundStreet = null;
  267. if (foundDistrict != null) {
  268. Set<String> streets = DISTRICT_STREETS.get(foundDistrict);
  269. if (streets != null) {
  270. for (String street : streets) {
  271. if (address.contains(street)) {
  272. foundStreet = street;
  273. address = address.replace(street, "");
  274. break;
  275. } else if (address.contains(street.replace("镇", "")) || address.contains(street.replace("街道", ""))) {
  276. foundStreet = street;
  277. address = address.replace(street.replace("镇", ""), "").replace(street.replace("街道", ""), "");
  278. break;
  279. }
  280. }
  281. }
  282. }
  283. result[2] = foundStreet;
  284. // 剩余部分为其他信息
  285. address = address.trim();
  286. result[3] = address;
  287. return result;
  288. }
  289. /***
  290. * 根据地名地址返回到村居一级分词,村居不是很准确
  291. * @param address 上海市松江区车墩镇乐都村乐都路590号
  292. * @return 上海市,松江区,车墩镇,乐都村,乐都路590号
  293. */
  294. public static String[] parseAddressCJ(String address) {
  295. String[] result = new String[5];
  296. result[0] = "上海市";
  297. // 去除地址中的“上海”和“上海市”
  298. address = address.replaceAll("上海(市)?", "");
  299. // 先尝试通过村居信息定位
  300. String foundVillageOrCommunity = null;
  301. String foundStreet = null;
  302. String foundDistrict = null;
  303. for (Map.Entry<String, Set<String>> streetEntry : STREET_VILLAGES_COMMUNITIES.entrySet()) {
  304. String street = streetEntry.getKey();
  305. Set<String> villagesCommunities = streetEntry.getValue();
  306. for (String villageCommunity : villagesCommunities) {
  307. if (address.contains(villageCommunity)) {
  308. foundVillageOrCommunity = villageCommunity;
  309. foundStreet = street;
  310. address = address.replace(villageCommunity, "");
  311. break;
  312. } else {
  313. String shortName = villageCommunity.replace("村", "").replace("社区居委会", "").replace("居委会", "");
  314. // 避免与街镇简称冲突
  315. String streetShortName = street.replace("镇", "").replace("街道", "");
  316. if (!shortName.equals(streetShortName) && address.contains(shortName)) {
  317. foundVillageOrCommunity = villageCommunity;
  318. foundStreet = street;
  319. address = address.replace(shortName, "");
  320. break;
  321. }
  322. }
  323. }
  324. if (foundVillageOrCommunity != null) {
  325. break;
  326. }
  327. }
  328. // 如果通过村居定位到了街镇,再定位区
  329. if (foundStreet != null) {
  330. for (Map.Entry<String, Set<String>> districtEntry : DISTRICT_STREETS.entrySet()) {
  331. String district = districtEntry.getKey();
  332. Set<String> streets = districtEntry.getValue();
  333. if (streets.contains(foundStreet)) {
  334. foundDistrict = district;
  335. break;
  336. }
  337. }
  338. }
  339. // 如果没有通过村居定位到信息,再按常规流程查找区、街镇、村居
  340. if (foundDistrict == null) {
  341. // 查找区
  342. for (String district : SHANGHAI_DISTRICTS) {
  343. if (address.contains(district)) {
  344. foundDistrict = district;
  345. address = address.replace(district, "");
  346. break;
  347. }
  348. }
  349. }
  350. if (foundStreet == null && foundDistrict != null) {
  351. // 查找街镇
  352. Set<String> streets = DISTRICT_STREETS.get(foundDistrict);
  353. if (streets != null) {
  354. for (String street : streets) {
  355. if (address.contains(street)) {
  356. foundStreet = street;
  357. address = address.replace(street, "");
  358. break;
  359. } else if (address.contains(street.replace("镇", "")) || address.contains(street.replace("街道", ""))) {
  360. foundStreet = street;
  361. address = address.replace(street.replace("镇", ""), "").replace(street.replace("街道", ""), "");
  362. break;
  363. }
  364. }
  365. }
  366. }
  367. if (foundVillageOrCommunity == null && foundStreet != null) {
  368. // 查找村或居委
  369. Set<String> villagesCommunities = STREET_VILLAGES_COMMUNITIES.get(foundStreet);
  370. if (villagesCommunities != null) {
  371. for (String villageCommunity : villagesCommunities) {
  372. if (address.contains(villageCommunity)) {
  373. foundVillageOrCommunity = villageCommunity;
  374. address = address.replace(villageCommunity, "");
  375. break;
  376. } else {
  377. String shortName = villageCommunity.replace("村", "").replace("社区居委会", "").replace("居委会", "");
  378. // 避免与街镇简称冲突
  379. String streetShortName = foundStreet.replace("镇", "").replace("街道", "");
  380. if (!shortName.equals(streetShortName) && address.contains(shortName)) {
  381. foundVillageOrCommunity = villageCommunity;
  382. address = address.replace(shortName, "");
  383. break;
  384. }
  385. }
  386. }
  387. }
  388. }
  389. // 处理别名残留问题
  390. if (foundVillageOrCommunity != null) {
  391. String[] aliases = {"居委", "居委会", "村", "新村"};
  392. for (String alias : aliases) {
  393. if (address.startsWith(alias)) {
  394. address = address.substring(alias.length());
  395. break;
  396. }
  397. }
  398. }
  399. // 移除剩余地址中可能残留的区和街镇信息
  400. if (foundDistrict != null) {
  401. address = address.replace(foundDistrict, "");
  402. }
  403. if (foundStreet != null) {
  404. address = address.replace(foundStreet, "");
  405. }
  406. // 去除多余的空白字符
  407. address = address.replaceAll(" ", "");
  408. // 去除街镇
  409. address = AddressQueryEngine.townReplaceAll(address);
  410. result[1] = foundDistrict;
  411. result[2] = foundStreet;
  412. result[3] = foundVillageOrCommunity;
  413. result[4] = address;
  414. return result;
  415. }
  416. /***
  417. * 获取array中,指定字段与address匹配值最高的数据,特定方法,指定的jsonarray使用
  418. * @param address 上海市松江区乐都路590号
  419. * @param array 地名地址接口返回的jsonarray数据
  420. * @param param jaonarray中地名地址字段的key
  421. * @return
  422. */
  423. public JSONObject findBestMatch(String address, JSONArray array, String param) {
  424. JSONObject bestMatch = null;
  425. double maxTotalScore = 0;
  426. // 处理输入地址的分词和数字前文本
  427. AddressInfo addressInfo = processAddress(address);
  428. for (int i = 0; i < array.size(); i++) {
  429. JSONObject obj = array.getJSONObject(i);
  430. obj.put("searchAddress", address);
  431. // && obj.getString(param).contains(Constant.getArea())
  432. if (obj.containsKey(param) && obj.get(param) != null && !obj.getString(param).trim().isEmpty()) {
  433. // 得到返回的地址
  434. String addr = obj.getString(param);
  435. // 规则4判断
  436. // TODO 添加校验逻辑(首先使用第4校验规则匹配,匹配不到使用第二规则,还匹配不到的话就使用打分规则)
  437. Set<String> addressString = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(addr)).get(0);
  438. Set<String> addressNumber = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(addr)).get(1);
  439. Set<String> address2String = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(0);
  440. Set<String> address2Number = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(1);
  441. if (addressString != null && addressString.size() > 0) {
  442. int addressStrSize = addressString.size();
  443. for (String addr2str : address2String) {
  444. if (addressString.contains(addr2str)) {
  445. addressStrSize--;
  446. if (addressStrSize == 0) {
  447. if (addressNumber.size() == 0) {
  448. obj.put("总分", "rule_4");
  449. return obj;
  450. } else {
  451. int addressNumSize = addressNumber.size();
  452. for (String addr2Num : address2Number) {
  453. if (addressNumber.contains(addr2Num)) {
  454. addressNumSize--;
  455. if (addressNumSize == 0) {
  456. obj.put("总分", "rule_4");
  457. return obj;
  458. }
  459. }
  460. }
  461. }
  462. }
  463. }
  464. }
  465. }
  466. // 规则2判断
  467. String role2address = AddressQueryEngine.townReplaceAll(AddressQueryEngine.addressReplaceAll(addr));
  468. String role2address2 = AddressQueryEngine.townReplaceAll(AddressQueryEngine.addressReplaceAll(address));
  469. if (AddressQueryEngine.isNotEmptyOrBlank(role2address) && role2address.contains(role2address2)) {
  470. obj.put("总分", "rule_2");
  471. return obj;
  472. }
  473. AddressInfo addrInfo = processAddress(addr);
  474. // 第一步:全词匹配比例
  475. double score1 = calculateFullWordMatchScore(address, addr);
  476. // 第二步:数字匹配得分
  477. double score2 = calculateNumberMatchScore(addressInfo.firstNumber, addrInfo.firstNumber);
  478. // 第三步:数字前文本匹配得分
  479. double score3 = calculatePrefixTextMatchScore(addressInfo.prefixText, addrInfo.prefixText);
  480. double totalScore = score1 + score2 + score3;
  481. if (totalScore > maxTotalScore) {
  482. maxTotalScore = totalScore;
  483. bestMatch = obj;
  484. bestMatch.put("计分1", score1);
  485. bestMatch.put("计分2", score2);
  486. bestMatch.put("计分3", score3);
  487. bestMatch.put("总分", totalScore);
  488. }
  489. }
  490. }
  491. return bestMatch;
  492. }
  493. public static boolean isOtherDistrictThanSongJiang(String address) {
  494. // 将地址字符串转换为小写,以便进行不区分大小写的比较
  495. String lowerCaseAddress = address.toLowerCase();
  496. // 检查地址是否包含 "青浦区" 字样,如果不包含,则可能是其他区
  497. // 列出上海市的其他区(已补充完整)
  498. String[] otherDistricts = {"北京市", "天津市", "重庆市",
  499. "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
  500. "江苏省", "浙江省", "安徽省", "福建省", "江西省",
  501. "山东省", "河南省", "湖北省", "湖南省", "广东省",
  502. "海南省", "四川省", "贵州省", "云南省", "陕西省",
  503. "甘肃省", "青海省", "台湾省",
  504. "内蒙古自治区", "广西壮族自治区", "西藏自治区",
  505. "宁夏回族自治区", "新疆维吾尔自治区",
  506. "香港特别行政区", "澳门特别行政区", "黄浦区", "徐汇区", "长宁区", "静安区", "普陀区", "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区", "金山区", "青浦区", "奉贤区", "崇明区", "浦东新区"};
  507. for (String district : otherDistricts) {
  508. if (lowerCaseAddress.contains(district.toLowerCase()) || lowerCaseAddress.indexOf(district.substring(0, 2)) == 0) {
  509. return false;
  510. }
  511. }
  512. return true;
  513. }
  514. public static boolean isOtherDistrictThanSongJiang2(String address) {
  515. // 将地址字符串转换为小写,以便进行不区分大小写的比较
  516. String lowerCaseAddress = address.toLowerCase();
  517. // 检查地址是否包含 "青浦区" 字样,如果不包含,则可能是其他区
  518. // 列出上海市的其他区(已补充完整)
  519. String[] otherDistricts = {"北京市", "天津市", "重庆市",
  520. "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
  521. "江苏省", "浙江省", "安徽省", "福建省", "江西省",
  522. "山东省", "河南省", "湖北省", "湖南省", "广东省",
  523. "海南省", "四川省", "贵州省", "云南省", "陕西省",
  524. "甘肃省", "青海省", "台湾省",
  525. "内蒙古自治区", "广西壮族自治区", "西藏自治区",
  526. "宁夏回族自治区", "新疆维吾尔自治区",
  527. "香港特别行政区", "澳门特别行政区"};
  528. for (String district : otherDistricts) {
  529. if (lowerCaseAddress.contains(district.toLowerCase()) || lowerCaseAddress.indexOf(district.substring(0, 3)) == 0) {
  530. return false;
  531. }
  532. }
  533. return true;
  534. }
  535. public static String isOtherDistrictThanShangHai(String address) {
  536. // 将地址字符串转换为小写,以便进行不区分大小写的比较
  537. String lowerCaseAddress = address.toLowerCase();
  538. // 列出上海市的其他区(已补充完整)
  539. String[] otherDistricts = {"上海市", "北京市", "天津市", "重庆市",
  540. "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
  541. "江苏省", "浙江省", "安徽省", "福建省", "江西省",
  542. "山东省", "河南省", "湖北省", "湖南省", "广东省",
  543. "海南省", "四川省", "贵州省", "云南省", "陕西省",
  544. "甘肃省", "青海省", "台湾省",
  545. "内蒙古自治区", "广西壮族自治区", "西藏自治区",
  546. "宁夏回族自治区", "新疆维吾尔自治区",
  547. "香港特别行政区", "澳门特别行政区"};
  548. for (String district : otherDistricts) {
  549. if (lowerCaseAddress.contains(district.toLowerCase()) || lowerCaseAddress.indexOf(district.substring(0, 3)) == 0) {
  550. return district;
  551. }
  552. }
  553. return "上海市";
  554. }
  555. /***
  556. * 其他方法的引用方法,不用管
  557. * @param input
  558. * @return
  559. */
  560. private AddressInfo processAddress(String input) {
  561. String prefixText = "";
  562. String firstNumber = null;
  563. List<String> tokens = new ArrayList<>();
  564. Pattern pattern = Pattern.compile("\\d+|[^\\d]+");
  565. Matcher matcher = pattern.matcher(input);
  566. boolean foundNumber = false;
  567. while (matcher.find()) {
  568. String token = matcher.group();
  569. tokens.add(token);
  570. if (!foundNumber && token.matches("\\d+")) {
  571. firstNumber = token;
  572. foundNumber = true;
  573. }
  574. }
  575. if (tokens.size() > 0 && !foundNumber) {
  576. prefixText = input;
  577. } else if (tokens.size() > 0 && firstNumber != null) {
  578. int index = tokens.indexOf(firstNumber);
  579. for (int i = 0; i < index; i++) {
  580. prefixText += tokens.get(i);
  581. }
  582. }
  583. return new AddressInfo(prefixText, firstNumber);
  584. }
  585. /***
  586. * 其他方法的引用方法,不用管
  587. * @param inputAddr 搜索地址
  588. * @param shortAddr 返回地址
  589. * @return
  590. */
  591. private double calculateFullWordMatchScore(String inputAddr, String shortAddr) {
  592. int totalLength = Math.max(inputAddr.length(), shortAddr.length());
  593. int commonCount = 0;
  594. for (char c : inputAddr.toCharArray()) {
  595. if (shortAddr.indexOf(c) != -1) {
  596. commonCount++;
  597. }
  598. }
  599. return (double) commonCount / totalLength;
  600. }
  601. /***
  602. * 其他方法的引用方法,不用管
  603. * @param addressNumber
  604. * @param addrNumber
  605. * @return
  606. */
  607. private double calculateNumberMatchScore(String addressNumber, String addrNumber) {
  608. if (addressNumber == null || addrNumber == null) {
  609. return 1; // 都没有数字或其中一个没有,认为这一步匹配满分
  610. }
  611. if (addressNumber.equals(addrNumber)) {
  612. return 1;
  613. }
  614. try {
  615. int num1 = Integer.parseInt(addressNumber);
  616. int num2 = Integer.parseInt(addrNumber);
  617. int diff = Math.abs(num1 - num2);
  618. if (diff <= 200) {
  619. return 1 - (double) diff / 200;
  620. }
  621. } catch (NumberFormatException e) {
  622. return 0;
  623. }
  624. return 0;
  625. }
  626. /***
  627. * 其他方法的引用方法,不用管
  628. * @param addressPrefix
  629. * @param addrPrefix
  630. * @return
  631. */
  632. private double calculatePrefixTextMatchScore(String addressPrefix, String addrPrefix) {
  633. if (addressPrefix.isEmpty() || addrPrefix.isEmpty()) {
  634. return 0;
  635. }
  636. double matchScore = calculateFullWordMatchScore(addressPrefix, addrPrefix);
  637. return matchScore >= 0.65 ? matchScore : 0;
  638. }
  639. private static class AddressInfo {
  640. String prefixText;
  641. String firstNumber;
  642. AddressInfo(String prefixText, String firstNumber) {
  643. this.prefixText = prefixText;
  644. this.firstNumber = firstNumber;
  645. }
  646. }
  647. /***
  648. * 去除特殊字符,仅保留中文、数字、字母
  649. * @param address
  650. * @return
  651. */
  652. public String deleteStr(String address) {
  653. if (address == null) {
  654. return null;
  655. }
  656. // 使用正则表达式替换所有非中文和非数字的字符为空字符串
  657. return address.replaceAll("[^\\u4e00-\\u9fa5\\da-zA-Z]", "");
  658. }
  659. // 静态资源:中国所有省份、地级市和县级市的名称
  660. private static final Set<String> PROVINCES = new HashSet<>(Arrays.asList(
  661. "北京市", "天津市", "上海市", "重庆市",
  662. "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
  663. "江苏省", "浙江省", "安徽省", "福建省", "江西省",
  664. "山东省", "河南省", "湖北省", "湖南省", "广东省",
  665. "海南省", "四川省", "贵州省", "云南省", "陕西省",
  666. "甘肃省", "青海省", "台湾省",
  667. "内蒙古自治区", "广西壮族自治区", "西藏自治区",
  668. "宁夏回族自治区", "新疆维吾尔自治区",
  669. "香港特别行政区", "澳门特别行政区"
  670. ));
  671. private static final Set<String> CITIES = new HashSet<>(Arrays.asList(
  672. // 这里只列举部分示例,实际需要完整的地级市和县级市列表
  673. "石家庄市", "唐山市", "秦皇岛市", "邯郸市", "邢台市",
  674. "保定市", "张家口市", "承德市", "沧州市", "廊坊市",
  675. "衡水市", "太原市", "大同市", "阳泉市", "长治市",
  676. "晋城市", "朔州市", "晋中市", "运城市", "忻州市",
  677. "临汾市", "吕梁市", "沈阳市", "大连市", "鞍山市",
  678. "抚顺市", "本溪市", "丹东市", "锦州市", "营口市",
  679. "阜新市", "辽阳市", "盘锦市", "铁岭市", "朝阳市",
  680. "葫芦岛市", "长春市", "吉林市", "四平市", "辽源市",
  681. "通化市", "白山市", "松原市", "白城市", "延边朝鲜族自治州",
  682. "哈尔滨市", "齐齐哈尔市", "鸡西市", "鹤岗市", "双鸭山市",
  683. "大庆市", "伊春市", "佳木斯市", "七台河市", "牡丹江市",
  684. "黑河市", "绥化市", "大兴安岭地区", "南京市", "无锡市",
  685. "徐州市", "常州市", "苏州市", "南通市", "连云港市",
  686. "淮安市", "盐城市", "扬州市", "镇江市", "泰州市",
  687. "宿迁市", "杭州市", "宁波市", "温州市", "嘉兴市",
  688. "湖州市", "绍兴市", "金华市", "衢州市", "舟山市",
  689. "台州市", "丽水市", "合肥市", "芜湖市", "蚌埠市",
  690. "淮南市", "马鞍山市", "淮北市", "铜陵市", "安庆市",
  691. "黄山市", "滁州市", "阜阳市", "宿州市", "六安市",
  692. "亳州市", "池州市", "宣城市", "福州市", "厦门市",
  693. "莆田市", "三明市", "泉州市", "漳州市", "南平市",
  694. "龙岩市", "宁德市", "南昌市", "景德镇市", "萍乡市",
  695. "九江市", "新余市", "鹰潭市", "赣州市", "吉安市",
  696. "宜春市", "抚州市", "上饶市", "济南市", "青岛市",
  697. "淄博市", "枣庄市", "东营市", "烟台市", "潍坊市",
  698. "济宁市", "泰安市", "威海市", "日照市", "临沂市",
  699. "德州市", "聊城市", "滨州市", "菏泽市", "郑州市",
  700. "开封市", "洛阳市", "平顶山市", "安阳市", "鹤壁市",
  701. "新乡市", "焦作市", "濮阳市", "许昌市", "漯河市",
  702. "三门峡市", "南阳市", "商丘市", "信阳市", "周口市",
  703. "驻马店市", "武汉市", "黄石市", "十堰市", "宜昌市",
  704. "襄阳市", "鄂州市", "荆门市", "孝感市", "荆州市",
  705. "黄冈市", "咸宁市", "随州市", "恩施土家族苗族自治州",
  706. "长沙市", "株洲市", "湘潭市", "衡阳市", "邵阳市",
  707. "岳阳市", "常德市", "张家界市", "益阳市", "郴州市",
  708. "永州市", "怀化市", "娄底市", "湘西土家族苗族自治州",
  709. "广州市", "韶关市", "深圳市", "珠海市", "汕头市",
  710. "佛山市", "江门市", "湛江市", "茂名市", "肇庆市",
  711. "惠州市", "梅州市", "汕尾市", "河源市", "阳江市",
  712. "清远市", "东莞市", "中山市", "潮州市", "揭阳市",
  713. "云浮市", "海口市", "三亚市", "三沙市", "儋州市",
  714. "成都市", "自贡市", "攀枝花市", "泸州市", "德阳市",
  715. "绵阳市", "广元市", "遂宁市", "内江市", "乐山市",
  716. "南充市", "眉山市", "宜宾市", "广安市", "达州市",
  717. "雅安市", "巴中市", "资阳市", "阿坝藏族羌族自治州",
  718. "甘孜藏族自治州", "凉山彝族自治州", "贵阳市", "六盘水市",
  719. "遵义市", "安顺市", "毕节市", "铜仁市", "黔西南布依族苗族自治州",
  720. "黔东南苗族侗族自治州", "黔南布依族苗族自治州", "昆明市", "曲靖市",
  721. "玉溪市", "保山市", "昭通市", "丽江市", "普洱市", "临沧市",
  722. "楚雄彝族自治州", "红河哈尼族彝族自治州", "文山壮族苗族自治州",
  723. "西双版纳傣族自治州", "大理白族自治州", "德宏傣族景颇族自治州",
  724. "怒江傈僳族自治州", "迪庆藏族自治州", "西安市", "铜川市", "宝鸡市",
  725. "咸阳市", "渭南市", "延安市", "汉中市", "榆林市", "安康市", "商洛市",
  726. "兰州市", "嘉峪关市", "金昌市", "白银市", "天水市", "武威市",
  727. "张掖市", "平凉市", "酒泉市", "庆阳市", "定西市", "陇南市",
  728. "临夏回族自治州", "甘南藏族自治州", "西宁市", "海东市",
  729. "海北藏族自治州", "黄南藏族自治州", "海南藏族自治州",
  730. "果洛藏族自治州", "玉树藏族自治州", "海西蒙古族藏族自治州",
  731. "台北市", "新北市", "桃园市", "台中市", "台南市", "高雄市",
  732. "基隆市", "新竹市", "嘉义市", "澳门市", "香港市"
  733. ));
  734. public static String ifCITIES(String address) {
  735. for (String item : CITIES) {
  736. if (address.contains(item)) {
  737. return item;
  738. }
  739. }
  740. return null;
  741. }
  742. /***
  743. * 去除地名地址的前缀,一直到镇/街道
  744. * @param address
  745. * @return
  746. */
  747. public static String quchuqianzhui(String address) {
  748. // 先去除 "中国"
  749. address = address.replace("中国", "");
  750. // 去除省份
  751. for (String province : PROVINCES) {
  752. if (address.startsWith(province)) {
  753. address = address.substring(province.length());
  754. }
  755. }
  756. // 去除市
  757. for (String city : CITIES) {
  758. if (address.startsWith(city)) {
  759. address = address.substring(city.length());
  760. } else {
  761. // 处理别名情况,例如 "安庆" 代表 "安庆市"
  762. String alias = city.replace("市", "");
  763. if (address.startsWith(alias)) {
  764. address = address.substring(alias.length());
  765. }
  766. }
  767. }
  768. // 去除区、镇、街道
  769. address = address.replaceAll("^.*?(区|镇|街道)", "");
  770. // 处理错误数据情况
  771. if (address.trim().isEmpty()) {
  772. // 尝试从原始地址中提取最后一个有效的市名称
  773. for (int i = CITIES.size() - 1; i >= 0; i--) {
  774. String city = CITIES.toArray(new String[0])[i];
  775. if (address.contains(city)) {
  776. return city;
  777. }
  778. String alias = city.replace("市", "");
  779. if (address.contains(alias)) {
  780. return city;
  781. }
  782. }
  783. // 如果没有找到市名称,检查是否有省份名称
  784. for (String province : PROVINCES) {
  785. if (address.contains(province)) {
  786. return province;
  787. }
  788. }
  789. }
  790. return address.trim();
  791. }
  792. private static final String ADDRESS_REGEX = "^[^市]+市[^区]+区(?:[^镇]+镇|[^街道]+街道).+$";
  793. private static final Pattern ADDRESS_PATTERN = Pattern.compile(ADDRESS_REGEX);
  794. /**
  795. * 验证地址格式的正则表达式
  796. */
  797. public static boolean validateAddress(String address) {
  798. if (address == null || address.trim().isEmpty()) {
  799. return false;
  800. }
  801. return ADDRESS_PATTERN.matcher(address).matches();
  802. }
  803. }