|
@@ -0,0 +1,771 @@
|
|
|
+package com.skyversation.poiaddr.util;
|
|
|
+
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.skyversation.poiaddr.bean.AddressResult;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+public class AddressTools {
|
|
|
+
|
|
|
+ private static AddressTools instance = new AddressTools();
|
|
|
+ private AddressTools (){};
|
|
|
+ public static AddressTools getInstance(){
|
|
|
+ if(instance == null) instance = new AddressTools();
|
|
|
+ return instance;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 上海市所有的区
|
|
|
+ private static final Set<String> SHANGHAI_DISTRICTS = new HashSet<>(Arrays.asList(
|
|
|
+ "黄浦区", "徐汇区", "长宁区", "静安区", "普陀区",
|
|
|
+ "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区",
|
|
|
+ "浦东新区", "金山区", "松江区", "青浦区", "奉贤区",
|
|
|
+ "崇明区"
|
|
|
+ ));
|
|
|
+
|
|
|
+ // 各区下辖的镇与街道
|
|
|
+ private static final Map<String, Set<String>> DISTRICT_STREETS = new HashMap<>();
|
|
|
+ // 街镇下辖的村和居委
|
|
|
+ private static final Map<String, Set<String>> STREET_VILLAGES_COMMUNITIES = new HashMap<>();
|
|
|
+ static {
|
|
|
+// // 松江区
|
|
|
+// Set<String> songjiangStreets = new HashSet<>(Arrays.asList(
|
|
|
+// "中山街道", "方松街道", "永丰街道", "岳阳街道",
|
|
|
+// "泗泾镇", "佘山镇", "车墩镇", "新桥镇", "洞泾镇",
|
|
|
+// "九亭镇", "泖港镇", "石湖荡镇", "新浜镇", "叶榭镇",
|
|
|
+// "小昆山镇"
|
|
|
+// ));
|
|
|
+// DISTRICT_STREETS.put("松江区", songjiangStreets);
|
|
|
+
|
|
|
+ // 中山街道下辖的村居
|
|
|
+ Set<String> zhongshanCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "茸梅社区居委会", "方东社区居委会", "夏家浜社区居委会",
|
|
|
+ "五龙村", "永隆村", "白洋村",
|
|
|
+ "白云社区居委会"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("中山街道", zhongshanCommunities);
|
|
|
+
|
|
|
+ // 方松街道下辖的村居
|
|
|
+ Set<String> fangsongCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "泰晤士小镇社区居委会", "绿地社区居委会", "公捷苑社区居委会",
|
|
|
+ "新陈家村", "江秋村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("方松街道", fangsongCommunities);
|
|
|
+
|
|
|
+ // 永丰街道下辖的村居
|
|
|
+ Set<String> yongfengCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "秀南社区居委会", "仓桥社区居委会", "玉乐社区居委会",
|
|
|
+ "周星村", "秀塘浜村", "薛家埭村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("永丰街道", yongfengCommunities);
|
|
|
+
|
|
|
+ // 岳阳街道下辖的村居
|
|
|
+ Set<String> yueyangCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "太平社区居委会", "菜花泾社区居委会", "佛字桥社区居委会",
|
|
|
+ "长桥村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("岳阳街道", yueyangCommunities);
|
|
|
+
|
|
|
+ // 泗泾镇下辖的村居
|
|
|
+ Set<String> sijingCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "江川社区居委会", "横港社区居委会", "青松社区居委会",
|
|
|
+ "泗泾村", "张施村", "打铁桥村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("泗泾镇", sijingCommunities);
|
|
|
+
|
|
|
+ // 佘山镇下辖的村居
|
|
|
+ Set<String> sheshanCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "陈坊桥社区居委会", "北干山社区居委会", "佘山家园社区居委会",
|
|
|
+ "江秋村", "陈坊村", "高家厍村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("佘山镇", sheshanCommunities);
|
|
|
+
|
|
|
+ // 车墩镇下辖的村居
|
|
|
+ Set<String> chendunCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "虬长路社区居委会", "车墩社区居委会", "高桥村",
|
|
|
+ "联建村", "联庄村", "洋泾村", "新余村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("车墩镇", chendunCommunities);
|
|
|
+
|
|
|
+ // 新桥镇下辖的村居
|
|
|
+ Set<String> xinqiaoCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "新乐社区居委会", "晨星社区居委会", "春申社区居委会",
|
|
|
+ "民益村", "马汤村", "潘家浜村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("新桥镇", xinqiaoCommunities);
|
|
|
+
|
|
|
+ // 洞泾镇下辖的村居
|
|
|
+ Set<String> dongjingCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "海欣社区居委会", "洞泾社区居委会", "同欣社区居委会",
|
|
|
+ "渔洋浜村", "砖桥村", "张泾村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("洞泾镇", dongjingCommunities);
|
|
|
+
|
|
|
+ // 九亭镇下辖的村居
|
|
|
+ Set<String> jiutingCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "亭中社区居委会", "亭东社区居委会", "亭南社区居委会",
|
|
|
+ "九亭村", "朱泾浜村", "小寅村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("九亭镇", jiutingCommunities);
|
|
|
+
|
|
|
+ // 泖港镇下辖的村居
|
|
|
+ Set<String> maogangCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "泖港社区居委会", "五厍社区居委会", "腰泾村",
|
|
|
+ "胡光村", "新龚村", "泖港村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("泖港镇", maogangCommunities);
|
|
|
+
|
|
|
+ // 石湖荡镇下辖的村居
|
|
|
+ Set<String> shihudangCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "古松社区居委会", "李塔汇社区居委会", "新源村",
|
|
|
+ "东港村", "洙桥村", "金胜村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("石湖荡镇", shihudangCommunities);
|
|
|
+
|
|
|
+ // 新浜镇下辖的村居
|
|
|
+ Set<String> xinbangCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "新浜社区居委会", "赵王村", "胡家埭村",
|
|
|
+ "南杨村", "黄家埭村", "许家草村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("新浜镇", xinbangCommunities);
|
|
|
+
|
|
|
+ // 叶榭镇下辖的村居
|
|
|
+ Set<String> yexieCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "叶榭社区居委会", "张泽社区居委会", "堰泾村",
|
|
|
+ "井凌桥村", "兴达村", "同建村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("叶榭镇", yexieCommunities);
|
|
|
+
|
|
|
+ // 小昆山镇下辖的村居
|
|
|
+ Set<String> xiaokunshanCommunities = new HashSet<>(Arrays.asList(
|
|
|
+ "大港社区居委会", "平原社区居委会", "玉昆二村社区居委会",
|
|
|
+ "汤村庙村", "陆家埭村", "昆西村"
|
|
|
+ ));
|
|
|
+ STREET_VILLAGES_COMMUNITIES.put("小昆山镇", xiaokunshanCommunities);
|
|
|
+ }
|
|
|
+ static {
|
|
|
+ // 黄浦区
|
|
|
+ Set<String> huangpuStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "南京东路街道", "外滩街道", "半淞园路街道", "小东门街道",
|
|
|
+ "豫园街道", "老西门街道", "五里桥街道", "打浦桥街道"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("黄浦区", huangpuStreets);
|
|
|
+
|
|
|
+ // 徐汇区
|
|
|
+ Set<String> xuhuiStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "天平路街道", "湖南路街道", "斜土路街道", "枫林路街道",
|
|
|
+ "长桥街道", "漕河泾街道", "康健新村街道", "徐家汇街道",
|
|
|
+ "凌云路街道", "龙华街道", "漕河泾镇", "华泾镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("徐汇区", xuhuiStreets);
|
|
|
+
|
|
|
+ // 长宁区
|
|
|
+ Set<String> changningStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "华阳路街道", "江苏路街道", "新华路街道", "周家桥街道",
|
|
|
+ "天山路街道", "仙霞新村街道", "虹桥街道", "程家桥街道",
|
|
|
+ "北新泾街道", "新泾镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("长宁区", changningStreets);
|
|
|
+
|
|
|
+ // 静安区
|
|
|
+ Set<String> jinganStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "江宁路街道", "石门二路街道", "南京西路街道", "静安寺街道",
|
|
|
+ "曹家渡街道", "天目西路街道", "北站街道", "宝山路街道",
|
|
|
+ "芷江西路街道", "共和新路街道", "大宁路街道", "彭浦新村街道",
|
|
|
+ "临汾路街道", "彭浦镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("静安区", jinganStreets);
|
|
|
+
|
|
|
+ // 普陀区
|
|
|
+ Set<String> putuoStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "曹杨新村街道", "长风新村街道", "长寿路街道", "甘泉路街道",
|
|
|
+ "石泉路街道", "宜川路街道", "万里街道", "真如镇街道",
|
|
|
+ "长征镇", "桃浦镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("普陀区", putuoStreets);
|
|
|
+
|
|
|
+ // 虹口区
|
|
|
+ Set<String> hongkouStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "欧阳路街道", "曲阳路街道", "广中路街道", "嘉兴路街道",
|
|
|
+ "凉城新村街道", "四川北路街道", "提篮桥街道", "江湾镇街道"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("虹口区", hongkouStreets);
|
|
|
+
|
|
|
+ // 杨浦区
|
|
|
+ Set<String> yangpuStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "定海路街道", "平凉路街道", "江浦路街道", "四平路街道",
|
|
|
+ "控江路街道", "长白新村街道", "延吉新村街道", "殷行街道",
|
|
|
+ "大桥街道", "五角场街道", "新江湾城街道", "五角场镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("杨浦区", yangpuStreets);
|
|
|
+
|
|
|
+ // 闵行区
|
|
|
+ Set<String> minhangStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "江川路街道", "新虹街道", "古美路街道", "浦锦街道",
|
|
|
+ "莘庄镇", "七宝镇", "颛桥镇", "华漕镇", "虹桥镇",
|
|
|
+ "梅陇镇", "吴泾镇", "马桥镇", "浦江镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("闵行区", minhangStreets);
|
|
|
+
|
|
|
+ // 宝山区
|
|
|
+ Set<String> baoshanStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "吴淞街道", "友谊路街道", "张庙街道", "罗店镇",
|
|
|
+ "大场镇", "杨行镇", "月浦镇", "罗泾镇", "顾村镇",
|
|
|
+ "高境镇", "庙行镇", "淞南镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("宝山区", baoshanStreets);
|
|
|
+
|
|
|
+ // 嘉定区
|
|
|
+ Set<String> jiadingStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "新成路街道", "真新街道", "嘉定镇街道", "南翔镇",
|
|
|
+ "安亭镇", "马陆镇", "徐行镇", "华亭镇", "外冈镇",
|
|
|
+ "江桥镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("嘉定区", jiadingStreets);
|
|
|
+
|
|
|
+ // 浦东新区
|
|
|
+ Set<String> pudongStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "潍坊新村街道", "陆家嘴街道", "周家渡街道", "塘桥街道",
|
|
|
+ "上钢新村街道", "南码头路街道", "沪东新村街道", "金杨新村街道",
|
|
|
+ "洋泾街道", "浦兴路街道", "东明路街道", "花木街道",
|
|
|
+ "川沙新镇", "高桥镇", "北蔡镇", "合庆镇", "唐镇",
|
|
|
+ "曹路镇", "金桥镇", "高东镇", "张江镇", "三林镇",
|
|
|
+ "惠南镇", "周浦镇", "新场镇", "大团镇", "康桥镇",
|
|
|
+ "航头镇", "祝桥镇", "泥城镇", "宣桥镇", "书院镇",
|
|
|
+ "万祥镇", "老港镇", "南汇新城镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("浦东新区", pudongStreets);
|
|
|
+
|
|
|
+ // 金山区
|
|
|
+ Set<String> jinshanStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "石化街道", "朱泾镇", "枫泾镇", "张堰镇", "亭林镇",
|
|
|
+ "吕巷镇", "廊下镇", "金山卫镇", "漕泾镇", "山阳镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("金山区", jinshanStreets);
|
|
|
+
|
|
|
+ // 松江区
|
|
|
+ Set<String> songjiangStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "中山街道", "方松街道", "永丰街道", "岳阳街道",
|
|
|
+ "泗泾镇", "佘山镇", "车墩镇", "新桥镇", "洞泾镇",
|
|
|
+ "九亭镇", "泖港镇", "石湖荡镇", "新浜镇", "叶榭镇",
|
|
|
+ "小昆山镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("松江区", songjiangStreets);
|
|
|
+
|
|
|
+ // 青浦区
|
|
|
+ Set<String> qingpuStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "夏阳街道", "盈浦街道", "香花桥街道", "朱家角镇",
|
|
|
+ "练塘镇", "金泽镇", "赵巷镇", "徐泾镇", "华新镇",
|
|
|
+ "重固镇", "白鹤镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("青浦区", qingpuStreets);
|
|
|
+
|
|
|
+ // 奉贤区
|
|
|
+ Set<String> fengxianStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "西渡街道", "奉浦街道", "南桥镇", "庄行镇", "金汇镇",
|
|
|
+ "柘林镇", "青村镇", "奉城镇", "四团镇", "海湾镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("奉贤区", fengxianStreets);
|
|
|
+
|
|
|
+ // 崇明区
|
|
|
+ Set<String> chongmingStreets = new HashSet<>(Arrays.asList(
|
|
|
+ "城桥镇", "堡镇", "新河镇", "庙镇", "竖新镇",
|
|
|
+ "向化镇", "三星镇", "港沿镇", "中兴镇", "陈家镇",
|
|
|
+ "绿华镇", "港西镇", "建设镇", "新海镇", "东平镇"
|
|
|
+ ));
|
|
|
+ DISTRICT_STREETS.put("崇明区", chongmingStreets);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 根据地名地址返回到街镇一级分词,
|
|
|
+ * @param address 上海市松江区车墩镇乐都路590号
|
|
|
+ * @return 上海市,松江区,车墩镇,乐都路590号
|
|
|
+ */
|
|
|
+ public static String[] parseAddressJZ(String address) {
|
|
|
+ String[] result = new String[4];
|
|
|
+ result[0] = "上海市";
|
|
|
+
|
|
|
+ // 去除地址中的“上海”和“上海市”
|
|
|
+ address = address.replaceAll("上海(市)?", "");
|
|
|
+
|
|
|
+ // 查找区
|
|
|
+ String foundDistrict = null;
|
|
|
+ for (String district : SHANGHAI_DISTRICTS) {
|
|
|
+ if (address.contains(district)) {
|
|
|
+ foundDistrict = district;
|
|
|
+ address = address.replace(district, "");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ result[1] = foundDistrict;
|
|
|
+
|
|
|
+ // 查找街镇
|
|
|
+ String foundStreet = null;
|
|
|
+ if (foundDistrict != null) {
|
|
|
+ Set<String> streets = DISTRICT_STREETS.get(foundDistrict);
|
|
|
+ if (streets != null) {
|
|
|
+ for (String street : streets) {
|
|
|
+ if (address.contains(street)) {
|
|
|
+ foundStreet = street;
|
|
|
+ address = address.replace(street, "");
|
|
|
+ break;
|
|
|
+ } else if (address.contains(street.replace("镇", "")) || address.contains(street.replace("街道", ""))) {
|
|
|
+ foundStreet = street;
|
|
|
+ address = address.replace(street.replace("镇", ""), "").replace(street.replace("街道", ""), "");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ result[2] = foundStreet;
|
|
|
+
|
|
|
+ // 剩余部分为其他信息
|
|
|
+ address = address.trim();
|
|
|
+ result[3] = address;
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 根据地名地址返回到村居一级分词,村居不是很准确
|
|
|
+ * @param address 上海市松江区车墩镇乐都村乐都路590号
|
|
|
+ * @return 上海市,松江区,车墩镇,乐都村,乐都路590号
|
|
|
+ */
|
|
|
+ public static String[] parseAddressCJ(String address) {
|
|
|
+ String[] result = new String[5];
|
|
|
+ result[0] = "上海市";
|
|
|
+
|
|
|
+ // 去除地址中的“上海”和“上海市”
|
|
|
+ address = address.replaceAll("上海(市)?", "");
|
|
|
+
|
|
|
+ // 先尝试通过村居信息定位
|
|
|
+ String foundVillageOrCommunity = null;
|
|
|
+ String foundStreet = null;
|
|
|
+ String foundDistrict = null;
|
|
|
+ for (Map.Entry<String, Set<String>> streetEntry : STREET_VILLAGES_COMMUNITIES.entrySet()) {
|
|
|
+ String street = streetEntry.getKey();
|
|
|
+ Set<String> villagesCommunities = streetEntry.getValue();
|
|
|
+ for (String villageCommunity : villagesCommunities) {
|
|
|
+ if (address.contains(villageCommunity)) {
|
|
|
+ foundVillageOrCommunity = villageCommunity;
|
|
|
+ foundStreet = street;
|
|
|
+ address = address.replace(villageCommunity, "");
|
|
|
+ break;
|
|
|
+ } else {
|
|
|
+ String shortName = villageCommunity.replace("村", "").replace("社区居委会", "").replace("居委会", "");
|
|
|
+ // 避免与街镇简称冲突
|
|
|
+ String streetShortName = street.replace("镇", "").replace("街道", "");
|
|
|
+ if (!shortName.equals(streetShortName) && address.contains(shortName)) {
|
|
|
+ foundVillageOrCommunity = villageCommunity;
|
|
|
+ foundStreet = street;
|
|
|
+ address = address.replace(shortName, "");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (foundVillageOrCommunity != null) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果通过村居定位到了街镇,再定位区
|
|
|
+ if (foundStreet != null) {
|
|
|
+ for (Map.Entry<String, Set<String>> districtEntry : DISTRICT_STREETS.entrySet()) {
|
|
|
+ String district = districtEntry.getKey();
|
|
|
+ Set<String> streets = districtEntry.getValue();
|
|
|
+ if (streets.contains(foundStreet)) {
|
|
|
+ foundDistrict = district;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果没有通过村居定位到信息,再按常规流程查找区、街镇、村居
|
|
|
+ if (foundDistrict == null) {
|
|
|
+ // 查找区
|
|
|
+ for (String district : SHANGHAI_DISTRICTS) {
|
|
|
+ if (address.contains(district)) {
|
|
|
+ foundDistrict = district;
|
|
|
+ address = address.replace(district, "");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (foundStreet == null && foundDistrict != null) {
|
|
|
+ // 查找街镇
|
|
|
+ Set<String> streets = DISTRICT_STREETS.get(foundDistrict);
|
|
|
+ if (streets != null) {
|
|
|
+ for (String street : streets) {
|
|
|
+ if (address.contains(street)) {
|
|
|
+ foundStreet = street;
|
|
|
+ address = address.replace(street, "");
|
|
|
+ break;
|
|
|
+ } else if (address.contains(street.replace("镇", "")) || address.contains(street.replace("街道", ""))) {
|
|
|
+ foundStreet = street;
|
|
|
+ address = address.replace(street.replace("镇", ""), "").replace(street.replace("街道", ""), "");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (foundVillageOrCommunity == null && foundStreet != null) {
|
|
|
+ // 查找村或居委
|
|
|
+ Set<String> villagesCommunities = STREET_VILLAGES_COMMUNITIES.get(foundStreet);
|
|
|
+ if (villagesCommunities != null) {
|
|
|
+ for (String villageCommunity : villagesCommunities) {
|
|
|
+ if (address.contains(villageCommunity)) {
|
|
|
+ foundVillageOrCommunity = villageCommunity;
|
|
|
+ address = address.replace(villageCommunity, "");
|
|
|
+ break;
|
|
|
+ } else {
|
|
|
+ String shortName = villageCommunity.replace("村", "").replace("社区居委会", "").replace("居委会", "");
|
|
|
+ // 避免与街镇简称冲突
|
|
|
+ String streetShortName = foundStreet.replace("镇", "").replace("街道", "");
|
|
|
+ if (!shortName.equals(streetShortName) && address.contains(shortName)) {
|
|
|
+ foundVillageOrCommunity = villageCommunity;
|
|
|
+ address = address.replace(shortName, "");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 处理别名残留问题
|
|
|
+ if (foundVillageOrCommunity != null) {
|
|
|
+ String[] aliases = {"居委", "居委会", "村", "新村"};
|
|
|
+ for (String alias : aliases) {
|
|
|
+ if (address.startsWith(alias)) {
|
|
|
+ address = address.substring(alias.length());
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 移除剩余地址中可能残留的区和街镇信息
|
|
|
+ if (foundDistrict != null) {
|
|
|
+ address = address.replace(foundDistrict, "");
|
|
|
+ }
|
|
|
+ if (foundStreet != null) {
|
|
|
+ address = address.replace(foundStreet, "");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 去除多余的空白字符
|
|
|
+ address = address.trim();
|
|
|
+
|
|
|
+ result[1] = foundDistrict;
|
|
|
+ result[2] = foundStreet;
|
|
|
+ result[3] = foundVillageOrCommunity;
|
|
|
+ result[4] = address;
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 获取array中,指定字段与address匹配值最高的数据,特定方法,指定的jsonarray使用
|
|
|
+ * @param address 上海市松江区乐都路590号
|
|
|
+ * @param array 地名地址接口返回的jsonarray数据
|
|
|
+ * @param param jaonarray中地名地址字段的key
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public JSONObject findBestMatch(String address, JSONArray array, String param) {
|
|
|
+ JSONObject bestMatch = null;
|
|
|
+ double maxTotalScore = 0;
|
|
|
+
|
|
|
+ // 处理输入地址的分词和数字前文本
|
|
|
+ AddressInfo addressInfo = processAddress(address);
|
|
|
+
|
|
|
+ for (int i = 0; i < array.size(); i++) {
|
|
|
+ JSONObject obj = array.getJSONObject(i);
|
|
|
+ String addr = obj.getString(param);
|
|
|
+ AddressInfo addrInfo = processAddress(addr);
|
|
|
+
|
|
|
+ // 第一步:全词匹配比例
|
|
|
+ double score1 = calculateFullWordMatchScore(address, addr);
|
|
|
+
|
|
|
+ // 第二步:数字匹配得分
|
|
|
+ double score2 = calculateNumberMatchScore(addressInfo.firstNumber, addrInfo.firstNumber);
|
|
|
+
|
|
|
+ // 第三步:数字前文本匹配得分
|
|
|
+ double score3 = calculatePrefixTextMatchScore(addressInfo.prefixText, addrInfo.prefixText);
|
|
|
+
|
|
|
+ double totalScore = score1 + score2 + score3;
|
|
|
+
|
|
|
+ if (totalScore > maxTotalScore) {
|
|
|
+ maxTotalScore = totalScore;
|
|
|
+ bestMatch = obj;
|
|
|
+ bestMatch.put("计分1", score1);
|
|
|
+ bestMatch.put("计分2", score2);
|
|
|
+ bestMatch.put("计分3", score3);
|
|
|
+ bestMatch.put("总分", totalScore);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return bestMatch;
|
|
|
+ }
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 其他方法的引用方法,不用管
|
|
|
+ * @param input
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private AddressInfo processAddress(String input) {
|
|
|
+ String prefixText = "";
|
|
|
+ String firstNumber = null;
|
|
|
+ List<String> tokens = new ArrayList<>();
|
|
|
+ Pattern pattern = Pattern.compile("\\d+|[^\\d]+");
|
|
|
+ Matcher matcher = pattern.matcher(input);
|
|
|
+ boolean foundNumber = false;
|
|
|
+ while (matcher.find()) {
|
|
|
+ String token = matcher.group();
|
|
|
+ tokens.add(token);
|
|
|
+ if (!foundNumber && token.matches("\\d+")) {
|
|
|
+ firstNumber = token;
|
|
|
+ foundNumber = true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (tokens.size() > 0 && !foundNumber) {
|
|
|
+ prefixText = input;
|
|
|
+ } else if (tokens.size() > 0 && firstNumber != null) {
|
|
|
+ int index = tokens.indexOf(firstNumber);
|
|
|
+ for (int i = 0; i < index; i++) {
|
|
|
+ prefixText += tokens.get(i);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return new AddressInfo(prefixText, firstNumber);
|
|
|
+ }
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 其他方法的引用方法,不用管
|
|
|
+ * @param inputAddr
|
|
|
+ * @param shortAddr
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private double calculateFullWordMatchScore(String inputAddr, String shortAddr) {
|
|
|
+ int totalLength = Math.max(inputAddr.length(), shortAddr.length());
|
|
|
+ int commonCount = 0;
|
|
|
+ for (char c : inputAddr.toCharArray()) {
|
|
|
+ if (shortAddr.indexOf(c) != -1) {
|
|
|
+ commonCount++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return (double) commonCount / totalLength;
|
|
|
+ }
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 其他方法的引用方法,不用管
|
|
|
+ * @param addressNumber
|
|
|
+ * @param addrNumber
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private double calculateNumberMatchScore(String addressNumber, String addrNumber) {
|
|
|
+ if (addressNumber == null || addrNumber == null) {
|
|
|
+ return 1; // 都没有数字或其中一个没有,认为这一步匹配满分
|
|
|
+ }
|
|
|
+ if (addressNumber.equals(addrNumber)) {
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ int num1 = Integer.parseInt(addressNumber);
|
|
|
+ int num2 = Integer.parseInt(addrNumber);
|
|
|
+ int diff = Math.abs(num1 - num2);
|
|
|
+ if (diff <= 200) {
|
|
|
+ return 1 - (double) diff / 200;
|
|
|
+ }
|
|
|
+ } catch (NumberFormatException e) {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 其他方法的引用方法,不用管
|
|
|
+ * @param addressPrefix
|
|
|
+ * @param addrPrefix
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private double calculatePrefixTextMatchScore(String addressPrefix, String addrPrefix) {
|
|
|
+ if (addressPrefix.isEmpty() || addrPrefix.isEmpty()) {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ double matchScore = calculateFullWordMatchScore(addressPrefix, addrPrefix);
|
|
|
+ return matchScore >= 0.65 ? matchScore : 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static class AddressInfo {
|
|
|
+ String prefixText;
|
|
|
+ String firstNumber;
|
|
|
+
|
|
|
+ AddressInfo(String prefixText, String firstNumber) {
|
|
|
+ this.prefixText = prefixText;
|
|
|
+ this.firstNumber = firstNumber;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 去除特殊字符,仅保留中文、数字、字母
|
|
|
+ * @param address
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public String deleteStr(String address){
|
|
|
+ if (address == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ // 使用正则表达式替换所有非中文和非数字的字符为空字符串
|
|
|
+ return address.replaceAll("[^\\u4e00-\\u9fa5\\da-zA-Z]", "");
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ // 静态资源:中国所有省份、地级市和县级市的名称
|
|
|
+ private static final Set<String> PROVINCES = new HashSet<>(Arrays.asList(
|
|
|
+ "北京市", "天津市", "上海市", "重庆市",
|
|
|
+ "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
|
|
|
+ "江苏省", "浙江省", "安徽省", "福建省", "江西省",
|
|
|
+ "山东省", "河南省", "湖北省", "湖南省", "广东省",
|
|
|
+ "海南省", "四川省", "贵州省", "云南省", "陕西省",
|
|
|
+ "甘肃省", "青海省", "台湾省",
|
|
|
+ "内蒙古自治区", "广西壮族自治区", "西藏自治区",
|
|
|
+ "宁夏回族自治区", "新疆维吾尔自治区",
|
|
|
+ "香港特别行政区", "澳门特别行政区"
|
|
|
+ ));
|
|
|
+
|
|
|
+ private static final Set<String> CITIES = new HashSet<>(Arrays.asList(
|
|
|
+ // 这里只列举部分示例,实际需要完整的地级市和县级市列表
|
|
|
+ "石家庄市", "唐山市", "秦皇岛市", "邯郸市", "邢台市",
|
|
|
+ "保定市", "张家口市", "承德市", "沧州市", "廊坊市",
|
|
|
+ "衡水市", "太原市", "大同市", "阳泉市", "长治市",
|
|
|
+ "晋城市", "朔州市", "晋中市", "运城市", "忻州市",
|
|
|
+ "临汾市", "吕梁市", "沈阳市", "大连市", "鞍山市",
|
|
|
+ "抚顺市", "本溪市", "丹东市", "锦州市", "营口市",
|
|
|
+ "阜新市", "辽阳市", "盘锦市", "铁岭市", "朝阳市",
|
|
|
+ "葫芦岛市", "长春市", "吉林市", "四平市", "辽源市",
|
|
|
+ "通化市", "白山市", "松原市", "白城市", "延边朝鲜族自治州",
|
|
|
+ "哈尔滨市", "齐齐哈尔市", "鸡西市", "鹤岗市", "双鸭山市",
|
|
|
+ "大庆市", "伊春市", "佳木斯市", "七台河市", "牡丹江市",
|
|
|
+ "黑河市", "绥化市", "大兴安岭地区", "南京市", "无锡市",
|
|
|
+ "徐州市", "常州市", "苏州市", "南通市", "连云港市",
|
|
|
+ "淮安市", "盐城市", "扬州市", "镇江市", "泰州市",
|
|
|
+ "宿迁市", "杭州市", "宁波市", "温州市", "嘉兴市",
|
|
|
+ "湖州市", "绍兴市", "金华市", "衢州市", "舟山市",
|
|
|
+ "台州市", "丽水市", "合肥市", "芜湖市", "蚌埠市",
|
|
|
+ "淮南市", "马鞍山市", "淮北市", "铜陵市", "安庆市",
|
|
|
+ "黄山市", "滁州市", "阜阳市", "宿州市", "六安市",
|
|
|
+ "亳州市", "池州市", "宣城市", "福州市", "厦门市",
|
|
|
+ "莆田市", "三明市", "泉州市", "漳州市", "南平市",
|
|
|
+ "龙岩市", "宁德市", "南昌市", "景德镇市", "萍乡市",
|
|
|
+ "九江市", "新余市", "鹰潭市", "赣州市", "吉安市",
|
|
|
+ "宜春市", "抚州市", "上饶市", "济南市", "青岛市",
|
|
|
+ "淄博市", "枣庄市", "东营市", "烟台市", "潍坊市",
|
|
|
+ "济宁市", "泰安市", "威海市", "日照市", "临沂市",
|
|
|
+ "德州市", "聊城市", "滨州市", "菏泽市", "郑州市",
|
|
|
+ "开封市", "洛阳市", "平顶山市", "安阳市", "鹤壁市",
|
|
|
+ "新乡市", "焦作市", "濮阳市", "许昌市", "漯河市",
|
|
|
+ "三门峡市", "南阳市", "商丘市", "信阳市", "周口市",
|
|
|
+ "驻马店市", "武汉市", "黄石市", "十堰市", "宜昌市",
|
|
|
+ "襄阳市", "鄂州市", "荆门市", "孝感市", "荆州市",
|
|
|
+ "黄冈市", "咸宁市", "随州市", "恩施土家族苗族自治州",
|
|
|
+ "长沙市", "株洲市", "湘潭市", "衡阳市", "邵阳市",
|
|
|
+ "岳阳市", "常德市", "张家界市", "益阳市", "郴州市",
|
|
|
+ "永州市", "怀化市", "娄底市", "湘西土家族苗族自治州",
|
|
|
+ "广州市", "韶关市", "深圳市", "珠海市", "汕头市",
|
|
|
+ "佛山市", "江门市", "湛江市", "茂名市", "肇庆市",
|
|
|
+ "惠州市", "梅州市", "汕尾市", "河源市", "阳江市",
|
|
|
+ "清远市", "东莞市", "中山市", "潮州市", "揭阳市",
|
|
|
+ "云浮市", "海口市", "三亚市", "三沙市", "儋州市",
|
|
|
+ "成都市", "自贡市", "攀枝花市", "泸州市", "德阳市",
|
|
|
+ "绵阳市", "广元市", "遂宁市", "内江市", "乐山市",
|
|
|
+ "南充市", "眉山市", "宜宾市", "广安市", "达州市",
|
|
|
+ "雅安市", "巴中市", "资阳市", "阿坝藏族羌族自治州",
|
|
|
+ "甘孜藏族自治州", "凉山彝族自治州", "贵阳市", "六盘水市",
|
|
|
+ "遵义市", "安顺市", "毕节市", "铜仁市", "黔西南布依族苗族自治州",
|
|
|
+ "黔东南苗族侗族自治州", "黔南布依族苗族自治州", "昆明市", "曲靖市",
|
|
|
+ "玉溪市", "保山市", "昭通市", "丽江市", "普洱市", "临沧市",
|
|
|
+ "楚雄彝族自治州", "红河哈尼族彝族自治州", "文山壮族苗族自治州",
|
|
|
+ "西双版纳傣族自治州", "大理白族自治州", "德宏傣族景颇族自治州",
|
|
|
+ "怒江傈僳族自治州", "迪庆藏族自治州", "西安市", "铜川市", "宝鸡市",
|
|
|
+ "咸阳市", "渭南市", "延安市", "汉中市", "榆林市", "安康市", "商洛市",
|
|
|
+ "兰州市", "嘉峪关市", "金昌市", "白银市", "天水市", "武威市",
|
|
|
+ "张掖市", "平凉市", "酒泉市", "庆阳市", "定西市", "陇南市",
|
|
|
+ "临夏回族自治州", "甘南藏族自治州", "西宁市", "海东市",
|
|
|
+ "海北藏族自治州", "黄南藏族自治州", "海南藏族自治州",
|
|
|
+ "果洛藏族自治州", "玉树藏族自治州", "海西蒙古族藏族自治州",
|
|
|
+ "台北市", "新北市", "桃园市", "台中市", "台南市", "高雄市",
|
|
|
+ "基隆市", "新竹市", "嘉义市", "澳门市", "香港市"
|
|
|
+ ));
|
|
|
+
|
|
|
+ /***
|
|
|
+ * 去除地名地址的前缀,一直到镇/街道
|
|
|
+ * @param address
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static String quchuqianzhui(String address) {
|
|
|
+ // 先去除 "中国"
|
|
|
+ address = address.replace("中国", "");
|
|
|
+
|
|
|
+ // 去除省份
|
|
|
+ for (String province : PROVINCES) {
|
|
|
+ if (address.startsWith(province)) {
|
|
|
+ address = address.substring(province.length());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 去除市
|
|
|
+ for (String city : CITIES) {
|
|
|
+ if (address.startsWith(city)) {
|
|
|
+ address = address.substring(city.length());
|
|
|
+ } else {
|
|
|
+ // 处理别名情况,例如 "安庆" 代表 "安庆市"
|
|
|
+ String alias = city.replace("市", "");
|
|
|
+ if (address.startsWith(alias)) {
|
|
|
+ address = address.substring(alias.length());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 去除区、镇、街道
|
|
|
+ address = address.replaceAll("^.*?(区|镇|街道)", "");
|
|
|
+
|
|
|
+ // 处理错误数据情况
|
|
|
+ if (address.trim().isEmpty()) {
|
|
|
+ // 尝试从原始地址中提取最后一个有效的市名称
|
|
|
+ for (int i = CITIES.size() - 1; i >= 0; i--) {
|
|
|
+ String city = CITIES.toArray(new String[0])[i];
|
|
|
+ if (address.contains(city)) {
|
|
|
+ return city;
|
|
|
+ }
|
|
|
+ String alias = city.replace("市", "");
|
|
|
+ if (address.contains(alias)) {
|
|
|
+ return city;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 如果没有找到市名称,检查是否有省份名称
|
|
|
+ for (String province : PROVINCES) {
|
|
|
+ if (address.contains(province)) {
|
|
|
+ return province;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return address.trim();
|
|
|
+ }
|
|
|
+
|
|
|
+ private static final String ADDRESS_REGEX = "^[^市]+市[^区]+区(?:[^镇]+镇|[^街道]+街道).+$";
|
|
|
+ private static final Pattern ADDRESS_PATTERN = Pattern.compile(ADDRESS_REGEX);
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 验证地址格式的正则表达式
|
|
|
+ */
|
|
|
+ public static boolean validateAddress(String address) {
|
|
|
+ if (address == null || address.trim().isEmpty()) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return ADDRESS_PATTERN.matcher(address).matches();
|
|
|
+ }
|
|
|
+
|
|
|
+}
|