123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895 |
- package com.skyversation.poiaddr.util;
- import com.alibaba.fastjson.JSONArray;
- import com.alibaba.fastjson.JSONObject;
- import com.skyversation.poiaddr.addquery.AddressQueryEngine;
- import com.skyversation.poiaddr.addquery.Constant;
- import com.skyversation.poiaddr.bean.AddressResult;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class AddressTools {
- private static AddressTools instance = new AddressTools();
- private AddressTools() {
- }
- ;
- public static AddressTools getInstance() {
- if (instance == null) instance = new AddressTools();
- return instance;
- }
- // 上海市所有的区
- private static final Set<String> SHANGHAI_DISTRICTS = new HashSet<>(Arrays.asList(
- "黄浦区", "徐汇区", "长宁区", "静安区", "普陀区",
- "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区",
- "浦东新区", "金山区", "松江区", "青浦区", "奉贤区",
- "崇明区"
- ));
- // 各区下辖的镇与街道
- private static final Map<String, Set<String>> DISTRICT_STREETS = new HashMap<>();
- // 街镇下辖的村和居委
- private static final Map<String, Set<String>> STREET_VILLAGES_COMMUNITIES = new HashMap<>();
- static {
- // // 松江区
- // Set<String> songjiangStreets = new HashSet<>(Arrays.asList(
- // "中山街道", "方松街道", "永丰街道", "岳阳街道",
- // "泗泾镇", "佘山镇", "车墩镇", "新桥镇", "洞泾镇",
- // "九亭镇", "泖港镇", "石湖荡镇", "新浜镇", "叶榭镇",
- // "小昆山镇"
- // ));
- // DISTRICT_STREETS.put("松江区", songjiangStreets);
- // 中山街道下辖的村居
- Set<String> zhongshanCommunities = new HashSet<>(Arrays.asList(
- "茸梅社区居委会", "方东社区居委会", "夏家浜社区居委会",
- "五龙村", "永隆村", "白洋村",
- "白云社区居委会"
- ));
- STREET_VILLAGES_COMMUNITIES.put("中山街道", zhongshanCommunities);
- // 方松街道下辖的村居
- Set<String> fangsongCommunities = new HashSet<>(Arrays.asList(
- "泰晤士小镇社区居委会", "绿地社区居委会", "公捷苑社区居委会",
- "新陈家村", "江秋村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("方松街道", fangsongCommunities);
- // 永丰街道下辖的村居
- Set<String> yongfengCommunities = new HashSet<>(Arrays.asList(
- "秀南社区居委会", "仓桥社区居委会", "玉乐社区居委会",
- "周星村", "秀塘浜村", "薛家埭村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("永丰街道", yongfengCommunities);
- // 岳阳街道下辖的村居
- Set<String> yueyangCommunities = new HashSet<>(Arrays.asList(
- "太平社区居委会", "菜花泾社区居委会", "佛字桥社区居委会",
- "长桥村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("岳阳街道", yueyangCommunities);
- // 泗泾镇下辖的村居
- Set<String> sijingCommunities = new HashSet<>(Arrays.asList(
- "江川社区居委会", "横港社区居委会", "青松社区居委会",
- "泗泾村", "张施村", "打铁桥村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("泗泾镇", sijingCommunities);
- // 佘山镇下辖的村居
- Set<String> sheshanCommunities = new HashSet<>(Arrays.asList(
- "陈坊桥社区居委会", "北干山社区居委会", "佘山家园社区居委会",
- "江秋村", "陈坊村", "高家厍村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("佘山镇", sheshanCommunities);
- // 车墩镇下辖的村居
- Set<String> chendunCommunities = new HashSet<>(Arrays.asList(
- "虬长路社区居委会", "车墩社区居委会", "高桥村",
- "联建村", "联庄村", "洋泾村", "新余村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("车墩镇", chendunCommunities);
- // 新桥镇下辖的村居
- Set<String> xinqiaoCommunities = new HashSet<>(Arrays.asList(
- "新乐社区居委会", "晨星社区居委会", "春申社区居委会",
- "民益村", "马汤村", "潘家浜村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("新桥镇", xinqiaoCommunities);
- // 洞泾镇下辖的村居
- Set<String> dongjingCommunities = new HashSet<>(Arrays.asList(
- "海欣社区居委会", "洞泾社区居委会", "同欣社区居委会",
- "渔洋浜村", "砖桥村", "张泾村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("洞泾镇", dongjingCommunities);
- // 九亭镇下辖的村居
- Set<String> jiutingCommunities = new HashSet<>(Arrays.asList(
- "亭中社区居委会", "亭东社区居委会", "亭南社区居委会",
- "九亭村", "朱泾浜村", "小寅村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("九亭镇", jiutingCommunities);
- // 泖港镇下辖的村居
- Set<String> maogangCommunities = new HashSet<>(Arrays.asList(
- "泖港社区居委会", "五厍社区居委会", "腰泾村",
- "胡光村", "新龚村", "泖港村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("泖港镇", maogangCommunities);
- // 石湖荡镇下辖的村居
- Set<String> shihudangCommunities = new HashSet<>(Arrays.asList(
- "古松社区居委会", "李塔汇社区居委会", "新源村",
- "东港村", "洙桥村", "金胜村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("石湖荡镇", shihudangCommunities);
- // 新浜镇下辖的村居
- Set<String> xinbangCommunities = new HashSet<>(Arrays.asList(
- "新浜社区居委会", "赵王村", "胡家埭村",
- "南杨村", "黄家埭村", "许家草村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("新浜镇", xinbangCommunities);
- // 叶榭镇下辖的村居
- Set<String> yexieCommunities = new HashSet<>(Arrays.asList(
- "叶榭社区居委会", "张泽社区居委会", "堰泾村",
- "井凌桥村", "兴达村", "同建村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("叶榭镇", yexieCommunities);
- // 小昆山镇下辖的村居
- Set<String> xiaokunshanCommunities = new HashSet<>(Arrays.asList(
- "大港社区居委会", "平原社区居委会", "玉昆二村社区居委会",
- "汤村庙村", "陆家埭村", "昆西村"
- ));
- STREET_VILLAGES_COMMUNITIES.put("小昆山镇", xiaokunshanCommunities);
- }
- static {
- // 黄浦区
- Set<String> huangpuStreets = new HashSet<>(Arrays.asList(
- "南京东路街道", "外滩街道", "半淞园路街道", "小东门街道",
- "豫园街道", "老西门街道", "五里桥街道", "打浦桥街道"
- ));
- DISTRICT_STREETS.put("黄浦区", huangpuStreets);
- // 徐汇区
- Set<String> xuhuiStreets = new HashSet<>(Arrays.asList(
- "天平路街道", "湖南路街道", "斜土路街道", "枫林路街道",
- "长桥街道", "漕河泾街道", "康健新村街道", "徐家汇街道",
- "凌云路街道", "龙华街道", "漕河泾镇", "华泾镇"
- ));
- DISTRICT_STREETS.put("徐汇区", xuhuiStreets);
- // 长宁区
- Set<String> changningStreets = new HashSet<>(Arrays.asList(
- "华阳路街道", "江苏路街道", "新华路街道", "周家桥街道",
- "天山路街道", "仙霞新村街道", "虹桥街道", "程家桥街道",
- "北新泾街道", "新泾镇"
- ));
- DISTRICT_STREETS.put("长宁区", changningStreets);
- // 静安区
- Set<String> jinganStreets = new HashSet<>(Arrays.asList(
- "江宁路街道", "石门二路街道", "南京西路街道", "静安寺街道",
- "曹家渡街道", "天目西路街道", "北站街道", "宝山路街道",
- "芷江西路街道", "共和新路街道", "大宁路街道", "彭浦新村街道",
- "临汾路街道", "彭浦镇"
- ));
- DISTRICT_STREETS.put("静安区", jinganStreets);
- // 普陀区
- Set<String> putuoStreets = new HashSet<>(Arrays.asList(
- "曹杨新村街道", "长风新村街道", "长寿路街道", "甘泉路街道",
- "石泉路街道", "宜川路街道", "万里街道", "真如镇街道",
- "长征镇", "桃浦镇"
- ));
- DISTRICT_STREETS.put("普陀区", putuoStreets);
- // 虹口区
- Set<String> hongkouStreets = new HashSet<>(Arrays.asList(
- "欧阳路街道", "曲阳路街道", "广中路街道", "嘉兴路街道",
- "凉城新村街道", "四川北路街道", "提篮桥街道", "江湾镇街道"
- ));
- DISTRICT_STREETS.put("虹口区", hongkouStreets);
- // 杨浦区
- Set<String> yangpuStreets = new HashSet<>(Arrays.asList(
- "定海路街道", "平凉路街道", "江浦路街道", "四平路街道",
- "控江路街道", "长白新村街道", "延吉新村街道", "殷行街道",
- "大桥街道", "五角场街道", "新江湾城街道", "五角场镇"
- ));
- DISTRICT_STREETS.put("杨浦区", yangpuStreets);
- // 闵行区
- Set<String> minhangStreets = new HashSet<>(Arrays.asList(
- "江川路街道", "新虹街道", "古美路街道", "浦锦街道",
- "莘庄镇", "七宝镇", "颛桥镇", "华漕镇", "虹桥镇",
- "梅陇镇", "吴泾镇", "马桥镇", "浦江镇"
- ));
- DISTRICT_STREETS.put("闵行区", minhangStreets);
- // 宝山区
- Set<String> baoshanStreets = new HashSet<>(Arrays.asList(
- "吴淞街道", "友谊路街道", "张庙街道", "罗店镇",
- "大场镇", "杨行镇", "月浦镇", "罗泾镇", "顾村镇",
- "高境镇", "庙行镇", "淞南镇"
- ));
- DISTRICT_STREETS.put("宝山区", baoshanStreets);
- // 嘉定区
- Set<String> jiadingStreets = new HashSet<>(Arrays.asList(
- "新成路街道", "真新街道", "嘉定镇街道", "南翔镇",
- "安亭镇", "马陆镇", "徐行镇", "华亭镇", "外冈镇",
- "江桥镇"
- ));
- DISTRICT_STREETS.put("嘉定区", jiadingStreets);
- // 浦东新区
- Set<String> pudongStreets = new HashSet<>(Arrays.asList(
- "潍坊新村街道", "陆家嘴街道", "周家渡街道", "塘桥街道",
- "上钢新村街道", "南码头路街道", "沪东新村街道", "金杨新村街道",
- "洋泾街道", "浦兴路街道", "东明路街道", "花木街道",
- "川沙新镇", "高桥镇", "北蔡镇", "合庆镇", "唐镇",
- "曹路镇", "金桥镇", "高东镇", "张江镇", "三林镇",
- "惠南镇", "周浦镇", "新场镇", "大团镇", "康桥镇",
- "航头镇", "祝桥镇", "泥城镇", "宣桥镇", "书院镇",
- "万祥镇", "老港镇", "南汇新城镇"
- ));
- DISTRICT_STREETS.put("浦东新区", pudongStreets);
- // 金山区
- Set<String> jinshanStreets = new HashSet<>(Arrays.asList(
- "石化街道", "朱泾镇", "枫泾镇", "张堰镇", "亭林镇",
- "吕巷镇", "廊下镇", "金山卫镇", "漕泾镇", "山阳镇"
- ));
- DISTRICT_STREETS.put("金山区", jinshanStreets);
- // 松江区
- Set<String> songjiangStreets = new HashSet<>(Arrays.asList(
- "岳阳街道", "永丰街道", "方松街道", "中山街道", "广富林街道", "九里亭街道", "泗泾镇", "佘山镇", "车墩镇", "新桥镇", "洞泾镇", "九亭镇", "泖港镇", "石湖荡镇", "新浜镇", "叶榭镇", "小昆山镇"
- ));
- DISTRICT_STREETS.put("松江区", songjiangStreets);
- // 青浦区
- Set<String> qingpuStreets = new HashSet<>(Arrays.asList(
- "夏阳街道", "盈浦街道", "香花桥街道", "朱家角镇",
- "练塘镇", "金泽镇", "赵巷镇", "徐泾镇", "华新镇",
- "重固镇", "白鹤镇"
- ));
- DISTRICT_STREETS.put("青浦区", qingpuStreets);
- // 奉贤区
- Set<String> fengxianStreets = new HashSet<>(Arrays.asList(
- "西渡街道", "奉浦街道", "南桥镇", "庄行镇", "金汇镇",
- "柘林镇", "青村镇", "奉城镇", "四团镇", "海湾镇"
- ));
- DISTRICT_STREETS.put("奉贤区", fengxianStreets);
- // 崇明区
- Set<String> chongmingStreets = new HashSet<>(Arrays.asList(
- "城桥镇", "堡镇", "新河镇", "庙镇", "竖新镇",
- "向化镇", "三星镇", "港沿镇", "中兴镇", "陈家镇",
- "绿华镇", "港西镇", "建设镇", "新海镇", "东平镇"
- ));
- DISTRICT_STREETS.put("崇明区", chongmingStreets);
- }
- /***
- * 根据地名地址返回到街镇一级分词,
- * @param address 上海市松江区车墩镇乐都路590号
- * @return 上海市,松江区,车墩镇,乐都路590号
- */
- public static String[] parseAddressJZ(String address) {
- String[] result = new String[4];
- result[0] = "上海市";
- // 去除地址中的“上海”和“上海市”
- address = address.replaceAll("上海(市)?", "");
- // 查找区
- String foundDistrict = null;
- for (String district : SHANGHAI_DISTRICTS) {
- if (address.contains(district)) {
- foundDistrict = district;
- address = address.replace(district, "");
- break;
- }
- }
- result[1] = foundDistrict;
- // 查找街镇
- String foundStreet = null;
- if (foundDistrict != null) {
- Set<String> streets = DISTRICT_STREETS.get(foundDistrict);
- if (streets != null) {
- for (String street : streets) {
- if (address.contains(street)) {
- foundStreet = street;
- address = address.replace(street, "");
- break;
- } else if (address.contains(street.replace("镇", "")) || address.contains(street.replace("街道", ""))) {
- foundStreet = street;
- address = address.replace(street.replace("镇", ""), "").replace(street.replace("街道", ""), "");
- break;
- }
- }
- }
- }
- result[2] = foundStreet;
- // 剩余部分为其他信息
- address = address.trim();
- result[3] = address;
- return result;
- }
- /***
- * 根据地名地址返回到村居一级分词,村居不是很准确
- * @param address 上海市松江区车墩镇乐都村乐都路590号
- * @return 上海市,松江区,车墩镇,乐都村,乐都路590号
- */
- public static String[] parseAddressCJ(String address) {
- String[] result = new String[5];
- result[0] = "上海市";
- // 去除地址中的“上海”和“上海市”
- address = address.replaceAll("上海(市)?", "");
- // 先尝试通过村居信息定位
- String foundVillageOrCommunity = null;
- String foundStreet = null;
- String foundDistrict = null;
- for (Map.Entry<String, Set<String>> streetEntry : STREET_VILLAGES_COMMUNITIES.entrySet()) {
- String street = streetEntry.getKey();
- Set<String> villagesCommunities = streetEntry.getValue();
- for (String villageCommunity : villagesCommunities) {
- if (address.contains(villageCommunity)) {
- foundVillageOrCommunity = villageCommunity;
- foundStreet = street;
- address = address.replace(villageCommunity, "");
- break;
- } else {
- String shortName = villageCommunity.replace("村", "").replace("社区居委会", "").replace("居委会", "");
- // 避免与街镇简称冲突
- String streetShortName = street.replace("镇", "").replace("街道", "");
- if (!shortName.equals(streetShortName) && address.contains(shortName)) {
- foundVillageOrCommunity = villageCommunity;
- foundStreet = street;
- address = address.replace(shortName, "");
- break;
- }
- }
- }
- if (foundVillageOrCommunity != null) {
- break;
- }
- }
- // 如果通过村居定位到了街镇,再定位区
- if (foundStreet != null) {
- for (Map.Entry<String, Set<String>> districtEntry : DISTRICT_STREETS.entrySet()) {
- String district = districtEntry.getKey();
- Set<String> streets = districtEntry.getValue();
- if (streets.contains(foundStreet)) {
- foundDistrict = district;
- break;
- }
- }
- }
- // 如果没有通过村居定位到信息,再按常规流程查找区、街镇、村居
- if (foundDistrict == null) {
- // 查找区
- for (String district : SHANGHAI_DISTRICTS) {
- if (address.contains(district)) {
- foundDistrict = district;
- address = address.replace(district, "");
- break;
- }
- }
- }
- if (foundStreet == null && foundDistrict != null) {
- // 查找街镇
- Set<String> streets = DISTRICT_STREETS.get(foundDistrict);
- if (streets != null) {
- for (String street : streets) {
- if (address.contains(street)) {
- foundStreet = street;
- address = address.replace(street, "");
- break;
- } else if (address.contains(street.replace("镇", "")) || address.contains(street.replace("街道", ""))) {
- foundStreet = street;
- address = address.replace(street.replace("镇", ""), "").replace(street.replace("街道", ""), "");
- break;
- }
- }
- }
- }
- if (foundVillageOrCommunity == null && foundStreet != null) {
- // 查找村或居委
- Set<String> villagesCommunities = STREET_VILLAGES_COMMUNITIES.get(foundStreet);
- if (villagesCommunities != null) {
- for (String villageCommunity : villagesCommunities) {
- if (address.contains(villageCommunity)) {
- foundVillageOrCommunity = villageCommunity;
- address = address.replace(villageCommunity, "");
- break;
- } else {
- String shortName = villageCommunity.replace("村", "").replace("社区居委会", "").replace("居委会", "");
- // 避免与街镇简称冲突
- String streetShortName = foundStreet.replace("镇", "").replace("街道", "");
- if (!shortName.equals(streetShortName) && address.contains(shortName)) {
- foundVillageOrCommunity = villageCommunity;
- address = address.replace(shortName, "");
- break;
- }
- }
- }
- }
- }
- // 处理别名残留问题
- if (foundVillageOrCommunity != null) {
- String[] aliases = {"居委", "居委会", "村", "新村"};
- for (String alias : aliases) {
- if (address.startsWith(alias)) {
- address = address.substring(alias.length());
- break;
- }
- }
- }
- // 移除剩余地址中可能残留的区和街镇信息
- if (foundDistrict != null) {
- address = address.replace(foundDistrict, "");
- }
- if (foundStreet != null) {
- address = address.replace(foundStreet, "");
- }
- // 去除多余的空白字符
- address = address.replaceAll(" ", "");
- // 去除街镇
- address = AddressQueryEngine.townReplaceAll(address);
- result[1] = foundDistrict;
- result[2] = foundStreet;
- result[3] = foundVillageOrCommunity;
- result[4] = address;
- return result;
- }
- /***
- * 获取array中,指定字段与address匹配值最高的数据,特定方法,指定的jsonarray使用
- * @param address 上海市松江区乐都路590号
- * @param array 地名地址接口返回的jsonarray数据
- * @param param jaonarray中地名地址字段的key
- * @return
- */
- public JSONObject findBestMatch(String address, JSONArray array, String param) {
- JSONObject bestMatch = null;
- double maxTotalScore = 0;
- // 处理输入地址的分词和数字前文本
- AddressInfo addressInfo = processAddress(address);
- for (int i = 0; i < array.size(); i++) {
- JSONObject obj = array.getJSONObject(i);
- obj.put("searchAddress", address);
- // && obj.getString(param).contains(Constant.getArea())
- if (obj.containsKey(param) && obj.get(param) != null && !obj.getString(param).trim().isEmpty()) {
- // 得到返回的地址
- String addr = obj.getString(param);
- // 规则4判断
- // TODO 添加校验逻辑(首先使用第4校验规则匹配,匹配不到使用第二规则,还匹配不到的话就使用打分规则)
- Set<String> addressString = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(addr)).get(0);
- Set<String> addressNumber = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(addr)).get(1);
- Set<String> address2String = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(0);
- Set<String> address2Number = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(1);
- if (addressString != null && addressString.size() > 0) {
- int addressStrSize = addressString.size();
- for (String addr2str : address2String) {
- if (addressString.contains(addr2str)) {
- addressStrSize--;
- if (addressStrSize == 0) {
- if (addressNumber.size() == 0) {
- obj.put("总分", "rule_4");
- return obj;
- } else {
- int addressNumSize = addressNumber.size();
- for (String addr2Num : address2Number) {
- if (addressNumber.contains(addr2Num)) {
- addressNumSize--;
- if (addressNumSize == 0) {
- obj.put("总分", "rule_4");
- return obj;
- }
- }
- }
- }
- }
- }
- }
- }
- // 规则2判断
- String role2address = AddressQueryEngine.townReplaceAll(AddressQueryEngine.addressReplaceAll(addr));
- String role2address2 = AddressQueryEngine.townReplaceAll(AddressQueryEngine.addressReplaceAll(address));
- if (AddressQueryEngine.isNotEmptyOrBlank(role2address) && role2address.contains(role2address2)) {
- obj.put("总分", "rule_2");
- return obj;
- }
- AddressInfo addrInfo = processAddress(addr);
- // 第一步:全词匹配比例
- double score1 = calculateFullWordMatchScore(address, addr);
- // 第二步:数字匹配得分
- double score2 = calculateNumberMatchScore(addressInfo.firstNumber, addrInfo.firstNumber);
- // 第三步:数字前文本匹配得分
- double score3 = calculatePrefixTextMatchScore(addressInfo.prefixText, addrInfo.prefixText);
- double totalScore = score1 + score2 + score3;
- if (totalScore > maxTotalScore) {
- maxTotalScore = totalScore;
- bestMatch = obj;
- bestMatch.put("计分1", score1);
- bestMatch.put("计分2", score2);
- bestMatch.put("计分3", score3);
- bestMatch.put("总分", totalScore);
- }
- }
- }
- return bestMatch;
- }
- public static boolean isOtherDistrictThanSongJiang(String address) {
- // 将地址字符串转换为小写,以便进行不区分大小写的比较
- String lowerCaseAddress = address.toLowerCase();
- // 检查地址是否包含 "青浦区" 字样,如果不包含,则可能是其他区
- // 列出上海市的其他区(已补充完整)
- String[] otherDistricts = {"北京市", "天津市", "重庆市",
- "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
- "江苏省", "浙江省", "安徽省", "福建省", "江西省",
- "山东省", "河南省", "湖北省", "湖南省", "广东省",
- "海南省", "四川省", "贵州省", "云南省", "陕西省",
- "甘肃省", "青海省", "台湾省",
- "内蒙古自治区", "广西壮族自治区", "西藏自治区",
- "宁夏回族自治区", "新疆维吾尔自治区",
- "香港特别行政区", "澳门特别行政区", "黄浦区", "徐汇区", "长宁区", "静安区", "普陀区", "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区", "金山区", "青浦区", "奉贤区", "崇明区", "浦东新区"};
- for (String district : otherDistricts) {
- if (lowerCaseAddress.contains(district.toLowerCase()) || lowerCaseAddress.indexOf(district.substring(0, 2)) == 0) {
- return false;
- }
- }
- return true;
- }
- public static boolean isOtherDistrictThanSongJiang2(String address) {
- // 将地址字符串转换为小写,以便进行不区分大小写的比较
- String lowerCaseAddress = address.toLowerCase();
- // 检查地址是否包含 "青浦区" 字样,如果不包含,则可能是其他区
- // 列出上海市的其他区(已补充完整)
- String[] otherDistricts = {"北京市", "天津市", "重庆市",
- "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
- "江苏省", "浙江省", "安徽省", "福建省", "江西省",
- "山东省", "河南省", "湖北省", "湖南省", "广东省",
- "海南省", "四川省", "贵州省", "云南省", "陕西省",
- "甘肃省", "青海省", "台湾省",
- "内蒙古自治区", "广西壮族自治区", "西藏自治区",
- "宁夏回族自治区", "新疆维吾尔自治区",
- "香港特别行政区", "澳门特别行政区"};
- for (String district : otherDistricts) {
- if (lowerCaseAddress.contains(district.toLowerCase()) || lowerCaseAddress.indexOf(district.substring(0, 3)) == 0) {
- return false;
- }
- }
- return true;
- }
- public static String isOtherDistrictThanShangHai(String address) {
- // 将地址字符串转换为小写,以便进行不区分大小写的比较
- String lowerCaseAddress = address.toLowerCase();
- // 列出上海市的其他区(已补充完整)
- String[] otherDistricts = {"上海市", "北京市", "天津市", "重庆市",
- "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
- "江苏省", "浙江省", "安徽省", "福建省", "江西省",
- "山东省", "河南省", "湖北省", "湖南省", "广东省",
- "海南省", "四川省", "贵州省", "云南省", "陕西省",
- "甘肃省", "青海省", "台湾省",
- "内蒙古自治区", "广西壮族自治区", "西藏自治区",
- "宁夏回族自治区", "新疆维吾尔自治区",
- "香港特别行政区", "澳门特别行政区"};
- for (String district : otherDistricts) {
- if (lowerCaseAddress.contains(district.toLowerCase()) || lowerCaseAddress.indexOf(district.substring(0, 3)) == 0) {
- return district;
- }
- }
- return "上海市";
- }
- /***
- * 其他方法的引用方法,不用管
- * @param input
- * @return
- */
- private AddressInfo processAddress(String input) {
- String prefixText = "";
- String firstNumber = null;
- List<String> tokens = new ArrayList<>();
- Pattern pattern = Pattern.compile("\\d+|[^\\d]+");
- Matcher matcher = pattern.matcher(input);
- boolean foundNumber = false;
- while (matcher.find()) {
- String token = matcher.group();
- tokens.add(token);
- if (!foundNumber && token.matches("\\d+")) {
- firstNumber = token;
- foundNumber = true;
- }
- }
- if (tokens.size() > 0 && !foundNumber) {
- prefixText = input;
- } else if (tokens.size() > 0 && firstNumber != null) {
- int index = tokens.indexOf(firstNumber);
- for (int i = 0; i < index; i++) {
- prefixText += tokens.get(i);
- }
- }
- return new AddressInfo(prefixText, firstNumber);
- }
- /***
- * 其他方法的引用方法,不用管
- * @param inputAddr 搜索地址
- * @param shortAddr 返回地址
- * @return
- */
- private double calculateFullWordMatchScore(String inputAddr, String shortAddr) {
- int totalLength = Math.max(inputAddr.length(), shortAddr.length());
- int commonCount = 0;
- for (char c : inputAddr.toCharArray()) {
- if (shortAddr.indexOf(c) != -1) {
- commonCount++;
- }
- }
- return (double) commonCount / totalLength;
- }
- /***
- * 其他方法的引用方法,不用管
- * @param addressNumber
- * @param addrNumber
- * @return
- */
- private double calculateNumberMatchScore(String addressNumber, String addrNumber) {
- if (addressNumber == null || addrNumber == null) {
- return 1; // 都没有数字或其中一个没有,认为这一步匹配满分
- }
- if (addressNumber.equals(addrNumber)) {
- return 1;
- }
- try {
- int num1 = Integer.parseInt(addressNumber);
- int num2 = Integer.parseInt(addrNumber);
- int diff = Math.abs(num1 - num2);
- if (diff <= 200) {
- return 1 - (double) diff / 200;
- }
- } catch (NumberFormatException e) {
- return 0;
- }
- return 0;
- }
- /***
- * 其他方法的引用方法,不用管
- * @param addressPrefix
- * @param addrPrefix
- * @return
- */
- private double calculatePrefixTextMatchScore(String addressPrefix, String addrPrefix) {
- if (addressPrefix.isEmpty() || addrPrefix.isEmpty()) {
- return 0;
- }
- double matchScore = calculateFullWordMatchScore(addressPrefix, addrPrefix);
- return matchScore >= 0.65 ? matchScore : 0;
- }
- private static class AddressInfo {
- String prefixText;
- String firstNumber;
- AddressInfo(String prefixText, String firstNumber) {
- this.prefixText = prefixText;
- this.firstNumber = firstNumber;
- }
- }
- /***
- * 去除特殊字符,仅保留中文、数字、字母
- * @param address
- * @return
- */
- public String deleteStr(String address) {
- if (address == null) {
- return null;
- }
- // 使用正则表达式替换所有非中文和非数字的字符为空字符串
- return address.replaceAll("[^\\u4e00-\\u9fa5\\da-zA-Z]", "");
- }
- // 静态资源:中国所有省份、地级市和县级市的名称
- private static final Set<String> PROVINCES = new HashSet<>(Arrays.asList(
- "北京市", "天津市", "上海市", "重庆市",
- "河北省", "山西省", "辽宁省", "吉林省", "黑龙江省",
- "江苏省", "浙江省", "安徽省", "福建省", "江西省",
- "山东省", "河南省", "湖北省", "湖南省", "广东省",
- "海南省", "四川省", "贵州省", "云南省", "陕西省",
- "甘肃省", "青海省", "台湾省",
- "内蒙古自治区", "广西壮族自治区", "西藏自治区",
- "宁夏回族自治区", "新疆维吾尔自治区",
- "香港特别行政区", "澳门特别行政区"
- ));
- private static final Set<String> CITIES = new HashSet<>(Arrays.asList(
- // 这里只列举部分示例,实际需要完整的地级市和县级市列表
- "石家庄市", "唐山市", "秦皇岛市", "邯郸市", "邢台市",
- "保定市", "张家口市", "承德市", "沧州市", "廊坊市",
- "衡水市", "太原市", "大同市", "阳泉市", "长治市",
- "晋城市", "朔州市", "晋中市", "运城市", "忻州市",
- "临汾市", "吕梁市", "沈阳市", "大连市", "鞍山市",
- "抚顺市", "本溪市", "丹东市", "锦州市", "营口市",
- "阜新市", "辽阳市", "盘锦市", "铁岭市", "朝阳市",
- "葫芦岛市", "长春市", "吉林市", "四平市", "辽源市",
- "通化市", "白山市", "松原市", "白城市", "延边朝鲜族自治州",
- "哈尔滨市", "齐齐哈尔市", "鸡西市", "鹤岗市", "双鸭山市",
- "大庆市", "伊春市", "佳木斯市", "七台河市", "牡丹江市",
- "黑河市", "绥化市", "大兴安岭地区", "南京市", "无锡市",
- "徐州市", "常州市", "苏州市", "南通市", "连云港市",
- "淮安市", "盐城市", "扬州市", "镇江市", "泰州市",
- "宿迁市", "杭州市", "宁波市", "温州市", "嘉兴市",
- "湖州市", "绍兴市", "金华市", "衢州市", "舟山市",
- "台州市", "丽水市", "合肥市", "芜湖市", "蚌埠市",
- "淮南市", "马鞍山市", "淮北市", "铜陵市", "安庆市",
- "黄山市", "滁州市", "阜阳市", "宿州市", "六安市",
- "亳州市", "池州市", "宣城市", "福州市", "厦门市",
- "莆田市", "三明市", "泉州市", "漳州市", "南平市",
- "龙岩市", "宁德市", "南昌市", "景德镇市", "萍乡市",
- "九江市", "新余市", "鹰潭市", "赣州市", "吉安市",
- "宜春市", "抚州市", "上饶市", "济南市", "青岛市",
- "淄博市", "枣庄市", "东营市", "烟台市", "潍坊市",
- "济宁市", "泰安市", "威海市", "日照市", "临沂市",
- "德州市", "聊城市", "滨州市", "菏泽市", "郑州市",
- "开封市", "洛阳市", "平顶山市", "安阳市", "鹤壁市",
- "新乡市", "焦作市", "濮阳市", "许昌市", "漯河市",
- "三门峡市", "南阳市", "商丘市", "信阳市", "周口市",
- "驻马店市", "武汉市", "黄石市", "十堰市", "宜昌市",
- "襄阳市", "鄂州市", "荆门市", "孝感市", "荆州市",
- "黄冈市", "咸宁市", "随州市", "恩施土家族苗族自治州",
- "长沙市", "株洲市", "湘潭市", "衡阳市", "邵阳市",
- "岳阳市", "常德市", "张家界市", "益阳市", "郴州市",
- "永州市", "怀化市", "娄底市", "湘西土家族苗族自治州",
- "广州市", "韶关市", "深圳市", "珠海市", "汕头市",
- "佛山市", "江门市", "湛江市", "茂名市", "肇庆市",
- "惠州市", "梅州市", "汕尾市", "河源市", "阳江市",
- "清远市", "东莞市", "中山市", "潮州市", "揭阳市",
- "云浮市", "海口市", "三亚市", "三沙市", "儋州市",
- "成都市", "自贡市", "攀枝花市", "泸州市", "德阳市",
- "绵阳市", "广元市", "遂宁市", "内江市", "乐山市",
- "南充市", "眉山市", "宜宾市", "广安市", "达州市",
- "雅安市", "巴中市", "资阳市", "阿坝藏族羌族自治州",
- "甘孜藏族自治州", "凉山彝族自治州", "贵阳市", "六盘水市",
- "遵义市", "安顺市", "毕节市", "铜仁市", "黔西南布依族苗族自治州",
- "黔东南苗族侗族自治州", "黔南布依族苗族自治州", "昆明市", "曲靖市",
- "玉溪市", "保山市", "昭通市", "丽江市", "普洱市", "临沧市",
- "楚雄彝族自治州", "红河哈尼族彝族自治州", "文山壮族苗族自治州",
- "西双版纳傣族自治州", "大理白族自治州", "德宏傣族景颇族自治州",
- "怒江傈僳族自治州", "迪庆藏族自治州", "西安市", "铜川市", "宝鸡市",
- "咸阳市", "渭南市", "延安市", "汉中市", "榆林市", "安康市", "商洛市",
- "兰州市", "嘉峪关市", "金昌市", "白银市", "天水市", "武威市",
- "张掖市", "平凉市", "酒泉市", "庆阳市", "定西市", "陇南市",
- "临夏回族自治州", "甘南藏族自治州", "西宁市", "海东市",
- "海北藏族自治州", "黄南藏族自治州", "海南藏族自治州",
- "果洛藏族自治州", "玉树藏族自治州", "海西蒙古族藏族自治州",
- "台北市", "新北市", "桃园市", "台中市", "台南市", "高雄市",
- "基隆市", "新竹市", "嘉义市", "澳门市", "香港市"
- ));
- public static String ifCITIES(String address) {
- for (String item : CITIES) {
- if (address.contains(item)) {
- return item;
- }
- }
- return null;
- }
- /***
- * 去除地名地址的前缀,一直到镇/街道
- * @param address
- * @return
- */
- public static String quchuqianzhui(String address) {
- // 先去除 "中国"
- address = address.replace("中国", "");
- // 去除省份
- for (String province : PROVINCES) {
- if (address.startsWith(province)) {
- address = address.substring(province.length());
- }
- }
- // 去除市
- for (String city : CITIES) {
- if (address.startsWith(city)) {
- address = address.substring(city.length());
- } else {
- // 处理别名情况,例如 "安庆" 代表 "安庆市"
- String alias = city.replace("市", "");
- if (address.startsWith(alias)) {
- address = address.substring(alias.length());
- }
- }
- }
- // 去除区、镇、街道
- address = address.replaceAll("^.*?(区|镇|街道)", "");
- // 处理错误数据情况
- if (address.trim().isEmpty()) {
- // 尝试从原始地址中提取最后一个有效的市名称
- for (int i = CITIES.size() - 1; i >= 0; i--) {
- String city = CITIES.toArray(new String[0])[i];
- if (address.contains(city)) {
- return city;
- }
- String alias = city.replace("市", "");
- if (address.contains(alias)) {
- return city;
- }
- }
- // 如果没有找到市名称,检查是否有省份名称
- for (String province : PROVINCES) {
- if (address.contains(province)) {
- return province;
- }
- }
- }
- return address.trim();
- }
- private static final String ADDRESS_REGEX = "^[^市]+市[^区]+区(?:[^镇]+镇|[^街道]+街道).+$";
- private static final Pattern ADDRESS_PATTERN = Pattern.compile(ADDRESS_REGEX);
- /**
- * 验证地址格式的正则表达式
- */
- public static boolean validateAddress(String address) {
- if (address == null || address.trim().isEmpty()) {
- return false;
- }
- return ADDRESS_PATTERN.matcher(address).matches();
- }
- }
|