123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- package com.skyversation.poiaddr.util;
- import java.io.InputStream;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import java.util.stream.Collectors;
- public class AddressSplitUtil {
- /**省级行政单位简称->全称映射表
- * 只读,请勿修改
- * */
- public static final Map<String,String> LEVEL_1_NAME_MAP;
- /**市级行政单位简称->全称映射表
- * 只读,请勿修改
- * */
- public static final Map<String,String> LEVEL_2_NAME_MAP;
- /**县级行政单位简称->全称映射表
- * 只读,请勿修改
- * */
- public static final Map<String,String> LEVEL_3_NAME_MAP;
- /**省市县三级行政单位简称树
- * 只读,请勿修改
- * */
- public static final Map<String,Map<String,Set<String>>> SIMPLE_NAME_TREE;
- private static final Map<String,Set<String>> All_CITY_IN_TREE;
- private static final Pattern LEVEL_1_SUFFIX_PATTERN = Pattern.compile("^(?:维吾尔|((?:(?!省|市|自治区).)*?族))?(?:省|市|自治区)");
- private static final Pattern LEVEL_2_SUFFIX_PATTERN = Pattern.compile("^(?:市|自治州|地区|盟)");
- private static final Pattern LEVEL_3_SUFFIX_PATTERN = Pattern.compile("^(?:县|自治县|市|区|旗|自治旗|林区|特区)");
- public static final Pattern ROAD_SUFFIX_PATTERN = Pattern.compile("^(?:旅游区|[东南西北中一二三四五六七八九十公大小支新老]{0,2}(?:大街|路|大道|街|菜市场|马路|村))");
- static {
- Map<String,String> level1NameMap = new HashMap<>();
- Map<String,String> level2NameMap = new HashMap<>();
- Map<String,String> level3NameMap = new HashMap<>();
- Map<String,Map<String,Set<String>>> simpleNameTree = new HashMap<>();
- String file = "全国省市县记录.xlsx";
- InputStream is = AddressSplitUtil.class.getResourceAsStream(file);
- if (is==null) is= AddressSplitUtil.class.getResourceAsStream("/"+file);
- if (is==null) throw new RuntimeException("无法找到"+file);
- try {
- List<Map<String, Object>> list = ExcelReaderUtils.readExcel(is);
- for (Map<String,Object> row : list) {
- Object level1Name = row.get("省份");
- Object level1SimpleName = row.get("省份简称");
- Object level2Name = row.get("地级市");
- Object level2SimpleName = row.get("地级市简称");
- Object level3Name = row.get("县级市");
- Object level3SimpleName = row.get("县级市简称");
- if (level1SimpleName != null && level1Name!=null) {
- level1NameMap.put(level1SimpleName.toString(), level1Name.toString());
- }
- if (level2SimpleName != null && level2Name!=null) {
- level2NameMap.put(level2SimpleName.toString(), level2Name.toString());
- }
- if (level3SimpleName != null && level3Name!=null) {
- level3NameMap.put(level3SimpleName.toString(), level3Name.toString());
- }
- if (level1SimpleName!=null){
- if (!simpleNameTree.containsKey(level1SimpleName.toString())){
- simpleNameTree.put(level1SimpleName.toString(),new HashMap<>());
- }
- Map<String, Set<String>> level2Map = simpleNameTree.get(level1SimpleName.toString());
- if (level2SimpleName!=null){
- if(!level2Map.containsKey(level2SimpleName.toString())){
- level2Map.put(level2SimpleName.toString(),new HashSet<>());
- }
- if (level3SimpleName!=null){
- level2Map.get(level2SimpleName.toString()).add(level3SimpleName.toString());
- }
- }
- }
- }
- LEVEL_1_NAME_MAP = Collections.unmodifiableMap(level1NameMap);
- LEVEL_2_NAME_MAP = Collections.unmodifiableMap(level2NameMap);
- LEVEL_3_NAME_MAP = Collections.unmodifiableMap(level3NameMap);
- Map<String,Map<String,Set<String>>> simpleNameTree_= new HashMap<>();
- for (String key : simpleNameTree.keySet()){
- simpleNameTree_.put(key , Collections.unmodifiableMap(simpleNameTree.get(key)));
- }
- SIMPLE_NAME_TREE= Collections.unmodifiableMap(simpleNameTree_);
- All_CITY_IN_TREE= Collections.unmodifiableMap(SIMPLE_NAME_TREE.values().stream()
- .flatMap(map -> map.entrySet().stream())
- .collect(Collectors.toMap(
- Map.Entry::getKey,
- Map.Entry::getValue,
- (oldValue, newValue) -> newValue
- ))
- );
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
- private static class SplittingAddress {
- int province = -1;
- int city = -1;
- int county = -1;
- Map<Integer,String> provinceInChoose = new HashMap<>();
- Map<Integer,String> cityInChoose = new HashMap<>();
- Map<Integer,String> countyInChoose = new HashMap<>();
- String sourceAddress ;
- SplittingAddress(String sourceAddress){
- this.sourceAddress=sourceAddress;
- }
- String[] toStringList(){
- String[] output = new String[4];
- output[0]=((province!=-1)?LEVEL_1_NAME_MAP.get(provinceInChoose.get(province)):"");
- output[1]=((city!=-1)?LEVEL_2_NAME_MAP.get(cityInChoose.get(city)):"");
- output[2]=((county!=-1)?LEVEL_3_NAME_MAP.get(countyInChoose.get(county)):"");
- return output;
- }
- String getOtherAddress(){
- int max = Math.max(province,Math.max(county,city));
- String maxName = "";
- if(max==-1){
- return sourceAddress;
- }
- if (province==max){
- maxName=provinceInChoose.get(province);
- }
- if (city==max){
- maxName=cityInChoose.get(city);
- }
- if (county==max){
- maxName=countyInChoose.get(county);
- }
- String sub = sourceAddress.substring(max+maxName.length());
- Matcher m = LEVEL_1_SUFFIX_PATTERN.matcher(sub);
- if (m.find()){
- sub = sub.substring(m.end());
- }
- m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
- if (m.find()){
- sub = sub.substring(m.end());
- }
- m = LEVEL_3_SUFFIX_PATTERN.matcher(sub);
- if (m.find()){
- sub = sub.substring(m.end());
- }
- return sub;
- }
- void findProvince(){
- Map<Integer,String> results = contain(this.sourceAddress,SIMPLE_NAME_TREE.keySet());
- for (int index : results.keySet()){
- String name = results.get(index);
- String sub = this.sourceAddress.substring(index+name.length());
- //去除南京路,北京大道型选手
- if (ROAD_SUFFIX_PATTERN.matcher(sub).find()){
- continue;
- }
- provinceInChoose.put(index,name);
- //匹配到后缀时直接当做第一选择
- if (LEVEL_1_SUFFIX_PATTERN.matcher(sub).find()){
- province = index;
- }
- }
- //仅有一个选择时当成一选
- if (provinceInChoose.size()==1){
- province = (int)provinceInChoose.keySet().toArray()[0];
- }
- }
- void findCity(){
- Map<Integer,String> results =null;
- //首先尝试在一选下匹配
- if (province!=-1){
- results = contain(this.sourceAddress,SIMPLE_NAME_TREE.get(provinceInChoose.
- get(province)).keySet());
- }
- //一选不存在或匹配无结果,直接搜全国
- if (results==null||results.isEmpty()){
- results = contain(this.sourceAddress,LEVEL_2_NAME_MAP.keySet());
- }
- Iterator<Integer> iterator = results.keySet().iterator();
- while (iterator.hasNext()) {
- int key = iterator.next();
- String name = results.get(key);
- if (key > 0 && name.equals("南县") &&"滦辉甘桦灌苍阜屏定全沂莒汝衡南郁平宁思广洛商南".indexOf(sourceAddress.charAt(key - 1)) != -1) {
- iterator.remove();
- }
- }
- for (int index : results.keySet()){
- String name = results.get(index);
- String sub = this.sourceAddress.substring(index+name.length());
- //去除南京路,北京大道型选手
- if (ROAD_SUFFIX_PATTERN.matcher(sub).find()){
- continue;
- }
- cityInChoose.put(index,name);
- //匹配到后缀时直接当做第一选择
- if (LEVEL_2_SUFFIX_PATTERN.matcher(sub).find()){
- city = index;
- }
- }
- //仅有一个选择时当成一选
- if (cityInChoose.size()==1){
- city = (int)cityInChoose.keySet().toArray()[0];
- }
- }
- void findCounty(){
- Map<Integer,String> results = null;
- //尝试一选
- if (city!=-1){
- results=contain(sourceAddress,All_CITY_IN_TREE.get(cityInChoose.get(city)));
- }
- //一选不存在或匹配无结果,先搜全省
- if ((results == null || results.isEmpty()) && province != -1) {
- results = contain(sourceAddress, SIMPLE_NAME_TREE.get(provinceInChoose.get(province)).values().stream().
- flatMap(Set::stream).collect(Collectors.toSet()));
- }
- //最后全国
- if (results == null || results.isEmpty()) {
- results = contain(sourceAddress, LEVEL_3_NAME_MAP.keySet());
- }
- for (int index : results.keySet()) {
- String name = results.get(index);
- String sub = this.sourceAddress.substring(index + name.length());
- //去除南京路,北京大道型选手
- if (ROAD_SUFFIX_PATTERN.matcher(sub).find()) {
- continue;
- }
- countyInChoose.put(index, name);
- //匹配到后缀时直接当做第一选择
- if (LEVEL_3_SUFFIX_PATTERN.matcher(sub).find()) {
- county = index;
- }
- }
- //仅有一个选择时当成一选
- if (countyInChoose.size()==1){
- county = (int)countyInChoose.keySet().toArray()[0];
- }
- }
- }
- /**
- * 检查字符串含有哪些字符,输出这些匹配字符的位置和字符的map
- * @param s 被检查字符串
- * @param nameList 检查范围
- */
- private static Map<Integer,String> contain(String s,Iterable<String> nameList){
- Map<Integer,String> output = new HashMap<Integer,String>();
- for (String name:nameList){
- if (name.isEmpty())continue;
- int index = -1;
- while ((index = s.indexOf(name, index + 1)) != -1){
- output.put(index,name);
- }
- }
- return output;
- }
- /**
- * 分离地址字符串,请优先使用shanghaiAddressSplitUtil,此类只分词到县<br/>
- * 注意,当输入的地址错误时不会自动修正,未找到的级会被空置<br/>
- * 例如输入"北京青浦区盈港路515号1061室" ,输出[北京市,北京市,青浦区,盈港路515号1061室]<br/>
- * 输入"安徽怀宁县黄墩镇老埂村双闸组" ,输出[安徽省,怀宁县,黄墩镇,盈港路515号1061室]<br/>
- * @return 结果为[省级,城级,县级,余下的部分],分离失败则返回null<br/>
- * @see ShanghaiAddressSplitUtil
- */
- public static String[] splitAddress(String address){
- SplittingAddress a = new SplittingAddress(address.replaceAll("\\s+",""));
- a.findProvince();
- a.findCity();
- a.findCounty();
- String[] output = a.toStringList();
- output[3]=(a.getOtherAddress());
- return output;
- }
- //测试用
- public static void main(String[] args) {
- System.out.println(Arrays.toString(splitAddress("安徽省安徽省颍上县垂岗乡陶嘴村东道场31号")));
- System.out.println(Arrays.toString(splitAddress("荣乐西路1058弄32号501室")));
- System.out.println(Arrays.toString(splitAddress("泗泾镇新家园路30弄21号402室")));
- System.out.println(Arrays.toString(splitAddress("山东省山东省单县莱河镇宋楼行政村霍井村041号")));
- System.out.println(Arrays.toString(splitAddress("安徽省五河县安徽省五河县朱顶乡胡庄村447号")));
- System.out.println(Arrays.toString(splitAddress("九亭镇九亭大街506弄22号101室")));
- System.out.println(Arrays.toString(splitAddress("陕西省宝鸡市凤翔区陕西省凤翔区尹家务乡槐中村5组024号")));
- System.out.println(Arrays.toString(splitAddress("江苏省海门市江苏省海门市正余镇王灶河村十三组36号")));
- System.out.println(Arrays.toString(splitAddress("泗泾镇古楼公路519弄1号1102室")));
- System.out.println(Arrays.toString(splitAddress("奉贤县奉城镇奉粮路115号")));
- System.out.println(Arrays.toString(splitAddress("上海市奉贤区南桥镇沪杭支路24号14幢165室")));
- System.out.println(Arrays.toString(splitAddress("浦东新区周浦镇年家浜路10、12号1层")));
- }
- }
|