Parcourir la source

添加外省判断逻辑

DESKTOP-6LTVLN7\Liumouren il y a 1 semaine
Parent
commit
aa0405a196

+ 13 - 9
src/main/java/com/skyversation/poiaddr/addquery/AddressQueryEngine.java

@@ -5,7 +5,9 @@ import com.alibaba.fastjson.JSONObject;
 import com.skyversation.poiaddr.bean.AddressResult;
 import com.skyversation.poiaddr.bean.GeoJsonBean;
 import com.skyversation.poiaddr.bean.YyskDmdzAddressStandardization;
+import com.skyversation.poiaddr.entity.AddrBean;
 import com.skyversation.poiaddr.service.AreaService;
+import com.skyversation.poiaddr.util.AddrSplitLmrMap;
 import com.skyversation.poiaddr.util.ShanghaiAddressSplitUtil;
 import com.skyversation.poiaddr.util.SplitAddress;
 import com.skyversation.poiaddr.util.net.AddressNetTools;
@@ -65,24 +67,26 @@ public class AddressQueryEngine {
      * @return
      */
     public AddressResult commonSearchByName_nw(String addr) {
-        if (!StringUtils.hasText(addr) || addr.length() < 3) {
+        if (!StringUtils.hasText(addr) || addr.length() <= 3) {
             return null;
         }
         AddressResult addressResult = new AddressResult();
-        SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(addr);
-        if (splitAddress.getStatus() == 2) { // 外省数据
+        AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(addr);
+        if (lmrAddrBean.getRule() == null || Integer.parseInt(lmrAddrBean.getRule()) <= 0) {
             addressResult.setMessage("非上海数据");
             AddressResult.ContentBean content = new AddressResult.ContentBean();
             content.setSearchAddress(addr);
-            content.setPname(splitAddress.getProvince());
-            content.setCityname(splitAddress.getCity());
-            content.setAdname(splitAddress.getStreet());
-            content.setCommunity(splitAddress.getCommunity());
+            content.setPname(lmrAddrBean.getProvinces());
+            content.setCityname(lmrAddrBean.getMarket());
+            content.setAdname(lmrAddrBean.getStreetTown());
+            content.setCommunity(lmrAddrBean.getResidentialCommittee());
             content.setScore("rule_0");
             addressResult.setData(new ArrayList<>());
             addressResult.getData().add(content);
             return addressResult;
-        } else if (splitAddress.getStatus() == 3) {// 不是地址
+        }
+        SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(addr);
+        if (splitAddress.getStatus() == 3) {// 不是地址
             addressResult.setMessage("非地址数据");
             return addressResult;
         } else {
@@ -106,7 +110,7 @@ public class AddressQueryEngine {
                 addressResult = new TransfromDataTool().dbResultToResult(splitAddress1, addr, pois);
                 addressResult.setAddrBean(addrBean);
                 if (addressResult.getData() != null && addressResult.getData().size() > 0) {
-                    getCjWgWgwByLoc(addressResult,splitAddress);
+                    getCjWgWgwByLoc(addressResult, splitAddress);
                     addressResult.setCode(AddressResultEnum.DB_SUCCESS);
                     addressResult.setMessage("成功");
                     AddressResult.ContentBean content = addressResult.getData().get(0);

+ 30 - 0
src/main/java/com/skyversation/poiaddr/entity/AddrBean.java

@@ -0,0 +1,30 @@
+package com.skyversation.poiaddr.entity;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+//统一分词实体
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+public class AddrBean {
+    //      处理前的地址
+    private String oldAddress;
+    //      省
+    private String provinces;
+    //      市
+    private String market;
+    //      区
+    private String distinguish;
+    //      镇
+    private String streetTown;
+    //      村
+    private String residentialCommittee;
+    //      路
+    private String address;
+    //      标准地址
+    private String standardAddress;
+    //      规则得分
+    private String rule;
+}

+ 274 - 0
src/main/java/com/skyversation/poiaddr/util/AddrSplitLmrMap.java

@@ -0,0 +1,274 @@
+package com.skyversation.poiaddr.util;
+
+import com.skyversation.poiaddr.entity.AddrBean;
+import org.springframework.stereotype.Service;
+
+import javax.annotation.PostConstruct;
+import java.io.InputStream;
+import java.util.*;
+import java.util.regex.Pattern;
+
+/**
+ * # 生成完整的上海市县乡记录.xlsx
+ * * 1、读取村居边界.geojson\得到对应关系【所属区:区代码:所属街:居委_1】,同时得到区和区代码的对应关系
+ * * 2、遍历xlsx文件列表,然后解析returnAddress,进行分词
+ * * 3、主要是获取街镇和居委的对应关系【判断到街镇后,得到下标,然后判断后面是否存在居委会或村委会关键字】
+ * 当前版本:V2.0.1
+ */
+@Service
+public class AddrSplitLmrMap {
+    //  上海市村居边界geojson文件地址
+    private static String All_no_SHFilePath = "全国省市县记录.xlsx";
+    private static String outPutFilePath = "geojson/上海市_村居边界.xlsx";
+
+    //  《区—街镇-居委》的对应关系
+    private static HashMap<String, HashMap<String, Set<String>>> D_S_C_tree = new HashMap<>();
+    //    非上海的《省-市-区》的对应关系
+    private static HashMap<String, HashMap<String, Set<String>>> All_NO_SH_tree = new HashMap<>();
+    //  区和区代码的对应关系
+    private static HashMap<String, String> districtCodeMap = new HashMap<>();
+
+    @PostConstruct
+    private void initFile() {
+        System.out.println("开始初始化分词器");
+        InputStream is = ShanghaiAddressSplitUtil.class.getResourceAsStream(outPutFilePath);
+        if (is == null) is = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + outPutFilePath);
+        if (is == null) throw new RuntimeException("无法找到" + outPutFilePath);
+        try {
+            for (Map<String, Object> row : ExcelReaderUtils.readExcel(is)) {
+                districtCodeMap.put(row.get("区").toString(), row.get("区代码").toString());
+                if (D_S_C_tree.containsKey(row.get("区").toString())) {
+                    Map<String, Set<String>> SCT = D_S_C_tree.get(row.get("区").toString());
+                    if (SCT.containsKey(row.get("镇").toString())) {
+                        SCT.get(row.get("镇").toString()).add(row.get("居委").toString());
+                    } else {
+                        Set<String> CL = new HashSet<>();
+                        CL.add(row.get("居委").toString());
+                        SCT.put(row.get("镇").toString(), CL);
+                    }
+                } else {
+                    HashMap<String, Set<String>> SCT = new HashMap<>();
+                    Set<String> CL = new HashSet<>();
+                    CL.add(row.get("居委").toString());
+                    SCT.put(row.get("镇").toString(), CL);
+                    D_S_C_tree.put(row.get("区").toString(), SCT);
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        InputStream is2 = ShanghaiAddressSplitUtil.class.getResourceAsStream(All_no_SHFilePath);
+        if (is2 == null) is2 = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + All_no_SHFilePath);
+        if (is2 == null) throw new RuntimeException("无法找到" + All_no_SHFilePath);
+        try {
+            for (Map<String, Object> row : ExcelReaderUtils.readExcel(is2)) {
+                String ss = row.get("省份").toString();
+                String djs = row.get("地级市") != null && !row.get("地级市").toString().isEmpty() ? row.get("地级市").toString() : null;
+                String xjs = row.get("县级市") != null && !row.get("县级市").toString().isEmpty() ? row.get("县级市").toString() : null;
+                if (All_NO_SH_tree.containsKey(ss)) {
+                    Map<String, Set<String>> SCT = All_NO_SH_tree.get(ss);
+                    if (djs != null && xjs != null) {
+                        if (SCT.containsKey(djs)) {
+                            SCT.get(djs).add(xjs);
+                        } else {
+                            Set<String> CL = new HashSet<>();
+                            CL.add(xjs);
+                            SCT.put(djs, CL);
+                        }
+                    }
+                } else {
+                    HashMap<String, Set<String>> SCT = new HashMap<>();
+                    if (djs != null && xjs != null) {
+                        Set<String> CL = new HashSet<>();
+                        CL.add(xjs);
+                        SCT.put(djs, CL);
+                    }
+                    All_NO_SH_tree.put(ss, SCT);
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * ## 分词:返回实体类【原地址:省:市:区:镇:居委:路牌地址:标准地址:区代码】
+     * * 1、初始化分词模型(读取村居边界.geojson\得到对应关系【所属区:区代码:所属街:居委_1】)(读取全国省市县记录.xlsx\得到对应关系【省:市:区县】)
+     * * 2、判断地址字符串是否存在[省、市、区、镇、街道、县]
+     * * 3、如果存在【省、市】判断是否是非上海市,是的话返回rule:0
+     * * 4、不知道是不是上海市的话,判断【区、镇】(先全词匹配,匹配不到的话模糊匹配)
+     * * 5、将匹配到的“区代码”拼接上310,否则直接是310000,作为搜索条件
+     * * 6、得到返回结果列表
+     * <p>
+     * //              省1:市2:区4:镇8:居委16
+     *
+     * @param addr
+     * @return
+     */
+    static Pattern pattern = Pattern.compile("市|区|镇|街道|县");
+    static Pattern spattern = Pattern.compile("路|街|道|村");
+
+    public static AddrBean outAddrMapInAddr(String addr) {
+        AddrBean addrMap = new AddrBean();
+        addrMap.setOldAddress(addr + "");
+        addrMap.setAddress(addr.replaceAll(" ", ""));
+        if (addr.contains("http")) {
+            //      先判断是否是链接
+            addrMap.setRule("-1");
+        } else {
+//          判断外地省名
+            boolean errorAddr = false;
+            for (String s : All_NO_SH_tree.keySet()) {
+                if (addr.startsWith(s) || (addr.startsWith(s.substring(0, 2)) && !ifTrueAddr(addr, s.substring(0, 2)))) {
+                    addrMap.setProvinces(s);
+                    addrMap.setRule("0");
+                    if (!s.contains("上海")) {
+                        errorAddr = true;
+                    }
+                }
+//              判断外地市名
+                for (String m : All_NO_SH_tree.get(s).keySet()) {
+                    if (addr.contains(m) && !addr.contains(m + "场")) {
+                        addrMap.setProvinces(s);
+                        addrMap.setMarket(m);
+                        addrMap.setRule("-2");
+                        errorAddr = true;
+                    }
+                    //              判断外地县名
+                    for (String x : All_NO_SH_tree.get(s).get(m)) {
+                        if (addr.contains(x) && !addr.contains(x + "场")) {
+                            addrMap.setDistinguish(x);
+                            addrMap.setProvinces(s);
+                            addrMap.setMarket(m);
+                            addrMap.setRule("-4");
+                            errorAddr = true;
+                            break;
+                        }
+                    }
+                }
+            }
+
+
+//          如果不是外地数据和连接数据的话
+            if (!errorAddr) {
+//              上海地址匹配
+                if (pattern.matcher(addr).find()) {
+                    if (addr.startsWith("上海")) {
+                        addrMap.setProvinces("上海市");
+                        addrMap.setMarket("上海市");
+                        addrMap.setRule("2");
+                    }
+//                  匹配区
+                    boolean ifContains = false;
+//                  区匹配标识
+                    String sh_distinguish = "";
+                    for (String d : D_S_C_tree.keySet()) {
+                        if (addr.contains(d) || addr.contains(d.substring(0, 2) + "县")) {
+                            ifContains = true;
+                            addrMap.setProvinces("上海市");
+                            addrMap.setMarket("上海市");
+                            addrMap.setDistinguish(d);
+                            sh_distinguish = d;
+                            addrMap.setRule("4");
+                            break;
+                        }
+                        if (addr.contains(d.substring(0, 2)) && ifTrueAddr(addr, d.substring(0, 2))) {
+                            addrMap.setProvinces("上海市");
+                            addrMap.setMarket("上海市");
+                            addrMap.setDistinguish(d);
+                            sh_distinguish = d;
+                            addrMap.setRule("4");
+                        }
+                    }
+//                  镇匹配
+                    for (String d : D_S_C_tree.keySet()) {
+                        for (String s : D_S_C_tree.get(d).keySet()) {
+                            if (addr.contains(s)) {
+                                addrMap.setProvinces("上海市");
+                                addrMap.setMarket("上海市");
+                                addrMap.setDistinguish(d);
+                                addrMap.setStreetTown(s);
+                                addrMap.setRule("8");
+                                break;
+                            }
+                            if (addr.contains(s.substring(0, 2)) && ifContains && !sh_distinguish.isEmpty() && sh_distinguish.contains(d)) {
+                                addrMap.setProvinces("上海市");
+                                addrMap.setMarket("上海市");
+                                addrMap.setDistinguish(d);
+                                addrMap.setStreetTown(s);
+                                addrMap.setRule("8");
+                            }
+                        }
+                    }
+                }
+            }
+//          特殊处理逻辑
+            if (addrMap.getDistinguish() != null && addrMap.getAddress() != null && addrMap.getDistinguish().contains("松江区") && addrMap.getAddress().contains("工业区")) {
+                addrMap.setStreetTown("松江技术开发区");
+                if (addrMap.getAddress().split("工业区").length > 1) {
+                    addrMap.setAddress(addrMap.getAddress().split("工业区")[1]);
+                }
+            }
+//          输出路牌
+            if (addrMap.getProvinces() != null && !addrMap.getProvinces().isEmpty() && addrMap.getAddress().contains(addrMap.getProvinces())) {
+                if (addrMap.getAddress().split(addrMap.getProvinces()).length > 1) {
+                    addrMap.setAddress(addrMap.getAddress().split(addrMap.getProvinces())[1]);
+                }
+            }
+            if (addrMap.getMarket() != null && !addrMap.getMarket().isEmpty() && addrMap.getAddress().contains(addrMap.getMarket())) {
+                if (addrMap.getAddress().split(addrMap.getMarket()).length > 1) {
+                    addrMap.setAddress(addrMap.getAddress().split(addrMap.getMarket())[1]);
+                }
+            }
+            if (addrMap.getDistinguish() != null && !addrMap.getDistinguish().isEmpty()) {
+                if (addrMap.getAddress().contains(addrMap.getDistinguish())) {
+                    if (addrMap.getAddress().split(addrMap.getDistinguish()).length > 1) {
+                        addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish())[1]);
+                    }
+                } else if (addrMap.getAddress().contains(addrMap.getDistinguish().substring(0, 2) + "县")) {
+                    if (addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2) + "县").length > 1) {
+                        addrMap.setAddress(addrMap.getAddress().split(addrMap.getDistinguish().substring(0, 2) + "县")[1]);
+                    }
+                }
+            }
+            if (addrMap.getStreetTown() != null && !addrMap.getStreetTown().isEmpty() && addrMap.getAddress().contains(addrMap.getStreetTown())) {
+                if (addrMap.getAddress().split(addrMap.getStreetTown()).length > 1) {
+                    addrMap.setAddress(addrMap.getAddress().split(addrMap.getStreetTown())[1]);
+                }
+            }
+            if (addrMap.getResidentialCommittee() != null && !addrMap.getResidentialCommittee().isEmpty() && addrMap.getAddress().contains(addrMap.getResidentialCommittee())) {
+                if (addrMap.getAddress().split(addrMap.getResidentialCommittee()).length > 1) {
+                    addrMap.setAddress(addrMap.getAddress().split(addrMap.getResidentialCommittee())[1]);
+                }
+            }
+            if (addrMap.getAddress().contains("委会") || addrMap.getAddress().contains("员会")) {
+                addrMap.setAddress(addrMap.getAddress().substring(Math.max(addrMap.getAddress().indexOf("委会"), addrMap.getAddress().indexOf("员会")) + 2));
+            }
+        }
+
+        return addrMap;
+    }
+
+    /**
+     * 判断是否是名称+路名|街名的格式
+     *
+     * @param addr
+     * @param tagStr
+     * @return
+     */
+    public static boolean ifTrueAddr(String addr, String tagStr) {
+        String endStr = addr.substring(addr.indexOf(tagStr) + tagStr.length(), Math.min(addr.length(), addr.indexOf(tagStr) + tagStr.length() + 2));
+        if (spattern.matcher(endStr).find()) {
+            return true;
+        }
+        return false;
+    }
+
+    public static void main(String[] args) {
+        AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
+        AddrSplitLmrMap.initFile();
+        System.out.println(outAddrMapInAddr("村165号"));
+        System.out.println(outAddrMapInAddr("河南驻马店汝南县东官庄镇"));
+        System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));
+    }
+}

+ 3 - 2
src/main/java/com/skyversation/poiaddr/util/ShanghaiAddressSplitUtil.java

@@ -466,8 +466,9 @@ public class ShanghaiAddressSplitUtil {
     }
     public static void main(String[] args) throws Exception {
         new ShanghaiAddressSplitUtil().init();
-        System.out.println(splitBestAddress("小昆山镇周家浜村骨介353号"));
-        System.out.println(splitBestAddress("小昆山镇周家浜村骨介353号"));
+        System.out.println(splitBestAddress("村165号"));
+        System.out.println(splitBestAddress("河南驻马店汝南县东官庄镇"));
+        System.out.println(splitBestAddress("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));
 //        System.out.println(splitBestAddress("新胜路88、98号3号厂房"));
 //        System.out.println(splitBestAddress("新胜路88-98号3号厂房"));
 //        System.out.println(splitBestAddress("新胜路、98号3号厂房"));

BIN
src/main/resources/geojson/上海市_村居边界.xlsx