Răsfoiți Sursa

调整地址分割逻辑

DESKTOP-6LTVLN7\Liumouren 3 luni în urmă
părinte
comite
1d1a6b90b3

+ 16 - 5
src/main/java/com/skyversation/poiaddr/addquery/AddressQueryEngine.java

@@ -132,14 +132,14 @@ public class AddressQueryEngine {
         paramObject.put("value", address);
         requestJson.add(paramObject);
 //      判断是否是关键字版搜索服务
-        if(Constant.SJ_SZX_SEARCH_BY_NAME.contains("1742459783686")){
+        if (Constant.SJ_SZX_SEARCH_BY_NAME.contains("1742459783686")) {
             JSONObject paramObject2 = new JSONObject();
-            paramObject2.put("name","region");
+            paramObject2.put("name", "region");
             paramObject2.put("position", "QUERY");
             paramObject2.put("value", Constant.getAMAP_CITY_CODE());
             requestJson.add(paramObject2);
             JSONObject paramObject3 = new JSONObject();
-            paramObject3.put("name","page_size");
+            paramObject3.put("name", "page_size");
             paramObject3.put("position", "QUERY");
             paramObject3.put("value", "10");
             requestJson.add(paramObject3);
@@ -153,7 +153,7 @@ public class AddressQueryEngine {
                 return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
             try {
                 if (body.indexOf("[") != 0) {
-                    return TransfromDataTool.szxResultToResult(JSONObject.parseObject(body),address);
+                    return TransfromDataTool.szxResultToResult(JSONObject.parseObject(body), address);
                 } else {
 //                  将得分最高的结果映射到实体类
                     return TransfromDataTool.szxResultToResult2(com.skyversation.poiaddr.util.AddressTools.getInstance().findBestMatch(address, JSONArray.parseArray(body), "address"));
@@ -397,9 +397,20 @@ public class AddressQueryEngine {
     }
 
     public static String townReplaceAll(String address) {
+//      替换市
+        address = address.replaceAll("上海市", "");
         String[] towns = Constant.getTowns();
         for (String town : towns) {
-            address = address.replaceAll(town, "");
+            if(address.contains(town)){
+                address = address.substring(address.indexOf(town) + town.length());
+            }
+        }
+//      TODO 强行去除居委
+        if (address.contains("居委会")) {
+            address = address.substring(address.indexOf("居委会") + 3);
+        }
+        if (address.contains("委员会")) {
+            address = address.substring(address.indexOf("委员会") + 3);
         }
         return address.replaceAll("小区", "0").replaceAll("号", "0").replaceAll("弄", "0").replaceAll("室", "0").replaceAll("户", "0").replaceAll("单元", "0").replaceAll("幢", "0");
     }

+ 1 - 1
src/main/java/com/skyversation/poiaddr/addquery/Constant.java

@@ -22,7 +22,7 @@ public class Constant {
     //    TODO 青浦区行政区划编码:310118 松江区行政区划编码:310117
     private String area;
 
-    private String[] towns;
+    private String[] towns = new String[]{"岳阳街道", "永丰街道", "方松街道", "中山街道", "广富林街道", "九里亭街道", "泗泾镇", "佘山镇", "车墩镇", "新桥镇", "洞泾镇", "九亭镇", "泖港镇", "石湖荡镇", "新浜镇", "叶榭镇", "小昆山镇", "天马山镇"};
 
     @Value("${app.search_server_id}")
     private String searchServerId;

+ 19 - 12
src/main/java/com/skyversation/poiaddr/controller/PoiAddressController.java

@@ -1,5 +1,6 @@
 package com.skyversation.poiaddr.controller;
 
+import com.alibaba.fastjson.JSONObject;
 import com.skyversation.poiaddr.addquery.AddressQueryEngine;
 import com.skyversation.poiaddr.addquery.Constant;
 import com.skyversation.poiaddr.bean.AddressResult;
@@ -15,10 +16,7 @@ import org.springframework.web.bind.annotation.*;
 
 import javax.annotation.Resource;
 import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
 /**
  * 地名地址测试类
@@ -73,9 +71,9 @@ public class PoiAddressController {
     }
 
     @PostMapping(value = "/uploadDataBase", produces = MediaType.APPLICATION_JSON_VALUE)
-    public Object uploadDataBase(@RequestParam(name = "columnName") String columnName,@RequestParam(name = "tableName") String tableName) {
+    public Object uploadDataBase(@RequestParam(name = "columnName") String columnName, @RequestParam(name = "tableName") String tableName) {
         long startTime = System.currentTimeMillis();
-        testDataService.uploadDataBase(columnName,tableName);
+        testDataService.uploadDataBase(columnName, tableName);
         long endTime = System.currentTimeMillis();
         return "处理完成!用时" + (endTime - startTime) / 1000 + "秒!";
     }
@@ -106,13 +104,22 @@ public class PoiAddressController {
     @GetMapping(value = "/searchAddr")
     public Object searchByAddrStr(@RequestParam(name = "address") String address) {
         String[] addressList = address.split(",");
-        List<Map<String,AddressResult>> requestDatas = new ArrayList<>();
-        for(String addr: addressList){
-            Map<String,AddressResult> requestItem = new HashMap<>();
+        JSONObject returnDatas = new JSONObject();
+        for (String addr : addressList) {
             AddressResult addressResult = AddressQueryEngine.getInstance().sj_szxSearchByName(addr);
-            requestItem.put(addr,addressResult);
-            requestDatas.add(requestItem);
+            List<AddressResult.ContentBean> data = addressResult.getData();
+            if (data != null && data.size() > 0) {
+                AddressResult.ContentBean contentBean = data.get(0);
+                String level = contentBean.getScore();
+                if (contentBean.getAddress().contains("青浦区") && !Objects.equals(level, "异常") && (level.contains("rule_") || Float.parseFloat(level) > 2.0)) {
+                    JSONObject jsonObjectItem = new JSONObject();
+                    jsonObjectItem.put("name", contentBean.getSearchAddress());
+                    jsonObjectItem.put("address", contentBean.getLocation());
+                    jsonObjectItem.put("type", "diy");
+                    jsonObjectItem.put("location", contentBean.getLat() + "," + contentBean.getLon());
+                }
+            }
         }
-        return requestDatas;
+        return returnDatas;
     }
 }

+ 1 - 1
src/main/java/com/skyversation/poiaddr/service/impl/YyskAddressStandardizationServiceImpl.java

@@ -266,7 +266,7 @@ public class YyskAddressStandardizationServiceImpl {
                                     } else {
                                         item.setResidentialCommittee("");
                                     }
-                                    item.setStandardAddress(item.getMarket() + item.getDistinguish() + item.getStreetTown() + AddressTools.parseAddressCJ(oldAddress)[4]);
+                                    item.setStandardAddress(item.getMarket() + item.getDistinguish() + item.getStreetTown() + AddressQueryEngine.townReplaceAll(oldAddress));
                                     break;
                                 } else {
                                     item.setMatchLevel("异常");

+ 5 - 5
src/main/java/com/skyversation/poiaddr/util/AddressTools.java

@@ -494,11 +494,11 @@ public class AddressTools {
                 String addr = obj.getString(param);
 //              规则4判断
 //              TODO 添加校验逻辑(首先使用第4校验规则匹配,匹配不到使用第二规则,还匹配不到的话就使用打分规则)
-                Set<String> addressString = AddressQueryEngine.tokenizeString(parseAddressCJ(addr)[4]).get(0);
-                Set<String> addressNumber = AddressQueryEngine.tokenizeString(parseAddressCJ(addr)[4]).get(1);
-                Set<String> address2String = AddressQueryEngine.tokenizeString(parseAddressCJ(address)[4]).get(0);
-                Set<String> address2Number = AddressQueryEngine.tokenizeString(parseAddressCJ(address)[4]).get(1);
-                if (addressString != null && addressString.size() > 1) {
+                Set<String> addressString = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(addr)).get(0);
+                Set<String> addressNumber = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(addr)).get(1);
+                Set<String> address2String = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(0);
+                Set<String> address2Number = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(1);
+                if (addressString != null && addressString.size() > 0) {
                     int addressStrSize = addressString.size();
                     for (String addr2str : address2String) {
                         if (addressString.contains(addr2str)) {

+ 70 - 3
src/main/java/com/skyversation/poiaddr/util/SerializationUtils.java

@@ -47,10 +47,47 @@ public class SerializationUtils {
 
     public static void main(String[] args) throws IOException {
         test();
+//        ruleTest("上海市松江区泗泾镇江川南路25弄19号302室", "上海市松江区洞泾镇砖桥居民委员会江川南路25弄19号302室");
+//        replaceResidentialCommitteeToNull();
         /*List<SjArrDzbzhSjWcbryDzxx> listData = deserialize("output/testDataBase_all.ser");
 //      将序列化的文件转储为xlsx文件
         ExcelReaderUtils.writeClassToExcel(listData, "output/allData.xlsx");*/
     }
+
+    public static boolean ruleTest(String address, String returnAddress) {
+        if (returnAddress != null && !returnAddress.isEmpty()) {
+//          去除所有行政区划
+            Set<String> addressString = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(returnAddress)).get(0);
+            Set<String> addressNumber = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(returnAddress)).get(1);
+            Set<String> address2String = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(0);
+            Set<String> address2Number = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(1);
+            if (addressString != null && addressString.size() > 0) {
+                int addressStrSize = addressString.size();
+                for (String addr2str : address2String) {
+                    if (addressString.contains(addr2str)) {
+                        addressStrSize--;
+                        if (addressStrSize == 0) {
+                            if (addressNumber.size() == 0) {
+                                return true;
+                            } else {
+                                int addressNumSize = addressNumber.size();
+                                for (String addr2Num : address2Number) {
+                                    if (addressNumber.contains(addr2Num)) {
+                                        addressNumSize--;
+                                        if (addressNumSize == 0) {
+                                            return true;
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
     /**
      * 读取本地xlsx文件得到List<Map<String,Object>>结果,然后得到所有规范化结果地址字段,然后匹配街镇关键字,并将匹配到的街镇关键字放到新的一列
      * 2、根据主键address和标准化地址standardAddress直接跑规则,属于rule_4的要替换掉同时添加updateTag
@@ -59,15 +96,21 @@ public class SerializationUtils {
     public static void test() {
 //       根据地址读取xlsx数据
         try {
-            List<Map<String, Object>> fileDatas = ExcelReaderUtils.readExcel("output/yysk_wcb_address_standardization.xlsx");
+            List<Map<String, Object>> fileDatas = ExcelReaderUtils.readExcel("output/yysk_new_wcb_address_standardization3.xlsx");
             float lecel = 1.4f;
             int sum = 0;
             int erSum = 0;
             int nullSum = 0;
+            int rule2Sum = 0;
+            int rule4Sum = 0;
             for (Map<String, Object> item : fileDatas) {
-                if (!"异常".equals(item.get("match_level").toString()) && !"否".equals(item.get("match_level").toString())) {
+                if (!"异常".equals(item.get("match_level").toString())) {
                     sum++;
-                    if (!"rule_2".equals(item.get("match_level").toString()) && !"rule_4".equals(item.get("match_level").toString())) {
+                    if("rule_2".equals(item.get("match_level").toString())){
+                        rule2Sum++;
+                    }else if("rule_4".equals(item.get("match_level").toString())){
+                        rule4Sum++;
+                    }else if (!"rule_2".equals(item.get("match_level").toString()) && !"rule_4".equals(item.get("match_level").toString())) {
                         //                  查询地址、主键
                         float address = Float.parseFloat(item.get("match_level").toString());
                         if (address < lecel) {
@@ -81,10 +124,34 @@ public class SerializationUtils {
             System.out.println("总数据条数:" + fileDatas.size());
             System.out.println("打分小于" + lecel + "数据条数:" + erSum);
             System.out.println("异常数据条数:" + nullSum);
+            System.out.println("rule2数据条数:" + rule2Sum);
+            System.out.println("rule4数据条数:" + rule4Sum);
             System.out.println("有效数据条数:" + (sum - erSum));
             System.out.println("有效数据数据占比:" + (float) (sum - erSum) / fileDatas.size());
         } catch (Exception e) {
             e.printStackTrace();
         }
     }
+
+    /**
+     * 替换标准化地址中的村居为null
+     * 山原居民委员会(筹)老是失败
+     */
+    public static void replaceResidentialCommitteeToNull() {
+        try {
+            List<Map<String, Object>> fileDatas = ExcelReaderUtils.readExcel("output/yysk_new_wcb_address_standardization2.xlsx");
+            for (Map<String, Object> item : fileDatas) {
+                if (item.get("return_address") != null && !item.get("return_address").toString().isEmpty()) {
+                    String address = item.get("address").toString();
+                    String returnAddress = item.get("return_address").toString();
+                    if(ruleTest(address,returnAddress)){
+                        item.put("match_level","rule_4");
+                    }
+                }
+            }
+            ExcelReaderUtils.writeToExcel(fileDatas, "output/yysk_new_wcb_address_standardization3.xlsx");
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
 }