Przeglądaj źródła

完善分词逻辑

DESKTOP-6LTVLN7\Liumouren 1 miesiąc temu
rodzic
commit
c6e4f04fb5

+ 6 - 19
src/main/java/com/skyversation/poiaddr/addquery/AddressQueryEngine.java

@@ -45,19 +45,15 @@ public class AddressQueryEngine {
      * @return
      */
     public AddressResult commonSearchByName(List<String> addrs) {
-        if (addrs == null || addrs.size() < 1) {
-            return null;
-        }
         AddressResult addressResult = new AddressResult();
 //      数据库查询
-        for (String addr : addrs) {
+        /*for (String addr : addrs) {
             if (addr.startsWith("中山") && !addr.startsWith("中山街道")) {
                 addr = addr.replace("中山", "中山街道");
             }
             // 优先进行数据库查询
             List<AmapAddressV3> dbPois = AreaService.getInstance().getAddressPoisByAddr(addr);
             if (dbPois != null && dbPois.size() > 0) {
-//                System.out.println("数据库查询有结果!!!!");
                 JSONArray pois = new JSONArray();
                 pois.addAll(dbPois);
                 addressResult = new TransfromDataTool().gdV3ResultToResult(addr, pois, false);
@@ -67,7 +63,7 @@ public class AddressQueryEngine {
                     return getCjWgWgwByLoc(addressResult);
                 }
             }
-        }
+        }*/
         /*for (String addr : addrs) {
             //  创建请求
             addressResult = sj_szxSearchByName(addr, 3);
@@ -98,9 +94,9 @@ public class AddressQueryEngine {
                 return getCjWgWgwByLoc(addressResult);
             }
         }
-//        addressResult.setCode(AddressResultEnum.RESULT_NULL);
-        if(addressResult != null){
+        if (addressResult != null) {
             addressResult.setMessage("失败");
+            addressResult.setCode(AddressResultEnum.RESULT_NULL);
         }
         return addressResult;
     }
@@ -130,13 +126,7 @@ public class AddressQueryEngine {
      * @return
      */
     public AddressResult szxSearchByName(String address) {
-        if (!address.startsWith("上海")) {
-            address = "上海市" + address;
-        }
-        if (address.startsWith("中山") && !address.startsWith("中山街道")) {
-            address = address.replace("中山", "中山街道");
-        }
-        ResponseEntity response = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.GET, Constant.SZX_HLW_URL + address, null, null, 3);
+        ResponseEntity response = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.GET, Constant.SZX_HLW_URL + com.skyversation.poiaddr.util.AddressTools.reOutDistinguish(address), null, null, 3);
         if (response == null) {
             return null;
         }
@@ -335,14 +325,11 @@ public class AddressQueryEngine {
      * @return
      */
     public AddressResult gdV3SearchByName(String address) {
-        if (address.startsWith("中山") && !address.startsWith("中山街道")) {
-            address = address.replace("中山", "中山街道");
-        }
         JSONArray pois = new JSONArray();
         ScheduledTasks.gdRequestSize++;
         String geoUrl =
                 Constant.AMAP_SEARCH_NAME_V3 + "?key=" + Constant.AMAP_KEY[ThreadLocalRandom.current().nextInt(0, Constant.AMAP_KEY.length)] + "&types=" + Constant.AMAP_SEARCH_TYPES +
-                        "&keywords=" + address + "&city=" + Constant.getAMAP_CITY_CODE() + "&offset=20";
+                        "&keywords=" + com.skyversation.poiaddr.util.AddressTools.reOutDistinguish(address) + "&city=" + Constant.getAMAP_CITY_CODE() + "&offset=20";
         ResponseEntity responseEntity = AddressNetTools.getInstance().requestGetOrPost(HttpMethod.GET, geoUrl, null, null, 0);
         if (responseEntity != null && responseEntity.hasBody()) {
             String body = responseEntity.getBody() + "";

+ 39 - 17
src/main/java/com/skyversation/poiaddr/controller/PoiAddressController.java

@@ -104,26 +104,48 @@ public class PoiAddressController {
      */
     @GetMapping(value = "/searchAddr")
     public Object searchByAddrStr(@RequestParam(name = "address") String address) {
-        String[] addressList = address.split(",");
-        JSONObject returnDatas = new JSONObject();
-        for (String addr : addressList) {
-            AddressResult addressResult = AddressQueryEngine.getInstance().sj_szxSearchByName(addr, 3);
-            List<AddressResult.ContentBean> data = addressResult.getData();
-            if (data != null && data.size() > 0) {
-                AddressResult.ContentBean contentBean = data.get(0);
-                String level = contentBean.getScore();
-                if (contentBean.getAddress().contains("青浦区") && !Objects.equals(level, "异常") && (level.contains("rule_") || Float.parseFloat(level) > 2.0)) {
-                    JSONObject jsonObjectItem = new JSONObject();
-                    jsonObjectItem.put("name", contentBean.getSearchAddress());
-                    jsonObjectItem.put("address", contentBean.getAddress());
-                    jsonObjectItem.put("type", "diy");
-                    jsonObjectItem.put("level", level);
-                    jsonObjectItem.put("location", contentBean.getLat() + "," + contentBean.getLon());
-                    returnDatas.put(contentBean.getSearchAddress(), jsonObjectItem);
+        try {
+            List<Map<String, Object>> fileData = ExcelReaderUtils.readExcel("C:\\Users\\Liumouren\\Desktop\\addrs.xlsx");
+            if (fileData.size() == 0) {
+                return "文件解析失败!";
+            }
+            List<Map<String,Object>> errorDatas = new ArrayList<>();
+            JSONObject returnDatas = new JSONObject();
+            for (Map<String, Object> item : fileData) {
+                List<String> addrs = new ArrayList<>();
+                addrs.add("上海市青浦区" + item.get("对口街镇").toString() + item.get("对口路牌").toString());
+                AddressResult addressResult = AddressQueryEngine.getInstance().commonSearchByName(addrs);
+                if (addressResult != null) {
+                    List<AddressResult.ContentBean> data = addressResult.getData();
+                    if (data != null && data.size() > 0) {
+                        AddressResult.ContentBean contentBean = data.get(0);
+                        String level = contentBean.getScore();
+                        if (contentBean.getAddress().contains("青浦区") && contentBean.getAddress().contains(item.get("对口路牌").toString())) {
+                            JSONObject jsonObjectItem = new JSONObject();
+                            jsonObjectItem.put("name", item.get("对口路牌").toString());
+                            jsonObjectItem.put("address", contentBean.getAddress());
+                            jsonObjectItem.put("type", "diy");
+                            jsonObjectItem.put("level", level);
+                            jsonObjectItem.put("location", contentBean.getLon() + "," + contentBean.getLat());
+                            returnDatas.put(contentBean.getSearchAddress(), jsonObjectItem);
+                        }else{
+                            errorDatas.add(item);
+                        }
+                    }else{
+                        errorDatas.add(item);
+                    }
+                }else{
+                    errorDatas.add(item);
                 }
             }
+            if(errorDatas.size() > 0){
+                ExcelReaderUtils.writeToExcel(errorDatas,"C:\\Users\\Liumouren\\Desktop\\errorDatas.xlsx");
+            }
+            return returnDatas;
+        } catch (Exception e) {
+            e.printStackTrace();
+            return e;
         }
-        return returnDatas;
     }
 
     /**

+ 220 - 0
src/main/java/com/skyversation/poiaddr/entity/YyszAddressQp.java

@@ -0,0 +1,220 @@
+package com.skyversation.poiaddr.entity;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import javax.persistence.Column;
+import javax.persistence.Entity;
+import javax.persistence.Table;
+import javax.persistence.Id;
+import java.io.Serializable;
+import java.util.Date;
+import java.math.BigDecimal;
+
+/**
+ * @Description  
+ * @Author  LiuMengxiang
+ * @Date 2025-05-14 17:14:49 
+ */
+
+@AllArgsConstructor
+@NoArgsConstructor
+@Data
+@Entity
+@Table ( name ="yysz_address_qp" , schema = "")
+public class YyszAddressQp  implements Serializable {
+
+	private static final long serialVersionUID =  1012376382976936447L;
+
+   	@Column(name = "oid" )
+	private Long oid;
+
+   	@Column(name = "code" )
+	private String code;
+
+   	@Column(name = "sourceaddress" )
+	private String sourceaddress;
+
+   	@Column(name = "city" )
+	private String city;
+
+   	@Column(name = "county" )
+	private String county;
+
+   	@Column(name = "town" )
+	private String town;
+
+   	@Column(name = "community" )
+	private String community;
+
+   	@Column(name = "village" )
+	private String village;
+
+   	@Column(name = "squad" )
+	private String squad;
+
+   	@Column(name = "szone" )
+	private String szone;
+
+   	@Column(name = "street" )
+	private String street;
+
+   	@Column(name = "door" )
+	private String door;
+
+   	@Column(name = "resregion" )
+	private String resregion;
+
+   	@Column(name = "building" )
+	private String building;
+
+   	@Column(name = "building_num" )
+	private String buildingNum;
+
+   	@Column(name = "unit" )
+	private String unit;
+
+   	@Column(name = "floor" )
+	private String floor;
+
+   	@Column(name = "room" )
+	private String room;
+
+   	@Column(name = "unique_code" )
+	private String uniqueCode;
+
+   	@Column(name = "room_of_floor" )
+	private Long roomOfFloor;
+
+   	@Column(name = "source" )
+	private String source;
+
+   	@Column(name = "lon" )
+	private BigDecimal lon;
+
+   	@Column(name = "lat" )
+	private BigDecimal lat;
+
+   	@Column(name = "shape" )
+	private String shape;
+
+   	@Column(name = "belong_building" )
+	private String belongBuilding;
+
+   	@Column(name = "address_type" )
+	private Long addressType;
+
+   	@Column(name = "lv" )
+	private Long lv;
+
+   	@Column(name = "is_delete" )
+	private Long isDelete;
+
+   	@Column(name = "is_multi" )
+	private Long isMulti;
+
+   	@Column(name = "createtime" )
+	private Date createtime;
+
+   	@Column(name = "updatetime" )
+	private Date updatetime;
+
+   	@Column(name = "alt" )
+	private BigDecimal alt;
+
+   	@Column(name = "model_type" )
+	private Long modelType;
+
+   	@Column(name = "full_place" )
+	private String fullPlace;
+
+   	@Column(name = "security_grade" )
+	private Long securityGrade;
+
+   	@Column(name = "city_code" )
+	private String cityCode;
+
+   	@Column(name = "county_code" )
+	private String countyCode;
+
+   	@Column(name = "town_code" )
+	private String townCode;
+
+   	@Column(name = "community_code" )
+	private String communityCode;
+
+   	@Column(name = "grid_name" )
+	private String gridName;
+
+   	@Column(name = "grid_code" )
+	private String gridCode;
+
+   	@Column(name = "data_type" )
+	private String dataType;
+
+   	@Column(name = "mphid" )
+	private String mphid;
+
+   	@Column(name = "address_code" )
+	private String addressCode;
+
+   	@Column(name = "systemid" )
+	private String systemid;
+
+   	@Column(name = "type" )
+	private String type;
+
+   	@Column(name = "x" )
+	private String X;
+
+   	@Column(name = "y" )
+	private String Y;
+
+   	@Column(name = "label" )
+	private String label;
+
+   	@Column(name = "addr_tag" )
+	private String addrTag;
+
+   	@Column(name = "multiple_unique" )
+	private String multipleUnique;
+
+   	@Column(name = "standardize_task_id" )
+	private String standardizeTaskId;
+
+   	@Column(name = "dlhh" )
+	private String dlhh;
+
+   	@Column(name = "signature_data" )
+	private String signatureData;
+
+   	@Column(name = "signature_check_state" )
+	private Long signatureCheckState;
+
+   	@Column(name = "signature_time" )
+	private Date signatureTime;
+
+   	@Column(name = "signature_check_time" )
+	private Date signatureCheckTime;
+
+   	@Column(name = "is_history" )
+	private Long isHistory;
+
+   	@Column(name = "ylmc" )
+	private String ylmc;
+
+   	@Column(name = "ylbm" )
+	private String ylbm;
+
+   	@Column(name = "address" )
+	private String address;
+
+   	@Column(name = "ybdd" )
+	private String ybdd;
+
+   	@Column(name = "location" )
+	private String location;
+
+   	@Column(name = "tydz_jc" )
+	private String tydzJc;
+}

+ 17 - 0
src/main/java/com/skyversation/poiaddr/service/YyszAddressQpRepository.java

@@ -0,0 +1,17 @@
+package com.skyversation.poiaddr.service;
+
+import com.skyversation.poiaddr.entity.YyszAddressQp;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
+
+import javax.annotation.Resource;
+import java.util.List;
+
+@Resource
+public interface YyszAddressQpRepository extends JpaRepository<YyszAddressQp, String> {
+
+    // 执行自定义的SQL查询
+    @Query(value = "SELECT * FROM yysz_address_qp WHERE sourceaddress ILIKE %:addr% LIMIT 20", nativeQuery = true)
+    List<YyszAddressQp> getAddressPoisByAddr(@Param("addr") String addr);
+}

+ 32 - 63
src/main/java/com/skyversation/poiaddr/service/impl/YyskAddressStandardizationServiceImpl.java

@@ -1,9 +1,7 @@
 package com.skyversation.poiaddr.service.impl;
 
-import com.alibaba.fastjson.JSONObject;
 import com.skyversation.poiaddr.addquery.AddressQueryEngine;
 import com.skyversation.poiaddr.bean.AddressResult;
-import com.skyversation.poiaddr.bean.GeoJsonBean;
 import com.skyversation.poiaddr.config.DbConnection;
 import com.skyversation.poiaddr.entity.AmapAddressV3;
 import com.skyversation.poiaddr.entity.FusionKjdlTydzWf;
@@ -330,7 +328,6 @@ public class YyskAddressStandardizationServiceImpl {
         System.out.println("<<<<<<<<------searchLoadFileData{fileIndex:" + startFileIndex + "}");
         long startTime = System.currentTimeMillis();
         List<YyskDmdzAddressStandardization> listData = new ArrayList<>();
-//        List<YyskDmdzAddressStandardization> errorDatas = new ArrayList<>();
         try {
             List<Map<String, Object>> fileData = ExcelReaderUtils.readExcel(filePath);
             for (Map<String, Object> item : fileData) {
@@ -356,18 +353,17 @@ public class YyskAddressStandardizationServiceImpl {
         }
         if (listData != null && listData.size() > 0) {
 //              批量更新处理后的数据
-            /*List<YyskAddressStandardization> resultDataList = runExecutorService(listData);
+            List<YyskDmdzAddressStandardization> resultDataList = runExecutorService(listData);
             if (resultDataList.size() > 0) {
                 ExcelReaderUtils.convertYYListToMapList(resultDataList, outPath);
-            }*/
-//          TODO 数据转换
-            List<FusionKjdlTydzWf> returnDatas = YyskDmdzAddressStandardizationToFusionKjdlTydzWf(listData);
-            try {
-                AreaService.getInstance().saveFktws(returnDatas);
-            } catch (Exception e) {
-                System.err.println("第" + startFileIndex + "个文件入库异常:" + e);
             }
-
+//          TODO 数据转换
+//            List<FusionKjdlTydzWf> returnDatas = YyskDmdzAddressStandardizationToFusionKjdlTydzWf(listData);
+//            try {
+//                AreaService.getInstance().saveFktws(returnDatas);
+//            } catch (Exception e) {
+//                System.err.println("第" + startFileIndex + "个文件入库异常:" + e);
+//            }
             long endTime = System.currentTimeMillis();
             System.out.println("处理单批次用时" + (endTime - startTime) / 1000 + "秒!已调用市中心接口个数" + ScheduledTasks.szxRequestSize + ";已調用高德接口个数:" + ScheduledTasks.gdRequestSize);
         } else {
@@ -418,60 +414,42 @@ public class YyskAddressStandardizationServiceImpl {
         return returnDatas;
     }
 
+    /**
+     * 搜索主逻辑
+     * @param listData 传入List<YyskDmdzAddressStandardization> 其中address不能为空
+     * @return 处理后的List<YyskDmdzAddressStandardization>
+     */
     public List<YyskDmdzAddressStandardization> runExecutorService(List<YyskDmdzAddressStandardization> listData) {
         // 创建线程池
         int executorSize = Runtime.getRuntime().availableProcessors() / 2;
         System.out.println("创建线程池个数:" + executorSize);
         ExecutorService executorService = Executors.newFixedThreadPool(executorSize);
-        AreaService areaService = AreaService.getInstance();
         List<Future<?>> futures = new ArrayList<>();
         for (int i = 0; i < listData.size(); i++) {
             YyskDmdzAddressStandardization item = listData.get(i);
+            //              首先判断地址不能为空
+            if (item.getAddress() == null || item.getAddress().isEmpty()) {
+                continue;
+            }
             int finalI = i;
             futures.add(executorService.submit(() -> {
                 System.out.print(finalI + ">");
                 List<String> addrList = new ArrayList<>();
-                if (item.getAddress() != null && !item.getAddress().isEmpty()) {
-                    addrList.add(item.getAddress());
-                }
-//              默认不查询
-                boolean ifSearch = false;
-//              首先过滤rule_0
-                if (!item.getMatchLevel().contains("rule_0") && !item.getMatchLevel().contains("异常")) {
-                    String[] addr = new String[]{};
-                    if (item.getDistinguish() == null || item.getDistinguish().isEmpty() || item.getDistinguish().trim().length() == 0) {
-                        addr = AddressTools.parseAddressJZ(item.getStandardAddress());
-                    } else if (item.getDistinguish().equals("[]")) {
-                        addr = AddressTools.parseAddressJZ(item.getAddress());
-                    }
-                    if (addr != null && addr[1] != null && !addr[1].equals("[]")) {
-                        item.setDistinguish(addr[1]);
-                    } else {
-//                        根据经纬度判断落点在哪儿个区
-                        String lon = item.getLon();
-                        String lat = item.getLat();
-                        double[] points = CoordTransform2.getInstance().shcj_to_wgs84(
-                                Double.parseDouble(lon), Double.parseDouble(lat));
-                        GeoJsonBean adBean = areaService.isInadPolygon(points[0], points[1]);
-                        JSONObject properties = adBean.getProperties();
-                        if (properties != null && properties.containsKey("name")) {
-                            item.setDistinguish(properties.getString("name"));
-                        } else {
-                            item.setStandardAddress(null);
+                addrList.add(item.getAddress());
+                boolean ifSearchTag = true;
+//              首先判断是否为非上海市地址
+                String[] ams = AddressMatcher.matchProvinceAndCity(item.getAddress());
+                if(ams != null && ams[0] != null){
+                    if(!ams[0].equals("上海市")){
+                        item.setMatchLevel("rule_0");
+                        item.setProvinces(ams[0]);
+                        if(ams[1] != null){
+                            item.setMarket(ams[1]);
                         }
-                    }
-                    try {
-                        String oldAddress = Objects.requireNonNull(AddressTools.parseAddressJZ(item.getStandardAddress()))[3].
-                                replaceAll(item.getMarket().equals("[]") ? "" : item.getMarket(), "").
-                                replaceAll(item.getDistinguish().equals("[]") ? "" : item.getDistinguish(), "").
-                                replaceAll(item.getStreetTown().equals("[]") ? "" : item.getStreetTown(), "").
-                                replaceAll(item.getResidentialCommittee().equals("[]") ? "" : item.getResidentialCommittee(), "");
-                        item.setStandardAddress(item.getMarket() + item.getDistinguish() + item.getStreetTown() + AddressQueryEngine.townReplace_ct(oldAddress));
-                    } catch (Exception e) {
-                        e.printStackTrace();
+                        ifSearchTag = false;
                     }
                 }
-                if (ifSearch && addrList.size() > 0) {
+                if (ifSearchTag) {
                     //  TODO 开始查询
                     AddressResult addressResult = AddressQueryEngine.getInstance().commonSearchByName(addrList);
                     if (addressResult == null || addressResult.getData() == null || addressResult.getData().size() < 1) {
@@ -483,7 +461,8 @@ public class YyskAddressStandardizationServiceImpl {
                                 String resultAddrKey = contentBean.getAddress();
                                 if (resultAddrKey != null && contentBean.getLon() != null && contentBean.getLat() != null && AddressTools.isOtherDistrictThanSongJiang2(resultAddrKey)) {
 //                                  去除特殊字符
-                                    resultAddrKey = AddressTools.getInstance().deleteStr(contentBean.getAddress());
+                                    resultAddrKey = AddressTools.reOutDistinguish(contentBean.getAddress());
+//                                  根据地名地址返回到街镇一级分词,
                                     String[] strs = AddressTools.parseAddressJZ(resultAddrKey);
 //                                  遍历全国省份名称得到省名
                                     item.setProvinces(AddressTools.isOtherDistrictThanShangHai(resultAddrKey));
@@ -525,17 +504,7 @@ public class YyskAddressStandardizationServiceImpl {
                                     } else {
                                         item.setResidentialCommittee("");
                                     }
-                                    String oldAddress = "";
-                                    try {
-                                        oldAddress = Objects.requireNonNull(AddressTools.parseAddressJZ(contentBean.getSearchAddress()))[3].
-                                                replaceAll(item.getMarket().equals("[]") ? "" : item.getMarket(), "").
-                                                replaceAll(item.getDistinguish().equals("[]") ? "" : item.getDistinguish(), "").
-                                                replaceAll(item.getStreetTown().equals("[]") ? "" : item.getStreetTown(), "").
-                                                replaceAll(item.getResidentialCommittee().equals("[]") ? "" : item.getResidentialCommittee(), "");
-                                    } catch (Exception e) {
-                                        e.printStackTrace();
-                                    }
-                                    item.setStandardAddress(item.getMarket() + item.getDistinguish() + item.getStreetTown() + AddressQueryEngine.townReplace_ct(oldAddress));
+                                    item.setStandardAddress(item.getMarket() + item.getDistinguish() + item.getStreetTown() + AddressTools.returnAddress(contentBean.getSearchAddress()));
                                     break;
                                 } else {
                                     item.setMatchLevel("异常");

+ 28 - 0
src/main/java/com/skyversation/poiaddr/service/impl/YyszAddressQpService.java

@@ -0,0 +1,28 @@
+package com.skyversation.poiaddr.service.impl;
+
+import com.skyversation.poiaddr.entity.YyszAddressQp;
+import com.skyversation.poiaddr.service.YyszAddressQpRepository;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import java.util.List;
+
+@Service
+public class YyszAddressQpService {
+
+    @Autowired
+    private YyszAddressQpRepository yyszAddressQpRepository;
+
+    public List<YyszAddressQp> saveAmapAddressV3(List<YyszAddressQp> amapAddressV3List) {
+        return yyszAddressQpRepository.saveAll(amapAddressV3List);
+    }
+
+    public List<YyszAddressQp> getAddressPoisByAddr(String addr){
+        return yyszAddressQpRepository.getAddressPoisByAddr(addr);
+    }
+
+    public List<YyszAddressQp> getAllData(){
+        return yyszAddressQpRepository.findAll();
+    }
+
+}

+ 18 - 17
src/main/java/com/skyversation/poiaddr/util/AddressMatcher.java

@@ -28,13 +28,11 @@ public class AddressMatcher {
         addProvinceAndCities("浙江省", "杭州市", "宁波市", "温州市", "嘉兴市", "湖州市", "绍兴市", "金华市", "衢州市", "舟山市", "台州市", "丽水市");
         addProvinceAndCities("安徽省", "合肥市", "芜湖市", "蚌埠市", "淮南市", "马鞍山市", "淮北市", "铜陵市", "安庆市", "黄山市", "阜阳市", "宿州市", "滁州市", "六安市", "宣城市", "池州市", "亳州市");
         addProvinceAndCities("福建省", "福州市", "厦门市", "莆田市", "三明市", "泉州市", "漳州市", "南平市", "龙岩市", "宁德市");
-//        addProvinceAndCities("江西省", "南昌市", "景德镇市", "萍乡市", "九江市", "新余市", "鹰潭市", "赣州市", "吉安市", "宜春市", "抚州市", "上饶市");
         addProvinceAndCities("江西省", "南昌市", "景德镇市", "萍乡市", "九江市", "鹰潭市", "赣州市", "吉安市", "宜春市", "抚州市", "上饶市");
         addProvinceAndCities("山东省", "济南市", "青岛市", "淄博市", "枣庄市", "东营市", "烟台市", "潍坊市", "济宁市", "泰安市", "威海市", "日照市", "临沂市", "德州市", "聊城市", "滨州市", "菏泽市");
         addProvinceAndCities("河南省", "郑州市", "开封市", "洛阳市", "平顶山市", "安阳市", "鹤壁市", "新乡市", "焦作市", "濮阳市", "许昌市", "漯河市", "三门峡市", "南阳市", "商丘市", "信阳市", "周口市", "驻马店市");
         addProvinceAndCities("湖北省", "武汉市", "黄石市", "十堰市", "宜昌市", "襄阳市", "鄂州市", "荆门市", "孝感市", "荆州市", "黄冈市", "咸宁市", "随州市");
         addProvinceAndCities("湖南省", "长沙市", "株洲市", "湘潭市", "衡阳市", "邵阳市", "岳阳市", "常德市", "张家界市", "益阳市", "郴州市", "永州市", "怀化市", "娄底市");
-//        addProvinceAndCities("广东省", "广州市", "深圳市", "珠海市", "汕头市", "佛山市", "韶关市", "湛江市", "肇庆市", "江门市", "茂名市", "惠州市", "梅州市", "汕尾市", "河源市", "阳江市", "清远市", "东莞市", "中山市", "潮州市", "揭阳市", "云浮市");
         addProvinceAndCities("广东省", "广州市", "深圳市", "珠海市", "汕头市", "佛山市", "韶关市", "湛江市", "肇庆市", "江门市", "茂名市", "惠州市", "梅州市", "汕尾市", "河源市", "阳江市", "清远市", "东莞市", "潮州市", "揭阳市", "云浮市");
         addProvinceAndCities("广西自治区", "南宁市", "柳州市", "桂林市", "梧州市", "北海市", "防城港市", "钦州市", "贵港市", "玉林市", "百色市", "贺州市", "河池市", "来宾市", "崇左市");
         addProvinceAndCities("海南省", "海口市", "三亚市", "三沙市", "儋州市");
@@ -83,6 +81,7 @@ public class AddressMatcher {
 
     /**
      * 根据输入的地址匹配省和市
+     *
      * @param address 输入的地址
      * @return 包含省和市的数组,若未匹配到则返回 null
      */
@@ -91,7 +90,7 @@ public class AddressMatcher {
         for (String province : PROVINCE_CITY_MAP.keySet()) {
             if (address.contains(province)) {
                 for (String city : PROVINCE_CITY_MAP.get(province)) {
-                    if (address.contains(city) &&!isPartOfRoadName(address, city)) {
+                    if (address.contains(city) && !isPartOfRoadName(address, city)) {
                         return new String[]{province, city};
                     }
                 }
@@ -106,10 +105,10 @@ public class AddressMatcher {
         for (Map.Entry<String, String> entry : ABBREVIATION_PROVINCE_MAP.entrySet()) {
             String abbreviation = entry.getKey();
             String province = entry.getValue();
-            if (address.contains(abbreviation) &&!isPartOfRoadName(address, abbreviation)) {
+            if (address.contains(abbreviation) && !isPartOfRoadName(address, abbreviation)) {
                 for (String city : PROVINCE_CITY_MAP.get(province)) {
                     String cityAbbreviation = city.replaceAll("市", "");
-                    if (address.contains(cityAbbreviation) &&!isPartOfRoadName(address, cityAbbreviation)) {
+                    if (address.contains(cityAbbreviation) && !isPartOfRoadName(address, cityAbbreviation)) {
                         return new String[]{province, city};
                     }
                 }
@@ -124,7 +123,7 @@ public class AddressMatcher {
         for (Map.Entry<String, String> entry : ABBREVIATION_CITY_MAP.entrySet()) {
             String cityAbbreviation = entry.getKey();
             String city = entry.getValue();
-            if (address.contains(cityAbbreviation) &&!isPartOfRoadName(address, cityAbbreviation)) {
+            if (address.contains(cityAbbreviation) && !isPartOfRoadName(address, cityAbbreviation)) {
                 String province = CITY_PROVINCE_MAP.get(city);
                 if (province != null) {
                     return new String[]{province, city};
@@ -141,28 +140,30 @@ public class AddressMatcher {
         return province.equals("北京市") || province.equals("天津市") || province.equals("上海市") || province.equals("重庆市");
     }
 
+    /**
+     * 只有返回为false时,才会检验成功
+     *
+     * @param address
+     * @param name
+     * @return
+     */
     private static boolean isPartOfRoadName(String address, String name) {
         boolean is = address.contains(name + "路") || address.contains(name + "大道") || address.contains(name + "街");
-        if(!is){
+        if (!is) {
             int index = address.indexOf(name) + name.length();
-            if("路道街".contains(address.substring((index + 1) >= address.length() ? address.length() - 1 : (index + 1),
-                    (index + 2) > address.length() ? address.length() : (index + 2)))){
+            String endStr = address.substring(index, Math.min((index + 2), address.length()));
+            if(endStr.contains("路") || endStr.contains("道") || endStr.contains("街")){
                 return true;
+            }else{
+                is = false;
             }
         }
-        return false;
+        return is;
     }
 
 
-
     public static void main(String[] args) {
         String address = "石家庄开封南路正定县新疆路123号";
-//        String address = "上海静安南京南路123弄56号B座1204室";
-//        String address = "河北石家庄正定县新疆路123号";
-//        String address = "河北石家庄正定县新疆路123号";
-//        String address = "河北石家庄正定县新疆路123号";
-//        String address = "河北石家庄正定县新疆路123号";
-//        String address = "河北石家庄正定县新疆路123号";
         String[] result = matchProvinceAndCity(address);
         if (result != null) {
             System.out.println("省: " + result[0]);

+ 129 - 1
src/main/java/com/skyversation/poiaddr/util/AddressTools.java

@@ -38,7 +38,7 @@ public class AddressTools {
 
     // 上海市所有的区
     private static final Set<String> SHANGHAI_DISTRICTS = new HashSet<>(Arrays.asList(
-            "黄浦区", "徐汇区", "长宁区", "静安区", "普陀区","虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区","浦东新区", "金山区", "松江区", "青浦区", "奉贤区","崇明区"
+            "黄浦区", "徐汇区", "长宁区", "静安区", "普陀区", "虹口区", "杨浦区", "闵行区", "宝山区", "嘉定区", "浦东新区", "金山区", "松江区", "青浦区", "奉贤区", "崇明区"
     ));
 
     // 各区下辖的镇与街道
@@ -347,6 +347,53 @@ public class AddressTools {
         return result;
     }
 
+    /**
+     * 根据上海市标准化地址输出筛选市、区、镇后的地址
+     * @param allAddress
+     * @return
+     */
+    public static String returnAddress(String allAddress) {
+        allAddress = reOutDistinguish(allAddress);
+//      删除头部的上海、上海市
+        allAddress = replaceStartStr(allAddress, "上海市");
+        allAddress = replaceStartStr(allAddress, "上海");
+//      匹配上海市所有的区,判断是否包含这些区,包含的话删除区前面的字符
+        // 查找区
+        for (String district : SHANGHAI_DISTRICTS) {
+            if (allAddress.contains(district)) {
+                allAddress = allAddress.replace(district, "");
+            }
+        }
+//      匹配上海市所有的街镇,判断是否包含这些镇,包含的话删除镇前面的字符
+        for (String district : SHANGHAI_DISTRICTS) {
+            Set<String> streets = DISTRICT_STREETS.get(district);
+            for (String townStr : streets) {
+                if (allAddress.contains(townStr)) {
+                    allAddress = allAddress.replace(townStr, "");
+                }
+            }
+        }
+//      去除居委
+        if (allAddress.contains("居委会")) {
+            allAddress = allAddress.substring(allAddress.indexOf("居委会") + 3);
+        }
+        if (allAddress.contains("委员会")) {
+            allAddress = allAddress.substring(allAddress.indexOf("委员会") + 3);
+        }
+        return allAddress;
+    }
+
+    public static String replaceStartStr(String addressStr, String replaceStr) {
+        if (addressStr.startsWith(replaceStr)) {
+            addressStr = addressStr.substring(replaceStr.length());
+        }
+        if (addressStr.startsWith(replaceStr)) {
+            addressStr = replaceStartStr(addressStr, replaceStr);
+        }
+        return addressStr;
+    }
+
+
     /***
      * 根据地名地址返回到村居一级分词,村居不是很准确
      * @param address 上海市松江区车墩镇乐都村乐都路590号
@@ -487,6 +534,7 @@ public class AddressTools {
 
     /**
      * 只匹配数字
+     *
      * @param address
      * @param array
      * @param param
@@ -958,6 +1006,86 @@ public class AddressTools {
         return address.replaceAll("[^\\u4e00-\\u9fa5\\da-zA-Z]", "");
     }
 
+    /**
+     * 地址标准化
+     * 判断上海市行政区划的准确性
+     *
+     * @param addressStr
+     * @return
+     */
+    public static String reOutDistinguish(String addressStr) {
+        if (addressStr != null) {
+            addressStr = addressStr.replaceAll("[^\\u4e00-\\u9fa5\\da-zA-Z]", "");
+            addressStr = addressStr.replaceAll(" ", "");
+            if (!addressStr.startsWith("上海")) {
+                addressStr = "上海市" + addressStr;
+            } else if (addressStr.startsWith("上海") && !addressStr.startsWith("上海市")) {
+                addressStr = addressStr.replace("上海", "上海市");
+            }
+            String distinguishName = "";
+//          先判断行政区划的准确性
+            for (String distinguish : SHANGHAI_DISTRICTS) {
+                if (addressStr.contains(distinguish)) {
+                    distinguishName = distinguish;
+                } else if (addressStr.contains(distinguish.substring(0, 2)) && addressStr.contains(distinguish.substring(0, 2) + "县")) {
+                    distinguishName = distinguish;
+                    addressStr = addressStr.replaceAll(distinguish.substring(0, 2) + "县", distinguish);
+                }
+            }
+            for (String distinguish : SHANGHAI_DISTRICTS) {
+                if (addressStr.startsWith("上海市" + distinguish.substring(0, distinguish.length() - 1))) {
+                    distinguishName = distinguish;
+                    addressStr = "上海市" + distinguish + addressStr.substring(distinguish.length() + 2);
+                } else if (addressStr.startsWith("上海市" + distinguish.substring(0, distinguish.length() - 2))) {
+                    distinguishName = distinguish;
+                    addressStr = "上海市" + distinguish + addressStr.substring(distinguish.length() + 1);
+                }
+            }
+//          判断街镇的准确性(可能会省略【街道、镇、新镇】,也可能会替换成县)
+            if (addressStr.contains("县")) {
+                if (!distinguishName.isEmpty()) {
+                    Set<String> towns = DISTRICT_STREETS.get(distinguishName);
+                    for (String town : towns) {
+                        String xTown = "";
+                        if (town.contains("新城镇")) {
+                            xTown = town.substring(0, town.length() - 3);
+                        } else if (town.contains("街道") || town.contains("新镇")) {
+                            xTown = town.substring(0, town.length() - 2);
+                        } else if (town.contains("镇")) {
+                            xTown = town.substring(0, town.length() - 1);
+                        }
+                        if (addressStr.contains(xTown + "县")) {
+                            addressStr = addressStr.replaceAll(xTown + "县", town);
+                        }
+                    }
+                } else {
+                    for (String distinguish : SHANGHAI_DISTRICTS) {
+                        Set<String> towns = DISTRICT_STREETS.get(distinguish);
+                        for (String town : towns) {
+                            String xTown = "";
+                            if (town.contains("新城镇")) {
+                                xTown = town.substring(0, town.length() - 3);
+                            } else if (town.contains("街道") || town.contains("新镇")) {
+                                xTown = town.substring(0, town.length() - 2);
+                            } else if (town.contains("镇")) {
+                                xTown = town.substring(0, town.length() - 1);
+                            }
+                            if (addressStr.contains(xTown + "县")) {
+                                addressStr = addressStr.replaceAll(xTown + "县", distinguish + town);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return addressStr;
+    }
+
+    public static void main(String[] args) {
+        String bzAddr = "上海嘉定曹新公路1388弄25号-A549";
+        System.out.println("地址标准化:" + reOutDistinguish(bzAddr));
+        System.out.println("过滤市区镇:" + returnAddress(bzAddr));
+    }
 
     // 静态资源:中国所有省份、地级市和县级市的名称
     private static final Set<String> PROVINCES = new HashSet<>(Arrays.asList(