浏览代码

优化分词判断逻辑

DESKTOP-6LTVLN7\Liumouren 1 月之前
父节点
当前提交
caa453b306

+ 17 - 9
src/main/java/com/skyversation/poiaddr/addquery/AddressQueryEngine.java

@@ -295,13 +295,14 @@ public class AddressQueryEngine {
                     return null;
                 }
             }
+//          2. 搜索到结果去除行政区划后,存在包含关系,则使用;
             case LEVEL_CONTAINS_2 -> {
                 if (result != null && result.getData() != null && result.getData().size() > 0) {
                     List<AddressResult.ContentBean> contentBean = result.getData();
                     for (AddressResult.ContentBean contentBean1 : contentBean) {
-                        String address = contentBean1.getAddress().replaceAll("上海市", "").replaceAll("青浦", "").replaceAll("区", "");
-                        String address2 = addr.replaceAll("上海市", "").replaceAll("青浦", "").replaceAll("区", "");
-                        if (address.contains(address2)) {
+                        String address = addressReplaceAll(contentBean1.getAddress());
+                        String address2 = addressReplaceAll(addr);
+                        if (address.contains(address2) || address2.contains(address)) {
                             return contentBean1;
                         }
                     }
@@ -309,13 +310,14 @@ public class AddressQueryEngine {
                     return null;
                 }
             }
+//          3. 搜索到结果,数字进行分词,数字匹配则使用;
             case LEVLE_NUMBER_3 -> {
                 if (result != null && result.getData() != null && result.getData().size() > 0) {
                     List<AddressResult.ContentBean> contentBean = result.getData();
                     for (AddressResult.ContentBean contentBean1 : contentBean) {
                         String address = extractNumbers(contentBean1.getAddress(), false);
                         String address2 = extractNumbers(addr, false);
-                        if (address.contains(address2)) {
+                        if (address.contains(address2) || address2.contains(address)) {
                             return contentBean1;
                         }
                     }
@@ -323,15 +325,16 @@ public class AddressQueryEngine {
                     return null;
                 }
             }
+//          4. 搜索到结果,数字与文本均匹配,则使用;
             case LEVEL_NUMBER_TEXT_4 -> {
                 if (result != null && result.getData() != null && result.getData().size() > 0) {
                     List<AddressResult.ContentBean> contentBean = result.getData();
                     for (AddressResult.ContentBean contentBean1 : contentBean) {
-                        String address = extractNumbers(contentBean1.getAddress().replaceAll("上海市", "").replaceAll("青浦", "").replaceAll("区", ""), true);
-                        String address2 = extractNumbers(addr.replaceAll("上海市", "").replaceAll("青浦", "").replaceAll("区", ""), true);
+                        String address = extractNumbers(addressReplaceAll(contentBean1.getAddress()), true);
+                        String address2 = extractNumbers(addressReplaceAll(addr), true);
                         String addressNumber = extractNumbers(contentBean1.getAddress(), false);
                         String addressNumber2 = extractNumbers(addr, false);
-                        if (address.contains(address2) && addressNumber.contains(addressNumber2)) {
+                        if ((address.contains(address2) || address2.contains(address)) && (addressNumber.contains(addressNumber2) || addressNumber2.contains(addressNumber))) {
                             return contentBean1;
                         }
                     }
@@ -339,12 +342,13 @@ public class AddressQueryEngine {
                     return null;
                 }
             }
+//          结果与基准数据完全一致,则使用。
             case LEVEL_TOTAL_CONTAINS_5 -> {
                 if (result != null && result.getData() != null && result.getData().size() > 0) {
                     List<AddressResult.ContentBean> contentBean = result.getData();
                     for (AddressResult.ContentBean contentBean1 : contentBean) {
-                        String address = contentBean1.getAddress().replaceAll("上海市", "").replaceAll("青浦", "").replaceAll("区", "");
-                        String address2 = addr.replaceAll("上海市", "").replaceAll("青浦", "").replaceAll("区", "");
+                        String address = addressReplaceAll(contentBean1.getAddress());
+                        String address2 = addressReplaceAll(addr);
                         if (address.equals(address2)) {
                             return contentBean1;
                         }
@@ -360,6 +364,10 @@ public class AddressQueryEngine {
         return null;
     }
 
+    public static String addressReplaceAll(String address) {
+        return address.replaceAll("上海市", "").replaceAll("青浦", "").replaceAll("区", "").replaceAll("-", "").replaceAll("_", "").replaceAll("号", "").replaceAll("/", "").replaceAll(" ", "").replaceAll(",", "").replaceAll("\\.", "").replaceAll(",", "").replaceAll("。", "").replaceAll("\\+", "").replaceAll("\\*", "").replaceAll("弄", "").replaceAll("栋", "").replaceAll("幢", "");
+    }
+
     /**
      * 得到字符串中所有的数字
      *

+ 24 - 11
src/main/java/com/skyversation/poiaddr/controller/PoiAddressController.java

@@ -11,11 +11,13 @@ import com.skyversation.poiaddr.util.RequestUtils;
 import com.skyversation.poiaddr.util.SerializationUtils;
 import com.skyversation.poiaddr.util.fileTools.ReadFileData;
 import com.skyversation.poiaddr.util.geotools.GeoJsonIntersector;
+import com.skyversation.poiaddr.util.geotools.GeoJsonPointInRegion;
 import com.skyversation.poiaddr.util.status.AddressLevel;
 import lombok.extern.slf4j.Slf4j;
 import org.locationtech.jts.geom.Coordinate;
 import org.locationtech.jts.geom.GeometryFactory;
 import org.locationtech.jts.geom.Point;
+import org.locationtech.jts.io.ParseException;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.http.MediaType;
 import org.springframework.validation.annotation.Validated;
@@ -112,7 +114,7 @@ public class PoiAddressController {
                                 @RequestParam(name = "matchingLevel", required = false) Integer matchingLevel,
                                 @RequestParam(name = "regionalJudgment", required = false) MultipartFile regionalJudgment,
                                 @RequestParam(name = "outFileType", required = false) String outFileType,
-                                @RequestParam(name = "outputFileName", required = false) String outputFileName) throws IOException {
+                                @RequestParam(name = "outputFileName", required = false) String outputFileName) throws IOException, ParseException {
 //            参数合法性判断
         if ((file != null && !file.isEmpty()) && (addrColNames != null && !addrColNames.isEmpty())) {
             outputFileName += UUID.randomUUID();
@@ -163,6 +165,11 @@ public class PoiAddressController {
 //          封装解析文件的参数
 //          TODO 文件数据解析
             List<FileDataDto> fileDataDtoList = ReadFileData.ReadMultipartFile(file);
+            List<FileDataDto> regionalDataList = new ArrayList<>();
+            if (regionalJudgment != null && !regionalJudgment.isEmpty()) {
+//                  解析geojson文件得到区域
+                regionalDataList = ReadFileData.ReadMultipartFile(regionalJudgment);
+            }
 //          TODO 补充FileDataDto中的搜索条件参数
             for (FileDataDto fileDataDto : fileDataDtoList) {
 //              搜索等级
@@ -172,27 +179,25 @@ public class PoiAddressController {
                 Map<String, Object> properties = fileDataDto.getProperties();
                 try {
                     if (properties != null && properties.get(addr1Key) != null && addr1Key != null && properties.get(addr1Key).toString().length() > 2 && ExcelReaderUtils.isOtherDistrictThanQingpu(properties.get(addr1Key).toString())) {
-                        String address = "上海市青浦区" + properties.getOrDefault(addr1Key, "").toString().replace("青浦区", "").replace(
-                                "青浦", "").replace("上海市", "").replace("上海", "").replaceAll("-", "");
+                        String address = "上海市青浦区" + AddressQueryEngine.addressReplaceAll(properties.getOrDefault(addr1Key, "").toString());
                         fileDataDto.setAddr1(address);
                     }
-                    if (addr2Key != null && !addr2Key.isEmpty() && ExcelReaderUtils.isOtherDistrictThanQingpu(addr2Key)) {
-                        String address = "上海市青浦区" + properties.getOrDefault(addr2Key.trim(), "").toString().replace("青浦区", "").replace(
-                                "青浦", "").replace("上海市", "").replace("上海", "").replace("/弄", "").replace("/号", "").replace("弄", "").replace("/幢", "").replace("/室", " ").replace("(号楼) ", " ");
+                    if (properties != null && properties.get(addr1Key) != null && addr2Key != null && properties.get(addr2Key).toString().length() > 2 && ExcelReaderUtils.isOtherDistrictThanQingpu(properties.get(addr2Key).toString())) {
+                        String address = "上海市青浦区" + AddressQueryEngine.addressReplaceAll(properties.getOrDefault(addr2Key, "").toString());
                         fileDataDto.setAddr2(address);
                     }
                 } catch (Exception e) {
                     System.err.println(e);
                 }
 //              判断是否有参考经纬度字段
-                if (latKey != null && !latKey.isEmpty() && lonKey != null && !lonKey.isEmpty() && properties.get(latKey) != null && properties.get(lonKey) != null) {
+                if (properties != null && latKey != null && !latKey.isEmpty() && lonKey != null && !lonKey.isEmpty() && properties.get(latKey) != null && properties.get(lonKey) != null) {
                     String latStr = properties.get(latKey).toString();
                     String lonStr = properties.get(lonKey).toString();
                     fileDataDto.setLat(Double.valueOf(latStr));
                     fileDataDto.setLon(Double.valueOf(lonStr));
-                } else if (!latLonKey.isEmpty() && !SplitStr.isEmpty()) {
+                } else if (!latLonKey.isEmpty()) {
                     String[] latLonKeys = latLonKey.split(SplitStr);
-                    if (properties.get(latLonKeys[0]) != null && properties.get(latLonKeys[1]) != null) {
+                    if (properties != null && properties.get(latLonKeys[0]) != null && properties.get(latLonKeys[1]) != null) {
                         String latStr = properties.get(latLonKeys[0]).toString();
                         String lonStr = properties.get(latLonKeys[1]).toString();
                         fileDataDto.setLat(Double.valueOf(latStr));
@@ -303,9 +308,17 @@ public class PoiAddressController {
                         }
                     }
                 }
-                //          TODO 区域判断
-                if (regionalJudgment != null && !regionalJudgment.isEmpty()) {
+                //          TODO 区域判断(得到经纬度后,要判断得到的点是否在传入的geojson面内)
+                if (regionalDataList != null && regionalDataList.size() > 0) {
 //                  解析geojson文件得到区域
+                    boolean isArea= false;
+                    for(FileDataDto regionlItem: regionalDataList){
+                        if(GeoJsonPointInRegion.isPointInGeoJsonRegion(regionlItem.getGeometry().toString(),fileDataDto.getLat(),fileDataDto.getLon())){
+                            isArea = true;
+                            break;
+                        };
+                    }
+                    fileDataDto.getProperties().put("是否在区域内",isArea ? "是" : "否");
                 }
                 dataList.add(fileDataDto.getProperties());
             }