Browse Source

弄,号分词

DESKTOP-6LTVLN7\Liumouren 2 ngày trước cách đây
mục cha
commit
ef546febe0

+ 3 - 1
src/main/java/com/skyversation/poiaddr/controller/CorporateLibraryController.java

@@ -35,6 +35,7 @@ public class CorporateLibraryController {
 
     @Resource
     private YyskAddressStandardizationServiceImpl yyskAddressStandardizationService;
+
     //    地址查询
     @RequestMapping(value = "/searchByName")
     public String searchByName(HttpServletRequest request) {
@@ -128,7 +129,8 @@ public class CorporateLibraryController {
 //            TODO 不包含的要走V3接口逻辑,匹配到的数据分别放到结果总表和回匹表(走缓存)也就是全部走Only逻辑,然后把匹配到的数据放到结果表,没匹配到的数据放到回流表中
             String page = request.getParameter("page");
             String pageSize = request.getParameter("pageSize");
-            AreaService.getInstance().selectLimitData(Integer.parseInt(page), Integer.parseInt(pageSize));
+            String batchNumber = request.getParameter("batchNumber");
+            AreaService.getInstance().selectLimitData(Integer.parseInt(page), Integer.parseInt(pageSize), batchNumber);
         } catch (Exception e) {
             e.printStackTrace();
         } finally {

+ 7 - 7
src/main/java/com/skyversation/poiaddr/service/AreaService.java

@@ -194,16 +194,16 @@ public class AreaService {
         for (YyskDmdzAddressStandardization item : dataList) {
             if (item.getSourceaddress() != null && StringUtils.hasText(item.getSourceaddress())) {
                 SplitAddress splitAddress = ShanghaiAddressSplitUtil.splitBestAddress(item.getSourceaddress());
-                if(splitAddress.getAddr().length() > 2){
+                if (splitAddress.getAddr().length() > 2) {
                     ScheduledTasks.putAllDmdzData(splitAddress.getAddr(), item);
-                }else{
+                } else {
                     ScheduledTasks.putAllDmdzData(item.getSourceaddress(), item);
                 }
                 if (item.getAddress() != null && StringUtils.hasText(item.getAddress()) && !item.getSourceaddress().contains(item.getAddress())) {
                     SplitAddress splitAddress2 = ShanghaiAddressSplitUtil.splitBestAddress(item.getAddress());
-                    if(splitAddress2.getAddr().length() > 2){
+                    if (splitAddress2.getAddr().length() > 2) {
                         ScheduledTasks.putAllDmdzData(splitAddress2.getAddr(), item);
-                    }else{
+                    } else {
                         ScheduledTasks.putAllDmdzData(item.getAddress(), item);
                     }
 
@@ -221,7 +221,7 @@ public class AreaService {
         }
     }
 
-    public void selectLimitData(int page, int pageSize) {
+    public void selectLimitData(int page, int pageSize, String batchNumber) {
         Map<String, Object> loginfoMap = new HashMap<>();
         loginfoMap.put("page", page);
         loginfoMap.put("pageSize", pageSize);
@@ -229,7 +229,7 @@ public class AreaService {
             long startTime = System.currentTimeMillis();
             loginfoMap.put("startTime", startTime);
 //          分页查询中间表数据
-            List<PendingGovernanceIntermediateTable> dataList = pgTableRepository.getAllgovernance_statusPage(pageSize);
+            List<PendingGovernanceIntermediateTable> dataList = pgTableRepository.getAllgovernance_statusPage(pageSize, batchNumber);
             List<GovernanceResultsTable> governanceResultsTables = new ArrayList<>();
             System.out.println("page:" + page + ",pageSize:" + pageSize + ",共查询待处理表数据:" + dataList.size() + "条数据,用时:" + (System.currentTimeMillis() - startTime) + "毫秒");
             int i = 0;
@@ -281,7 +281,7 @@ public class AreaService {
 //              TODO 要使用JDBC的方式去查询待处理表的数据,然后有个sql需要执行一下,筛选出结果表中不存在的数据,然后进行下一步处理。
                 if (dataList.size() >= pageSize) {
                     page++;
-                    selectLimitData(page, pageSize);
+                    selectLimitData(page, pageSize, batchNumber);
                 }
             }
         } catch (Exception e) {

+ 2 - 2
src/main/java/com/skyversation/poiaddr/service/impl/PgTableRepository.java

@@ -11,6 +11,6 @@ import java.util.List;
 //治理中间表(有查询、更新、删除)
 @Resource
 public interface PgTableRepository extends JpaRepository<PendingGovernanceIntermediateTable, String> {
-    @Query(value = "SELECT * FROM pending_governance_table where governance_status = 0 limit :pageSize", nativeQuery = true)
-    List<PendingGovernanceIntermediateTable> getAllgovernance_statusPage(@Param("pageSize") int pageSize);
+    @Query(value = "SELECT * FROM pending_governance_table where governance_status = 0 and batch_number = :batchNumber limit :pageSize", nativeQuery = true)
+    List<PendingGovernanceIntermediateTable> getAllgovernance_statusPage(@Param("pageSize") int pageSize, @Param("batchNumber") String batchNumber);
 }

+ 102 - 2
src/main/java/com/skyversation/poiaddr/util/AddrSplitLmrMap.java

@@ -6,6 +6,7 @@ import org.springframework.stereotype.Service;
 import javax.annotation.PostConstruct;
 import java.io.InputStream;
 import java.util.*;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 /**
@@ -262,11 +263,110 @@ public class AddrSplitLmrMap {
         return false;
     }
 
+    /**
+     * 解析中文地址为结构化列表
+     *
+     * @param address 原始地址字符串
+     * @return 分词后的列表,按路、弄、号楼、层、室的顺序
+     */
+    public static List<String> parseAddress(String address) {
+        List<String> result = new ArrayList<>(Arrays.asList(null, null, null, null, null));
+        if (address == null || address.trim().isEmpty()) {
+            return result;
+        }
+        address = address.trim();
+
+        // 1. 提取路(简化版:直接匹配"弄"之前的所有字符)
+        String roadPattern = "(.+?)[弄]";
+        Matcher roadMatcher = Pattern.compile(roadPattern).matcher(address);
+        if (roadMatcher.find() && roadMatcher.start() == 0) {
+            result.set(0, roadMatcher.group(1));
+            address = address.substring(roadMatcher.end() - 1); // 从"弄"之后开始截取
+        } else {
+            // 如果没有找到"弄",则尝试使用原来的路名匹配逻辑
+            roadPattern = "(.+?[路街道路巷弄])";
+            roadMatcher = Pattern.compile(roadPattern).matcher(address);
+            if (roadMatcher.find() && roadMatcher.start() == 0) {
+                result.set(0, roadMatcher.group(1));
+                address = address.substring(roadMatcher.end());
+            }
+        }
+
+        // 2. 提取弄(支持连续的弄和支弄)
+        String lanePattern = "([0-9一二三四五六七八九十百千]+[弄支弄]+)";
+        Matcher laneMatcher = Pattern.compile(lanePattern).matcher(address);
+        StringBuilder laneBuilder = new StringBuilder();
+        while (laneMatcher.find() && laneMatcher.start() == 0) {
+            laneBuilder.append(laneMatcher.group(1));
+            address = address.substring(laneMatcher.end());
+            laneMatcher = Pattern.compile(lanePattern).matcher(address);
+        }
+        if (laneBuilder.length() > 0) {
+            result.set(1, laneBuilder.toString());
+        }
+
+        // 3. 提取号楼(支持XX号格式)
+        String buildingPattern = "([0-9一二三四五六七八九十百千]+[号楼栋号])";
+        Matcher buildingMatcher = Pattern.compile(buildingPattern).matcher(address);
+        if (buildingMatcher.find() && buildingMatcher.start() == 0) {
+            result.set(2, buildingMatcher.group(1));
+            address = address.substring(buildingMatcher.end());
+        }
+
+        // 智能楼层室号解析
+        String roomPattern = "([0-9]{1,2})([0-9]{2,})[室房]"; // 修改正则表达式,确保室号部分至少两位数
+        Matcher roomMatcher = Pattern.compile(roomPattern).matcher(address);
+
+        if (roomMatcher.find()) {
+            String floorPart = roomMatcher.group(1);
+            String roomPart = roomMatcher.group(2);
+            // 设置楼层
+            result.set(3, floorPart + "层");
+            // 设置室号(直接使用匹配到的部分,不去除前导零)
+            result.set(4, floorPart + roomPart + "室"); // 修改此处,直接使用roomPart
+
+            // 移除已匹配的部分
+            address = address.substring(0, roomMatcher.start()) +
+                    address.substring(roomMatcher.end());
+        } else {
+            // 4. 提取层
+            String floorPattern = "([0-9一二三四五六七八九十百千]+[层楼])";
+            Matcher floorMatcher = Pattern.compile(floorPattern).matcher(address);
+            if (floorMatcher.find()) {
+                result.set(3, floorMatcher.group(1));
+                address = address.substring(0, floorMatcher.start()) + address.substring(floorMatcher.end());
+            }
+
+            // 5. 提取室
+            String roomPatternSimple = "([0-9]+[室房])";
+            Matcher roomMatcherSimple = Pattern.compile(roomPatternSimple).matcher(address);
+            if (roomMatcherSimple.find()) {
+                result.set(4, roomMatcherSimple.group(1));
+            }
+        }
+
+        return result;
+    }
+
     public static void main(String[] args) {
-        AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
+        /*AddrSplitLmrMap AddrSplitLmrMap = new AddrSplitLmrMap();
         AddrSplitLmrMap.initFile();
         System.out.println(outAddrMapInAddr("村165号"));
         System.out.println(outAddrMapInAddr("上海市松江区乐都路"));
-        System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));
+        System.out.println(outAddrMapInAddr("云南省昭通市昭阳区永丰镇绿荫社区居民委员会管湾村二十五组205号"));*/
+        // 测试示例(包含所有典型场景)
+        // 测试一位数楼层地址
+        String address4 = "广富林1188弄167号313室";
+        System.out.println("\n测试地址: " + address4);
+        printParsedResult(parseAddress(address4));
+    }
+
+    private static void printParsedResult(List<String> parsed) {
+        System.out.println("解析结果:");
+        System.out.println("路: " + parsed.get(0));
+        System.out.println("弄: " + parsed.get(1));
+        System.out.println("号楼: " + parsed.get(2));
+        System.out.println("层: " + parsed.get(3));
+        System.out.println("室: " + parsed.get(4));
     }
 }

+ 6 - 6
src/main/java/com/skyversation/poiaddr/util/tasks/ScheduledTasks.java

@@ -124,7 +124,7 @@ public class ScheduledTasks {
      * 如果返回了正确的数据,入到地址库表中,并更新state为2(同时添加到地址库中)
      * 否则更新状态为1
      */
-//    @Scheduled(cron = "0 30 2 * * ?")
+    @Scheduled(cron = "0 30 18 * * ?")
     public void dbdataCallBackTask() {
         try {
 //          根据当前日期生成开始id
@@ -141,7 +141,7 @@ public class ScheduledTasks {
                 infoItem.put("结束时间", System.currentTimeMillis());
                 infoItem.put("备注", "数据条数" + tAddressCallbacks.size());
                 List<TAddressCallback> updateCallBackDatas = new ArrayList<>();
-                List<YyskDmdzAddressStandardization> addDmdzDataList = new ArrayList<>();
+//                List<YyskDmdzAddressStandardization> addDmdzDataList = new ArrayList<>();
 //              然后调用市中心接口
                 ScheduledTasks.logInfos.add(infoItem);
                 for (TAddressCallback item : tAddressCallbacks) {
@@ -188,10 +188,10 @@ public class ScheduledTasks {
                                             contentBean.getAdname() + contentBean.getCommunity() + splitAddress.getAddr());
                                     item.setStatusTag(2);
 //                                  添加数据到地址库和缓存
-                                    if (StringUtils.hasText(item.getLat()) && StringUtils.hasText(item.getLon())) {
+                                    /*if (StringUtils.hasText(item.getLat()) && StringUtils.hasText(item.getLon())) {
                                         startOid++;
                                         addDmdzDataList.add(TAddressCallbackToYyskDmdzAddressStandardization(item, Long.valueOf(getCurrentDateTimeByPatern("yyyyddMMhhmm") + startOid)));
-                                    }
+                                    }*/
                                 } else {
                                     item.setStatusTag(1);
                                 }
@@ -220,9 +220,9 @@ public class ScheduledTasks {
                         e.printStackTrace();
                     }
                 }
-                if (addDmdzDataList.size() > 0) {
+                /*if (addDmdzDataList.size() > 0) {
                     yyszAddressRepository.saveAll(addDmdzDataList);
-                }
+                }*/
                 if (updateCallBackDatas.size() > 0) {
                     infoItem.put("操作", "将治理的数据入库更新callBack表");
                     long inBaseTime = System.currentTimeMillis();