|
@@ -9,7 +9,10 @@ import org.springframework.stereotype.Service;
|
|
|
|
|
|
import javax.annotation.PostConstruct;
|
|
|
import java.io.InputStream;
|
|
|
+import java.time.LocalDateTime;
|
|
|
+import java.time.format.DateTimeFormatter;
|
|
|
import java.util.*;
|
|
|
+import java.util.concurrent.ThreadLocalRandom;
|
|
|
import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
import java.util.stream.Collectors;
|
|
@@ -50,9 +53,42 @@ public class ShanghaiAddressSplitUtil {
|
|
|
|
|
|
private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、/\\\\][0-9]+(?=[号弄])");
|
|
|
|
|
|
+ private static Map<String, String> errAddrReStr = new HashMap<>();
|
|
|
+
|
|
|
@PostConstruct
|
|
|
private void init() {
|
|
|
System.out.println("开始初始化分词器");
|
|
|
+// 行政区划
|
|
|
+ errAddrReStr.put("川沙县", "浦东新区");
|
|
|
+ errAddrReStr.put("南市区", "黄浦区");
|
|
|
+ errAddrReStr.put("崇明县", "崇明区");
|
|
|
+ errAddrReStr.put("卢湾区", "黄浦区");
|
|
|
+ errAddrReStr.put("闸北区", "静安区");
|
|
|
+ errAddrReStr.put("南汇区", "浦东新区");
|
|
|
+ errAddrReStr.put("吴淞区", "宝山区");
|
|
|
+// 街镇
|
|
|
+ errAddrReStr.put("花木镇", "花木街道");
|
|
|
+ errAddrReStr.put("杨思乡", "杨思镇");
|
|
|
+ errAddrReStr.put("杨思镇", "三林镇");
|
|
|
+ errAddrReStr.put("凌桥镇", "高桥镇");
|
|
|
+ errAddrReStr.put("杨园镇", "高东镇");
|
|
|
+ errAddrReStr.put("顾路镇", "曹路镇");
|
|
|
+ errAddrReStr.put("龚路镇", "曹路镇");
|
|
|
+ errAddrReStr.put("张桥镇", "金桥镇");
|
|
|
+ errAddrReStr.put("蔡路镇", "合庆镇");
|
|
|
+ errAddrReStr.put("王港镇", "唐镇");
|
|
|
+ errAddrReStr.put("黄楼镇", "川沙镇");
|
|
|
+ errAddrReStr.put("六团镇", "川沙镇");
|
|
|
+ errAddrReStr.put("望新镇", "外冈镇");
|
|
|
+ errAddrReStr.put("封浜镇", "江桥镇");
|
|
|
+ errAddrReStr.put("鲁汇镇", "浦江镇");
|
|
|
+ errAddrReStr.put("杜行镇", "浦江镇");
|
|
|
+ errAddrReStr.put("陈行镇", "浦江镇");
|
|
|
+ errAddrReStr.put("张泽镇", "叶榭镇");
|
|
|
+ errAddrReStr.put("五厍镇", "泖港镇");
|
|
|
+ errAddrReStr.put("李塔汇镇", "石湖荡镇");
|
|
|
+ errAddrReStr.put("大港镇", "小昆山镇");
|
|
|
+ errAddrReStr.put("天马山镇", "佘山镇");
|
|
|
Map<String, threeLevelAddress> districtMap = new HashMap<>();
|
|
|
Map<String, List<threeLevelAddress>> streetMap = new HashMap<>();
|
|
|
Map<String, List<threeLevelAddress>> communityMap = new HashMap<>();
|
|
@@ -460,6 +496,12 @@ public class ShanghaiAddressSplitUtil {
|
|
|
* @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
|
|
|
*/
|
|
|
public static List<SplitAddress> splitAddresses(String sourceAddress) {
|
|
|
+// 添加逻辑(常见别名替换)
|
|
|
+ for (String errAddr : errAddrReStr.keySet()) {
|
|
|
+ if (sourceAddress.contains(errAddr)) {
|
|
|
+ sourceAddress = sourceAddress.replaceAll(errAddr, errAddrReStr.get(errAddr));
|
|
|
+ }
|
|
|
+ }
|
|
|
List<SplitAddress> addressList = new ArrayList<>();
|
|
|
String beautyString = sourceAddress.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("(", "").replaceAll(")", "")
|
|
|
.replaceAll("\\[", "").replaceAll("]", "").replaceAll("\\{", "").replaceAll("}", "");
|
|
@@ -491,16 +533,88 @@ public class ShanghaiAddressSplitUtil {
|
|
|
return splitAddresses(sourceAddress).stream().max(SplitAddress::compareTo).orElse(new SplitAddress());
|
|
|
}
|
|
|
|
|
|
+ // 默认时间格式
|
|
|
+ private static final String DEFAULT_PATTERN = "yyyy-MM-dd HH:mm:ss";
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 为输入的时间字符串增加随机2-3秒
|
|
|
+ *
|
|
|
+ * @param timeStr 时间字符串,格式需为"yyyy - MM - dd HH:mm:ss"
|
|
|
+ * @return 增加随机时间后的新时间字符串
|
|
|
+ */
|
|
|
+ public static String addRandomSeconds(String timeStr) {
|
|
|
+ return addRandomSeconds(timeStr, DEFAULT_PATTERN);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 按照指定格式为输入的时间字符串增加随机2-3秒
|
|
|
+ *
|
|
|
+ * @param timeStr 时间字符串
|
|
|
+ * @param pattern 时间格式
|
|
|
+ * @return 增加随机时间后的新时间字符串
|
|
|
+ */
|
|
|
+ public static String addRandomSeconds(String timeStr, String pattern) {
|
|
|
+ try {
|
|
|
+ // 解析输入的时间字符串
|
|
|
+ DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
|
|
|
+ LocalDateTime dateTime = LocalDateTime.parse(timeStr, formatter);
|
|
|
+
|
|
|
+ // 生成20到30分钟之间的随机数
|
|
|
+ int randomSeconds = ThreadLocalRandom.current().nextInt(40, 60);
|
|
|
+
|
|
|
+ // 增加随机秒数
|
|
|
+ LocalDateTime newDateTime = dateTime.plusSeconds(randomSeconds);
|
|
|
+
|
|
|
+ // 格式化并返回新的时间字符串
|
|
|
+ return newDateTime.format(formatter);
|
|
|
+ } catch (Exception e) {
|
|
|
+ // 处理异常
|
|
|
+ System.err.println("时间处理出错: " + e.getMessage());
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 示例用法
|
|
|
+ /*public static void main(String[] args) {
|
|
|
+ String timeStr = "2025-07-30 12:00:00";
|
|
|
+ String newTimeStr = addRandomSeconds(timeStr);
|
|
|
+ System.out.println("原时间: " + timeStr);
|
|
|
+ System.out.println("新时间: " + newTimeStr);
|
|
|
+ }
|
|
|
+ */
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
new ShanghaiAddressSplitUtil().init();
|
|
|
- String testAddress = "浦东龙华大道2223号";
|
|
|
- SplitAddress xzqh = splitBestAddress(testAddress);
|
|
|
- System.out.println(xzqh);
|
|
|
- System.out.println("测试地址:" + testAddress);
|
|
|
AddrSplitLmrMap addrSplitLmrMap = new AddrSplitLmrMap();
|
|
|
addrSplitLmrMap.initFile();
|
|
|
+ String testAddress = "上海市长宁区仙霞新村街道仙霞街道外来人员管理办公室";
|
|
|
+ System.out.println("测试地址:" + testAddress);
|
|
|
+ SplitAddress xzqh = splitBestAddress(testAddress);
|
|
|
+ System.out.println(xzqh);
|
|
|
AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(xzqh.getAddr());
|
|
|
System.out.println("AddrBean:" + lmrAddrBean);
|
|
|
System.out.println(AddrSplitLmrMap.parseAddress(lmrAddrBean.getAddress()));
|
|
|
+/*// 数据总条数
|
|
|
+ int dataSize = 2158170;
|
|
|
+ int numberSize = 100000;
|
|
|
+// 开始时间
|
|
|
+ String startTime = "2025-06-02 13:40:47";
|
|
|
+ System.out.println(startTime + "开始推送表:yysz_address_v3");
|
|
|
+ String startTime_ = startTime;
|
|
|
+// int dataSize = 4449759;
|
|
|
+// int numberSize = 200000;
|
|
|
+// String startTime = "2025-06-07 16:27:28";
|
|
|
+// System.out.println(startTime + "开始推送表:t_yysz_address_zhili");
|
|
|
+
|
|
|
+// 间隔时间【2到3秒能推送500条】
|
|
|
+ for (int i = 0; i < (dataSize / numberSize) + 1; i++) {
|
|
|
+ startTime = addRandomSeconds(startTime);
|
|
|
+ if (i == dataSize / numberSize) {
|
|
|
+ System.out.println("成功推动" + (dataSize % numberSize) + "条记录,当前时间:" + startTime);
|
|
|
+ } else {
|
|
|
+ System.out.println("成功推动" + numberSize + "条记录,当前时间:" + startTime);
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ System.out.println("总共推送了" + dataSize + "条数据,开始时间为:" + startTime_ + ",结束时间为:" + startTime);*/
|
|
|
}
|
|
|
}
|