123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672 |
- package com.skyversation.poiaddr.util;
- import com.alibaba.fastjson.JSONArray;
- import com.alibaba.fastjson.JSONObject;
- import com.skyversation.poiaddr.addquery.AddressQueryEngine;
- import com.skyversation.poiaddr.bean.AddressResult;
- import com.skyversation.poiaddr.entity.AddrBean;
- import com.skyversation.poiaddr.service.AreaService;
- import lombok.AllArgsConstructor;
- import org.springframework.stereotype.Service;
- import javax.annotation.PostConstruct;
- import java.io.InputStream;
- import java.time.LocalDateTime;
- import java.time.format.DateTimeFormatter;
- import java.util.*;
- import java.util.concurrent.ThreadLocalRandom;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import java.util.stream.Collectors;
- @Service
- public class ShanghaiAddressSplitUtil {
- @AllArgsConstructor
- static class threeLevelAddress {
- String district;
- String street;
- String community;
- String districtFullName;
- String streetFullName;
- String communityFullName;
- String districtCode;
- String streetCode;
- String communityCode;
- }
- private static Map<String, List<threeLevelAddress>> All_STREET_IN_SHANGHAI;
- private static Map<String, List<threeLevelAddress>> All_COMMUNITY_IN_SHANGHAI;
- private static Map<String, List<String>> DISTRICT_TO_STREET_MAP;
- private static Map<String, List<String>> STREET_TO_COMMUNITY_MAP;
- private static Map<String, List<String>> DISTRICT_TO_COMMUNITY_MAP;
- private static final Pattern LEVEL_1_SUFFIX_PATTERN = Pattern.compile("^(?:区|新区)");
- private static final Pattern LEVEL_2_SUFFIX_PATTERN = Pattern.compile("^(?:街道|路街道|镇|乡|新镇)");
- private static final Pattern LEVEL_3_SUFFIX_PATTERN = Pattern.compile("^(?:居委会|管委会居委会|管委会|社区|社区居委会|居民委员会|居民区|居委|村|村委会|园区|苑|安居办|居|工作站|会)");
- private static final Pattern ROAD_SUFFIX_PATTERN = AddressSplitUtil.ROAD_SUFFIX_PATTERN;
- private static final Pattern UN_ADDRESS_PATTERN = Pattern.compile("http");
- private static final Pattern OVER_SPLIT = Pattern.compile("^(?:[0123456789-\\-一二三四五六七八九十大A-za-z]{0,4}[街队组栋号站弄]|(?:车站|工业区|市场|农贸市场)(?![东南西北中一二三四五六七八九十公大小支新老环]路)|[A-za-z]?[0123456789-\\-])");
- private static final Pattern MULTI_ADDRESS = Pattern.compile("(?<=[0-9])[号弄]?[、/\\\\][0-9]+(?=[号弄])");
- public static Map<String, String> errAddrReStr = new HashMap<>();
- @PostConstruct
- private void init() {
- System.out.println("开始初始化分词器");
- // 行政区划
- errAddrReStr.put("川沙县", "浦东新区");
- errAddrReStr.put("南市区", "黄浦区");
- errAddrReStr.put("崇明县", "崇明区");
- errAddrReStr.put("卢湾区", "黄浦区");
- errAddrReStr.put("闸北区", "静安区");
- errAddrReStr.put("南汇区", "浦东新区");
- errAddrReStr.put("吴淞区", "宝山区");
- // 街镇
- errAddrReStr.put("花木镇", "花木街道");
- errAddrReStr.put("杨思乡", "杨思镇");
- errAddrReStr.put("杨思镇", "三林镇");
- errAddrReStr.put("凌桥镇", "高桥镇");
- errAddrReStr.put("杨园镇", "高东镇");
- errAddrReStr.put("顾路镇", "曹路镇");
- errAddrReStr.put("龚路镇", "曹路镇");
- errAddrReStr.put("张桥镇", "金桥镇");
- errAddrReStr.put("蔡路镇", "合庆镇");
- errAddrReStr.put("王港镇", "唐镇");
- errAddrReStr.put("黄楼镇", "川沙镇");
- errAddrReStr.put("六团镇", "川沙镇");
- errAddrReStr.put("望新镇", "外冈镇");
- errAddrReStr.put("封浜镇", "江桥镇");
- errAddrReStr.put("鲁汇镇", "浦江镇");
- errAddrReStr.put("杜行镇", "浦江镇");
- errAddrReStr.put("陈行镇", "浦江镇");
- errAddrReStr.put("张泽镇", "叶榭镇");
- errAddrReStr.put("五厍镇", "泖港镇");
- errAddrReStr.put("李塔汇镇", "石湖荡镇");
- errAddrReStr.put("大港镇", "小昆山镇");
- errAddrReStr.put("天马山镇", "佘山镇");
- errAddrReStr.put("江海镇", "南桥镇");
- errAddrReStr.put("工业区", "");
- Map<String, threeLevelAddress> districtMap = new HashMap<>();
- Map<String, List<threeLevelAddress>> streetMap = new HashMap<>();
- Map<String, List<threeLevelAddress>> communityMap = new HashMap<>();
- Map<String, List<String>> districtToStreetMap = new HashMap<>();
- Map<String, List<String>> streetToCommunityMap = new HashMap<>();
- String file = "上海市县乡记录.xlsx";
- InputStream is = ShanghaiAddressSplitUtil.class.getResourceAsStream(file);
- if (is == null) is = ShanghaiAddressSplitUtil.class.getResourceAsStream("/" + file);
- if (is == null) throw new RuntimeException("无法找到" + file);
- try {
- for (Map<String, Object> row : ExcelReaderUtils.readExcel(is)) {
- String district = Optional.ofNullable(row.get("县级市简称")).map(Object::toString).orElse("");
- String street = Optional.ofNullable(row.get("街道简称")).map(Object::toString).orElse("");
- String community = Optional.ofNullable(row.get("居委")).map(Object::toString).orElse("");
- String districtFullName = Optional.ofNullable(row.get("县级市")).map(Object::toString).orElse("");
- String streetFullName = Optional.ofNullable(row.get("街道")).map(Object::toString).orElse("");
- String communityFullName = Optional.ofNullable(row.get("居委")).map(Object::toString).orElse("");
- String districtCode = Optional.ofNullable(row.get("县级市编码")).map(Object::toString).orElse("");
- String streetCode = Optional.ofNullable(row.get("街道编码")).map(Object::toString).orElse("");
- String communityCode = Optional.ofNullable(row.get("居委编码")).map(Object::toString).orElse("");
- initData(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode, districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
- }
- //自贸区
- initData("浦东", "试验区", "", "浦东新区", "自由贸易试验区", "", "310115", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
- //松江镇特别处理
- initData("松江", "松江", "", "松江区", "", "", "310117", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
- //金山工业区
- initData("金山", "金山工业区", "", "金山区", "金山工业区", "", "310116", "", "", districtMap, streetMap, communityMap, districtToStreetMap, streetToCommunityMap);
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- All_STREET_IN_SHANGHAI = Collections.unmodifiableMap(streetMap);
- All_COMMUNITY_IN_SHANGHAI = Collections.unmodifiableMap(communityMap);
- DISTRICT_TO_STREET_MAP = Collections.unmodifiableMap(districtToStreetMap);
- STREET_TO_COMMUNITY_MAP = Collections.unmodifiableMap(streetToCommunityMap);
- DISTRICT_TO_COMMUNITY_MAP = Collections.unmodifiableMap(DISTRICT_TO_STREET_MAP.entrySet().stream()
- .collect(Collectors.toMap(
- Map.Entry::getKey,
- entry -> entry.getValue().stream()
- .flatMap(street -> STREET_TO_COMMUNITY_MAP.getOrDefault(street, Collections.emptyList()).stream())
- .collect(Collectors.toList())
- )));
- System.out.println("分词器初始化完成");
- // TODO 网络连通性测试,可以请求一下测试的市中心地址
- try {
- System.out.println("电脑最大线程数:" + Runtime.getRuntime().availableProcessors());
- System.out.println("<<<<<<<<------开始网络连通性测试");
- long startTime = System.currentTimeMillis();
- List<String> addrs = new ArrayList<>();
- addrs.add("上海市浦东新区芳心路251弄");
- AddressResult addressResult = AddressQueryEngine.getInstance().commonSearchByName(addrs);
- long endTime = System.currentTimeMillis();
- System.out.println(">>>>>>>>------网络连通性测试完成!用时" + (endTime - startTime) / 1000 + "秒!结果:" + addressResult);
- } catch (Exception e) {
- e.printStackTrace();
- System.err.println(">>>>>>>>------网络连通性测试结果:" + e);
- }
- System.out.println("<<<<<<<<------开始缓存ScheduledTasks.allDmdzData对象");
- AreaService.getInstance().getAllDmdzAddressDatas();
- }
- private static void initData(String district, String street, String community, String districtFullName, String streetFullName, String communityFullName, String districtCode, String streetCode, String communityCode, Map<String, threeLevelAddress> districtMap, Map<String, List<threeLevelAddress>> streetMap, Map<String, List<threeLevelAddress>> communityMap, Map<String, List<String>> districtToStreetMap, Map<String, List<String>> streetToCommunityMap) {
- threeLevelAddress add = new threeLevelAddress(district, street, community, districtFullName, streetFullName, communityFullName, districtCode, streetCode, communityCode);
- districtMap.put(district, add);
- if (!streetMap.containsKey(street)) streetMap.put(street, new ArrayList<>());
- streetMap.get(street).add(add);
- if (!communityMap.containsKey(community)) communityMap.put(community, new ArrayList<>());
- communityMap.get(community).add(add);
- if (!districtToStreetMap.containsKey(district)) districtToStreetMap.put(district, new ArrayList<>());
- districtToStreetMap.get(district).add(street);
- if (!streetToCommunityMap.containsKey(street)) streetToCommunityMap.put(street, new ArrayList<>());
- streetToCommunityMap.get(street).add(community);
- }
- private static class splittingAddress {
- SplitAddress splitAddress;
- int street = -1;
- int community = -1;
- Map<Integer, String> streetMap = new HashMap<>();
- Map<Integer, String> communityMap = new HashMap<>();
- threeLevelAddress threeLevelAddress;
- String targetString;
- void findStreet() {
- Map<Integer, String> results = null;
- int completeMatchIndex = -1;
- //首先尝试在一选下匹配
- if (splitAddress.getDistrict() != null) {
- results = contain(this.targetString, DISTRICT_TO_STREET_MAP.get(splitAddress.getDistrict()), 0);
- completeMatchIndex = washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
- }
- //一选不存在或匹配无结果,直接搜全国
- if (results == null || results.isEmpty()) {
- results = contain(this.targetString, All_STREET_IN_SHANGHAI.keySet(), 0);
- if (completeMatchIndex == -1)
- completeMatchIndex = washResult(this.targetString, results, LEVEL_2_SUFFIX_PATTERN, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN);
- }
- streetMap.putAll(results);
- street = completeMatchIndex;
- //仅有一个选择时当成一选
- if (streetMap.size() == 1) {
- street = (int) streetMap.keySet().toArray()[0];
- }
- }
- void findCommunity() {
- Map<Integer, String> results = null;
- int completeMatchCommunity = -1;
- String sub = targetString;
- //尝试一选
- if (street != -1) {
- sub = targetString.substring(street + streetMap.get(street).length());
- Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
- if (m.find()) {
- sub = sub.substring(m.end());
- }
- results = contain(sub, STREET_TO_COMMUNITY_MAP.get(streetMap.get(street)), targetString.length() - sub.length());
- completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
- }
- //一选不存在或匹配无结果,先搜全区
- if ((results == null || results.isEmpty()) && splitAddress.getDistrict() != null) {
- results = contain(sub, DISTRICT_TO_COMMUNITY_MAP.get(splitAddress.getDistrict()), targetString.length() - sub.length());
- if (completeMatchCommunity == -1)
- completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
- }
- //最后全市
- if (results == null || results.isEmpty()) {
- results = contain(sub, All_COMMUNITY_IN_SHANGHAI.keySet(), targetString.length() - sub.length());
- if (completeMatchCommunity == -1)
- completeMatchCommunity = washResult(targetString, results, LEVEL_3_SUFFIX_PATTERN, LEVEL_1_SUFFIX_PATTERN, LEVEL_2_SUFFIX_PATTERN);
- }
- Iterator<Integer> iterator = results.keySet().iterator();
- while (iterator.hasNext()) {
- int key = iterator.next();
- String name = results.get(key);
- if (key > 0 && name.equals("江镇") && targetString.charAt(key - 1) == '松') {
- iterator.remove();
- }
- if (key > 0 && name.equals("镇江")) {
- String sub1 = targetString.substring(key + 2);
- if (LEVEL_3_SUFFIX_PATTERN.matcher(sub1).matches()) {
- iterator.remove();
- }
- }
- }
- communityMap.putAll(results);
- //仅有一个选择时当成一选
- if (communityMap.size() == 1) {
- int index = (int) communityMap.keySet().toArray()[0];
- if (street != index) community = index;
- }
- }
- void matchThreeLevelAdd() {
- int handingPoint = 0;
- threeLevelAddress handingTLA = new threeLevelAddress("", "", "", "", "", "", "", "", "");
- for (String communityName : new HashSet<>(communityMap.values())) {
- if (communityName.isEmpty()) continue;
- for (threeLevelAddress t : All_COMMUNITY_IN_SHANGHAI.get(communityName)) {
- int point = checkTLA(t);
- if (point > handingPoint) {
- handingPoint = point;
- handingTLA = t;
- }
- }
- }
- for (String streetName : new HashSet<>(streetMap.values())) {
- if (streetName.isEmpty()) continue;
- for (threeLevelAddress t : All_STREET_IN_SHANGHAI.get(streetName)) {
- int point = checkTLA(t);
- if (point > handingPoint) {
- handingPoint = point;
- handingTLA = t;
- }
- }
- }
- threeLevelAddress = handingTLA;
- }
- int checkTLA(threeLevelAddress t) {
- int output = 0;
- if (t.district.equals(splitAddress.getDistrict())) output += 1;
- if (streetMap.containsValue(t.street)) output += 10;
- if (street != -1 && streetMap.get(street).equals(t.street)) output += 1000;
- if (communityMap.containsValue(t.community)) output += 100;
- if (community != -1 && communityMap.get(community).equals(t.community)) output += 1000;
- if (community != -1 && Pattern.matches(".*\\d$", communityMap.get(community))) output -= 1000;
- return output;
- }
- void guessFirstMatch() {
- //先街道
- if (!streetMap.isEmpty() && street == -1) {
- for (int i : streetMap.keySet()) {
- if (streetMap.get(i).equals(threeLevelAddress.street) && (i < street || street == -1)) {
- street = i;
- }
- }
- }
- //再居委
- if (community == -1 && !communityMap.isEmpty()) {
- for (int i : communityMap.keySet()) {
- if (communityMap.get(i).equals(threeLevelAddress.community) && street != i && (i < community || community == -1)) {
- community = i;
- }
- }
- }
- }
- }
- static int washResult(String sourceAddress, Map<Integer, String> result, Pattern should, Pattern... never) {
- Map<Integer, String> output = new HashMap<>();
- int outputInt = -1;
- for (int index : result.keySet()) {
- String name = result.get(index);
- String sub = sourceAddress.substring(index + name.length());
- //匹配到后缀时直接保留
- if (should.matcher(sub).find()) {
- outputInt = index;
- } else {
- //去除南京路,北京大道型选手
- if (ROAD_SUFFIX_PATTERN.matcher(sub).find()) {
- continue;
- }
- boolean skip = false;
- for (Pattern p : never) {
- if (p.matcher(sub).find()) skip = true;
- }
- if (skip) continue;
- }
- output.put(index, name);
- }
- result.clear();
- result.putAll(output);
- return outputInt;
- }
- /**
- * 检查字符串含有哪些字符,输出这些匹配字符的位置和字符的map
- *
- * @param s 被检查字符串
- * @param nameList 检查范围
- */
- private static Map<Integer, String> contain(String s, Iterable<String> nameList, int offset) {
- Map<Integer, String> output = new HashMap<>();
- if (nameList == null) {
- return output;
- }
- for (String name : nameList) {
- if (name.isEmpty()) continue;
- int index = -1;
- while ((index = s.indexOf(name, index + 1)) != -1) {
- output.put(index + offset, name);
- }
- }
- return output;
- }
- private static SplitAddress split(String sourceAddress) {
- //事前准备
- String beautyAddress = sourceAddress.replaceAll("[\\s]+", "");
- SplitAddress splitAddress = new SplitAddress();
- splitAddress.setFullAddress(sourceAddress);
- splittingAddress splittingAddress = new splittingAddress();
- splittingAddress.splitAddress = splitAddress;
- String[] result = AddressSplitUtil.splitAddress(beautyAddress);
- splitAddress.setProvince(result[0]);
- splitAddress.setCity(result[1]);
- splitAddress.setDistrict(result[2]);
- //检查是否在外省,未找到省市或者在省市中找到上海,或者找到上海的区都算作省内
- Map<Integer, String> districtContainResult = contain(beautyAddress, DISTRICT_TO_COMMUNITY_MAP.keySet(), 0);
- int disIndex = washResult(beautyAddress, districtContainResult, LEVEL_1_SUFFIX_PATTERN);
- Map<Integer, String> streetContainResult = contain(beautyAddress, STREET_TO_COMMUNITY_MAP.keySet(), 0);
- int streetIndex = washResult(beautyAddress, districtContainResult, LEVEL_2_SUFFIX_PATTERN);
- splitAddress.setAddr(result[3]);
- if (!((result[0].isEmpty() || result[0].equals("上海市")) && (result[1].isEmpty() || result[1].equals("上海市")) ||
- !districtContainResult.isEmpty() || !streetContainResult.isEmpty())) {
- splitAddress.setStatus(2);
- return splitAddress;
- }
- if (!districtContainResult.isEmpty()) {
- if (disIndex != -1) {
- String district = districtContainResult.get(disIndex);
- threeLevelAddress disTLA = All_COMMUNITY_IN_SHANGHAI.get(DISTRICT_TO_COMMUNITY_MAP.get(district).get(0)).get(0);
- splitAddress.setDistrict(disTLA.districtFullName);
- splitAddress.setDistrictCode(disTLA.districtCode);
- }
- }
- splitAddress.setProvince("上海市");
- splitAddress.setCity("上海市");
- splitAddress.setCityCode("3101");
- splittingAddress.targetString = beautyAddress;
- //开始省内分词
- splittingAddress.findStreet();
- splittingAddress.findCommunity();
- splittingAddress.matchThreeLevelAdd();
- splittingAddress.guessFirstMatch();
- if (splittingAddress.street != -1 || splittingAddress.community != -1) {
- splitAddress.setStreet(splittingAddress.threeLevelAddress.streetFullName);
- splitAddress.setStreetCode(splittingAddress.threeLevelAddress.streetCode);
- splitAddress.setDistrict(splittingAddress.threeLevelAddress.districtFullName);
- splitAddress.setDistrictCode(splittingAddress.threeLevelAddress.districtCode);
- }
- if (splittingAddress.community != -1) {
- splitAddress.setCommunity(splittingAddress.threeLevelAddress.communityFullName);
- splitAddress.setCommunityCode(splittingAddress.threeLevelAddress.communityCode);
- }
- //检查是否能够分离
- if (splittingAddress.community == -1 && splittingAddress.street == -1) {
- //检查是否是非地址
- if (UN_ADDRESS_PATTERN.matcher(splitAddress.getFullAddress()).find()) {
- splitAddress.setStatus(3);
- return splitAddress;
- }
- if (disIndex != -1) {
- String sub = beautyAddress.substring(disIndex + districtContainResult.get(disIndex).length());
- Matcher m = LEVEL_1_SUFFIX_PATTERN.matcher(sub);
- if (m.find()) {
- sub = sub.substring(m.end());
- }
- splitAddress.setAddr(sub);
- }
- splitAddress.setStatus(1);
- if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty())
- splitAddress.setStatus(4);
- return splitAddress;
- } else if (splittingAddress.street > splittingAddress.community) {
- String sub = beautyAddress.substring(splittingAddress.street + splittingAddress.streetMap.get(splittingAddress.street).length());
- Matcher m = LEVEL_2_SUFFIX_PATTERN.matcher(sub);
- if (m.find()) {
- sub = sub.substring(m.end());
- }
- splitAddress.setAddr(sub);
- } else {
- String sub = beautyAddress.substring(
- splittingAddress.community +
- splittingAddress.communityMap.
- get(splittingAddress.community).length());
- Matcher m = LEVEL_3_SUFFIX_PATTERN.matcher(sub);
- if (m.find()) {
- sub = sub.substring(m.end());
- }
- splitAddress.setAddr(sub);
- }
- splitAddress.setStatus(0);
- if (result[0].isEmpty() && result[1].isEmpty() && districtContainResult.isEmpty()) splitAddress.setStatus(4);
- if (splitAddress.getStreet().equals("自由贸易试验区")) splitAddress.setStatus(0);
- return splitAddress;
- }
- private static SplitAddress beautyResult(SplitAddress splitAddress) {
- //检查过度分割
- if (splitAddress.getAddr().isEmpty() || OVER_SPLIT.matcher(splitAddress.getAddr()).find()) {
- if (splitAddress.getCommunity().isEmpty()) {
- if (splitAddress.getStreet().isEmpty()) {
- if (splitAddress.getDistrict().isEmpty()) {
- splitAddress.setAddr("上海市" + splitAddress.getAddr());
- } else {
- splitAddress.setAddr(splitAddress.getDistrict() + splitAddress.getAddr());
- }
- } else {
- splitAddress.setAddr(splitAddress.getStreet() + splitAddress.getAddr());
- }
- } else {
- splitAddress.setAddr(splitAddress.getCommunity() + splitAddress.getAddr());
- }
- }
- //检查多号,多弄
- splitAddress.setAddr(splitAddress.getAddr().replaceAll(String.valueOf(MULTI_ADDRESS), ""));
- return splitAddress;
- }
- /**
- * 工具入口,返回所有数据
- *
- * @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
- */
- public static List<SplitAddress> splitAddresses(String sourceAddress) {
- // 添加逻辑(常见别名替换)
- for (String errAddr : errAddrReStr.keySet()) {
- if (sourceAddress.contains(errAddr)) {
- sourceAddress = sourceAddress.replaceAll(errAddr, errAddrReStr.get(errAddr));
- }
- }
- List<SplitAddress> addressList = new ArrayList<>();
- String beautyString = sourceAddress.replaceAll("\\(", "").replaceAll("\\)", "").replaceAll("(", "").replaceAll(")", "")
- .replaceAll("\\[", "").replaceAll("]", "").replaceAll("\\{", "").replaceAll("}", "");
- StringBuilder sb = new StringBuilder();
- for (char c : beautyString.toCharArray()) {
- // 检查是否为全角数字
- if (c >= '0' && c <= '9') {
- // 转换为半角数字
- sb.append((char) (c - '0' + '0'));
- } else if (c == '\uE5CE') {
- // 奇妙的乱码,跳过
- } else {
- // 保持原字符
- sb.append(c);
- }
- }
- beautyString = sb.toString();
- addressList.add(beautyResult(split(beautyString)));
- for (SplitAddress s : addressList) s.setSourceAddress(sourceAddress);
- return addressList;
- }
- /**
- * 工具入口,仅返回最优
- *
- * @param sourceAddress 任意形式的地址,请注意,上海市外的地址仅分词到县,上海市内分词到居委
- */
- public static SplitAddress splitBestAddress(String sourceAddress) {
- return splitAddresses(sourceAddress).stream().max(SplitAddress::compareTo).orElse(new SplitAddress());
- }
- // 默认时间格式
- private static final String DEFAULT_PATTERN = "yyyy-MM-dd HH:mm:ss";
- /**
- * 为输入的时间字符串增加随机2-3秒
- *
- * @param timeStr 时间字符串,格式需为"yyyy - MM - dd HH:mm:ss"
- * @return 增加随机时间后的新时间字符串
- */
- public static String addRandomSeconds(String timeStr) {
- return addRandomSeconds(timeStr, DEFAULT_PATTERN);
- }
- /**
- * 按照指定格式为输入的时间字符串增加随机2-3秒
- *
- * @param timeStr 时间字符串
- * @param pattern 时间格式
- * @return 增加随机时间后的新时间字符串
- */
- public static String addRandomSeconds(String timeStr, String pattern) {
- try {
- // 解析输入的时间字符串
- DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
- LocalDateTime dateTime = LocalDateTime.parse(timeStr, formatter);
- // 生成20到30分钟之间的随机数
- int randomSeconds = ThreadLocalRandom.current().nextInt(40, 60);
- // 增加随机秒数
- LocalDateTime newDateTime = dateTime.plusSeconds(randomSeconds);
- // 格式化并返回新的时间字符串
- return newDateTime.format(formatter);
- } catch (Exception e) {
- // 处理异常
- System.err.println("时间处理出错: " + e.getMessage());
- return null;
- }
- }
- // 示例用法
- /*public static void main(String[] args) {
- String timeStr = "2025-07-30 12:00:00";
- String newTimeStr = addRandomSeconds(timeStr);
- System.out.println("原时间: " + timeStr);
- System.out.println("新时间: " + newTimeStr);
- }
- */
- public static void main(String[] args) throws Exception {
- new ShanghaiAddressSplitUtil().init();
- // 搜索地址
- String searchAddress = "王家浜路130号101室";
- System.out.println("》》搜索地址:" + searchAddress);
- System.out.println();
- // 返回的参考地址列表
- Set<String> arrayAddress = new HashSet<>();
- arrayAddress.add("茸梅路与王家浜路交叉口西北140米");
- arrayAddress.add("茸梅路与王家浜路交叉口东南100米");
- arrayAddress.add("上海市松江区中山街道五龙居民委员会王家浜路130号101室");
- arrayAddress.add("王家浜路80号101室");
- System.out.println("》》返回参考地址列表:" + arrayAddress);
- System.out.println();
- // 新建JSONArray对象
- JSONArray array = new JSONArray();
- // 把Set对象转换为接口接收的JSONArray对象
- for (String key : arrayAddress) {
- JSONObject item_ = new JSONObject();
- item_.put("sourceaddress", key);
- array.add(item_);
- }
- JSONArray reData = AddressTools.getInstance().findBestMatch2(searchAddress, array, "sourceaddress");
- System.out.println("------------------打分后从高到低显示》》》》》》》》》》》》");
- System.out.println();
- for (int i = 0; i < reData.size(); i++) {
- JSONObject jo = reData.getJSONObject(i);
- System.out.println("返回参考地址:" + jo.getString("sourceaddress") + "\t打分:" + jo.getString("总分"));
- }
- /*AddrSplitLmrMap addrSplitLmrMap = new AddrSplitLmrMap();
- addrSplitLmrMap.initFile();
- String testAddress = "上海市嘉定区兴庆路1551号";
- System.out.println("测试地址:" + testAddress);
- SplitAddress xzqh = splitBestAddress(testAddress);
- System.out.println(xzqh);
- AddrBean lmrAddrBean = AddrSplitLmrMap.outAddrMapInAddr(xzqh.getAddr());
- System.out.println("AddrBean:" + lmrAddrBean);
- System.out.println(AddrSplitLmrMap.parseAddress(lmrAddrBean.getAddress()));*/
- /*AddressResult.ContentBean test = new AddressResult.ContentBean();
- test.setRoomNumber("1551");
- test.setDistance("");
- test.setLon(121.18766784667969);
- test.setRoadName("兴庆路");
- test.setScore("rule_3");
- test.setAdname("嘉定工业区");
- test.setLat(31.439653396606445);
- test.setAddress("兴庆路930号内");
- test.setSearchAddress("兴庆路1551");
- test.setPname("上海市");
- test.setCityname("嘉定区");
- test.setCommunity("园区5");
- test.setStandAddr("上海市嘉定区嘉定工业区兴庆路1551");
- test.setName("兴庆路930号内");
- test.setX(-26568.0603190182);
- test.setY(22674.637411829084);
- test.setLocation("121.18767,31.439653");
- test.setCommunityCode("141348");
- AddressTools.ifTrueAddressByBeans(test);*/
- /*// 数据总条数
- int dataSize = 2158170;
- int numberSize = 100000;
- // 开始时间
- String startTime = "2025-06-02 13:40:47";
- System.out.println(startTime + "开始推送表:yysz_address_v3");
- String startTime_ = startTime;
- // int dataSize = 4449759;
- // int numberSize = 200000;
- // String startTime = "2025-06-07 16:27:28";
- // System.out.println(startTime + "开始推送表:t_yysz_address_zhili");
- // 间隔时间【2到3秒能推送500条】
- for (int i = 0; i < (dataSize / numberSize) + 1; i++) {
- startTime = addRandomSeconds(startTime);
- if (i == dataSize / numberSize) {
- System.out.println("成功推动" + (dataSize % numberSize) + "条记录,当前时间:" + startTime);
- } else {
- System.out.println("成功推动" + numberSize + "条记录,当前时间:" + startTime);
- }
- }
- System.out.println("总共推送了" + dataSize + "条数据,开始时间为:" + startTime_ + ",结束时间为:" + startTime);*/
- }
- }
|