123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662 |
- package com.skyversation.poiaddr.addquery;
- import com.alibaba.fastjson.JSONArray;
- import com.alibaba.fastjson.JSONObject;
- import com.skyversation.poiaddr.bean.AddressResult;
- import com.skyversation.poiaddr.bean.GeoJsonBean;
- import com.skyversation.poiaddr.bean.WDToken;
- import com.skyversation.poiaddr.service.AreaService;
- import com.skyversation.poiaddr.util.ExcelReaderUtils;
- import com.skyversation.poiaddr.util.net.AddressNetTools;
- import com.skyversation.poiaddr.util.status.AddressLevel;
- import com.skyversation.poiaddr.util.status.AddressResultEnum;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.http.ResponseEntity;
- import org.springframework.stereotype.Service;
- import org.springframework.util.StringUtils;
- import javax.annotation.PostConstruct;
- import javax.annotation.Resource;
- import java.io.IOException;
- import java.util.*;
- import static com.skyversation.poiaddr.addquery.TransfromDataTool.yyszResultToResult;
- @Service
- public class AddressQueryEngine {
- private static AddressQueryEngine instance = new AddressQueryEngine();
- @Value("${app.net-type}")
- private String netType=null;
- @Value("${app.yysz-address-service}")
- private String yyszAddressService=null;
- @Resource
- private DataCallBack dataCallBack;
- @PostConstruct
- public void init(){
- if (netType == null) {
- netType = "internet";
- System.out.println("spring环境未初始化,使用默认网络环境"+netType);
- } else {
- // netType = netTypeInProp.getNetType();
- System.out.println("通过配置文件获取当前网络环境为:" + netType);
- }
- }
- private AddressQueryEngine() {
- }
- public synchronized static AddressQueryEngine getInstance() {
- if (instance == null) {
- instance = new AddressQueryEngine();
- }
- return instance;
- }
- private WDToken wdToken = null;
- /***
- * 单条地名搜索通用方法,搜索顺序 1. 武大吉奥; 2. 市四中心; 3. 高德搜索; 4. 高德高级搜索;
- * @param addrs
- * @param level 搜索等级:
- * 1. 搜索到结果就返回第一个;
- * 2. 搜索到结果去除行政区划后,存在包含关系,则使用;
- * 3. 搜索到结果,数字进行分词,数字匹配则使用;
- * 4. 搜索到结果,数字与文本均匹配,则使用;
- * 5. 结果与基准数据完全一致,则使用。
- * @return
- */
- public AddressResult commonSearchByName(List<String> addrs, AddressLevel level) {
- if (addrs == null || addrs.size() < 1) {
- return null;
- }
- //先走yysz_address
- AddressResult yyszAddressResult =null;
- yyszAddressResult = yyszMultiSearch(addrs, level );
- if (yyszAddressResult != null) return yyszAddressResult;
- if (!Objects.equals(netType, "internet")) {
- //武大
- if (Objects.equals(netType, "qingpu")) {
- AddressResult wdAddressResult = wdMultiSearch(addrs, level);
- if (wdAddressResult != null) {
- dataCallBack.successDataCallBack(wdAddressResult);
- return wdAddressResult;
- }
- }
- //四中心
- AddressResult szxAddressResult = szxMultiSearch(addrs, level);
- if (szxAddressResult != null) {
- dataCallBack.successDataCallBack(szxAddressResult);
- return szxAddressResult;
- }
- }else {
- //高德
- AddressResult gdAddressResult = gdMultiSearch(addrs, level);
- if (gdAddressResult != null) {
- dataCallBack.successDataCallBack(gdAddressResult);
- return gdAddressResult;
- }
- //高德高级
- AddressResult gdV3AddressResult = gdV3MultiSearch(addrs, level);
- if (gdV3AddressResult != null) {
- dataCallBack.successDataCallBack(gdV3AddressResult);
- return gdV3AddressResult;
- }
- }
- //失败
- AddressResult addressResult =new AddressResult();
- dataCallBack.failDataCallBack(addrs);
- addressResult.setCode(AddressResultEnum.RESULT_NULL);
- addressResult.setMessage("失败");
- return addressResult;
- }
- public AddressResult commonSearchByNameForTest(List<String> addrs, AddressLevel level) {
- if (addrs == null || addrs.size() < 1) {
- return null;
- }
- if (!Objects.equals(netType, "internet")) {
- //武大
- if (Objects.equals(netType, "qingpu")) {
- AddressResult wdAddressResult = wdMultiSearch(addrs, level);
- if (wdAddressResult != null) {
- return wdAddressResult;
- }
- }
- //四中心
- AddressResult szxAddressResult = szxMultiSearch(addrs, level);
- if (szxAddressResult != null) {
- return szxAddressResult;
- }
- }else {
- //高德
- AddressResult gdAddressResult = gdMultiSearch(addrs, level);
- if (gdAddressResult != null) {
- return gdAddressResult;
- }
- //高德高级
- AddressResult gdV3AddressResult = gdV3MultiSearch(addrs, level);
- if (gdV3AddressResult != null) {
- return gdV3AddressResult;
- }
- }
- //失败
- AddressResult addressResult =new AddressResult();
- addressResult.setCode(AddressResultEnum.RESULT_NULL);
- addressResult.setMessage("失败");
- return addressResult;
- }
- //多条搜索
- /**
- * yysz_address多条搜索
- */
- private AddressResult yyszMultiSearch(List<String> addrs, AddressLevel level) {
- try{
- for (String addr : addrs) {
- AddressResult.ContentBean contentBean = verificaData(yyszSearchByName(addr), level, addr);
- if (contentBean != null) {
- contentBean.setSearchAddress(addr);
- List<com.skyversation.poiaddr.bean.AddressResult.ContentBean> contentBeans = new ArrayList<>();
- contentBeans.add(contentBean);
- AddressResult addressResult = new AddressResult();
- addressResult.setData(contentBeans);
- addressResult.setCode(AddressResultEnum.YYSZ_SUCCESS);
- addressResult.setMessage("成功");
- return addressResult;
- }
- }
- }catch (Exception e){
- throw new RuntimeException("yysz_address服务异常");
- }
- return null;
- }
- /**
- * 武大多条搜索
- */
- private AddressResult wdMultiSearch(List<String> addrs, AddressLevel level) {
- try {
- for (String addr : addrs) {
- AddressResult.ContentBean contentBean = verificaData(wdjaSearchByName(addr), level, addr);
- if (contentBean != null) {
- contentBean.setSearchAddress(addr);
- List<com.skyversation.poiaddr.bean.AddressResult.ContentBean> contentBeans = new ArrayList<>();
- contentBeans.add(contentBean);
- AddressResult addressResult = new AddressResult();
- addressResult.setData(contentBeans);
- addressResult.setCode(AddressResultEnum.WDJA_SUCCESS);
- addressResult.setMessage("成功");
- return addressResult;
- }
- }
- } catch (Exception e) {
- System.err.println("武大吉奥:日常异常" + e);
- }
- return null;
- }
- /**
- * 四中心多条搜索
- */
- private AddressResult szxMultiSearch(List<String> addrs, AddressLevel level) {
- try {
- for (String addr : addrs) {
- AddressResult.ContentBean contentBean = verificaData(szxSearchByName(addr), level, addr);
- if (contentBean != null) {
- contentBean.setSearchAddress(addr);
- List<com.skyversation.poiaddr.bean.AddressResult.ContentBean> contentBeans = new ArrayList<>();
- contentBeans.add(contentBean);
- AddressResult addressResult = new AddressResult();
- addressResult.setData(contentBeans);
- addressResult.setCode(AddressResultEnum.SZX_SUCCESS);
- addressResult.setMessage("成功");
- return addressResult;
- }
- }
- } catch (Exception e) {
- System.err.println(" 四中心异常" + e);
- }
- return null;
- }
- /**
- * 高德多条搜索
- */
- private AddressResult gdMultiSearch(List<String> addrs, AddressLevel level) {
- try {
- for (String addr : addrs) {
- AddressResult.ContentBean contentBean = verificaData(gdSearchByName(addr), level, addr);
- if (contentBean != null) {
- contentBean.setSearchAddress(addr);
- List<com.skyversation.poiaddr.bean.AddressResult.ContentBean> contentBeans = new ArrayList<>();
- contentBeans.add(contentBean);
- AddressResult addressResult = new AddressResult();
- addressResult.setData(contentBeans);
- addressResult.setCode(AddressResultEnum.GD_SUCCESS);
- addressResult.setMessage("成功");
- return addressResult;
- }
- }
- } catch (Exception e) {
- System.err.println("高德异常" + e);
- }
- return null;
- }/**
- * 高德高级多条搜索
- */
- private AddressResult gdV3MultiSearch(List<String> addrs, AddressLevel level) {
- try {
- for (String addr : addrs) {
- AddressResult.ContentBean contentBean = verificaData(gdV3SearchByName(addr), level, addr);
- if (contentBean != null) {
- contentBean.setSearchAddress(addr);
- List<com.skyversation.poiaddr.bean.AddressResult.ContentBean> contentBeans = new ArrayList<>();
- contentBeans.add(contentBean);
- AddressResult addressResult = new AddressResult();
- addressResult.setData(contentBeans);
- addressResult.setCode(AddressResultEnum.GDV3_SUCCESS);
- addressResult.setMessage("成功");
- return addressResult;
- }
- }
- } catch (Exception e) {
- System.err.println("高德高级异常" + e);
- }
- return null;
- }
- //单搜索
- /**
- * yysz_address地名搜索
- */
- public AddressResult yyszSearchByName(String address){
- ResponseEntity response = AddressNetTools.getInstance().requestGet(yyszAddressService+"/search/searchByName?address="+address, null, null, 10);
- if (response != null) {
- String body = response.getBody() + "";
- if (!StringUtils.hasText(body))
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
- try {
- AddressResult addressResult = yyszResultToResult(JSONObject.parseObject(body));
- if (addressResult==null)System.out.println("yysz_address地址[" + address + "]未查询到数据!");
- return addressResult;
- } catch (Exception e) {
- System.err.println(e);
- return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
- }
- } else {
- System.out.println("yysz_address地址[" + address + "]未查询到数据!");
- return null;
- }
- }
- /**
- * 武大吉奥单条地名搜索
- * @param address
- */
- public AddressResult wdjaSearchByName(String address) {
- System.out.println("<<<<<<<<<<----------开始武大吉奥地名地址搜索------------>>>>>>>>>>>>>");
- if (wdToken == null || System.currentTimeMillis() - wdToken.getTime() > 36000000) {
- wdToken = AddressTools.getInstance().getWDToken(Constant.WD_USER_NAME, Constant.WD_USER_PWD);
- if (wdToken == null) {
- return null;
- }
- wdToken.setTime(System.currentTimeMillis());
- }
- if (wdToken == null)
- return AddressTools.getInstance().faildQuery(AddressResultEnum.NO_TOKEN, "token获取失败");
- String token = wdToken.getToken();
- String url = Constant.GET_ADDRESS_MEG_URL + "?token=" + token + "&addr=" + address;
- String body = null;
- try {
- body = AddressNetTools.getInstance().wdSendGetRequest(url);
- } catch (Exception e) {
- e.toString();
- }
- if (body == null || body.equals("null") || !StringUtils.hasText(body))
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
- try {
- JSONObject json = JSONObject.parseObject(body);
- return TransfromDataTool.wdResultToResult(json);
- } catch (Exception e) {
- return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
- }
- }
- /**
- * 市四中心地名搜索
- * @param address
- * @return
- */
- public AddressResult szxSearchByName(String address) {
- ResponseEntity response = AddressNetTools.getInstance().requestGet(Constant.SZX_URL + "®ion=" + Constant.getAMAP_CITY_CODE() + "&query=" + address, null, null, 10);
- if (response != null) {
- String body = response.getBody() + "";
- if (!StringUtils.hasText(body))
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
- try {
- return TransfromDataTool.szxResultToResult(JSONObject.parseObject(body));
- } catch (Exception e) {
- System.err.println(e);
- return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
- }
- } else {
- System.out.println("地址[" + address + "]未查询到数据!");
- return null;
- }
- }
- /*public AddressResult v4SearchByName(String address) {
- ResponseEntity response = AddressNetTools.getInstance().requestGet(Constant.V4_URL + address, null, null);
- if (response != null) {
- String body = response.getBody() + "";
- if (!StringUtils.hasText(body))
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
- try {
- JSONObject json = JSONObject.parseObject(body);
- return TransfromDataTool.szxResultToResult(json);
- } catch (Exception e) {
- System.err.println(e);
- return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
- }
- } else {
- System.out.println("地址" + address + "未查询到数据!");
- return null;
- }
- }*/
- /**
- * 高德普通地名搜索
- * @param address
- * @return
- */
- public AddressResult gdSearchByName(String address) {
- String geoUrl = Constant.AMAP_GEO_URL + "?key=" + Constant.AMAP_KEY + "&address=" + address + "&city=" + Constant.getAMAP_CITY_CODE() + "&output=JSON";
- ResponseEntity response = AddressNetTools.getInstance().requestGet(geoUrl, null, null, 0);
- if (response != null && response.getBody() != null) {
- String body = response.getBody() + "";
- if (!StringUtils.hasText(body))
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
- try {
- JSONObject json = JSONObject.parseObject(body);
- return TransfromDataTool.gdResultToResult(json);
- } catch (Exception e) {
- System.err.println(e);
- return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
- }
- } else {
- return null;
- }
- }
- /**
- * 高德高级地名搜索
- * @param address
- * @return
- */
- public AddressResult gdV3SearchByName(String address) {
- String geoUrl =
- Constant.AMAP_SEARCH_NAME_V3 + "?key=" + Constant.AMAP_KEY + "&types=" + Constant.AMAP_SEARCH_TYPES +
- "&keywords=" + address + "&city=" + Constant.getAMAP_CITY_CODE() + "&offset=20";
- ResponseEntity responseEntity = AddressNetTools.getInstance().requestGet(geoUrl, null, null, 0);
- if (responseEntity.hasBody()) {
- String body = responseEntity.getBody() + "";
- if (!StringUtils.hasText(body)) {
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_NULL, "搜索无结果");
- }
- JSONArray pois = new JSONArray();
- JSONObject json = JSONObject.parseObject(body);
- if (json.containsKey("pois")) {
- pois.addAll(json.getJSONArray("pois"));
- }
- if (json.containsKey("count")) {
- int count = Integer.parseInt(json.get("count") + "");
- if (count > 20) {
- for (int i = 2; i < (count % 20 + 1); i++) {
- geoUrl =
- Constant.AMAP_SEARCH_NAME_V3 + "?key=" + Constant.AMAP_KEY + "&types=" + Constant.AMAP_SEARCH_TYPES +
- "&keywords=" + address + "&city=" + Constant.getAMAP_CITY_CODE() + "&offset=20&page=" + i;
- ResponseEntity responseEntity2 = AddressNetTools.getInstance().requestGet(geoUrl, null, null, 0);
- if (responseEntity2.hasBody()) {
- body = responseEntity2.getBody() + "";
- }
- if (!StringUtils.hasText(body)) {
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_AGAIN_NULL, "多次搜索无结果");
- }
- JSONObject json2 = JSONObject.parseObject(body);
- if (json2.containsKey(pois)) {
- pois.addAll(JSONObject.parseObject(body).getJSONArray("pois"));
- }
- }
- }
- }
- try {
- return TransfromDataTool.gdV3ResultToResult(pois);
- } catch (Exception e) {
- System.err.println(e);
- return AddressTools.getInstance().faildQuery(AddressResultEnum.DATA_FROMAT_FAILD, "格式化失败");
- }
- } else {
- return AddressTools.getInstance().faildQuery(AddressResultEnum.RESULT_AGAIN_NULL, "高德V3分页查询无结果");
- }
- }
- /**
- * 根据搜索地址,赋值村居、网格、微格网信息
- * @param result
- * @return
- */
- public AddressResult getCjWgWgwByLoc(AddressResult result) {
- if (result == null || result.getData() == null || result.getData().size() < 1) {
- return result;
- } else {
- AreaService areaService = AreaService.getInstance();
- for (AddressResult.ContentBean content : result.getData()) {
- if (content.getLat() != null && content.getLon() != null) {
- // TODO 松江的话只跑行政区划和街镇
- /*GeoJsonBean cjBean = areaService.isInCJPolygon(content.getLon(), content.getLat());
- content.setCjJson(cjBean == null ? new JSONObject() : cjBean.getProperties());
- GeoJsonBean gridBean = areaService.isInGridPolygon(content.getLon(), content.getLat());
- content.setWgwJson(gridBean == null ? new JSONObject() : gridBean.getProperties());
- GeoJsonBean wgridBean = areaService.isInWGridPolygon(content.getLon(), content.getLat());
- content.setWgwJson(wgridBean == null ? new JSONObject() : wgridBean.getProperties());*/
- GeoJsonBean adBean = areaService.isInadPolygon(content.getLon(), content.getLat());
- content.setAdJson(adBean == null ? new JSONObject() : adBean.getProperties());
- GeoJsonBean townBean = areaService.isInTownPolygon(content.getLon(), content.getLat());
- content.setTownJson(townBean == null ? new JSONObject() : townBean.getProperties());
- }
- }
- return result;
- }
- }
- //
- public static boolean isNotEmptyOrBlank(String str) {
- return str != null && !str.trim().isEmpty();
- }
- /**
- * 1. 搜索到结果就返回第一个;
- * 2. 搜索到结果去除行政区划后,存在包含关系,则使用;
- * 3. 搜索到结果,数字进行分词,数字匹配则使用;
- * 4. 搜索到结果,数字与文本均匹配,则使用;
- * 5. 结果与基准数据完全一致,则使用。
- *
- * @param result
- * @param level
- * @return
- */
- public AddressResult.ContentBean verificaData(AddressResult result, AddressLevel level, String addr) {
- switch (level) {
- case LEVEL_1: {
- if (result != null && result.getData() != null && result.getData().size() > 0) {
- return result.getData().get(0);
- } else {
- return null;
- }
- }
- // 2. 搜索到结果去除行政区划后,存在包含关系,则使用;
- case LEVEL_CONTAINS_2: {
- if (result != null && result.getData() != null && result.getData().size() > 0) {
- List<AddressResult.ContentBean> contentBean = result.getData();
- for (AddressResult.ContentBean contentBean1 : contentBean) {
- if (contentBean1.getAddress() != null) {
- String address = townReplaceAll(addressReplaceAll(contentBean1.getAddress()));
- String address2 = townReplaceAll(addressReplaceAll(addr));
- if (isNotEmptyOrBlank(address) && address.contains(address2)) {
- return contentBean1;
- }
- }
- }
- } else {
- return null;
- }
- }
- // 3. 搜索到结果,数字进行分词,数字匹配则使用;
- case LEVLE_NUMBER_3: {
- if (result != null && result.getData() != null && result.getData().size() > 0) {
- List<AddressResult.ContentBean> contentBean = result.getData();
- for (AddressResult.ContentBean contentBean1 : contentBean) {
- if (contentBean1.getAddress() != null) {
- Set<String> address = tokenizeString(contentBean1.getAddress()).get(1);
- if (address != null && address.size() > 1) {
- Set<String> address2 = tokenizeString(addr).get(1);
- int addressSize = address.size();
- for (String addr2 : address2) {
- if (address.contains(addr2)) {
- addressSize--;
- if (addressSize == 0) {
- return contentBean1;
- }
- }
- }
- }
- }
- }
- } else {
- return null;
- }
- }
- // 4. 搜索到结果,数字与文本均匹配,则使用;
- case LEVEL_NUMBER_TEXT_4: {
- if (result != null && result.getData() != null && result.getData().size() > 0) {
- List<AddressResult.ContentBean> contentBean = result.getData();
- for (AddressResult.ContentBean contentBean1 : contentBean) {
- if (contentBean1.getAddress() != null) {
- Set<String> addressString = tokenizeString(contentBean1.getAddress()).get(0);
- Set<String> addressNumber = tokenizeString(contentBean1.getAddress()).get(1);
- Set<String> address2String = tokenizeString(addr).get(0);
- Set<String> address2Number = tokenizeString(addr).get(1);
- if (addressString != null && addressString.size() > 1) {
- int addressStrSize = addressString.size();
- for (String addr2str : address2String) {
- if (addressString.contains(addr2str)) {
- addressStrSize--;
- if (addressStrSize == 0) {
- int addressNumSize = addressNumber.size();
- for (String addr2Num : address2Number) {
- if (addressNumber.contains(addr2Num)) {
- addressNumSize--;
- if (addressNumSize == 0) {
- return contentBean1;
- }
- }
- }
- }
- }
- }
- }
- }
- }
- } else {
- return null;
- }
- }
- // 结果与基准数据完全一致,则使用。
- case LEVEL_TOTAL_CONTAINS_5: {
- if (result != null && result.getData() != null && result.getData().size() > 0) {
- List<AddressResult.ContentBean> contentBean = result.getData();
- for (AddressResult.ContentBean contentBean1 : contentBean) {
- if (contentBean1.getAddress() != null) {
- String address = addressReplaceAll(contentBean1.getAddress());
- String address2 = addressReplaceAll(addr);
- if (isNotEmptyOrBlank(address) && address.equals(address2)) {
- return contentBean1;
- }
- }
- }
- } else {
- return null;
- }
- }
- default: {
- return null;
- }
- }
- }
- public static String addressReplaceAll(String address) {
- return address.replaceAll("上海市", "").replaceAll(Constant.getArea() + "区", "").replaceAll("-", "").replaceAll("_", "").replaceAll("/", "").replaceAll(" ", "").replaceAll(",", "").replaceAll("\\.", "").replaceAll(",", "").replaceAll("。", "").replaceAll("\\+", "").replaceAll("\\*", "");
- }
- public static String townReplaceAll(String address) {
- String[] towns = Constant.getTowns();
- for (String town : towns) {
- address = address.replaceAll(town, "");
- }
- return address.replaceAll("号", "0").replaceAll("弄", "0").replaceAll("室", "0").replaceAll("户", "0").replaceAll("单元", "0").replaceAll("幢", "0");
- }
- /**
- * 数字和文字分词返回List<Set<String>>
- *
- * @param input
- * @return
- */
- public static List<Set<String>> tokenizeString(String input) {
- input = townReplaceAll(input);
- // 初始化两个集合,一个用于存储非数字字符串,一个用于存储数字字符串
- Set<String> nonNumberSet = new HashSet<>();
- Set<String> numberSet = new HashSet<>();
- StringBuilder currentToken = new StringBuilder();
- for (int i = 0; i < input.length(); i++) {
- char c = input.charAt(i);
- if (Character.isDigit(c)) {
- // 如果当前字符是数字
- if (currentToken.length() > 0 && !Character.isDigit(currentToken.charAt(0))) {
- // 如果之前的 token 是非数字,将其添加到非数字集合中
- nonNumberSet.add(currentToken.toString());
- currentToken.setLength(0);
- }
- currentToken.append(c);
- } else {
- // 如果当前字符不是数字
- if (currentToken.length() > 0 && Character.isDigit(currentToken.charAt(0))) {
- // 如果之前的 token 是数字,将其添加到数字集合中
- numberSet.add(currentToken.toString());
- currentToken.setLength(0);
- }
- if (!Character.isWhitespace(c)) {
- currentToken.append(c);
- }
- }
- }
- // 处理最后一个 token
- if (currentToken.length() > 0) {
- if (Character.isDigit(currentToken.charAt(0))) {
- numberSet.add(currentToken.toString());
- } else {
- nonNumberSet.add(currentToken.toString());
- }
- }
- // 将两个集合添加到列表中
- List<Set<String>> result = new ArrayList<>();
- result.add(nonNumberSet);
- result.add(numberSet);
- return result;
- }
- }
|