|
@@ -47,10 +47,47 @@ public class SerializationUtils {
|
|
|
|
|
|
public static void main(String[] args) throws IOException {
|
|
|
test();
|
|
|
+// ruleTest("上海市松江区泗泾镇江川南路25弄19号302室", "上海市松江区洞泾镇砖桥居民委员会江川南路25弄19号302室");
|
|
|
+// replaceResidentialCommitteeToNull();
|
|
|
/*List<SjArrDzbzhSjWcbryDzxx> listData = deserialize("output/testDataBase_all.ser");
|
|
|
// 将序列化的文件转储为xlsx文件
|
|
|
ExcelReaderUtils.writeClassToExcel(listData, "output/allData.xlsx");*/
|
|
|
}
|
|
|
+
|
|
|
+ public static boolean ruleTest(String address, String returnAddress) {
|
|
|
+ if (returnAddress != null && !returnAddress.isEmpty()) {
|
|
|
+// 去除所有行政区划
|
|
|
+ Set<String> addressString = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(returnAddress)).get(0);
|
|
|
+ Set<String> addressNumber = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(returnAddress)).get(1);
|
|
|
+ Set<String> address2String = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(0);
|
|
|
+ Set<String> address2Number = AddressQueryEngine.tokenizeString(AddressQueryEngine.townReplaceAll(address)).get(1);
|
|
|
+ if (addressString != null && addressString.size() > 0) {
|
|
|
+ int addressStrSize = addressString.size();
|
|
|
+ for (String addr2str : address2String) {
|
|
|
+ if (addressString.contains(addr2str)) {
|
|
|
+ addressStrSize--;
|
|
|
+ if (addressStrSize == 0) {
|
|
|
+ if (addressNumber.size() == 0) {
|
|
|
+ return true;
|
|
|
+ } else {
|
|
|
+ int addressNumSize = addressNumber.size();
|
|
|
+ for (String addr2Num : address2Number) {
|
|
|
+ if (addressNumber.contains(addr2Num)) {
|
|
|
+ addressNumSize--;
|
|
|
+ if (addressNumSize == 0) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* 读取本地xlsx文件得到List<Map<String,Object>>结果,然后得到所有规范化结果地址字段,然后匹配街镇关键字,并将匹配到的街镇关键字放到新的一列
|
|
|
* 2、根据主键address和标准化地址standardAddress直接跑规则,属于rule_4的要替换掉同时添加updateTag
|
|
@@ -59,15 +96,21 @@ public class SerializationUtils {
|
|
|
public static void test() {
|
|
|
// 根据地址读取xlsx数据
|
|
|
try {
|
|
|
- List<Map<String, Object>> fileDatas = ExcelReaderUtils.readExcel("output/yysk_wcb_address_standardization.xlsx");
|
|
|
+ List<Map<String, Object>> fileDatas = ExcelReaderUtils.readExcel("output/yysk_new_wcb_address_standardization3.xlsx");
|
|
|
float lecel = 1.4f;
|
|
|
int sum = 0;
|
|
|
int erSum = 0;
|
|
|
int nullSum = 0;
|
|
|
+ int rule2Sum = 0;
|
|
|
+ int rule4Sum = 0;
|
|
|
for (Map<String, Object> item : fileDatas) {
|
|
|
- if (!"异常".equals(item.get("match_level").toString()) && !"否".equals(item.get("match_level").toString())) {
|
|
|
+ if (!"异常".equals(item.get("match_level").toString())) {
|
|
|
sum++;
|
|
|
- if (!"rule_2".equals(item.get("match_level").toString()) && !"rule_4".equals(item.get("match_level").toString())) {
|
|
|
+ if("rule_2".equals(item.get("match_level").toString())){
|
|
|
+ rule2Sum++;
|
|
|
+ }else if("rule_4".equals(item.get("match_level").toString())){
|
|
|
+ rule4Sum++;
|
|
|
+ }else if (!"rule_2".equals(item.get("match_level").toString()) && !"rule_4".equals(item.get("match_level").toString())) {
|
|
|
// 查询地址、主键
|
|
|
float address = Float.parseFloat(item.get("match_level").toString());
|
|
|
if (address < lecel) {
|
|
@@ -81,10 +124,34 @@ public class SerializationUtils {
|
|
|
System.out.println("总数据条数:" + fileDatas.size());
|
|
|
System.out.println("打分小于" + lecel + "数据条数:" + erSum);
|
|
|
System.out.println("异常数据条数:" + nullSum);
|
|
|
+ System.out.println("rule2数据条数:" + rule2Sum);
|
|
|
+ System.out.println("rule4数据条数:" + rule4Sum);
|
|
|
System.out.println("有效数据条数:" + (sum - erSum));
|
|
|
System.out.println("有效数据数据占比:" + (float) (sum - erSum) / fileDatas.size());
|
|
|
} catch (Exception e) {
|
|
|
e.printStackTrace();
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 替换标准化地址中的村居为null
|
|
|
+ * 山原居民委员会(筹)老是失败
|
|
|
+ */
|
|
|
+ public static void replaceResidentialCommitteeToNull() {
|
|
|
+ try {
|
|
|
+ List<Map<String, Object>> fileDatas = ExcelReaderUtils.readExcel("output/yysk_new_wcb_address_standardization2.xlsx");
|
|
|
+ for (Map<String, Object> item : fileDatas) {
|
|
|
+ if (item.get("return_address") != null && !item.get("return_address").toString().isEmpty()) {
|
|
|
+ String address = item.get("address").toString();
|
|
|
+ String returnAddress = item.get("return_address").toString();
|
|
|
+ if(ruleTest(address,returnAddress)){
|
|
|
+ item.put("match_level","rule_4");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ExcelReaderUtils.writeToExcel(fileDatas, "output/yysk_new_wcb_address_standardization3.xlsx");
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|