Commit ae4978f6 authored by 赵啸非's avatar 赵啸非

修改重试机制

parent 5a240978
...@@ -22,6 +22,8 @@ import java.util.HashMap; ...@@ -22,6 +22,8 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import static com.mortals.xhx.common.key.Constant.MAX_RETRY_COUNT;
@Slf4j @Slf4j
public class MatterHtmlParseUtil { public class MatterHtmlParseUtil {
...@@ -29,61 +31,39 @@ public class MatterHtmlParseUtil { ...@@ -29,61 +31,39 @@ public class MatterHtmlParseUtil {
String matterTotalExp = "//input[@id=\"result_count\"]"; String matterTotalExp = "//input[@id=\"result_count\"]";
String matterPageExp = "//input[@id=\"pageNum\"]"; String matterPageExp = "//input[@id=\"pageNum\"]";
Map<String, Integer> resultMap = new HashMap<>(); Map<String, Integer> resultMap = new HashMap<>();
try { int reCount = 0;
Document dom = Jsoup.connect(url) while (reCount < MAX_RETRY_COUNT) {
.ignoreContentType(true) try {
.ignoreHttpErrors(true) Document dom = Jsoup.connect(url)
.data(params).get(); .ignoreContentType(true)
Elements elements = dom.selectXpath(matterTotalExp); .ignoreHttpErrors(true)
if (elements.size() > 0) { .timeout(60 * 1000)
Integer total = elements.get(0) == null ? 0 : DataUtil.converStr2Int(elements.get(0).attr("value"), 0); .data(params).get();
resultMap.put("total", total); Elements elements = dom.selectXpath(matterTotalExp);
} if (elements.size() > 0) {
Integer total = elements.get(0) == null ? 0 : DataUtil.converStr2Int(elements.get(0).attr("value"), 0);
resultMap.put("total", total);
}
elements = dom.selectXpath(matterPageExp); elements = dom.selectXpath(matterPageExp);
if (elements.size() > 0) { if (elements.size() > 0) {
Integer pageNum = elements.get(0) == null ? 0 : DataUtil.converStr2Int(elements.get(0).attr("value"), 0); Integer pageNum = elements.get(0) == null ? 0 : DataUtil.converStr2Int(elements.get(0).attr("value"), 0);
resultMap.put("pageNum", pageNum); resultMap.put("pageNum", pageNum);
}
} catch (Exception e) {
log.error("获取事项数量异常!params:{},重试:{}次", JSON.toJSONString(params), reCount, e);
reCount++;
try {
Thread.sleep(2000);
} catch (InterruptedException interruptedException) {
}
} }
} catch (Exception e) {
log.error("获取事项数量异常!params:" + JSON.toJSONString(params), e);
return Rest.fail(e.getMessage());
} }
return Rest.ok(resultMap); return Rest.ok(resultMap);
} }
public static Rest<Map<String, Integer>> statSiteMatterDeptCount(Map<String, String> params, String url) {
String matterTotalExp = "//input[@id=\"result_countDept\"]";
String matterPageExp = "//input[@id=\"pageNumDept\"]";
Map<String, Integer> resultMap = new HashMap<>();
try {
Document dom = Jsoup.connect(url)
.ignoreContentType(true)
.ignoreHttpErrors(true)
.data(params).get();
Elements elements = dom.selectXpath(matterTotalExp);
if (elements.size() > 0) {
Integer total = elements.get(0) == null ? 0 : DataUtil.converStr2Int(elements.get(0).attr("value"), 0);
resultMap.put("total", total);
}
elements = dom.selectXpath(matterPageExp);
if (elements.size() > 0) {
Integer pageNum = elements.get(0) == null ? 0 : DataUtil.converStr2Int(elements.get(0).attr("value"), 0);
resultMap.put("pageNum", pageNum);
}
} catch (Exception e) {
log.error("获取事项数量异常!params:" + JSON.toJSONString(params), e);
return Rest.fail(e.getMessage());
}
return Rest.ok(resultMap);
}
public static Rest<List<MatterEntity>> getMatterList(Map<String, String> params, String url) { public static Rest<List<MatterEntity>> getMatterList(Map<String, String> params, String url) {
String matterListExp = "//div[@class=\"sx_list\"]//span[1]"; String matterListExp = "//div[@class=\"sx_list\"]//span[1]";
String matterListLiExp = "//div[@class=\"sx_list\"]//li/a[1]"; String matterListLiExp = "//div[@class=\"sx_list\"]//li/a[1]";
...@@ -91,107 +71,117 @@ public class MatterHtmlParseUtil { ...@@ -91,107 +71,117 @@ public class MatterHtmlParseUtil {
String evaluationUrl = ""; String evaluationUrl = "";
String netApplyUrl = ""; String netApplyUrl = "";
String href = ""; String href = "";
try {
Document dom = Jsoup.connect(url).data(params).get();
//System.out.println(dom.html());
Elements elements = dom.selectXpath(matterListExp);
for (int i = 0; i < elements.size(); i++) { int reCount = 0;
Element element = elements.get(i); while (reCount < MAX_RETRY_COUNT) {
if (element == null) { try {
continue; Document dom = Jsoup.connect(url)
} .ignoreContentType(true)
String title = element.attr("title"); .ignoreHttpErrors(true)
href = element.firstElementChild().attr("href"); .timeout(60 * 1000)
.data(params).get();
Elements elements = dom.selectXpath(matterListExp);
for (int i = 0; i < elements.size(); i++) {
Element element = elements.get(i);
if (element == null) {
continue;
}
String title = element.attr("title");
href = element.firstElementChild().attr("href");
//element.child() //element.child()
if (href.equalsIgnoreCase("javascript:void(0)")) { if (href.equalsIgnoreCase("javascript:void(0)")) {
continue; continue;
} }
//抓取申请与评价页面地址 //抓取申请与评价页面地址
Element nextElementSibling = element.nextElementSibling(); Element nextElementSibling = element.nextElementSibling();
Elements elementsA = nextElementSibling.children(); Elements elementsA = nextElementSibling.children();
//Elements elementsA = nextElementSibling.selectXpath("//a"); //Elements elementsA = nextElementSibling.selectXpath("//a");
if (elementsA != null) { if (elementsA != null) {
for (Element tempElement : elementsA) { for (Element tempElement : elementsA) {
if ("办事指南".equals(tempElement.text().trim())) { if ("办事指南".equals(tempElement.text().trim())) {
String onclick = tempElement.attr("onclick"); String onclick = tempElement.attr("onclick");
List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick); List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick);
if (list.size() > 1) { if (list.size() > 1) {
href = StrUtil.subBetween(list.get(0), "'", "'"); href = StrUtil.subBetween(list.get(0), "'", "'");
}
} }
} if ("好差评".equals(tempElement.text().trim())) {
if ("好差评".equals(tempElement.text().trim())) { String onclick = tempElement.attr("onclick");
String onclick = tempElement.attr("onclick"); evaluationUrl = StrUtil.subBetween(onclick, "evaluation('", "')");
evaluationUrl = StrUtil.subBetween(onclick, "evaluation('", "')"); }
} if ("申请".equals(tempElement.text().trim())) {
if ("申请".equals(tempElement.text().trim())) { String onclick = tempElement.attr("onclick");
String onclick = tempElement.attr("onclick"); List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick);
List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick); if (list.size() > 4) {
if (list.size() > 4) { netApplyUrl = StrUtil.subBetween(list.get(3), "'", "'");
netApplyUrl = StrUtil.subBetween(list.get(3), "'", "'"); }
} }
} }
} }
}
if (ObjectUtils.isEmpty(href)) { if (ObjectUtils.isEmpty(href)) {
log.info("error href ,title:" + title); log.info("error href ,title:" + title);
} }
buildMatter(matterEntityList, title, href, evaluationUrl, netApplyUrl);
}
elements = dom.selectXpath(matterListLiExp); buildMatter(matterEntityList, title, href, evaluationUrl, netApplyUrl);
for (int i = 0; i < elements.size(); i++) {
Element element = elements.get(i);
if (element == null) {
continue;
} }
String title = element.attr("title");
href = element.attr("href");
//抓取申请与评价页面地址
Element nextElementSibling = element.nextElementSibling();
if (nextElementSibling != null) {
Elements elementsA = nextElementSibling.children(); elements = dom.selectXpath(matterListLiExp);
//Elements elementsA = nextElementSibling.selectXpath("//a"); for (int i = 0; i < elements.size(); i++) {
for (Element tempElement : elementsA) { Element element = elements.get(i);
if (element == null) {
if ("办事指南".equals(tempElement.text().trim())) { continue;
String onclick = tempElement.attr("onclick"); }
if(ObjectUtils.isEmpty(onclick)) continue; String title = element.attr("title");
List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick); href = element.attr("href");
if (list.size() > 1) { //抓取申请与评价页面地址
href = StrUtil.subBetween(list.get(0), "'", "'"); Element nextElementSibling = element.nextElementSibling();
if (nextElementSibling != null) {
Elements elementsA = nextElementSibling.children();
//Elements elementsA = nextElementSibling.selectXpath("//a");
for (Element tempElement : elementsA) {
if ("办事指南".equals(tempElement.text().trim())) {
String onclick = tempElement.attr("onclick");
if (ObjectUtils.isEmpty(onclick)) continue;
List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick);
if (list.size() > 1) {
href = StrUtil.subBetween(list.get(0), "'", "'");
}
} }
} if ("好差评".equals(tempElement.text().trim())) {
if ("好差评".equals(tempElement.text().trim())) { String onclick = tempElement.attr("onclick");
String onclick = tempElement.attr("onclick"); evaluationUrl = StrUtil.subBetween(onclick, "evaluation('", "')");
evaluationUrl = StrUtil.subBetween(onclick, "evaluation('", "')"); }
} if ("申请".equals(tempElement.text().trim())) {
if ("申请".equals(tempElement.text().trim())) { String onclick = tempElement.attr("onclick");
String onclick = tempElement.attr("onclick"); List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick);
List<String> list = ReUtil.findAllGroup0("'(.*?)'", onclick); if (list.size() > 4) {
if (list.size() > 4) { netApplyUrl = StrUtil.subBetween(list.get(3), "'", "'");
netApplyUrl = StrUtil.subBetween(list.get(3), "'", "'"); }
} }
}
if (ObjectUtils.isEmpty(href)) { if (ObjectUtils.isEmpty(href)) {
log.info("error href ,title:" + title); log.info("error href ,title:" + title);
} }
}
} }
buildMatter(matterEntityList, title, href, evaluationUrl, netApplyUrl);
} }
buildMatter(matterEntityList, title, href, evaluationUrl, netApplyUrl);
}
} catch (Exception e) { } catch (Exception e) {
log.error("获取列表异常!params:" + JSON.toJSONString(params), e); log.error("获取列表异常!params:{},重试:{}次", JSON.toJSONString(params), reCount, e);
return Rest.fail(e.getMessage()); reCount++;
try {
Thread.sleep(2000);
} catch (InterruptedException interruptedException) {
}
}
} }
return Rest.ok(matterEntityList); return Rest.ok(matterEntityList);
} }
...@@ -220,10 +210,13 @@ public class MatterHtmlParseUtil { ...@@ -220,10 +210,13 @@ public class MatterHtmlParseUtil {
public static Rest<Map<String, String>> syncDeptBySiteId(Map<String, String> params, String url) { public static Rest<Map<String, String>> syncDeptBySiteId(Map<String, String> params, String url) {
String deptListExp = "//ul[@class='bm-list']//li"; String deptListExp = "//ul[@class='bm-list']//li";
Map<String, String> map = new HashMap<>(); Map<String, String> map = new HashMap<>();
try { int reCount = 0;
while (reCount < MAX_RETRY_COUNT) {
try {
Document dom = Jsoup.connect(url) Document dom = Jsoup.connect(url)
.ignoreContentType(true) .ignoreContentType(true)
.ignoreHttpErrors(true) .ignoreHttpErrors(true)
.timeout(60 * 1000)
.data(params).get(); .data(params).get();
Elements elements = dom.selectXpath(deptListExp); Elements elements = dom.selectXpath(deptListExp);
for (int i = 0; i < elements.size(); i++) { for (int i = 0; i < elements.size(); i++) {
...@@ -237,8 +230,14 @@ public class MatterHtmlParseUtil { ...@@ -237,8 +230,14 @@ public class MatterHtmlParseUtil {
String deptCode = builder.getQuery().get("deptCode").toString(); String deptCode = builder.getQuery().get("deptCode").toString();
map.put(deptCode, deptName); map.put(deptCode, deptName);
} }
} catch (Exception e) { } catch (Exception e) {
return Rest.fail(e.getMessage()); log.error("当前站点同步添加部门异常!params:{},重试:{}次", JSON.toJSONString(params), reCount, e);
reCount++;
try {
Thread.sleep(2000);
} catch (InterruptedException interruptedException) {
}
}
} }
return Rest.ok("当前站点同步添加部门成功!", map); return Rest.ok("当前站点同步添加部门成功!", map);
...@@ -248,9 +247,9 @@ public class MatterHtmlParseUtil { ...@@ -248,9 +247,9 @@ public class MatterHtmlParseUtil {
public static void main(String[] args) { public static void main(String[] args) {
String url = "http://www.sczwfw.gov.cn/jiq/interface/item/tags"; String url = "http://www.sczwfw.gov.cn/jiq/interface/item/tags";
HashMap<String, String> params = new HashMap<>(); HashMap<String, String> params = new HashMap<>();
params.put("dxType", "54"); params.put("dxType", "6");
params.put("areaCode", "511503003999"); params.put("areaCode", "511523000000");
params.put("deptCode", ""); params.put("deptCode", "2632");
params.put("searchtext", ""); params.put("searchtext", "");
params.put("pageno", "1"); params.put("pageno", "1");
params.put("taskType", ""); params.put("taskType", "");
...@@ -258,6 +257,10 @@ public class MatterHtmlParseUtil { ...@@ -258,6 +257,10 @@ public class MatterHtmlParseUtil {
System.out.println(rest.getData().size()); System.out.println(rest.getData().size());
//{"searchtext":"","areaCode":"511523000000","taskType":"","dxType":"6","deptCode":"2632"}
/* HashMap<String, String> params = new HashMap<>(); /* HashMap<String, String> params = new HashMap<>();
params.put("areaCode", "510116000000"); params.put("areaCode", "510116000000");
......
...@@ -127,26 +127,13 @@ public class MatterExtServiceImpl extends AbstractCRUDServiceImpl<MatterExtDao, ...@@ -127,26 +127,13 @@ public class MatterExtServiceImpl extends AbstractCRUDServiceImpl<MatterExtDao,
matterList = matterService.getDao().getMatterListByAreaCode(matterQuery.areaCode(siteEntity.getAreaCode())); matterList = matterService.getDao().getMatterListByAreaCode(matterQuery.areaCode(siteEntity.getAreaCode()));
} }
log.info("开始更新事项详细!"); log.info("开始更新事项详细!");
// List<MatterEntity> updateList = new ArrayList<>();
for (MatterEntity matterEntity : matterList) { for (MatterEntity matterEntity : matterList) {
Rest<String> rest = matterService.buildMatterDetail(matterEntity, null); Rest<String> rest = matterService.buildMatterDetail(matterEntity, null);
if (rest.getCode() == YesNoEnum.YES.getValue()) { if (rest.getCode() == YesNoEnum.YES.getValue()) {
matterEntity.setUpdateTime(new Date()); matterEntity.setUpdateTime(new Date());
// updateList.add(matterEntity);
// log.info("id==>{} matterEditon==>{}",matterEntity.getId(),matterEntity.getMatterEdition());
matterService.update(matterEntity, null); matterService.update(matterEntity, null);
} }
} }
/* if (!ObjectUtils.isEmpty(updateList)) {
log.info("更新事项详细==》{}", updateList.size());
List<List<MatterEntity>> partition = Lists.partition(updateList, 50);
for (List<MatterEntity> matterEntityList : partition) {
matterService.update(matterEntityList, null);
}
}*/
return Rest.ok(); return Rest.ok();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment