Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
smart_gov_platform
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵啸非
smart_gov_platform
Commits
3bae4198
Commit
3bae4198
authored
Mar 21, 2024
by
赵啸非
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改重试机制
parent
7f5e2976
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
109 additions
and
124 deletions
+109
-124
base-manager/src/main/java/com/mortals/xhx/common/utils/MatterDetailHtmlParseUtil.java
...m/mortals/xhx/common/utils/MatterDetailHtmlParseUtil.java
+0
-43
base-manager/src/main/java/com/mortals/xhx/common/utils/MatterHtmlParseUtil.java
...ava/com/mortals/xhx/common/utils/MatterHtmlParseUtil.java
+20
-18
base-manager/src/main/java/com/mortals/xhx/common/utils/MatterTypeHtmlParseUtil.java
...com/mortals/xhx/common/utils/MatterTypeHtmlParseUtil.java
+89
-63
No files found.
base-manager/src/main/java/com/mortals/xhx/common/utils/MatterDetailHtmlParseUtil.java
View file @
3bae4198
...
...
@@ -26,50 +26,7 @@ public class MatterDetailHtmlParseUtil {
try
{
document
=
Jsoup
.
connect
(
url
).
get
();
return
document
;
/*Jsoup.connect(url).get();
html = Jsoup.connect(url).get().body().html();
// html = HttpUtil.get(url);
//System.out.println(html);
HtmlCleaner hc = new HtmlCleaner();
TagNode tn = hc.clean(html);
Object[] rs = tn.evaluateXPath("//div");
for (Integer i = 0; i < rs.length; i++) {
TagNode n = (TagNode) rs[i];
System.out.println(n.getText());
}*/
// System.out.println(tn.getText());
/* CleanerProperties cleanerProperties = new CleanerProperties();
cleanerProperties.setAdvancedXmlEscape(true);
cleanerProperties.setOmitXmlDeclaration(true);
cleanerProperties.setOmitDoctypeDeclaration(true);
cleanerProperties.setTranslateSpecialEntities(true);
cleanerProperties.setTransResCharsToNCR(true);
cleanerProperties.setRecognizeUnicodeChars(true);
cleanerProperties.setIgnoreQuestAndExclam(true);
cleanerProperties.setUseEmptyElementTags(false);
Document dom = new DomSerializer(cleanerProperties).createDOM(tn);*/
/*XPath xPath = XPathFactory.newInstance().newXPath();
String tklrExp = String.format("//div[@class=\"smltc9_bottom\"]//p");
Object result = xPath.evaluate(tklrExp, dom, XPathConstants.NODESET);
NodeList nodeList = (NodeList) result;
for (Integer i = 0; i < nodeList.getLength(); i++) {
System.out.println(nodeList.item(i).getTextContent());
System.out.println("===============");
}*/
//div[@class='section10']//table[1]//tr[%d]//td
//return dom;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
...
...
base-manager/src/main/java/com/mortals/xhx/common/utils/MatterHtmlParseUtil.java
View file @
3bae4198
...
...
@@ -31,7 +31,7 @@ public class MatterHtmlParseUtil {
String
matterTotalExp
=
"//input[@id=\"result_count\"]"
;
String
matterPageExp
=
"//input[@id=\"pageNum\"]"
;
Map
<
String
,
Integer
>
resultMap
=
new
HashMap
<>();
int
reCount
=
0
;
int
reCount
=
0
;
while
(
reCount
<
MAX_RETRY_COUNT
)
{
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
...
...
@@ -50,6 +50,7 @@ public class MatterHtmlParseUtil {
Integer
pageNum
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"pageNum"
,
pageNum
);
}
break
;
}
catch
(
Exception
e
)
{
log
.
error
(
"获取事项数量异常!params:{},重试:{}次"
,
JSON
.
toJSONString
(
params
),
reCount
,
e
);
reCount
++;
...
...
@@ -213,24 +214,25 @@ public class MatterHtmlParseUtil {
int
reCount
=
0
;
while
(
reCount
<
MAX_RETRY_COUNT
)
{
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
timeout
(
60
*
1000
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
deptListExp
);
for
(
int
i
=
0
;
i
<
elements
.
size
();
i
++)
{
Element
element
=
elements
.
get
(
i
);
if
(
element
==
null
)
{
continue
;
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
timeout
(
60
*
1000
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
deptListExp
);
for
(
int
i
=
0
;
i
<
elements
.
size
();
i
++)
{
Element
element
=
elements
.
get
(
i
);
if
(
element
==
null
)
{
continue
;
}
String
deptName
=
element
.
attr
(
"title"
);
String
href
=
element
.
firstElementChild
().
attr
(
"href"
);
UrlBuilder
builder
=
UrlBuilder
.
ofHttp
(
href
,
CharsetUtil
.
CHARSET_UTF_8
);
String
deptCode
=
builder
.
getQuery
().
get
(
"deptCode"
).
toString
();
map
.
put
(
deptCode
,
deptName
);
}
String
deptName
=
element
.
attr
(
"title"
);
String
href
=
element
.
firstElementChild
().
attr
(
"href"
);
UrlBuilder
builder
=
UrlBuilder
.
ofHttp
(
href
,
CharsetUtil
.
CHARSET_UTF_8
);
String
deptCode
=
builder
.
getQuery
().
get
(
"deptCode"
).
toString
();
map
.
put
(
deptCode
,
deptName
);
}
}
catch
(
Exception
e
)
{
break
;
}
catch
(
Exception
e
)
{
log
.
error
(
"当前站点同步添加部门异常!params:{},重试:{}次"
,
JSON
.
toJSONString
(
params
),
reCount
,
e
);
reCount
++;
try
{
...
...
base-manager/src/main/java/com/mortals/xhx/common/utils/MatterTypeHtmlParseUtil.java
View file @
3bae4198
...
...
@@ -22,6 +22,8 @@ import java.util.HashMap;
import
java.util.List
;
import
java.util.Map
;
import
static
com
.
mortals
.
xhx
.
common
.
key
.
Constant
.
MAX_RETRY_COUNT
;
/**
* 解析事项类型,主题,热度等
*/
...
...
@@ -40,30 +42,38 @@ public class MatterTypeHtmlParseUtil {
String
matterTotalExp
=
"//input[@id=\"result_countTheme\"]"
;
String
matterPageExp
=
"//input[@id=\"pageNumTheme\"]"
;
Map
<
String
,
Integer
>
resultMap
=
new
HashMap
<>();
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
matterTotalExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
total
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"total"
,
total
);
}
else
{
resultMap
.
put
(
"total"
,
0
);
}
int
reCount
=
0
;
while
(
reCount
<
MAX_RETRY_COUNT
)
{
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
timeout
(
60
*
1000
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
matterTotalExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
total
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"total"
,
total
);
}
else
{
resultMap
.
put
(
"total"
,
0
);
}
elements
=
dom
.
selectXpath
(
matterPageExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
pageNum
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"pageNum"
,
pageNum
);
}
else
{
resultMap
.
put
(
"pageNum"
,
0
);
elements
=
dom
.
selectXpath
(
matterPageExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
pageNum
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"pageNum"
,
pageNum
);
}
else
{
resultMap
.
put
(
"pageNum"
,
0
);
}
break
;
}
catch
(
Exception
e
)
{
log
.
error
(
"获取事项数量异常!params:"
+
JSON
.
toJSONString
(
params
),
e
);
reCount
++;
try
{
Thread
.
sleep
(
2000
);
}
catch
(
InterruptedException
interruptedException
)
{
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"获取事项数量异常!params:"
+
JSON
.
toJSONString
(
params
),
e
);
return
Rest
.
fail
(
e
.
getMessage
());
}
return
Rest
.
ok
(
resultMap
);
...
...
@@ -80,30 +90,38 @@ public class MatterTypeHtmlParseUtil {
String
matterTotalExp
=
"//input[@id=\"result_count\"]"
;
String
matterPageExp
=
"//input[@id=\"pageNum\"]"
;
Map
<
String
,
Integer
>
resultMap
=
new
HashMap
<>();
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
matterTotalExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
total
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"total"
,
total
);
}
else
{
resultMap
.
put
(
"total"
,
0
);
}
int
reCount
=
0
;
while
(
reCount
<
MAX_RETRY_COUNT
)
{
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
timeout
(
60
*
1000
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
matterTotalExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
total
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"total"
,
total
);
}
else
{
resultMap
.
put
(
"total"
,
0
);
}
elements
=
dom
.
selectXpath
(
matterPageExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
pageNum
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"pageNum"
,
pageNum
);
}
else
{
resultMap
.
put
(
"pageNum"
,
0
);
elements
=
dom
.
selectXpath
(
matterPageExp
);
if
(
elements
.
size
()
>
0
)
{
Integer
pageNum
=
elements
.
get
(
0
)
==
null
?
0
:
DataUtil
.
converStr2Int
(
elements
.
get
(
0
).
attr
(
"value"
),
0
);
resultMap
.
put
(
"pageNum"
,
pageNum
);
}
else
{
resultMap
.
put
(
"pageNum"
,
0
);
}
break
;
}
catch
(
Exception
e
)
{
log
.
error
(
"获取事项数量异常!params:"
+
JSON
.
toJSONString
(
params
),
e
);
reCount
++;
try
{
Thread
.
sleep
(
2000
);
}
catch
(
InterruptedException
interruptedException
)
{
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"获取事项数量异常!params:"
+
JSON
.
toJSONString
(
params
),
e
);
return
Rest
.
fail
(
e
.
getMessage
());
}
return
Rest
.
ok
(
resultMap
);
...
...
@@ -120,27 +138,35 @@ public class MatterTypeHtmlParseUtil {
public
static
Rest
<
Map
<
String
,
String
>>
getThemeList
(
Map
<
String
,
String
>
params
,
String
url
)
{
String
ThemeListExp
=
"//ul[@class=\"zhuti_list\"]//li//a"
;
Map
<
String
,
String
>
map
=
new
HashMap
<>();
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
ThemeListExp
);
for
(
int
i
=
0
;
i
<
elements
.
size
();
i
++)
{
Element
element
=
elements
.
get
(
i
);
if
(
element
==
null
)
{
continue
;
int
reCount
=
0
;
while
(
reCount
<
MAX_RETRY_COUNT
)
{
try
{
Document
dom
=
Jsoup
.
connect
(
url
)
.
ignoreContentType
(
true
)
.
ignoreHttpErrors
(
true
)
.
timeout
(
60
*
1000
)
.
data
(
params
).
get
();
Elements
elements
=
dom
.
selectXpath
(
ThemeListExp
);
for
(
int
i
=
0
;
i
<
elements
.
size
();
i
++)
{
Element
element
=
elements
.
get
(
i
);
if
(
element
==
null
)
{
continue
;
}
//主题编码
String
themeCode
=
element
.
attr
(
"id"
);
String
title
=
element
.
attr
(
"title"
);
map
.
put
(
themeCode
,
title
);
}
break
;
}
catch
(
Exception
e
)
{
log
.
error
(
"获取列表异常!params:"
+
JSON
.
toJSONString
(
params
),
e
);
reCount
++;
try
{
Thread
.
sleep
(
2000
);
}
catch
(
InterruptedException
interruptedException
)
{
}
//主题编码
String
themeCode
=
element
.
attr
(
"id"
);
String
title
=
element
.
attr
(
"title"
);
map
.
put
(
themeCode
,
title
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"获取列表异常!params:"
+
JSON
.
toJSONString
(
params
),
e
);
return
Rest
.
fail
(
e
.
getMessage
());
}
return
Rest
.
ok
(
map
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment