diff --git a/pom.xml b/pom.xml
index cf44a00..1cc2ede 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,6 +19,8 @@
1.7
1.7
+ true
+
1.11.2
2.24
2.53.1
diff --git a/src/main/java/com/xuxueli/crawler/XxlCrawler.java b/src/main/java/com/xuxueli/crawler/XxlCrawler.java
index 9c134ab..0add917 100644
--- a/src/main/java/com/xuxueli/crawler/XxlCrawler.java
+++ b/src/main/java/com/xuxueli/crawler/XxlCrawler.java
@@ -55,7 +55,7 @@ public static class Builder {
* 设置运行数据类型
*
* @param runData
- * @return
+ * @return Builder
*/
public Builder setRunData(RunData runData){
crawler.runData = runData;
@@ -66,7 +66,7 @@ public Builder setRunData(RunData runData){
* 待爬的URL列表
*
* @param urls
- * @return
+ * @return Builder
*/
public Builder setUrls(String... urls) {
if (urls!=null && urls.length>0) {
@@ -82,7 +82,7 @@ public Builder setUrls(String... urls) {
* 允许扩散爬取,将会以现有URL为起点扩散爬取整站
*
* @param allowSpread
- * @return
+ * @return Builder
*/
public Builder setAllowSpread(boolean allowSpread) {
crawler.runConf.setAllowSpread(allowSpread);
@@ -93,7 +93,7 @@ public Builder setAllowSpread(boolean allowSpread) {
* URL白名单正则,非空时进行URL白名单过滤页面
*
* @param whiteUrlRegexs
- * @return
+ * @return Builder
*/
public Builder setWhiteUrlRegexs(String... whiteUrlRegexs) {
if (whiteUrlRegexs!=null && whiteUrlRegexs.length>0) {
@@ -108,7 +108,7 @@ public Builder setWhiteUrlRegexs(String... whiteUrlRegexs) {
* 页面解析器
*
* @param pageParser
- * @return
+ * @return Builder
*/
public Builder setPageParser(PageParser pageParser){
crawler.runConf.setPageParser(pageParser);
@@ -119,7 +119,7 @@ public Builder setPageParser(PageParser pageParser){
* 页面下载器
*
* @param pageLoader
- * @return
+ * @return Builder
*/
public Builder setPageLoader(PageLoader pageLoader){
crawler.runConf.setPageLoader(pageLoader);
@@ -131,7 +131,7 @@ public Builder setPageLoader(PageLoader pageLoader){
* 请求参数
*
* @param paramMap
- * @return
+ * @return Builder
*/
public Builder setParamMap(Map paramMap){
crawler.runConf.setParamMap(paramMap);
@@ -142,7 +142,7 @@ public Builder setParamMap(Map paramMap){
* 请求Cookie
*
* @param cookieMap
- * @return
+ * @return Builder
*/
public Builder setCookieMap(Map cookieMap){
crawler.runConf.setCookieMap(cookieMap);
@@ -153,7 +153,7 @@ public Builder setCookieMap(Map cookieMap){
* 请求Header
*
* @param headerMap
- * @return
+ * @return Builder
*/
public Builder setHeaderMap(Map headerMap){
crawler.runConf.setHeaderMap(headerMap);
@@ -164,7 +164,7 @@ public Builder setHeaderMap(Map headerMap){
* 请求UserAgent
*
* @param userAgents
- * @return
+ * @return Builder
*/
public Builder setUserAgent(String... userAgents){
if (userAgents!=null && userAgents.length>0) {
@@ -181,7 +181,7 @@ public Builder setUserAgent(String... userAgents){
* 请求Referrer
*
* @param referrer
- * @return
+ * @return Builder
*/
public Builder setReferrer(String referrer){
crawler.runConf.setReferrer(referrer);
@@ -192,7 +192,7 @@ public Builder setReferrer(String referrer){
* 请求方式:true=POST请求、false=GET请求
*
* @param ifPost
- * @return
+ * @return Builder
*/
public Builder setIfPost(boolean ifPost){
crawler.runConf.setIfPost(ifPost);
@@ -203,7 +203,7 @@ public Builder setIfPost(boolean ifPost){
* 超时时间,毫秒
*
* @param timeoutMillis
- * @return
+ * @return Builder
*/
public Builder setTimeoutMillis(int timeoutMillis){
crawler.runConf.setTimeoutMillis(timeoutMillis);
@@ -214,7 +214,7 @@ public Builder setTimeoutMillis(int timeoutMillis){
* 停顿时间,爬虫线程处理完页面之后进行主动停顿,避免过于频繁被拦截;
*
* @param pauseMillis
- * @return
+ * @return Builder
*/
public Builder setPauseMillis(int pauseMillis){
crawler.runConf.setPauseMillis(pauseMillis);
@@ -225,7 +225,7 @@ public Builder setPauseMillis(int pauseMillis){
* 代理生成器
*
* @param proxyMaker
- * @return
+ * @return Builder
*/
public Builder setProxyMaker(ProxyMaker proxyMaker){
crawler.runConf.setProxyMaker(proxyMaker);
@@ -236,7 +236,7 @@ public Builder setProxyMaker(ProxyMaker proxyMaker){
* 失败重试次数,大于零时生效
*
* @param failRetryCount
- * @return
+ * @return Builder
*/
public Builder setFailRetryCount(int failRetryCount){
if (failRetryCount > 0) {
@@ -250,7 +250,7 @@ public Builder setFailRetryCount(int failRetryCount){
* 爬虫并发线程数
*
* @param threadCount
- * @return
+ * @return Builder
*/
public Builder setThreadCount(int threadCount) {
crawler.threadCount = threadCount;
diff --git a/src/main/java/com/xuxueli/crawler/annotation/PageFieldSelect.java b/src/main/java/com/xuxueli/crawler/annotation/PageFieldSelect.java
index b2db9d5..bf26702 100644
--- a/src/main/java/com/xuxueli/crawler/annotation/PageFieldSelect.java
+++ b/src/main/java/com/xuxueli/crawler/annotation/PageFieldSelect.java
@@ -21,7 +21,7 @@
*
* CSS选择器, 如 "#title"
*
- * @return
+ * @return String
*/
public String cssQuery() default "";
@@ -32,7 +32,7 @@
*
* @see com.xuxueli.crawler.conf.XxlCrawlerConf.SelectType
*
- * @return
+ * @return SelectType
*/
public XxlCrawlerConf.SelectType selectType() default XxlCrawlerConf.SelectType.TEXT;
@@ -41,7 +41,7 @@
*
* jquery 数据抽取参数,SelectType=ATTR/HAS_CLASS 时有效,如 ".attr("abs:src")"
*
- * @return
+ * @return String
*/
public String selectVal() default "";
@@ -50,7 +50,7 @@
*
* 时间格式化,日期类型数据有效
*
- * @return
+ * @return String
*/
String datePattern() default "yyyy-MM-dd HH:mm:ss";
diff --git a/src/main/java/com/xuxueli/crawler/annotation/PageSelect.java b/src/main/java/com/xuxueli/crawler/annotation/PageSelect.java
index bf1a135..4308989 100644
--- a/src/main/java/com/xuxueli/crawler/annotation/PageSelect.java
+++ b/src/main/java/com/xuxueli/crawler/annotation/PageSelect.java
@@ -19,7 +19,7 @@
*
* CSS选择器, 如 "#body"
*
- * @return
+ * @return String
*/
public String cssQuery() default "";
diff --git a/src/main/java/com/xuxueli/crawler/loader/PageLoader.java b/src/main/java/com/xuxueli/crawler/loader/PageLoader.java
index c7fcebf..a55c497 100644
--- a/src/main/java/com/xuxueli/crawler/loader/PageLoader.java
+++ b/src/main/java/com/xuxueli/crawler/loader/PageLoader.java
@@ -14,7 +14,7 @@ public abstract class PageLoader {
* load page
*
* @param pageRequest
- * @return
+ * @return Document
*/
public abstract Document load(PageRequest pageRequest);
diff --git a/src/main/java/com/xuxueli/crawler/model/RunConf.java b/src/main/java/com/xuxueli/crawler/model/RunConf.java
index 24250ff..887c8fc 100644
--- a/src/main/java/com/xuxueli/crawler/model/RunConf.java
+++ b/src/main/java/com/xuxueli/crawler/model/RunConf.java
@@ -39,7 +39,7 @@ public class RunConf {
* valid url, include white url
*
* @param link
- * @return
+ * @return boolean
*/
public boolean validWhiteUrl(String link){
if (!UrlUtil.isUrl(link)) {
diff --git a/src/main/java/com/xuxueli/crawler/proxy/ProxyMaker.java b/src/main/java/com/xuxueli/crawler/proxy/ProxyMaker.java
index 5a9ab5b..8270d2f 100644
--- a/src/main/java/com/xuxueli/crawler/proxy/ProxyMaker.java
+++ b/src/main/java/com/xuxueli/crawler/proxy/ProxyMaker.java
@@ -31,7 +31,7 @@ public ProxyMaker clear() {
/**
* make proxy
*
- * @return
+ * @return Proxy
*/
public abstract Proxy make();
diff --git a/src/main/java/com/xuxueli/crawler/rundata/RunData.java b/src/main/java/com/xuxueli/crawler/rundata/RunData.java
index ea14681..11cb2d9 100644
--- a/src/main/java/com/xuxueli/crawler/rundata/RunData.java
+++ b/src/main/java/com/xuxueli/crawler/rundata/RunData.java
@@ -11,21 +11,21 @@ public abstract class RunData {
* add link
*
* @param link
- * @return
+ * @return boolean
*/
public abstract boolean addUrl(String link);
/**
* get link, remove from unVisitedUrlQueue and add to visitedUrlSet
*
- * @return
+ * @return String
*/
public abstract String getUrl();
/**
* get url num
*
- * @return
+ * @return int
*/
public abstract int getUrlNum();
diff --git a/src/main/java/com/xuxueli/crawler/rundata/strategy/LocalRunData.java b/src/main/java/com/xuxueli/crawler/rundata/strategy/LocalRunData.java
index 16cb843..ecc8e9a 100644
--- a/src/main/java/com/xuxueli/crawler/rundata/strategy/LocalRunData.java
+++ b/src/main/java/com/xuxueli/crawler/rundata/strategy/LocalRunData.java
@@ -49,7 +49,7 @@ public boolean addUrl(String link) {
/**
* url take
- * @return
+ * @return String
* @throws InterruptedException
*/
@Override
diff --git a/src/main/java/com/xuxueli/crawler/thread/CrawlerThread.java b/src/main/java/com/xuxueli/crawler/thread/CrawlerThread.java
index c7b19e8..4a28667 100644
--- a/src/main/java/com/xuxueli/crawler/thread/CrawlerThread.java
+++ b/src/main/java/com/xuxueli/crawler/thread/CrawlerThread.java
@@ -116,7 +116,7 @@ public void run() {
* make page request
*
* @param link
- * @return
+ * @return PageRequest
*/
private PageRequest makePageRequest(String link){
String userAgent = crawler.getRunConf().getUserAgentList().size()>1
@@ -145,7 +145,7 @@ private PageRequest makePageRequest(String link){
/**
* process non page
* @param pageRequest
- * @return
+ * @return boolean
*/
private boolean processNonPage(PageRequest pageRequest){
NonPageParser nonPageParser = (NonPageParser) crawler.getRunConf().getPageParser();
@@ -161,7 +161,7 @@ private boolean processNonPage(PageRequest pageRequest){
/**
* process page
* @param pageRequest
- * @return
+ * @return boolean
*/
private boolean processPage(PageRequest pageRequest) throws IllegalAccessException, InstantiationException {
Document html = crawler.getRunConf().getPageLoader().load(pageRequest);
diff --git a/src/main/java/com/xuxueli/crawler/util/FieldReflectionUtil.java b/src/main/java/com/xuxueli/crawler/util/FieldReflectionUtil.java
index 468ddea..0c87cd8 100644
--- a/src/main/java/com/xuxueli/crawler/util/FieldReflectionUtil.java
+++ b/src/main/java/com/xuxueli/crawler/util/FieldReflectionUtil.java
@@ -101,7 +101,7 @@ public static Date parseDate(PageFieldSelect apiRequestParam, String value) {
*
* @param field
* @param value
- * @return
+ * @return Object
*/
public static Object parseValue(Field field, String value) {
diff --git a/src/main/java/com/xuxueli/crawler/util/FileUtil.java b/src/main/java/com/xuxueli/crawler/util/FileUtil.java
index 4dd3478..72be170 100644
--- a/src/main/java/com/xuxueli/crawler/util/FileUtil.java
+++ b/src/main/java/com/xuxueli/crawler/util/FileUtil.java
@@ -22,7 +22,7 @@ public class FileUtil {
*
* @param url
* @param contentType
- * @return
+ * @return String
*/
public static String getFileNameByUrl(String url, String contentType) {
url = url.replaceAll("[\\?/:*|<>\"]", "_");
diff --git a/src/main/java/com/xuxueli/crawler/util/IOUtil.java b/src/main/java/com/xuxueli/crawler/util/IOUtil.java
index a49b608..6de3796 100644
--- a/src/main/java/com/xuxueli/crawler/util/IOUtil.java
+++ b/src/main/java/com/xuxueli/crawler/util/IOUtil.java
@@ -17,7 +17,7 @@ public class IOUtil {
* String 2 InputStream
*
* @param str
- * @return
+ * @return InputStream
*/
public static InputStream toInputStream(String str, String encoding) {
try {
@@ -33,7 +33,7 @@ public static InputStream toInputStream(String str, String encoding) {
* InputStream 2 String
*
* @param inputStream
- * @return
+ * @return String
* @throws IOException
*/
public static String toString(InputStream inputStream, String encoding){
diff --git a/src/main/java/com/xuxueli/crawler/util/JsoupUtil.java b/src/main/java/com/xuxueli/crawler/util/JsoupUtil.java
index a7767ea..94a1dbe 100644
--- a/src/main/java/com/xuxueli/crawler/util/JsoupUtil.java
+++ b/src/main/java/com/xuxueli/crawler/util/JsoupUtil.java
@@ -27,7 +27,7 @@ public class JsoupUtil {
*
* @param pageRequest
*
- * @return
+ * @return Document
*/
public static Document load(PageRequest pageRequest) {
if (!UrlUtil.isUrl(pageRequest.getUrl())) {
@@ -125,7 +125,7 @@ public static String loadPageSource(PageRequest pageRequest) {
* @param fieldElement
* @param selectType
* @param selectVal
- * @return
+ * @return String
*/
public static String parseElement(Element fieldElement, XxlCrawlerConf.SelectType selectType, String selectVal) {
String fieldElementOrigin = null;
@@ -149,7 +149,7 @@ public static String parseElement(Element fieldElement, XxlCrawlerConf.SelectTyp
* 获取页面上所有超链接地址 (标签的href值)
*
* @param html 页面文档
- * @return
+ * @return Set
*/
public static Set findLinks(Document html) {
@@ -185,7 +185,7 @@ public static Set findLinks(Document html) {
* 获取页面上所有图片地址 (标签的href值)
*
* @param html
- * @return
+ * @return Set
*/
public static Set findImages(Document html) {
diff --git a/src/main/java/com/xuxueli/crawler/util/ProxyIpUtil.java b/src/main/java/com/xuxueli/crawler/util/ProxyIpUtil.java
index d925050..52331d8 100644
--- a/src/main/java/com/xuxueli/crawler/util/ProxyIpUtil.java
+++ b/src/main/java/com/xuxueli/crawler/util/ProxyIpUtil.java
@@ -22,7 +22,7 @@ public class ProxyIpUtil {
*
* @param proxy
* @param validSite
- * @return
+ * @return int
*/
public static int checkProxy(Proxy proxy, String validSite){
try {
@@ -55,7 +55,7 @@ public static int checkProxy(Proxy proxy, String validSite){
*
* @param proxy
* @param validSite
- * @return
+ * @return int
*/
public static int checkProxyRepeat(Proxy proxy, String validSite){
for (int i = 0; i < 3; i++) {
diff --git a/src/main/java/com/xuxueli/crawler/util/RegexUtil.java b/src/main/java/com/xuxueli/crawler/util/RegexUtil.java
index 3f550c4..755b404 100644
--- a/src/main/java/com/xuxueli/crawler/util/RegexUtil.java
+++ b/src/main/java/com/xuxueli/crawler/util/RegexUtil.java
@@ -13,7 +13,7 @@ public class RegexUtil {
* 正则匹配
* @param regex : 正则表达式
* @param str : 待匹配字符串
- * @return
+ * @return boolean
*/
public static boolean matches(String regex, String str) {
Pattern pattern = Pattern.compile(regex);
@@ -27,7 +27,7 @@ public static boolean matches(String regex, String str) {
* url格式校验
*
* @param str
- * @return
+ * @return boolean
*/
public static boolean isUrl(String str) {
if (str==null || str.trim().length()==0) {
diff --git a/src/test/java/com/xuxueli/crawler/test/XxlCrawlerTest06.java b/src/test/java/com/xuxueli/crawler/test/XxlCrawlerTest06.java
index 3cda1b7..7468883 100644
--- a/src/test/java/com/xuxueli/crawler/test/XxlCrawlerTest06.java
+++ b/src/test/java/com/xuxueli/crawler/test/XxlCrawlerTest06.java
@@ -30,7 +30,6 @@ public static void main(String[] args) {
* 新增一个待采集的URL,接口需要做URL去重,爬虫线程将会获取到并进行处理;
*
* @param link
- * @return
*/
@Override
public boolean addUrl(String link) {
@@ -46,8 +45,6 @@ public boolean addUrl(String link) {
/**
* 获取一个待采集的URL,并且将它从"待采集URL池"中移除,并且添加到"已采集URL池"中;
- *
- * @return
*/
@Override
public String getUrl() {
@@ -65,8 +62,6 @@ public String getUrl() {
/**
* 获取待采集URL数量;
- *
- * @return
*/
@Override
public int getUrlNum() {