Merge remote-tracking branch 'upstream/dev' into dev

This commit is contained in:
nekolr 2020-04-11 18:56:04 +08:00
commit 6543a74cf3
3 changed files with 75 additions and 8 deletions

View File

@ -1,16 +1,16 @@
package org.spiderflow.core.executor.function.extension;
import java.util.List;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.spiderflow.annotation.Comment;
import org.spiderflow.annotation.Example;
import org.spiderflow.annotation.Return;
import org.spiderflow.core.utils.ExtractUtils;
import org.spiderflow.executor.FunctionExtension;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
@Component
public class ElementsFunctionExtension implements FunctionExtension{
@ -21,14 +21,12 @@ public class ElementsFunctionExtension implements FunctionExtension{
@Comment("根据xpath提取内容")
@Example("${elementsVar.xpath('//title/text()')}")
@Return({Element.class,String.class})
public static String xpath(Elements elements,String xpath){
return ExtractUtils.getValueByXPath(elements, xpath);
}
@Comment("根据xpath提取内容")
@Example("${elementsVar.xpaths('//h2/text()')}")
@Return({Element.class,String.class})
public static List<String> xpaths(Elements elements,String xpath){
return ExtractUtils.getValuesByXPath(elements, xpath);
}
@ -78,6 +76,76 @@ public class ElementsFunctionExtension implements FunctionExtension{
}
return null;
}
@Comment("返回所有attr")
@Example("${elementsVar.attrs('href')}")
public static List<String> attrs(Elements elements,String key){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.attr(key));
}
return list;
}
@Comment("返回所有value")
@Example("${elementsVar.vals()}")
public static List<String> vals(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.val());
}
return list;
}
@Comment("返回所有text")
@Example("${elementsVar.texts()}")
public static List<String> texts(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.text());
}
return list;
}
@Comment("返回所有html")
@Example("${elementsVar.htmls()}")
public static List<String> htmls(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.html());
}
return list;
}
@Comment("返回所有outerHtml")
@Example("${elementsVar.outerHtmls()}")
public static List<String> outerHtmls(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.outerHtml());
}
return list;
}
@Comment("返回所有ownTexts")
@Example("${elementsVar.ownTexts()}")
public static List<String> ownTexts(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.ownText());
}
return list;
}
@Comment("返回所有wholeText")
@Example("${elementsVar.wholeTexts()}")
public static List<String> wholeTexts(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.wholeText());
}
return list;
}
@Comment("根据css选择器提取内容")
@Example("${elementsVar.selectors('div > a')}")

View File

@ -27,7 +27,7 @@ public class ResponseFunctionExtension implements FunctionExtension {
@Comment("将请求结果转为Element对象")
@Example("${resp.element()}")
public static Element element(SpiderResponse response) {
return Jsoup.parse(response.getHtml());
return Jsoup.parse(response.getHtml(),response.getUrl());
}
@Comment("根据xpath在请求结果中查找")

View File

@ -21,8 +21,7 @@ public class SqlRowSetExtension implements FunctionExtension {
@Example("${rs.nextToMap()}")
public static Map<String, Object> nextToMap(SqlRowSet sqlRowSet) {
try {
boolean next = sqlRowSet.next();
if (!next) {
if (!sqlRowSet.next()) {
return null;
}
String[] columnNames = sqlRowSet.getMetaData().getColumnNames();