elements增加htmls、outerHtmls、texts、attrs、ownTexts、wholeTexts等方法

This commit is contained in:
mxd 2020-04-11 15:51:03 +08:00
parent 2c78a1809b
commit ce4bdf7e19

View File

@ -1,16 +1,16 @@
package org.spiderflow.core.executor.function.extension; package org.spiderflow.core.executor.function.extension;
import java.util.List;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.spiderflow.annotation.Comment; import org.spiderflow.annotation.Comment;
import org.spiderflow.annotation.Example; import org.spiderflow.annotation.Example;
import org.spiderflow.annotation.Return;
import org.spiderflow.core.utils.ExtractUtils; import org.spiderflow.core.utils.ExtractUtils;
import org.spiderflow.executor.FunctionExtension; import org.spiderflow.executor.FunctionExtension;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
@Component @Component
public class ElementsFunctionExtension implements FunctionExtension{ public class ElementsFunctionExtension implements FunctionExtension{
@ -21,14 +21,12 @@ public class ElementsFunctionExtension implements FunctionExtension{
@Comment("根据xpath提取内容") @Comment("根据xpath提取内容")
@Example("${elementsVar.xpath('//title/text()')}") @Example("${elementsVar.xpath('//title/text()')}")
@Return({Element.class,String.class})
public static String xpath(Elements elements,String xpath){ public static String xpath(Elements elements,String xpath){
return ExtractUtils.getValueByXPath(elements, xpath); return ExtractUtils.getValueByXPath(elements, xpath);
} }
@Comment("根据xpath提取内容") @Comment("根据xpath提取内容")
@Example("${elementsVar.xpaths('//h2/text()')}") @Example("${elementsVar.xpaths('//h2/text()')}")
@Return({Element.class,String.class})
public static List<String> xpaths(Elements elements,String xpath){ public static List<String> xpaths(Elements elements,String xpath){
return ExtractUtils.getValuesByXPath(elements, xpath); return ExtractUtils.getValuesByXPath(elements, xpath);
} }
@ -78,6 +76,76 @@ public class ElementsFunctionExtension implements FunctionExtension{
} }
return null; return null;
} }
@Comment("返回所有attr")
@Example("${elementsVar.attrs('href')}")
public static List<String> attrs(Elements elements,String key){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.attr(key));
}
return list;
}
@Comment("返回所有value")
@Example("${elementsVar.vals()}")
public static List<String> vals(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.val());
}
return list;
}
@Comment("返回所有text")
@Example("${elementsVar.texts()}")
public static List<String> texts(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.text());
}
return list;
}
@Comment("返回所有html")
@Example("${elementsVar.htmls()}")
public static List<String> htmls(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.html());
}
return list;
}
@Comment("返回所有outerHtml")
@Example("${elementsVar.outerHtmls()}")
public static List<String> outerHtmls(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.outerHtml());
}
return list;
}
@Comment("返回所有ownTexts")
@Example("${elementsVar.ownTexts()}")
public static List<String> ownTexts(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.ownText());
}
return list;
}
@Comment("返回所有wholeText")
@Example("${elementsVar.wholeTexts()}")
public static List<String> wholeTexts(Elements elements){
List<String> list = new ArrayList<>(elements.size());
for (Element element : elements) {
list.add(element.wholeText());
}
return list;
}
@Comment("根据css选择器提取内容") @Comment("根据css选择器提取内容")
@Example("${elementsVar.selectors('div > a')}") @Example("${elementsVar.selectors('div > a')}")