From ce4bdf7e19acc4d1bb45f47955415dce020a2835 Mon Sep 17 00:00:00 2001 From: mxd <838425805@qq.com> Date: Sat, 11 Apr 2020 15:51:03 +0800 Subject: [PATCH 1/2] =?UTF-8?q?elements=E5=A2=9E=E5=8A=A0htmls=E3=80=81out?= =?UTF-8?q?erHtmls=E3=80=81texts=E3=80=81attrs=E3=80=81ownTexts=E3=80=81wh?= =?UTF-8?q?oleTexts=E7=AD=89=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../extension/ElementsFunctionExtension.java | 78 +++++++++++++++++-- 1 file changed, 73 insertions(+), 5 deletions(-) diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ElementsFunctionExtension.java b/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ElementsFunctionExtension.java index 2471480..058eb3c 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ElementsFunctionExtension.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ElementsFunctionExtension.java @@ -1,16 +1,16 @@ package org.spiderflow.core.executor.function.extension; -import java.util.List; - import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.spiderflow.annotation.Comment; import org.spiderflow.annotation.Example; -import org.spiderflow.annotation.Return; import org.spiderflow.core.utils.ExtractUtils; import org.spiderflow.executor.FunctionExtension; import org.springframework.stereotype.Component; +import java.util.ArrayList; +import java.util.List; + @Component public class ElementsFunctionExtension implements FunctionExtension{ @@ -21,14 +21,12 @@ public class ElementsFunctionExtension implements FunctionExtension{ @Comment("根据xpath提取内容") @Example("${elementsVar.xpath('//title/text()')}") - @Return({Element.class,String.class}) public static String xpath(Elements elements,String xpath){ return ExtractUtils.getValueByXPath(elements, xpath); } @Comment("根据xpath提取内容") @Example("${elementsVar.xpaths('//h2/text()')}") - @Return({Element.class,String.class}) public static List xpaths(Elements elements,String xpath){ return ExtractUtils.getValuesByXPath(elements, xpath); } @@ -78,6 +76,76 @@ public class ElementsFunctionExtension implements FunctionExtension{ } return null; } + + @Comment("返回所有attr") + @Example("${elementsVar.attrs('href')}") + public static List attrs(Elements elements,String key){ + List list = new ArrayList<>(elements.size()); + for (Element element : elements) { + list.add(element.attr(key)); + } + return list; + } + + @Comment("返回所有value") + @Example("${elementsVar.vals()}") + public static List vals(Elements elements){ + List list = new ArrayList<>(elements.size()); + for (Element element : elements) { + list.add(element.val()); + } + return list; + } + + @Comment("返回所有text") + @Example("${elementsVar.texts()}") + public static List texts(Elements elements){ + List list = new ArrayList<>(elements.size()); + for (Element element : elements) { + list.add(element.text()); + } + return list; + } + + @Comment("返回所有html") + @Example("${elementsVar.htmls()}") + public static List htmls(Elements elements){ + List list = new ArrayList<>(elements.size()); + for (Element element : elements) { + list.add(element.html()); + } + return list; + } + + @Comment("返回所有outerHtml") + @Example("${elementsVar.outerHtmls()}") + public static List outerHtmls(Elements elements){ + List list = new ArrayList<>(elements.size()); + for (Element element : elements) { + list.add(element.outerHtml()); + } + return list; + } + + @Comment("返回所有ownTexts") + @Example("${elementsVar.ownTexts()}") + public static List ownTexts(Elements elements){ + List list = new ArrayList<>(elements.size()); + for (Element element : elements) { + list.add(element.ownText()); + } + return list; + } + + @Comment("返回所有wholeText") + @Example("${elementsVar.wholeTexts()}") + public static List wholeTexts(Elements elements){ + List list = new ArrayList<>(elements.size()); + for (Element element : elements) { + list.add(element.wholeText()); + } + return list; + } @Comment("根据css选择器提取内容") @Example("${elementsVar.selectors('div > a')}") From ed4ec48dffe24a6bec958f3913358c30f1e96674 Mon Sep 17 00:00:00 2001 From: mxd <838425805@qq.com> Date: Sat, 11 Apr 2020 15:52:05 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../executor/function/extension/ResponseFunctionExtension.java | 2 +- .../core/executor/function/extension/SqlRowSetExtension.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ResponseFunctionExtension.java b/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ResponseFunctionExtension.java index 777e3c6..9956f59 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ResponseFunctionExtension.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/ResponseFunctionExtension.java @@ -27,7 +27,7 @@ public class ResponseFunctionExtension implements FunctionExtension { @Comment("将请求结果转为Element对象") @Example("${resp.element()}") public static Element element(SpiderResponse response) { - return Jsoup.parse(response.getHtml()); + return Jsoup.parse(response.getHtml(),response.getUrl()); } @Comment("根据xpath在请求结果中查找") diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/SqlRowSetExtension.java b/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/SqlRowSetExtension.java index 17ebe0d..6fd678b 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/SqlRowSetExtension.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/executor/function/extension/SqlRowSetExtension.java @@ -21,8 +21,7 @@ public class SqlRowSetExtension implements FunctionExtension { @Example("${rs.nextToMap()}") public static Map nextToMap(SqlRowSet sqlRowSet) { try { - boolean next = sqlRowSet.next(); - if (!next) { + if (!sqlRowSet.next()) { return null; } String[] columnNames = sqlRowSet.getMetaData().getColumnNames();