diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/FreeMarkerEngine.java b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/FreeMarkerEngine.java index 0ce7214..73712da 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/FreeMarkerEngine.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/FreeMarkerEngine.java @@ -62,6 +62,7 @@ public class FreeMarkerEngine implements ExpressionEngine{ configuration.setDefaultEncoding("UTF-8"); //设置兼容性 经典兼容性 configuration.setClassicCompatible(true); + configuration.setNumberFormat("0.###############"); //如果自定义方法不为空 就将自定义方法列表中的方法循环添加到模板模型 if(customMethods != null){ for (FreemarkerTemplateMethodModel method : customMethods) { diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/FreemarkerTemplateMethodModel.java b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/FreemarkerTemplateMethodModel.java index dba6f29..4e0f4f3 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/FreemarkerTemplateMethodModel.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/FreemarkerTemplateMethodModel.java @@ -5,7 +5,9 @@ import java.util.List; import org.spiderflow.core.freemarker.FreeMarkerEngine; import org.spiderflow.core.freemarker.FreemarkerObject; +import freemarker.ext.beans.StringModel; import freemarker.ext.util.WrapperTemplateModel; +import freemarker.template.SimpleScalar; import freemarker.template.TemplateMethodModelEx; import freemarker.template.TemplateModelException; import freemarker.template.TemplateScalarModel; @@ -66,6 +68,18 @@ public abstract class FreemarkerTemplateMethodModel implements TemplateMethodMod } return null; } + + protected boolean canGetStringValue(Object value){ + try { + if(value instanceof SimpleScalar){ + return true; + } + return false; + } catch (Exception e) { + return false; + } + } + /** * 流程 * @param args 泛型参数列表 diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorFunction.java b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorFunction.java index 42a363d..a670e27 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorFunction.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorFunction.java @@ -3,7 +3,7 @@ package org.spiderflow.core.freemarker.functions; import java.util.List; import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import org.spiderflow.core.utils.ExtractUtils; import org.springframework.stereotype.Component; @@ -20,21 +20,28 @@ public class SelectorFunction extends FreemarkerTemplateMethodModel{ @Override public Object process(List args) throws TemplateModelException { if(args != null && args.size() > 1){ - String content = getStringValue(args.get(0)); String selector = getStringValue(args.get(1)); - Document document = Jsoup.parse(content); + Element element = null; + if(canGetStringValue(args.get(0))){ + element = Jsoup.parse(getStringValue(args.get(0))); + }else{ + element = (Element) getObjectValue(args.get(0)); + } if(args.size() == 2){ - return ExtractUtils.getFirstHTMLBySelector(document, selector); + return ExtractUtils.getFirstHTMLBySelector(element, selector); } String type = getStringValue(args.get(2)); if("text".equals(type)){ - return ExtractUtils.getFirstTextBySelector(document, selector); + return ExtractUtils.getFirstTextBySelector(element, selector); } if("attr".equals(type) && args.size() == 4){ - return ExtractUtils.getFirstAttrBySelector(document, selector,getStringValue(args.get(3))); + return ExtractUtils.getFirstAttrBySelector(element, selector,getStringValue(args.get(3))); } if("outerhtml".equals(type)){ - return ExtractUtils.getFirstOuterHTMLBySelector(document, selector); + return ExtractUtils.getFirstOuterHTMLBySelector(element, selector); + } + if("element".equals(type)){ + return ExtractUtils.getFirstElement(element, selector); } } return null; diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorsFunction.java b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorsFunction.java index f9a7f3f..e2b5e82 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorsFunction.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/freemarker/functions/SelectorsFunction.java @@ -3,7 +3,7 @@ package org.spiderflow.core.freemarker.functions; import java.util.List; import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import org.spiderflow.core.utils.ExtractUtils; import org.springframework.stereotype.Component; @@ -20,21 +20,28 @@ public class SelectorsFunction extends FreemarkerTemplateMethodModel{ @Override public Object process(List args) throws TemplateModelException { if(args != null && args.size() > 1){ - String content = getStringValue(args.get(0)); String selector = getStringValue(args.get(1)); - Document document = Jsoup.parse(content); + Element element = null; + if(canGetStringValue(args.get(0))){ + element = Jsoup.parse(getStringValue(args.get(0))); + }else{ + element = (Element) getObjectValue(args.get(0)); + } if(args.size() == 2){ - return ExtractUtils.getHTMLBySelector(document, selector); + return ExtractUtils.getHTMLBySelector(element, selector); } String type = getStringValue(args.get(2)); if("text".equals(type)){ - return ExtractUtils.getTextBySelector(document, selector); + return ExtractUtils.getTextBySelector(element, selector); } if("attr".equals(type) && args.size() == 4){ - return ExtractUtils.getAttrBySelector(document, selector,getStringValue(args.get(3))); + return ExtractUtils.getAttrBySelector(element, selector,getStringValue(args.get(3))); } if("outerhtml".equals(type)){ - return ExtractUtils.getOuterHTMLBySelector(document, selector); + return ExtractUtils.getOuterHTMLBySelector(element, selector); + } + if("element".equals(type)){ + return ExtractUtils.getElements(element, selector); } } return null; diff --git a/spider-flow-core/src/main/java/org/spiderflow/core/utils/ExtractUtils.java b/spider-flow-core/src/main/java/org/spiderflow/core/utils/ExtractUtils.java index d20f43e..baa807f 100644 --- a/spider-flow-core/src/main/java/org/spiderflow/core/utils/ExtractUtils.java +++ b/spider-flow-core/src/main/java/org/spiderflow/core/utils/ExtractUtils.java @@ -8,7 +8,6 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.seimicrawler.xpath.JXDocument; @@ -59,54 +58,66 @@ public class ExtractUtils { return getFirstMatcher(url, "(?<=//|)((\\w)+\\.)+\\w+", false); } - public static String getFirstHTMLBySelector(Document document,String selector){ - return document.selectFirst(selector).html(); + public static String getFirstHTMLBySelector(Element element,String selector){ + element = getFirstElement(element,selector); + return element == null ? null : element.html(); } - public static String getFirstOuterHTMLBySelector(Document document,String selector){ - return document.selectFirst(selector).outerHtml(); + public static String getFirstOuterHTMLBySelector(Element element,String selector){ + element = getFirstElement(element,selector); + return element == null ? null : element.outerHtml(); } - public static String getFirstTextBySelector(Document document,String selector){ - return document.selectFirst(selector).text(); + public static String getFirstTextBySelector(Element element,String selector){ + element = getFirstElement(element,selector); + return element == null ? null : element.text(); } - public static String getFirstAttrBySelector(Document document,String selector,String attr){ - return document.selectFirst(selector).attr(attr); + public static String getFirstAttrBySelector(Element element,String selector,String attr){ + element = getFirstElement(element,selector); + return element == null ? null : element.attr(attr); } - public static List getHTMLBySelector(Document document,String selector){ - Elements elements = document.select(selector); + public static Element getFirstElement(Element element,String selector){ + return element.selectFirst(selector); + } + + public static List getElements(Element element,String selector){ + return element.select(selector); + } + + public static List getHTMLBySelector(Element element,String selector){ + Elements elements = element.select(selector); List result = new ArrayList<>(); - for (Element element : elements) { - result.add(element.html()); + for (Element elem : elements) { + result.add(elem.html()); } return result; } - public static List getOuterHTMLBySelector(Document document,String selector){ - Elements elements = document.select(selector); + public static List getOuterHTMLBySelector(Element element,String selector){ + Elements elements = element.select(selector); List result = new ArrayList<>(); - for (Element element : elements) { - result.add(element.outerHtml()); + for (Element elem : elements) { + result.add(elem.outerHtml()); } return result; } - public static List getTextBySelector(Document document,String selector){ - Elements elements = document.select(selector); + public static List getTextBySelector(Element element,String selector){ + Elements elements = element.select(selector); List result = new ArrayList<>(); - for (Element element : elements) { - result.add(element.text()); + for (Element elem : elements) { + result.add(elem.text()); } return result; } - public static List getAttrBySelector(Document document,String selector,String attr){ - Elements elements = document.select(selector); + public static List getAttrBySelector(Element element,String selector,String attr){ + Elements elements = element.select(selector); List result = new ArrayList<>(); - for (Element element : elements) { - result.add(element.attr(attr)); + for (Element elem : elements) { + result.add(elem.attr(attr)); } return result; } @@ -115,8 +126,8 @@ public class ExtractUtils { return JSONPath.eval(root, jsonPath); } - public static List getValuesByXPath(Document document,String xpath){ - JXDocument jXdocument = JXDocument.create(document); + public static List getValuesByXPath(Element element,String xpath){ + JXDocument jXdocument = JXDocument.create(new Elements(element)); List nodes = jXdocument.selN(xpath); if(nodes != null){ List result = new ArrayList<>(); @@ -128,8 +139,8 @@ public class ExtractUtils { return Collections.emptyList(); } - public static String getValueByXPath(Document document,String xpath){ - JXDocument jXdocument = JXDocument.create(document); + public static String getValueByXPath(Element element,String xpath){ + JXDocument jXdocument = JXDocument.create(new Elements(element)); JXNode node = jXdocument.selNOne(xpath); if(node != null){ return node.asString(); diff --git a/spider-flow-web/src/main/java/org/spiderflow/SpiderApplication.java b/spider-flow-web/src/main/java/org/spiderflow/SpiderApplication.java index dc5d96b..f00af5c 100644 --- a/spider-flow-web/src/main/java/org/spiderflow/SpiderApplication.java +++ b/spider-flow-web/src/main/java/org/spiderflow/SpiderApplication.java @@ -8,8 +8,10 @@ import javax.servlet.ServletException; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.web.servlet.ServletContextInitializer; +import org.springframework.scheduling.annotation.EnableScheduling; @SpringBootApplication +@EnableScheduling public class SpiderApplication implements ServletContextInitializer{ public static void main(String[] args) throws IOException { diff --git a/spider-flow-web/src/main/resources/static/resources/templates/variable.html b/spider-flow-web/src/main/resources/static/resources/templates/variable.html index 2e783eb..4a9303e 100644 --- a/spider-flow-web/src/main/resources/static/resources/templates/variable.html +++ b/spider-flow-web/src/main/resources/static/resources/templates/variable.html @@ -11,6 +11,18 @@ +
+ +
+ +
+
+
+ +
+ +
+
{{# layui.each(d.data.object['variable-name'],function(index,variable){ }}