优化核心组件及修复BUG

This commit is contained in:
mxd 2019-08-24 17:32:59 +08:00
parent c3cc6d6cee
commit 80bcd6b8f3
7 changed files with 97 additions and 43 deletions

View File

@ -62,6 +62,7 @@ public class FreeMarkerEngine implements ExpressionEngine{
configuration.setDefaultEncoding("UTF-8");
//设置兼容性 经典兼容性
configuration.setClassicCompatible(true);
configuration.setNumberFormat("0.###############");
//如果自定义方法不为空 就将自定义方法列表中的方法循环添加到模板模型
if(customMethods != null){
for (FreemarkerTemplateMethodModel method : customMethods) {

View File

@ -5,7 +5,9 @@ import java.util.List;
import org.spiderflow.core.freemarker.FreeMarkerEngine;
import org.spiderflow.core.freemarker.FreemarkerObject;
import freemarker.ext.beans.StringModel;
import freemarker.ext.util.WrapperTemplateModel;
import freemarker.template.SimpleScalar;
import freemarker.template.TemplateMethodModelEx;
import freemarker.template.TemplateModelException;
import freemarker.template.TemplateScalarModel;
@ -66,6 +68,18 @@ public abstract class FreemarkerTemplateMethodModel implements TemplateMethodMod
}
return null;
}
protected boolean canGetStringValue(Object value){
try {
if(value instanceof SimpleScalar){
return true;
}
return false;
} catch (Exception e) {
return false;
}
}
/**
* 流程
* @param args 泛型参数列表

View File

@ -3,7 +3,7 @@ package org.spiderflow.core.freemarker.functions;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.spiderflow.core.utils.ExtractUtils;
import org.springframework.stereotype.Component;
@ -20,21 +20,28 @@ public class SelectorFunction extends FreemarkerTemplateMethodModel{
@Override
public Object process(List<?> args) throws TemplateModelException {
if(args != null && args.size() > 1){
String content = getStringValue(args.get(0));
String selector = getStringValue(args.get(1));
Document document = Jsoup.parse(content);
Element element = null;
if(canGetStringValue(args.get(0))){
element = Jsoup.parse(getStringValue(args.get(0)));
}else{
element = (Element) getObjectValue(args.get(0));
}
if(args.size() == 2){
return ExtractUtils.getFirstHTMLBySelector(document, selector);
return ExtractUtils.getFirstHTMLBySelector(element, selector);
}
String type = getStringValue(args.get(2));
if("text".equals(type)){
return ExtractUtils.getFirstTextBySelector(document, selector);
return ExtractUtils.getFirstTextBySelector(element, selector);
}
if("attr".equals(type) && args.size() == 4){
return ExtractUtils.getFirstAttrBySelector(document, selector,getStringValue(args.get(3)));
return ExtractUtils.getFirstAttrBySelector(element, selector,getStringValue(args.get(3)));
}
if("outerhtml".equals(type)){
return ExtractUtils.getFirstOuterHTMLBySelector(document, selector);
return ExtractUtils.getFirstOuterHTMLBySelector(element, selector);
}
if("element".equals(type)){
return ExtractUtils.getFirstElement(element, selector);
}
}
return null;

View File

@ -3,7 +3,7 @@ package org.spiderflow.core.freemarker.functions;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.spiderflow.core.utils.ExtractUtils;
import org.springframework.stereotype.Component;
@ -20,21 +20,28 @@ public class SelectorsFunction extends FreemarkerTemplateMethodModel{
@Override
public Object process(List<?> args) throws TemplateModelException {
if(args != null && args.size() > 1){
String content = getStringValue(args.get(0));
String selector = getStringValue(args.get(1));
Document document = Jsoup.parse(content);
Element element = null;
if(canGetStringValue(args.get(0))){
element = Jsoup.parse(getStringValue(args.get(0)));
}else{
element = (Element) getObjectValue(args.get(0));
}
if(args.size() == 2){
return ExtractUtils.getHTMLBySelector(document, selector);
return ExtractUtils.getHTMLBySelector(element, selector);
}
String type = getStringValue(args.get(2));
if("text".equals(type)){
return ExtractUtils.getTextBySelector(document, selector);
return ExtractUtils.getTextBySelector(element, selector);
}
if("attr".equals(type) && args.size() == 4){
return ExtractUtils.getAttrBySelector(document, selector,getStringValue(args.get(3)));
return ExtractUtils.getAttrBySelector(element, selector,getStringValue(args.get(3)));
}
if("outerhtml".equals(type)){
return ExtractUtils.getOuterHTMLBySelector(document, selector);
return ExtractUtils.getOuterHTMLBySelector(element, selector);
}
if("element".equals(type)){
return ExtractUtils.getElements(element, selector);
}
}
return null;

View File

@ -8,7 +8,6 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.seimicrawler.xpath.JXDocument;
@ -59,54 +58,66 @@ public class ExtractUtils {
return getFirstMatcher(url, "(?<=//|)((\\w)+\\.)+\\w+", false);
}
public static String getFirstHTMLBySelector(Document document,String selector){
return document.selectFirst(selector).html();
public static String getFirstHTMLBySelector(Element element,String selector){
element = getFirstElement(element,selector);
return element == null ? null : element.html();
}
public static String getFirstOuterHTMLBySelector(Document document,String selector){
return document.selectFirst(selector).outerHtml();
public static String getFirstOuterHTMLBySelector(Element element,String selector){
element = getFirstElement(element,selector);
return element == null ? null : element.outerHtml();
}
public static String getFirstTextBySelector(Document document,String selector){
return document.selectFirst(selector).text();
public static String getFirstTextBySelector(Element element,String selector){
element = getFirstElement(element,selector);
return element == null ? null : element.text();
}
public static String getFirstAttrBySelector(Document document,String selector,String attr){
return document.selectFirst(selector).attr(attr);
public static String getFirstAttrBySelector(Element element,String selector,String attr){
element = getFirstElement(element,selector);
return element == null ? null : element.attr(attr);
}
public static List<String> getHTMLBySelector(Document document,String selector){
Elements elements = document.select(selector);
public static Element getFirstElement(Element element,String selector){
return element.selectFirst(selector);
}
public static List<Element> getElements(Element element,String selector){
return element.select(selector);
}
public static List<String> getHTMLBySelector(Element element,String selector){
Elements elements = element.select(selector);
List<String> result = new ArrayList<>();
for (Element element : elements) {
result.add(element.html());
for (Element elem : elements) {
result.add(elem.html());
}
return result;
}
public static List<String> getOuterHTMLBySelector(Document document,String selector){
Elements elements = document.select(selector);
public static List<String> getOuterHTMLBySelector(Element element,String selector){
Elements elements = element.select(selector);
List<String> result = new ArrayList<>();
for (Element element : elements) {
result.add(element.outerHtml());
for (Element elem : elements) {
result.add(elem.outerHtml());
}
return result;
}
public static List<String> getTextBySelector(Document document,String selector){
Elements elements = document.select(selector);
public static List<String> getTextBySelector(Element element,String selector){
Elements elements = element.select(selector);
List<String> result = new ArrayList<>();
for (Element element : elements) {
result.add(element.text());
for (Element elem : elements) {
result.add(elem.text());
}
return result;
}
public static List<String> getAttrBySelector(Document document,String selector,String attr){
Elements elements = document.select(selector);
public static List<String> getAttrBySelector(Element element,String selector,String attr){
Elements elements = element.select(selector);
List<String> result = new ArrayList<>();
for (Element element : elements) {
result.add(element.attr(attr));
for (Element elem : elements) {
result.add(elem.attr(attr));
}
return result;
}
@ -115,8 +126,8 @@ public class ExtractUtils {
return JSONPath.eval(root, jsonPath);
}
public static List<String> getValuesByXPath(Document document,String xpath){
JXDocument jXdocument = JXDocument.create(document);
public static List<String> getValuesByXPath(Element element,String xpath){
JXDocument jXdocument = JXDocument.create(new Elements(element));
List<JXNode> nodes = jXdocument.selN(xpath);
if(nodes != null){
List<String> result = new ArrayList<>();
@ -128,8 +139,8 @@ public class ExtractUtils {
return Collections.emptyList();
}
public static String getValueByXPath(Document document,String xpath){
JXDocument jXdocument = JXDocument.create(document);
public static String getValueByXPath(Element element,String xpath){
JXDocument jXdocument = JXDocument.create(new Elements(element));
JXNode node = jXdocument.selNOne(xpath);
if(node != null){
return node.asString();

View File

@ -8,8 +8,10 @@ import javax.servlet.ServletException;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.web.servlet.ServletContextInitializer;
import org.springframework.scheduling.annotation.EnableScheduling;
@SpringBootApplication
@EnableScheduling
public class SpiderApplication implements ServletContextInitializer{
public static void main(String[] args) throws IOException {

View File

@ -11,6 +11,18 @@
<input type="text" name="value" placeholder="请输入节点名称" value="{{=d.value}}" autocomplete="off" class="layui-input">
</div>
</div>
<div class="layui-form-item">
<label class="layui-form-label">循环变量</label>
<div class="layui-input-block">
<input type="text" name="loopVariableName" placeholder="请输入循环变量" autocomplete="off" class="layui-input input-default" value="{{=d.data.object.loopVariableName}}">
</div>
</div>
<div class="layui-form-item">
<label class="layui-form-label">循环次数</label>
<div class="layui-input-block">
<input type="text" name="loopCount" placeholder="请输入循环次数" autocomplete="off" class="layui-input input-default" value="{{=d.data.object.loopCount}}">
</div>
</div>
{{# layui.each(d.data.object['variable-name'],function(index,variable){ }}
<hr>
<div class="layui-form-item layui-form-relative">