From d7797875e4ce565fa18187d5fdfc022e2b010950 Mon Sep 17 00:00:00 2001 From: mxd <838425805@qq.com> Date: Wed, 24 Jul 2019 11:07:56 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BB=A3=E7=90=86=E3=80=81?= =?UTF-8?q?=E5=BB=B6=E8=BF=9F=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/java/com/mxd/spider/core/Spider.java | 109 +++++++++------- .../spider/core/executor/RequestExecutor.java | 31 ++++- .../com/mxd/spider/core/io/HttpRequest.java | 23 +++- .../spider/core/model/SpiderJsonProperty.java | 36 +++++- src/main/resources/application.properties | 2 +- src/main/resources/static/editor.html | 1 + src/main/resources/static/js/editor.js | 32 +++-- .../static/resources/templates/rectangle.html | 122 ++++++++++-------- .../static/resources/templates/root.html | 48 +++---- 9 files changed, 263 insertions(+), 141 deletions(-) diff --git a/src/main/java/com/mxd/spider/core/Spider.java b/src/main/java/com/mxd/spider/core/Spider.java index 7ffd99d..e73b75a 100644 --- a/src/main/java/com/mxd/spider/core/Spider.java +++ b/src/main/java/com/mxd/spider/core/Spider.java @@ -1,16 +1,14 @@ package com.mxd.spider.core; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -39,46 +37,61 @@ public class Spider { public void run(SpiderFlow spiderFlow){ SpiderNode root = SpiderFlowUtils.loadXMLFromString(spiderFlow.getXml()); SpiderContext context = new SpiderContext(); - int threadPoolSize = 8; - ThreadPoolExecutor pool = new ThreadPoolExecutor(threadPoolSize,threadPoolSize,5000,TimeUnit.MILLISECONDS,new LinkedBlockingQueue()); - execute(pool,root, context,new HashMap<>()); + execute(8,root, context,new HashMap<>()); } public List runWithTest(SpiderNode root,SpiderContext context){ //开始不允许设置任何东西 - int threadPoolSize = 8; - ThreadPoolExecutor pool = new ThreadPoolExecutor(threadPoolSize,threadPoolSize,5000,TimeUnit.MILLISECONDS,new LinkedBlockingQueue()); - execute(pool,root, context,new HashMap<>()); + execute(8,root, context,new HashMap<>()); context.log("测试完毕!"); return context.getOutputs(); } - private void execute(ExecutorService threadPool,SpiderNode node,SpiderContext context,Map variables){ + private void execute(int nThreads,SpiderNode node,SpiderContext context,Map variables){ + ThreadPoolExecutor pool = new ThreadPoolExecutor(0, Integer.MAX_VALUE,60L, TimeUnit.SECONDS,new LinkedBlockingQueue()); + LinkedBlockingQueue queue = new LinkedBlockingQueue(nThreads); + executeNode(queue,node,context,variables); + for (int i = 0; i < nThreads; i++) { + pool.execute(()->{ + try { + Runnable runnable = null; + while((runnable = queue.take()) != null){ + runnable.run(); + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + }); + } + pool.shutdown(); + //等待线程执行完毕 + try { + while(!pool.awaitTermination(60, TimeUnit.SECONDS)){ + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + private void executeaNextNodes(LinkedBlockingQueue queue,SpiderNode node,SpiderContext context,Map variables){ + List nextNodes = node.getNextNodes(); + if(nextNodes != null){ + for (SpiderNode nextNode : nextNodes) { + executeNode(queue,nextNode,context,variables); + } + } + } + + private void executeNode(LinkedBlockingQueue queue,SpiderNode node,SpiderContext context,Map variables){ + if(!executeCondition(node,context,variables)){ + return; + } if(logger.isDebugEnabled()){ logger.debug("执行节点[{}:{}]",node.getNodeName(),node.getNodeId()); } context.log(String.format("执行节点[%s:%s]", node.getNodeName(),node.getNodeId())); - if(executeCondition(node,context,variables)){ - executeNode(threadPool,node,context,variables); - if(logger.isDebugEnabled()){ - logger.debug("执行节点[{}:{}]完毕",node.getNodeName(),node.getNodeId()); - } - context.log(String.format("执行节点[%s:%s]完毕", node.getNodeName(),node.getNodeId())); - } - } - - private void executeaNextNodes(ExecutorService threadPool,SpiderNode node,SpiderContext context,Map variables){ - List nextNodes = node.getNextNodes(); - if(nextNodes != null){ - for (SpiderNode nextNode : nextNodes) { - execute(threadPool,nextNode,context,variables); - } - } - } - - private void executeNode(ExecutorService pool,SpiderNode node,SpiderContext context,Map variables){ int loopCount = 1; - if(node.getLoopCount() != null){ + if(StringUtils.isNotEmpty(node.getLoopCount())){ Object result = engine.execute(node.getLoopCount(), variables); if(result != null){ if(logger.isDebugEnabled()){ @@ -88,30 +101,36 @@ public class Spider { loopCount = ((Long)result).intValue(); } } - List>> futures = new ArrayList<>(); if(loopCount > 0){ for (Executor executor : executors) { if(executor.supportShape().equals(node.getShape())){ for (int i = 0; i < loopCount; i++) { //存入下标变量 Map nVariables = Maps.add(variables, node.getLoopVariableName(), i); - futures.add(pool.submit(()->{ - executor.execute(node, context,nVariables); - return nVariables; - })); + try { + queue.put(()->{ + try { + executor.execute(node, context,nVariables); + } catch (Exception e) { + logger.error("执行节点[{}:{}]出错",node.getNodeName(),node.getNodeId(),e); + } finally{ + if(logger.isDebugEnabled()){ + logger.debug("执行节点[{}:{}]完毕",node.getNodeName(),node.getNodeId()); + } + context.log(String.format("执行节点[%s:%s]完毕", node.getNodeName(),node.getNodeId())); + //递归执行下一级 + CompletableFuture.runAsync(()->{ + executeaNextNodes(queue, node, context, nVariables); + }); + } + }); + } catch (InterruptedException e) { + e.printStackTrace(); + } } } } } - //递归执行下一级 - for (Future> future : futures) { - try { - executeaNextNodes(pool, node, context, future.get()); - } catch (InterruptedException | ExecutionException e) { - - } - - } } private boolean executeCondition(SpiderNode node,SpiderContext context,Map variables){ diff --git a/src/main/java/com/mxd/spider/core/executor/RequestExecutor.java b/src/main/java/com/mxd/spider/core/executor/RequestExecutor.java index 2286139..276c652 100644 --- a/src/main/java/com/mxd/spider/core/executor/RequestExecutor.java +++ b/src/main/java/com/mxd/spider/core/executor/RequestExecutor.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,6 +35,16 @@ public class RequestExecutor implements Executor{ @Override public void execute(SpiderNode node, SpiderContext context, Map variables) { + String sleepCondition = node.getJsonProperty() == null ? null : node.getJsonProperty().getSleep(); + if(StringUtils.isNotEmpty(sleepCondition)){ + try { + Object value = engine.execute(sleepCondition, variables); + long sleepTime = ((Long)value).longValue(); + Thread.sleep(sleepTime); + } catch (InterruptedException e) { + + } + } HttpRequest request = HttpRequest.create(); SpiderJsonProperty property = node.getJsonProperty(); List parameters = property.getParameters(); @@ -93,6 +104,24 @@ public class RequestExecutor implements Executor{ request.header(nameValue.getName(),value); } } + //设置代理 + String proxy = property.getProxy(); + if(proxy != null){ + try { + proxy = engine.execute(proxy, variables).toString(); + String[] proxyArr = proxy.split(":"); + if(proxyArr != null && proxyArr.length == 2){ + request.proxy(proxyArr[0], Integer.parseInt(proxyArr[1])); + context.log(String.format("设置代理:%s", proxy)); + if(logger.isDebugEnabled()){ + logger.debug("设置代理:{}",proxy); + } + } + } catch (Exception e) { + context.log("设置代理出错,异常信息:" + ExceptionUtils.getStackTrace(e)); + logger.error("设置代理出错",e); + } + } try { HttpResponse response = request.execute(); //结果存入变量 @@ -104,5 +133,5 @@ public class RequestExecutor implements Executor{ } } - + } diff --git a/src/main/java/com/mxd/spider/core/io/HttpRequest.java b/src/main/java/com/mxd/spider/core/io/HttpRequest.java index 901f736..47b8ffc 100644 --- a/src/main/java/com/mxd/spider/core/io/HttpRequest.java +++ b/src/main/java/com/mxd/spider/core/io/HttpRequest.java @@ -1,13 +1,15 @@ package com.mxd.spider.core.io; import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.Proxy; import java.util.HashMap; import java.util.Map; import org.jsoup.Connection; -import org.jsoup.Jsoup; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; +import org.jsoup.Jsoup; public class HttpRequest { @@ -19,6 +21,13 @@ public class HttpRequest { private String method = "GET"; + private Proxy proxy; + + /** + * 超时时间 + */ + private int timeout = 60000; + public static HttpRequest create(){ return new HttpRequest(); } @@ -46,7 +55,7 @@ public class HttpRequest { public HttpRequest header(String key,Object value){ if(value != null){ - return header(key,data.toString()); + return header(key,value.toString()); } return this; } @@ -78,11 +87,18 @@ public class HttpRequest { return this; } + public HttpRequest proxy(String host,int port){ + this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); + return this; + } + public HttpResponse execute() throws IOException{ Connection connection = Jsoup.connect(this.url); connection.ignoreContentType(true); connection.ignoreHttpErrors(true); connection.method(Method.GET); + connection.maxBodySize(0); + connection.timeout(this.timeout); if("POST".equals(this.method)){ connection.method(Method.POST); } @@ -92,6 +108,9 @@ public class HttpRequest { if(this.data != null){ connection.data(data); } + if(this.proxy != null){ + connection.proxy(proxy); + } Response response = connection.execute(); return new HttpResponse(response); } diff --git a/src/main/java/com/mxd/spider/core/model/SpiderJsonProperty.java b/src/main/java/com/mxd/spider/core/model/SpiderJsonProperty.java index d0f1066..8dde4ab 100644 --- a/src/main/java/com/mxd/spider/core/model/SpiderJsonProperty.java +++ b/src/main/java/com/mxd/spider/core/model/SpiderJsonProperty.java @@ -10,6 +10,8 @@ public class SpiderJsonProperty{ private List variables; private List outputs; + + private String sleep; /** * 条件判断表达式 @@ -36,6 +38,11 @@ public class SpiderJsonProperty{ private List parameters; + /** + * 代理 + */ + private String proxy; + /*爬取参数--end*/ /*数据源参数--start*/ @@ -196,14 +203,31 @@ public class SpiderJsonProperty{ public void setLoopVariableName(String loopVariableName) { this.loopVariableName = loopVariableName; } + + public String getProxy() { + return proxy; + } + + public void setProxy(String proxy) { + this.proxy = proxy; + } + + public String getSleep() { + return sleep; + } + + public void setSleep(String sleep) { + this.sleep = sleep; + } @Override public String toString() { - return "SpiderJsonProperty [shape=" + shape + ", variables=" + variables + ", outputs=" + outputs - + ", condition=" + condition + ", loopCount=" + loopCount + ", loopVariableName=" + loopVariableName - + ", url=" + url + ", method=" + method + ", headers=" + headers + ", parameters=" + parameters - + ", datasourceType=" + datasourceType + ", datasourceUrl=" + datasourceUrl + ", datasourceUsername=" - + datasourceUsername + ", datasourcePassword=" + datasourcePassword + ", datasourceId=" + datasourceId - + ", statementType=" + statementType + ", sql=" + sql + "]"; + return "SpiderJsonProperty [shape=" + shape + ", variables=" + variables + ", outputs=" + outputs + ", sleep=" + + sleep + ", condition=" + condition + ", loopCount=" + loopCount + ", loopVariableName=" + + loopVariableName + ", url=" + url + ", method=" + method + ", headers=" + headers + ", parameters=" + + parameters + ", proxy=" + proxy + ", datasourceType=" + datasourceType + ", datasourceUrl=" + + datasourceUrl + ", datasourceUsername=" + datasourceUsername + ", datasourcePassword=" + + datasourcePassword + ", datasourceId=" + datasourceId + ", statementType=" + statementType + ", sql=" + + sql + "]"; } } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 19e02fa..f9a34f5 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -2,7 +2,7 @@ server.port=8088 logging.level.root=INFO -spider.job.enable=true +spider.job.enable=false spring.jackson.date-format=yyyy-MM-dd HH:mm:ss spring.jackson.time-zone=GMT+8 diff --git a/src/main/resources/static/editor.html b/src/main/resources/static/editor.html index 1c5557a..c88148d 100644 --- a/src/main/resources/static/editor.html +++ b/src/main/resources/static/editor.html @@ -17,6 +17,7 @@
  • 返回
  • +
  • 全选
  • 保存
  • 测试
  • 打印XML
  • diff --git a/src/main/resources/static/js/editor.js b/src/main/resources/static/js/editor.js index 06df212..de29def 100644 --- a/src/main/resources/static/js/editor.js +++ b/src/main/resources/static/js/editor.js @@ -99,7 +99,18 @@ $(function(){ processCellEvent(graph.getSelectionCell(),graph); }); //节点名称输入框事件 - $("body").on("keyup",".editor-form-node .layui-form-item input[name=value]",function(){ + $("body").on("mousewheel",".layui-tab .layui-tab-title",function(e,delta){ + var $dom = $(this); + var wheel = e.originalEvent.wheelDelta || -e.originalEvent.detail; + var delta = Math.max(-1, Math.min(1, wheel) ); + e.preventDefault = function(){} + if(delta > 0){ + $dom.scrollLeft($dom.scrollLeft()-60); + }else{ + $dom.scrollLeft($dom.scrollLeft()+60); + } + return false; + }).on("keyup",".editor-form-node .layui-form-item input[name=value]",function(){ var cell = graph.getSelectionCell(); if(cell != null){ var $input = $(this); @@ -117,17 +128,11 @@ $(function(){ } } } - }).on("keyup",".editor-form-node .layui-form-item input[name=spiderName]",function(){ + }).on("keyup",".editor-form-node .layui-form-item input.input-default",function(){ var cell = graph.getModel().getRoot(); if(cell != null){ cell.data = cell.data || new JsonProperty(); - cell.data.set('spiderName',$(this).val()) - } - }).on("keyup",".editor-form-node .layui-form-item input[name=loopCount]",function(){ - var cell = graph.getSelectionCell(); - if(cell != null){ - cell.data = cell.data || new JsonProperty(); - cell.data.set('loopCount',$(this).val()) + cell.data.set($(this).attr('name'),$(this).val()) } }).on("keyup",".editor-form-node .layui-form-item input[name^=variable-]",function(){ //变量操作 resetFormArray(graph,'variable','variables'); @@ -438,6 +443,8 @@ $(function(){ var graph = editor.graph; $(".toolbar-container").on('click','.btn-delete',function(){ deleteSelectCells(graph); + }).on("click",".btn-selectAll",function(){ + editor.execute('selectAll'); }).on('click',".btn-undo",function(){ editor.execute('undo'); }).on('click',".btn-redo",function(){ @@ -521,7 +528,7 @@ $(function(){ location.href="spiderList.html" }).on('click','.btn-save',function(){ Save(); - }); + }) } function getXML(editor){ @@ -556,7 +563,7 @@ $(function(){ editor.execute('copy'); }); keyHandler.bindControlKey(86,function(){ // Ctrl+V - editor.execute('paste') + editor.execute('paste'); }); keyHandler.bindControlKey(83,function(){ // Ctrl+S Save(); @@ -564,6 +571,9 @@ $(function(){ keyHandler.bindControlKey(81,function(){ // Ctrl+S $(".btn-test").click(); }); + keyHandler.bindControlKey(65,function(){ // Ctrl+A + editor.execute('selectAll'); + }); } function createWebSocket(options){ diff --git a/src/main/resources/static/resources/templates/rectangle.html b/src/main/resources/static/resources/templates/rectangle.html index e597450..489bd92 100644 --- a/src/main/resources/static/resources/templates/rectangle.html +++ b/src/main/resources/static/resources/templates/rectangle.html @@ -1,10 +1,13 @@
      -
    • 配置
    • +
    • 基本配置
    • +
    • 参数
    • +
    • Header
    • +
    • 代理
    -
    +
    -
    +
    @@ -14,19 +17,25 @@
    - +
    - +
    - +
    - + +
    +
    +
    + +
    +
    @@ -36,52 +45,61 @@
    - {{# layui.each(d.data.object.parameters,function(index,parameter){ }} -
    -
    - - -
    - -
    -
    -
    - -
    - -
    -
    - {{# }) }} -
    -
    -
    - -
    -
    - - {{# layui.each(d.data.object.headers,function(index,header){ }} -
    -
    - - -
    - -
    -
    -
    - -
    - -
    -
    - {{# }) }} -
    -
    -
    - -
    -
    +
    + {{# layui.each(d.data.object.parameters,function(index,parameter){ }} +
    + + +
    + +
    +
    +
    + +
    + +
    +
    +
    + {{# }) }} +
    +
    + +
    +
    +
    +
    + {{# layui.each(d.data.object.headers,function(index,header){ }} +
    + + +
    + +
    +
    +
    + +
    + +
    +
    +
    + {{# }) }} +
    +
    + +
    +
    +
    +
    +
    + +
    + +
    +
    +
    \ No newline at end of file diff --git a/src/main/resources/static/resources/templates/root.html b/src/main/resources/static/resources/templates/root.html index 5ff629b..f5b883d 100644 --- a/src/main/resources/static/resources/templates/root.html +++ b/src/main/resources/static/resources/templates/root.html @@ -1,38 +1,40 @@
    • 全局配置
    • +
    • 全局Header
    -
    +
    -
    +
    - {{# layui.each(d.data.object.headers,function(index,header){ }} -
    -
    - - -
    - -
    -
    -
    - -
    - -
    -
    - {{# }) }} -
    -
    +
    +
    +
    + {{# layui.each(d.data.object.headers,function(index,header){ }} +
    + +
    - -
    -
    + +
    +
    +
    + +
    + +
    +
    +
    + {{# }) }} +
    +
    + +