IP代理池插件
This commit is contained in:
parent
80bcd6b8f3
commit
3e8abd250a
@ -46,7 +46,7 @@ spider-flow
|
||||
- [ ] Redis插件
|
||||
- [ ] Mongodb插件
|
||||
- [ ] Hbase插件
|
||||
- [ ] IP代理池插件
|
||||
- [x] IP代理池插件
|
||||
- [ ] OCR识别插件
|
||||
|
||||
### 项目部分截图
|
||||
|
1
pom.xml
1
pom.xml
@ -34,5 +34,6 @@
|
||||
<module>spider-flow-core</module>
|
||||
<module>spider-flow-web</module>
|
||||
<module>spider-flow-selenium</module>
|
||||
<module>spider-flow-proxypool</module>
|
||||
</modules>
|
||||
</project>
|
18
spider-flow-proxypool/db/sp_proxy.sql
Normal file
18
spider-flow-proxypool/db/sp_proxy.sql
Normal file
@ -0,0 +1,18 @@
|
||||
|
||||
SET FOREIGN_KEY_CHECKS=0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for sp_proxy
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `sp_proxy`;
|
||||
CREATE TABLE `sp_proxy` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`ip` varchar(32) NOT NULL,
|
||||
`port` int(6) NOT NULL,
|
||||
`type` varchar(16) DEFAULT NULL,
|
||||
`anonymous` int(11) DEFAULT NULL,
|
||||
`available` int(11) DEFAULT NULL,
|
||||
`valid_date` datetime DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `sp_proxy_unique` (`ip`,`port`) USING HASH
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=57 DEFAULT CHARSET=utf8mb4;
|
34
spider-flow-proxypool/pom.xml
Normal file
34
spider-flow-proxypool/pom.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<?xml version="1.0"?>
|
||||
<project
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.spiderflow</groupId>
|
||||
<artifactId>spider-flow</artifactId>
|
||||
<version>0.0.1</version>
|
||||
</parent>
|
||||
<artifactId>spider-flow-proxypool</artifactId>
|
||||
<name>spider-flow-web</name>
|
||||
<url>https://gitee.com/jmxd/spider-flow/tree/master/spider-flow-proxypool</url>
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.spiderflow</groupId>
|
||||
<artifactId>spider-flow-api</artifactId>
|
||||
<version>0.0.1</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-data-jpa</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.11.3</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
@ -0,0 +1,49 @@
|
||||
package org.spiderflow.proxypool;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.spiderflow.proxypool.model.Proxy;
|
||||
import org.spiderflow.proxypool.service.ProxyService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class ProxyPoolCleanTask {
|
||||
|
||||
@Autowired
|
||||
private ProxyService proxyService;
|
||||
|
||||
@Autowired
|
||||
private ProxyPoolManager proxyPoolManager;
|
||||
|
||||
private static Logger logger = LoggerFactory.getLogger(ProxyPoolCleanTask.class);
|
||||
|
||||
@Scheduled(initialDelay = 10000,fixedDelay = 10000)
|
||||
public void clean(){
|
||||
logger.info("开始检测代理IP有效性");
|
||||
List<Proxy> proxys = proxyService.findAll();
|
||||
ThreadPoolExecutor pool = new ThreadPoolExecutor(8, 8, 60, TimeUnit.SECONDS, new LinkedBlockingQueue<>());
|
||||
for (Proxy proxy : proxys) {
|
||||
pool.submit(()->{
|
||||
if(proxyPoolManager.check(proxy) == -1){
|
||||
proxyPoolManager.remove(proxy);
|
||||
}
|
||||
});
|
||||
}
|
||||
pool.shutdown();
|
||||
while(!pool.isTerminated()){
|
||||
try {
|
||||
pool.awaitTermination(50, TimeUnit.MILLISECONDS);
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
logger.info("检测代理IP有效性完毕");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,131 @@
|
||||
package org.spiderflow.proxypool;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.apache.commons.lang3.RandomUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.spiderflow.proxypool.model.Proxy;
|
||||
import org.spiderflow.proxypool.service.ProxyService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class ProxyPoolManager {
|
||||
|
||||
@Autowired
|
||||
private ProxyService proxyService;
|
||||
|
||||
private List<Proxy> proxys = Collections.emptyList();
|
||||
|
||||
private static Logger logger = LoggerFactory.getLogger(ProxyPoolManager.class);
|
||||
|
||||
@PostConstruct
|
||||
private void init(){
|
||||
//读取全部代理存到内存中
|
||||
this.proxys = new CopyOnWriteArrayList<>(proxyService.findAll());
|
||||
}
|
||||
|
||||
public void remove(Proxy proxy){
|
||||
this.proxys.remove(proxy);
|
||||
this.proxyService.remove(proxy);
|
||||
}
|
||||
|
||||
public boolean add(Proxy proxy){
|
||||
if(this.proxys.contains(proxy)){
|
||||
return true;
|
||||
}
|
||||
if(check(proxy) != -1){
|
||||
boolean flag = proxyService.insert(proxy);
|
||||
if(flag){
|
||||
this.proxys.add(proxy);
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 随机获取一个http代理
|
||||
* @return
|
||||
*/
|
||||
public Proxy getHttpProxy(){
|
||||
return getHttpProxy(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* 随机获取一个https代理
|
||||
* @return
|
||||
*/
|
||||
public Proxy getHttpsProxy(){
|
||||
return getHttpsProxy(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* 随机获取一个HTTP代理
|
||||
* @return
|
||||
*/
|
||||
public Proxy getHttpProxy(boolean anonymous){
|
||||
return random(get("http", anonymous));
|
||||
}
|
||||
|
||||
/**
|
||||
* 随机获取一个HTTPS代理
|
||||
* @return
|
||||
*/
|
||||
public Proxy getHttpsProxy(boolean anonymous){
|
||||
return random(get("https", anonymous));
|
||||
}
|
||||
|
||||
private Proxy random(List<Proxy> proxys){
|
||||
int size = 0;
|
||||
if(proxys != null && (size = proxys.size()) > 0){
|
||||
return proxys.get(RandomUtils.nextInt(0, size));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private List<Proxy> get(String type,boolean anonymous){
|
||||
List<Proxy> nProxys = new ArrayList<>();
|
||||
if(this.proxys != null){
|
||||
for (Proxy proxy : proxys) {
|
||||
if(type.equals(proxy.getType())){
|
||||
if((anonymous && proxy.getAnonymous() == 1)||(proxy.getAnonymous() == 0 && !anonymous)){
|
||||
nProxys.add(proxy);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nProxys;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测代理
|
||||
* @param proxy
|
||||
* @return
|
||||
*/
|
||||
public long check(Proxy proxy){
|
||||
try {
|
||||
long st = System.currentTimeMillis();
|
||||
Jsoup.connect("https://www.baidu.com")
|
||||
.ignoreContentType(true)
|
||||
.ignoreHttpErrors(true)
|
||||
.timeout(3000)
|
||||
.proxy(proxy.getIp(), proxy.getPort())
|
||||
.execute();
|
||||
st = System.currentTimeMillis() - st;
|
||||
logger.info("检测代理:{}:{},延迟:{}",proxy.getIp(),proxy.getPort(),st);
|
||||
return st;
|
||||
} catch (Exception e) {
|
||||
logger.info("检测代理:{}:{},超时",proxy.getIp(),proxy.getPort());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package org.spiderflow.proxypool.executor.function;
|
||||
|
||||
import org.spiderflow.executor.FunctionExecutor;
|
||||
import org.spiderflow.proxypool.ProxyPoolManager;
|
||||
import org.spiderflow.proxypool.model.Proxy;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class ProxyPoolFunctionExecutor implements FunctionExecutor{
|
||||
|
||||
private static ProxyPoolManager proxyPoolManager;
|
||||
|
||||
@Override
|
||||
public String getFunctionPrefix() {
|
||||
return "proxypool";
|
||||
}
|
||||
|
||||
public static String http(boolean anonymous){
|
||||
return convertToString(proxyPoolManager.getHttpProxy(anonymous));
|
||||
}
|
||||
|
||||
public static String http(){
|
||||
return http(true);
|
||||
}
|
||||
|
||||
public static String https(boolean anonymous){
|
||||
return convertToString(proxyPoolManager.getHttpsProxy(anonymous));
|
||||
}
|
||||
|
||||
public static String https(){
|
||||
return https(true);
|
||||
}
|
||||
|
||||
private static String convertToString(Proxy proxy){
|
||||
return String.format("%s:%s", proxy.getIp(),proxy.getPort());
|
||||
}
|
||||
|
||||
public static void add(String ip,Integer port,String type,boolean anonymous){
|
||||
Proxy proxy = new Proxy();
|
||||
proxy.setIp(ip);
|
||||
proxy.setPort(Integer.valueOf(port));
|
||||
proxy.setType(type);
|
||||
proxy.setAnonymous(anonymous ? 1: 0);
|
||||
proxyPoolManager.add(proxy);
|
||||
}
|
||||
|
||||
@Autowired
|
||||
public void setProxyPoolManager(ProxyPoolManager proxyPoolManager) {
|
||||
ProxyPoolFunctionExecutor.proxyPoolManager = proxyPoolManager;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,107 @@
|
||||
package org.spiderflow.proxypool.model;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
import javax.persistence.Entity;
|
||||
import javax.persistence.GeneratedValue;
|
||||
import javax.persistence.GenerationType;
|
||||
import javax.persistence.Id;
|
||||
import javax.persistence.Table;
|
||||
|
||||
@Entity
|
||||
@Table(name = "sp_proxy")
|
||||
public class Proxy {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
private Integer id;
|
||||
|
||||
private String ip;
|
||||
|
||||
private Integer port;
|
||||
|
||||
private String type;
|
||||
|
||||
private Integer anonymous;
|
||||
|
||||
private Date validDate;
|
||||
|
||||
public String getIp() {
|
||||
return ip;
|
||||
}
|
||||
|
||||
public void setIp(String ip) {
|
||||
this.ip = ip;
|
||||
}
|
||||
|
||||
public Integer getPort() {
|
||||
return port;
|
||||
}
|
||||
|
||||
public void setPort(Integer port) {
|
||||
this.port = port;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public Integer getAnonymous() {
|
||||
return anonymous;
|
||||
}
|
||||
|
||||
public void setAnonymous(Integer anonymous) {
|
||||
this.anonymous = anonymous;
|
||||
}
|
||||
|
||||
public Date getValidDate() {
|
||||
return validDate;
|
||||
}
|
||||
|
||||
public void setValidDate(Date validDate) {
|
||||
this.validDate = validDate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((ip == null) ? 0 : ip.hashCode());
|
||||
result = prime * result + ((port == null) ? 0 : port.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
Proxy other = (Proxy) obj;
|
||||
if (ip == null) {
|
||||
if (other.ip != null)
|
||||
return false;
|
||||
} else if (!ip.equals(other.ip))
|
||||
return false;
|
||||
if (port == null) {
|
||||
if (other.port != null)
|
||||
return false;
|
||||
} else if (!port.equals(other.port))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public Integer getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(Integer id) {
|
||||
this.id = id;
|
||||
}
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package org.spiderflow.proxypool.repository;
|
||||
|
||||
import org.spiderflow.proxypool.model.Proxy;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
|
||||
public interface ProxyRepository extends JpaRepository<Proxy, Integer>{
|
||||
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
package org.spiderflow.proxypool.service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.spiderflow.proxypool.model.Proxy;
|
||||
import org.spiderflow.proxypool.repository.ProxyRepository;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class ProxyService {
|
||||
|
||||
@Autowired
|
||||
private ProxyRepository proxyRepository;
|
||||
|
||||
public boolean insert(Proxy proxy){
|
||||
try {
|
||||
proxyRepository.save(proxy);
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public void remove(Proxy proxy){
|
||||
proxyRepository.deleteById(proxy.getId());
|
||||
}
|
||||
|
||||
public List<Proxy> findAll(){
|
||||
return proxyRepository.findAll();
|
||||
}
|
||||
}
|
@ -27,5 +27,10 @@
|
||||
<artifactId>spider-flow-selenium</artifactId>
|
||||
<version>0.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.spiderflow</groupId>
|
||||
<artifactId>spider-flow-proxypool</artifactId>
|
||||
<version>0.0.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
Loading…
Reference in New Issue
Block a user