mirror of
https://github.com/Geniusay/ChopperBot.git
synced 2026-05-19 17:04:01 +08:00
文档编写
ReadMe更改
This commit is contained in:
@@ -10,7 +10,7 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twj666.github.io/ChopperBot-Doc/">
|
||||
<a href="https://969025903.github.io/ChopperBot-Doc/">
|
||||
<img src="https://img.shields.io/badge/文档-简体中文-blue.svg" alt="简体中文文档" />
|
||||
</a>
|
||||
|
||||
@@ -74,8 +74,8 @@
|
||||

|
||||
|
||||
# 🔗 Links
|
||||
👉 [Document](https://twj666.github.io/ChopperBot-Doc/)
|
||||
👉 [Document](https://969025903.github.io/ChopperBot-Doc/)
|
||||
|
||||
👉 [Developer's Guide](https://twj666.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84)
|
||||
👉 [Developer's Guide](https://969025903.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84)
|
||||
|
||||
👉 [CHANGE LOG](https://github.com/969025903/ChopperBot/blob/master/CHANGELOG.md)
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twj666.github.io/ChopperBot-Doc/">
|
||||
<a href="https://969025903.github.io/ChopperBot-Doc/">
|
||||
<img src="https://img.shields.io/badge/文档-简体中文-blue.svg" alt="简体中文文档" />
|
||||
</a>
|
||||
|
||||
@@ -73,8 +73,8 @@
|
||||
# 📈 项目动态
|
||||

|
||||
# 🔗 相关链接
|
||||
👉 [项目文档](https://twj666.github.io/ChopperBot-Doc/)
|
||||
👉 [项目文档](https://969025903.github.io/ChopperBot-Doc/)
|
||||
|
||||
👉 [项目开发指南](https://twj666.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84)
|
||||
👉 [项目开发指南](https://969025903.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84)
|
||||
|
||||
👉 [更新日志](https://github.com/969025903/ChopperBot/blob/master/CHANGELOG.md)
|
||||
|
||||
@@ -15,4 +15,5 @@ public class ModuleName {
|
||||
public static final String FILE = ConstPool.FILE;
|
||||
|
||||
public static final String BARRAGE = ConstPool.BARRAGE;
|
||||
public static final String LIVE = ConstPool.LIVE_RECORD;
|
||||
}
|
||||
|
||||
@@ -27,4 +27,5 @@ public class PluginName {
|
||||
|
||||
//Barrage
|
||||
public static final String BARRAGE_FILE_PLUGIN= "BarrageFileListen";
|
||||
public static final String LIVE_MANAGER_PLUGIN= "LiveDownLoadManager";
|
||||
}
|
||||
|
||||
@@ -151,9 +151,11 @@ public abstract class ModuleInitMachine extends CommonInitMachine{
|
||||
@Override
|
||||
public void shutdown() {
|
||||
logger.info("👇 <{}> is shutting down , {} plugins need to shut down...",moduleName,initMachines.size());
|
||||
initMachines.forEach(
|
||||
InitMachine::shutdown
|
||||
);
|
||||
initMachines.forEach(initMachine-> {
|
||||
if(InitPluginRegister.isRegister(initMachine.getPluginName())){
|
||||
initMachine.shutdown();
|
||||
}
|
||||
});
|
||||
logger.info("👆 <{}> Completing the shutdown of all plugins!",moduleName);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ public abstract class CommonPlugin implements ChopperBotPlugin{
|
||||
|
||||
@Override
|
||||
public boolean init() {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -2,5 +2,5 @@ package org.example.plugin;
|
||||
|
||||
@FunctionalInterface
|
||||
public interface PluginAction {
|
||||
void action();
|
||||
void action(ChopperBotPlugin plugin);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package org.example.plugin;
|
||||
|
||||
import org.example.constpool.PluginName;
|
||||
import org.example.init.InitPluginRegister;
|
||||
|
||||
import java.util.function.Consumer;
|
||||
@@ -14,32 +15,25 @@ public class PluginCheckAndDo {
|
||||
/**
|
||||
* 且条件,检测所需的所有插件是否存在,存在则调用success,不存在则调用fail
|
||||
*/
|
||||
|
||||
public static void CheckAndDo(
|
||||
PluginAction success,
|
||||
PluginAction fail,
|
||||
String...needPlugins){
|
||||
for (String needPlugin : needPlugins) {
|
||||
if (!InitPluginRegister.isRegister(needPlugin)) {
|
||||
fail.action();
|
||||
return;
|
||||
}
|
||||
String needPlugin){
|
||||
if (!InitPluginRegister.isRegister(needPlugin)) {
|
||||
return;
|
||||
}
|
||||
success.action();
|
||||
|
||||
success.action((InitPluginRegister.getPlugin(needPlugin)));
|
||||
}
|
||||
|
||||
/**
|
||||
* 且条件,检测所需的所有插件是否存在,存在则调用success,不存在则返回
|
||||
*/
|
||||
|
||||
public static void CheckAndDo(
|
||||
PluginAction success,
|
||||
String...needPlugins){
|
||||
for (String needPlugin : needPlugins) {
|
||||
if (!InitPluginRegister.isRegister(needPlugin)) {
|
||||
return;
|
||||
}
|
||||
PluginFailAction fail,
|
||||
String needPlugin){
|
||||
if (!InitPluginRegister.isRegister(needPlugin)) {
|
||||
fail.action();
|
||||
return;
|
||||
}
|
||||
success.action();
|
||||
success.action((InitPluginRegister.getPlugin(needPlugin)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
package org.example.plugin;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/08/31 02:12
|
||||
**/
|
||||
@FunctionalInterface
|
||||
public interface PluginFailAction {
|
||||
|
||||
void action();
|
||||
}
|
||||
@@ -23,6 +23,11 @@
|
||||
<artifactId>chopperbot-creeper</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.example</groupId>
|
||||
<artifactId>chopperbot-live</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.example</groupId>
|
||||
<artifactId>chopperbot-file</artifactId>
|
||||
|
||||
@@ -16,16 +16,13 @@ import java.util.function.Supplier;
|
||||
**/
|
||||
public class WorldInitMachine extends ModuleInitMachine{
|
||||
|
||||
|
||||
|
||||
|
||||
private static final String githubUrl = "https://github.com/969025903/ChopperBot";
|
||||
|
||||
public WorldInitMachine() throws Exception {
|
||||
super("ChopperBot",ChopperLogFactory.getLogger(LoggerType.System));
|
||||
}
|
||||
|
||||
|
||||
public List<CommonInitMachine> alreadyInitModule = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public boolean init() {
|
||||
@@ -41,6 +38,7 @@ public class WorldInitMachine extends ModuleInitMachine{
|
||||
return fail();
|
||||
}
|
||||
(initMachine).registerPlugin();
|
||||
alreadyInitModule.add(initMachine);
|
||||
}else{
|
||||
return fail();
|
||||
}
|
||||
@@ -86,7 +84,7 @@ public class WorldInitMachine extends ModuleInitMachine{
|
||||
logger.info("🌏 <{}> is shutting down,{} modules need to be closed,please wait.....","ChopperBot",getInitMachines().size());
|
||||
|
||||
ChopperBotGuardPool.GuardPool().shutdown();
|
||||
this.getInitMachines().forEach(
|
||||
this.alreadyInitModule.forEach(
|
||||
InitMachine::shutdown
|
||||
);
|
||||
|
||||
|
||||
@@ -244,8 +244,8 @@ public class TaskCenter extends GuardPlugin {
|
||||
|
||||
public void request(ReptileRequest request){
|
||||
PluginCheckAndDo.CheckAndDo(
|
||||
()->{
|
||||
ReptileTask task = ((CreeperManager)InitPluginRegister.getPlugin(PluginName.CREEPER_MANAGER_PLUGIN)).getReptileTask(request);
|
||||
(plugin)->{
|
||||
ReptileTask task = ((CreeperManager)plugin).getReptileTask(request);
|
||||
if(task!=null){
|
||||
addTask(task);
|
||||
}
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
package org.example.init;
|
||||
|
||||
import org.example.bean.ConfigFile;
|
||||
import org.example.cache.FileCacheManager;
|
||||
import org.example.constpool.PluginName;
|
||||
import org.example.plugin.CommonPlugin;
|
||||
import org.example.plugin.PluginCheckAndDo;
|
||||
import org.example.plugin.annotation.Plugin;
|
||||
import org.example.util.ConfigFileUtil;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/07/29 00:08
|
||||
@@ -40,5 +45,18 @@ public abstract class ConfigInitMachine extends CommonInitMachine{
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
String path = Path.of(configFile.getFilePath(), configFile.getFileName()).toString();
|
||||
|
||||
PluginCheckAndDo.CheckAndDo(
|
||||
(plugin)->{
|
||||
((FileCacheManager)plugin).deleteFileCache(path);
|
||||
},
|
||||
PluginName.FILE_CACHE_PLUGIN
|
||||
);
|
||||
logger.info("[\uD83C\uDD96] {} close success",path);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -67,10 +67,10 @@ public class Guard<T extends HotModuleLoadTask> implements Runnable, ResultLogge
|
||||
|
||||
//查看热度推送插件是否装载,如果装载则进行热度推送
|
||||
PluginCheckAndDo.CheckAndDo(
|
||||
()->{
|
||||
HeatRecommendation plugin = (HeatRecommendation) InitPluginRegister.getPlugin(PluginName.HOT_RECOMMENDATION_PLUGIN);
|
||||
(plugin)->{
|
||||
|
||||
assert plugin != null;
|
||||
plugin.sendHotEvent(platform);
|
||||
((HeatRecommendation)plugin).sendHotEvent(platform);
|
||||
},
|
||||
PluginName.HOT_RECOMMENDATION_PLUGIN
|
||||
);
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
package org.example;
|
||||
|
||||
import org.apache.poi.ss.formula.functions.T;
|
||||
import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig;
|
||||
import org.example.core.creeper.loadtask.DouyuLiveOnlineLoadTask;
|
||||
import org.example.core.manager.TaskManager;
|
||||
import org.example.pojo.live.DouyuLiveConfig;
|
||||
|
||||
|
||||
public class DouyuLiveTest {
|
||||
public static void main(String[] args) {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
package org.example;
|
||||
|
||||
import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig;
|
||||
import org.example.core.manager.TaskManager;
|
||||
import org.example.pojo.live.BilibiliLiveConfig;
|
||||
|
||||
public class LiveStreamTest {
|
||||
private static final int THREAD_NUM = 10;
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
// 创建直播配置
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -22,22 +22,20 @@ public class LiveStreamTask {
|
||||
private Map<String, String> headers;
|
||||
private FlvHandle f = new FlvHandle();
|
||||
|
||||
public void start(ExecutorService executor, StatusMonitor statusMonitor, OutputStream fileIO) {
|
||||
executor.execute(() -> {
|
||||
try {
|
||||
URLConnection conn = new URL(this.url).openConnection();
|
||||
if (this.headers != null) {
|
||||
for (Map.Entry<String, String> entry : this.headers.entrySet()) {
|
||||
conn.setRequestProperty(entry.getKey(), entry.getValue());
|
||||
}
|
||||
public void start(StatusMonitor statusMonitor, OutputStream fileIO) {
|
||||
try {
|
||||
URLConnection conn = new URL(this.url).openConnection();
|
||||
if (this.headers != null) {
|
||||
for (Map.Entry<String, String> entry : this.headers.entrySet()) {
|
||||
conn.setRequestProperty(entry.getKey(), entry.getValue());
|
||||
}
|
||||
try (InputStream in = conn.getInputStream()) {
|
||||
f.parseStream(in, statusMonitor, fileIO);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
try (InputStream in = conn.getInputStream()) {
|
||||
f.parseStream(in, statusMonitor, fileIO);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void terminate() {
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
package org.example.core.creeper.loadconfig;
|
||||
|
||||
import lombok.Data;
|
||||
import org.example.core.creeper.loadtask.BilibiliLiveOnlineLoadTask;
|
||||
import org.example.core.creeper.loadtask.DouyuLiveOnlineLoadTask;
|
||||
import org.example.core.creeper.loadtask.DouyuRecordLoadTask;
|
||||
import org.example.core.manager.annotation.Creeper;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/08/30 18:05
|
||||
**/
|
||||
@Data
|
||||
@Creeper(creeperName = "bilibili_live",loadTask = BilibiliLiveOnlineLoadTask.class,creeperDescription = "B站直播爬取")
|
||||
public class BilibiliLiveOnlineConfig extends LoadLiveConfig{
|
||||
|
||||
|
||||
|
||||
@@ -2,15 +2,18 @@ package org.example.core.creeper.loadconfig;
|
||||
|
||||
import lombok.Data;
|
||||
import org.example.bean.live.DouyuLive;
|
||||
import org.example.core.creeper.loadtask.DouyuLiveOnlineLoadTask;
|
||||
import org.example.core.creeper.loadtask.DouyuRecordLoadTask;
|
||||
import org.example.core.manager.annotation.Creeper;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/07/28 23:17
|
||||
**/
|
||||
@Data
|
||||
@Creeper(creeperName = "douyu_live",loadTask = DouyuLiveOnlineLoadTask.class,creeperDescription = "斗鱼直播爬取")
|
||||
public class DouyuLiveOnlineConfig extends LoadLiveConfig {
|
||||
|
||||
private String flvUrl = "http://openflv-huos.douyucdn2.cn/dyliveflv1/";
|
||||
public DouyuLiveOnlineConfig(String roomId, String videoPath, String videoName,int clarity) {
|
||||
super(roomId, videoPath, videoName, false);
|
||||
this.clarity = clarity;
|
||||
|
||||
@@ -14,13 +14,18 @@ public abstract class LoadLiveConfig extends LoadVideoConfig {
|
||||
// 房间号
|
||||
protected String roomId;
|
||||
|
||||
protected String liverName;
|
||||
|
||||
|
||||
// 是否自动转换为mp4格式
|
||||
protected boolean convertToMp4;
|
||||
|
||||
protected boolean showDownloadTable;
|
||||
|
||||
public LoadLiveConfig(String roomId, String videoPath, String videoName, boolean convertToMp4) {
|
||||
super(videoPath,videoName);
|
||||
this.roomId = roomId;
|
||||
this.convertToMp4 = convertToMp4;
|
||||
this.showDownloadTable = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
package org.example.core.creeper.loadtask;
|
||||
|
||||
import org.checkerframework.checker.units.qual.A;
|
||||
import org.example.constpool.PluginName;
|
||||
import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig;
|
||||
import org.example.core.loadtask.CommonLoadTask;
|
||||
import org.example.core.manager.LiveDownloadManager;
|
||||
import org.example.log.ChopperLogFactory;
|
||||
import org.example.log.LoggerType;
|
||||
import org.example.plugin.PluginCheckAndDo;
|
||||
import org.slf4j.Logger;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/08/31 15:44
|
||||
**/
|
||||
public class BilibiliLiveOnlineLoadTask extends LiveOnlineLoadTask {
|
||||
|
||||
public BilibiliLiveOnlineLoadTask(BilibiliLiveOnlineConfig loadConfig) {
|
||||
super(loadConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String start() {
|
||||
return this.start(ChopperLogFactory.getLogger(LoggerType.LiveRecord),(BilibiliLiveOnlineConfig)loadConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,32 +1,33 @@
|
||||
package org.example.core.creeper.loadtask;
|
||||
|
||||
import org.example.bean.live.DouyuLive;
|
||||
import org.example.constpool.PluginName;
|
||||
import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig;
|
||||
import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig;
|
||||
import org.example.core.loadtask.CommonLoadTask;
|
||||
import org.example.core.loadtask.WebMagicLoadTask;
|
||||
import org.example.core.manager.LiveDownloadManager;
|
||||
import org.example.log.ChopperLogFactory;
|
||||
import org.example.log.LoggerType;
|
||||
import org.example.plugin.PluginCheckAndDo;
|
||||
import org.slf4j.Logger;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/07/28 23:14
|
||||
**/
|
||||
public class DouyuLiveOnlineLoadTask extends CommonLoadTask<String> {
|
||||
public class DouyuLiveOnlineLoadTask extends LiveOnlineLoadTask {
|
||||
|
||||
|
||||
private DouyuLive douyuLive;
|
||||
public DouyuLiveOnlineLoadTask(DouyuLiveOnlineConfig douyuLiveOnlineConfig) {
|
||||
super(douyuLiveOnlineConfig);
|
||||
|
||||
}
|
||||
|
||||
//TODO 需要开发斗鱼在线直播爬取功能
|
||||
@Override
|
||||
public String start() {
|
||||
Logger logger = ChopperLogFactory.getLogger(LoggerType.Creeper);
|
||||
logger.info("正在爬取主播:{},直播间:{},直播间id:{}",douyuLive.getLiver(),douyuLive.getLiveName(),douyuLive.getLiveId());
|
||||
return null;
|
||||
return this.start(ChopperLogFactory.getLogger(LoggerType.LiveRecord),(DouyuLiveOnlineConfig)loadConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
package org.example.core.creeper.loadtask;
|
||||
|
||||
import org.example.constpool.PluginName;
|
||||
import org.example.core.creeper.loadconfig.LoadLiveConfig;
|
||||
import org.example.core.loadconfig.LoadConfig;
|
||||
import org.example.core.loadtask.CommonLoadTask;
|
||||
import org.example.core.manager.LiveDownloadManager;
|
||||
import org.example.plugin.PluginCheckAndDo;
|
||||
import org.slf4j.Logger;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/08/31 16:56
|
||||
**/
|
||||
public abstract class LiveOnlineLoadTask extends CommonLoadTask<String> {
|
||||
|
||||
|
||||
public LiveOnlineLoadTask(LoadConfig loadConfig) {
|
||||
super(loadConfig);
|
||||
}
|
||||
|
||||
public String start(Logger logger, LoadLiveConfig loadLiveConfig){
|
||||
AtomicReference<String> res = new AtomicReference<>();
|
||||
PluginCheckAndDo.CheckAndDo(
|
||||
(plugin) -> {
|
||||
try {
|
||||
String taskId = ((LiveDownloadManager) plugin).addTask(loadLiveConfig);
|
||||
logger.info("正在爬取{}的直播内容....",loadLiveConfig.getLiverName());
|
||||
res.set((String) ((LiveDownloadManager) plugin).waitResult(taskId, loadLiveConfig));
|
||||
}catch (Exception e){
|
||||
res.set("");
|
||||
}
|
||||
},
|
||||
()->{
|
||||
logger.error("该爬虫需要的{}插件不存在,无法启用直播,请检查插件是否安装", PluginName.LIVE_MANAGER_PLUGIN);
|
||||
},
|
||||
PluginName.LIVE_MANAGER_PLUGIN
|
||||
);
|
||||
return res.get();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void end() {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -7,9 +7,6 @@ import org.example.core.parser.PlatformVideoUrlParser;
|
||||
import org.example.core.parser.impl.BilibiliFlvUrlParser;
|
||||
import org.example.core.component.LiveStreamTask;
|
||||
import org.example.core.parser.impl.DouyuFlvUrlParser;
|
||||
import org.example.pojo.live.BilibiliLiveConfig;
|
||||
import org.example.pojo.live.DouyuLiveConfig;
|
||||
import org.example.pojo.live.LiveConfig;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@@ -5,7 +5,7 @@ import org.example.core.component.StatusMonitor;
|
||||
import org.example.core.creeper.loadconfig.LoadLiveConfig;
|
||||
import org.example.core.factory.LiveTaskFactory;
|
||||
import org.example.plugin.CommonPlugin;
|
||||
import org.example.pojo.live.LiveConfig;
|
||||
import org.example.thread.NamedThreadFactory;
|
||||
import org.example.utils.VideoConverter;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
@@ -16,6 +16,7 @@ import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
@@ -25,7 +26,7 @@ import java.util.concurrent.Future;
|
||||
* @author 燧枫
|
||||
* @date 2023/5/19 17:12
|
||||
*/
|
||||
public class TaskManager extends CommonPlugin {
|
||||
public class LiveDownloadManager extends CommonPlugin {
|
||||
|
||||
private ExecutorService executor;
|
||||
private ExecutorService logExecutor;
|
||||
@@ -34,16 +35,11 @@ public class TaskManager extends CommonPlugin {
|
||||
private LiveTaskFactory taskFactory;
|
||||
private Map<String, StatusMonitor> statusMonitors;
|
||||
|
||||
public TaskManager(String module, String pluginName, List<String> needPlugins, boolean isAutoStart) {
|
||||
public LiveDownloadManager(String module, String pluginName, List<String> needPlugins, boolean isAutoStart) {
|
||||
super(module, pluginName, needPlugins, isAutoStart);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean init() {
|
||||
return super.init();
|
||||
}
|
||||
|
||||
public TaskManager(int maxTasks) {
|
||||
public LiveDownloadManager(int maxTasks) {
|
||||
super(null,null,null,true);
|
||||
this.executor = Executors.newFixedThreadPool(maxTasks);
|
||||
this.logExecutor = Executors.newFixedThreadPool(maxTasks);
|
||||
@@ -53,6 +49,20 @@ public class TaskManager extends CommonPlugin {
|
||||
this.statusMonitors = new HashMap<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean init() {
|
||||
NamedThreadFactory poolName = new NamedThreadFactory("LiveManager");
|
||||
this.executor = Executors.newCachedThreadPool(poolName);
|
||||
this.logExecutor = Executors.newCachedThreadPool(poolName);
|
||||
this.futures = new HashMap<>();
|
||||
this.tasks = new HashMap<>();
|
||||
this.taskFactory = new LiveTaskFactory();
|
||||
this.statusMonitors = new HashMap<>();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String addTask(LoadLiveConfig liveConfig) throws FileNotFoundException {
|
||||
LiveStreamTask task = this.taskFactory.create(liveConfig);
|
||||
if (task == null) {
|
||||
@@ -67,7 +77,7 @@ public class TaskManager extends CommonPlugin {
|
||||
OutputStream fileIO = new FileOutputStream(Path.of(liveConfig.getVideoPath(),liveConfig.getVideoName() + ".flv").toString());
|
||||
|
||||
Future<?> future = executor.submit(() -> {
|
||||
task.start(executor, statusMonitor, fileIO);
|
||||
task.start(statusMonitor, fileIO);
|
||||
});
|
||||
futures.put(taskId, future);
|
||||
|
||||
@@ -83,6 +93,14 @@ public class TaskManager extends CommonPlugin {
|
||||
return new ArrayList<>(tasks.keySet());
|
||||
}
|
||||
|
||||
public Object waitResult(String taskId,LoadLiveConfig liveConfig) throws ExecutionException, InterruptedException {
|
||||
Future<?> future = futures.get(taskId);
|
||||
if(future!=null){
|
||||
future.get();
|
||||
return terminateThenSave(liveConfig,taskId);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public void removeTask(String taskId) {
|
||||
pauseTask(taskId);
|
||||
statusMonitors.remove(taskId);
|
||||
@@ -97,17 +115,18 @@ public class TaskManager extends CommonPlugin {
|
||||
}
|
||||
}
|
||||
|
||||
public void terminateThenSave(LoadLiveConfig liveConfig,String taskId){
|
||||
public String terminateThenSave(LoadLiveConfig liveConfig,String taskId){
|
||||
LiveStreamTask task = tasks.get(taskId);
|
||||
task.terminate();
|
||||
removeTask(taskId);
|
||||
String path = Path.of(liveConfig.getVideoPath(),liveConfig.getRoomId() + ".flv").toString();
|
||||
if (liveConfig.isConvertToMp4()) {
|
||||
String flvFilePath = Path.of(liveConfig.getVideoPath(),liveConfig.getRoomId() + ".flv").toString();
|
||||
String mp4FilePath = Path.of(liveConfig.getVideoPath(),liveConfig.getRoomId() + ".mp4").toString();
|
||||
VideoConverter.convertFlvToMp4(flvFilePath, mp4FilePath);
|
||||
VideoConverter.convertFlvToMp4(path, mp4FilePath);
|
||||
System.out.println("start: flv-->mp4");
|
||||
|
||||
path = mp4FilePath;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
private StatusMonitor getStatusMonitor(String taskId) {
|
||||
@@ -140,4 +159,11 @@ public class TaskManager extends CommonPlugin {
|
||||
statusMonitor.downloadLogTable(taskId);
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
logExecutor.shutdown();
|
||||
executor.shutdown();
|
||||
super.shutdown();
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
package org.example.core.parser;
|
||||
|
||||
import org.example.core.creeper.loadconfig.LoadRecordConfig;
|
||||
import org.example.core.creeper.loadconfig.LoadVideoConfig;
|
||||
import org.example.core.loadconfig.LoadConfig;
|
||||
import org.example.pojo.live.LiveConfig;
|
||||
|
||||
|
||||
/**
|
||||
* 全直播平台flv链接解析接口
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
package org.example.core.parser.impl;
|
||||
|
||||
import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig;
|
||||
import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig;
|
||||
import org.example.core.creeper.loadconfig.LoadRecordConfig;
|
||||
import org.example.core.parser.PlatformVideoUrlParser;
|
||||
import org.example.pojo.live.BilibiliLiveConfig;
|
||||
import org.example.pojo.live.LiveConfig;
|
||||
import org.example.utils.HttpClientUtil;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
@@ -2,10 +2,7 @@ package org.example.core.parser.impl;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig;
|
||||
import org.example.core.creeper.loadconfig.DouyuRecordConfig;
|
||||
import org.example.core.parser.PlatformVideoUrlParser;
|
||||
import org.example.pojo.live.DouyuLiveConfig;
|
||||
import org.example.pojo.live.LiveConfig;
|
||||
import org.example.utils.HttpClientUtil;
|
||||
import org.example.utils.RegexUtil;
|
||||
import org.json.JSONObject;
|
||||
@@ -17,7 +14,6 @@ import java.time.LocalDate;
|
||||
|
||||
|
||||
public class DouyuFlvUrlParser implements PlatformVideoUrlParser<DouyuLiveOnlineConfig> {
|
||||
String flvBaseUrl = "http://openflv-huos.douyucdn2.cn/dyliveflv1/";
|
||||
String did = "818074ef9c05a3fe94acdfe500091601";
|
||||
|
||||
@Override
|
||||
@@ -64,12 +60,14 @@ public class DouyuFlvUrlParser implements PlatformVideoUrlParser<DouyuLiveOnline
|
||||
JSONObject dataObj = respObj.getJSONObject("data");
|
||||
if(dataObj!=null){
|
||||
String fileUrl = dataObj.getString("rtmp_live");
|
||||
String flvBaseUrl = dataObj.getString("rtmp_url");
|
||||
String token = fileUrl.substring(fileUrl.indexOf("."));
|
||||
if(fileUrl!=null){
|
||||
String name = fileUrl.substring(0,fileUrl.indexOf("."));
|
||||
if(name.contains("_")){
|
||||
return String.format(config.getFlvUrl()+"%s_%s.xs",name.substring(0,name.indexOf("_")),clarity);
|
||||
return String.format(flvBaseUrl+"/%s_%s%s",name.substring(0,name.indexOf("_")),clarity,token);
|
||||
}
|
||||
return String.format(config.getFlvUrl()+"%s_%s.xs",name,clarity);
|
||||
return String.format(flvBaseUrl+"/%s",fileUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package org.example.core.parser.impl;
|
||||
|
||||
import org.example.core.creeper.loadconfig.DouyuRecordConfig;
|
||||
import org.example.core.parser.PlatformVideoUrlParser;
|
||||
import org.example.pojo.live.LiveConfig;
|
||||
import org.example.pool.ConstPool;
|
||||
import org.openqa.selenium.devtools.DevTools;
|
||||
import org.openqa.selenium.devtools.v115.network.Network;
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
package org.example.init;
|
||||
|
||||
import org.example.constpool.ModuleName;
|
||||
import org.example.constpool.PluginName;
|
||||
import org.example.core.manager.LiveDownloadManager;
|
||||
import org.example.plugin.CommonPlugin;
|
||||
import org.example.plugin.annotation.Plugin;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/08/31 15:47
|
||||
**/
|
||||
|
||||
@Plugin(moduleName = ModuleName.LIVE,
|
||||
pluginName = PluginName.LIVE_MANAGER_PLUGIN,
|
||||
pluginName_CN = "直播下载监控插件",
|
||||
pluginDescription = "实时下载监控直播的插件",
|
||||
pluginClass= LiveDownloadManager.class )
|
||||
public class LiveDownloadManagerInitMachine extends CommonInitMachine{
|
||||
|
||||
public LiveDownloadManagerInitMachine(List<String> needPlugins, boolean isAutoStart, String moduleName, String name, Class<? extends CommonPlugin> clazz) {
|
||||
super(needPlugins, isAutoStart, moduleName, name, clazz);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package org.example.init.module;
|
||||
|
||||
import org.example.constpool.ConstPool;
|
||||
import org.example.constpool.ModuleName;
|
||||
import org.example.init.ModuleInitMachine;
|
||||
import org.example.log.ChopperLogFactory;
|
||||
import org.example.log.LoggerType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Genius
|
||||
* @date 2023/07/21 00:16
|
||||
**/
|
||||
|
||||
/**
|
||||
* 整个热门模块的模块初始化类
|
||||
*/
|
||||
public class LiveModuleInitMachine extends ModuleInitMachine {
|
||||
|
||||
public LiveModuleInitMachine() {
|
||||
super(
|
||||
List.of(ConstPool.FILE,ConstPool.CREEPER),
|
||||
ChopperLogFactory.getLogger(LoggerType.LiveRecord),
|
||||
ModuleName.LIVE
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
package org.example.pojo.live;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* b站直播下载配置
|
||||
* @author 燧枫
|
||||
* @date 2023/5/19 19:55
|
||||
*/
|
||||
@Data
|
||||
public class BilibiliLiveConfig extends LiveConfig {
|
||||
|
||||
// 清晰度,10000为原画画质
|
||||
private int clarity;
|
||||
|
||||
public BilibiliLiveConfig(String roomId, int clarity, String videoPath, String videoName, boolean convertToMp4) {
|
||||
super(roomId, videoPath, videoName, convertToMp4);
|
||||
this.clarity = clarity;
|
||||
}
|
||||
|
||||
public BilibiliLiveConfig(String roomId, String videoPath, String videoName) {
|
||||
super(roomId, videoPath, videoName, true);
|
||||
this.clarity = 4000;
|
||||
}
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
package org.example.pojo.live;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class DouyuLiveConfig extends LiveConfig{
|
||||
|
||||
// 清晰度,4000蓝光
|
||||
private int clarity;
|
||||
|
||||
public DouyuLiveConfig(String roomId, int clarity, String videoPath, String videoName, boolean convertToMp4) {
|
||||
super(roomId, videoPath, videoName, convertToMp4);
|
||||
this.clarity = clarity;
|
||||
}
|
||||
|
||||
public DouyuLiveConfig(String roomId, String videoPath, String videoName) {
|
||||
super(roomId, videoPath, videoName, true);
|
||||
this.clarity = 4000;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
package org.example.pojo.live;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author 燧枫
|
||||
* @date 2023/8/3 22:23
|
||||
*/
|
||||
@Data
|
||||
public class DouyuRecordConfig extends LiveConfig {
|
||||
|
||||
private String startTime;
|
||||
|
||||
private String endTime;
|
||||
|
||||
public DouyuRecordConfig(String roomId, String videoPath, String videoName, String startTime, String endTime) {
|
||||
super(roomId, videoPath, videoName, false);
|
||||
this.startTime = startTime;
|
||||
this.endTime = endTime;
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
package org.example.pojo.live;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 直播下载配置
|
||||
* @author 燧枫
|
||||
* @date 2023/5/19 19:54
|
||||
*/
|
||||
@Data
|
||||
public class LiveConfig {
|
||||
|
||||
// 房间号
|
||||
private String roomId;
|
||||
|
||||
// 视频保存路径
|
||||
private String videoPath;
|
||||
|
||||
// 视频保存名称
|
||||
private String videoName;
|
||||
|
||||
// 是否自动转换为mp4格式
|
||||
private boolean convertToMp4;
|
||||
|
||||
public LiveConfig(String roomId, String videoPath, String videoName, boolean convertToMp4) {
|
||||
this.roomId = roomId;
|
||||
this.videoPath = videoPath;
|
||||
this.videoName = videoName;
|
||||
this.convertToMp4 = convertToMp4;
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,7 @@ package org.example.live;
|
||||
import org.example.ConsoleApplication;
|
||||
import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig;
|
||||
import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig;
|
||||
import org.example.core.manager.TaskManager;
|
||||
import org.example.core.manager.LiveDownloadManager;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
@@ -19,21 +19,21 @@ import org.springframework.test.context.junit4.SpringRunner;
|
||||
public class LiveTest {
|
||||
@Test
|
||||
public void DouyuLive(){
|
||||
TaskManager taskManager = new TaskManager(5);
|
||||
LiveDownloadManager liveDownLoadManager = new LiveDownloadManager(5);
|
||||
DouyuLiveOnlineConfig douyuLiveConfig = new DouyuLiveOnlineConfig(
|
||||
"4333872", "E:\\Project\\ChopperBot\\config\\LiveRecord\\", "CF", true
|
||||
"36252", "E:\\Project\\ChopperBot\\config\\LiveRecord\\", "CF", true
|
||||
);
|
||||
try {
|
||||
// 向任务管理器中添加任务
|
||||
String taskId = taskManager.addTask(douyuLiveConfig); // 获取任务的标识符
|
||||
taskManager.showDownloadTable(taskId);
|
||||
String taskId = liveDownLoadManager.addTask(douyuLiveConfig); // 获取任务的标识符
|
||||
liveDownLoadManager.showDownloadTable(taskId);
|
||||
int cnt = 100;
|
||||
while (cnt > 0) {
|
||||
cnt--;
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
taskManager.terminateThenSave(douyuLiveConfig,taskId);
|
||||
liveDownLoadManager.terminateThenSave(douyuLiveConfig,taskId);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
@@ -44,19 +44,19 @@ public class LiveTest {
|
||||
BilibiliLiveOnlineConfig liveConfig = new BilibiliLiveOnlineConfig("732", "E:\\Project\\ChopperBot\\config\\LiveRecord\\", "猪猪公主",false);
|
||||
|
||||
// 创建下载任务管理器
|
||||
TaskManager taskManager = new TaskManager(5);
|
||||
LiveDownloadManager liveDownLoadManager = new LiveDownloadManager(5);
|
||||
|
||||
try {
|
||||
// 向任务管理器中添加任务
|
||||
String taskId = taskManager.addTask(liveConfig); // 获取任务的标识符
|
||||
taskManager.showDownloadTable(taskId);
|
||||
// int cnt = 20;
|
||||
// while (cnt > 0) {
|
||||
// cnt--;
|
||||
// Thread.sleep(1000);
|
||||
// }
|
||||
//
|
||||
// taskManager.terminateThenSave(liveConfig,taskId);
|
||||
String taskId = liveDownLoadManager.addTask(liveConfig); // 获取任务的标识符
|
||||
liveDownLoadManager.showDownloadTable(taskId);
|
||||
int cnt = 600;
|
||||
while (cnt > 0) {
|
||||
cnt--;
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
liveDownLoadManager.terminateThenSave(liveConfig,taskId);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
@@ -18,8 +18,9 @@
|
||||
"BarrageFileListen":false,
|
||||
"TaskCenter":true,
|
||||
"CreeperConfig":true,
|
||||
"LiveDownLoadManager":true,
|
||||
"HotRecommendation":true
|
||||
}
|
||||
},
|
||||
"updateTime":"2023-08-26 15:13:13"
|
||||
}
|
||||
"updateTime":"2023-08-31 17:18:49"
|
||||
}
|
||||
|
||||
@@ -16,6 +16,6 @@ git commit -m 'deploy'
|
||||
|
||||
# 推到你仓库的的 gh-page 分支
|
||||
# 将 <USERNAME>/<REPO> 替换为你的信息
|
||||
git push -f git@github.com:twj666/ChopperBot-Doc.git master:pages
|
||||
git push -f git@github.com:969025903/ChopperBot-Doc.git master:gh-pages
|
||||
|
||||
```
|
||||
|
||||
@@ -10,7 +10,7 @@ module.exports = {
|
||||
searchMaxSuggestions: 10,
|
||||
lastUpdated: "上次更新",
|
||||
|
||||
docsRepo: "/twj666/Chopper-Doc",
|
||||
docsRepo: "/969025903/Chopper-Doc",
|
||||
docsDir: "docs",
|
||||
docsBranch: "master",
|
||||
editLinks: true,
|
||||
|
||||
@@ -213,3 +213,21 @@ InitPluginRegister.getPlugin插件名);
|
||||
//注册插件
|
||||
InitPluginRegister.register(CommonInitMachine);
|
||||
```
|
||||
### 其他用法
|
||||
|
||||
#### CheckAndDo
|
||||
|
||||
::: tip PluginCheckAndDo
|
||||
有的插件可能在某些插件启动时有不同的功能,为了检测目标插件是否存在,并根据存在情况使用不同的方法,你可以使用PluginCheckAndDo方法
|
||||
:::
|
||||
```java
|
||||
PluginCheckAndDo.CheckAndDo(
|
||||
(plugin)->{
|
||||
//存在时执行的方法
|
||||
},
|
||||
()->{
|
||||
//不存在时执行的方法
|
||||
},
|
||||
PluginName.CREEPER_MANAGER_PLUGIN
|
||||
);
|
||||
```
|
||||
|
||||
213
doc/docs/01.指南/01.开发指南/01.快速开发/03.如何编写爬虫.md
Normal file
213
doc/docs/01.指南/01.开发指南/01.快速开发/03.如何编写爬虫.md
Normal file
@@ -0,0 +1,213 @@
|
||||
---
|
||||
title: 如何编写爬虫
|
||||
date: 2023-07-31 02:05:34
|
||||
permalink: /pages/63a89d/
|
||||
---
|
||||
|
||||
[[toc]]
|
||||
## ChopperBot与爬虫
|
||||
|
||||
::: tip 简介
|
||||
如果说插件构成了ChopperBot这颗独特的星球,那么爬虫得到数据则是这颗星球的生命。没有生命的星球将是死气沉沉的,而没有爬虫的ChopperBot也将失去它的光泽
|
||||
再此向各位开发者介绍如何在ChopperBot中编写一个规范的爬虫并在融合进入ChopperBot中。
|
||||
:::
|
||||
|
||||
如果你不知道什么是爬虫,或者如何编写,请参考下列文章:
|
||||
- [java爬虫详解及简单实例](https://zhuanlan.zhihu.com/p/634122028)
|
||||
- [爬虫框架WebMagic](https://webmagic.io/)
|
||||
|
||||
## 编写ChopperBot爬虫
|
||||
|
||||
### 创建文件夹
|
||||
```
|
||||
+-- ChopperBot
|
||||
| +-- 模块名
|
||||
| | +-- core
|
||||
| | | +-- creeper
|
||||
| | | | +-- loadconfig //爬虫参数文件
|
||||
| | | | +-- loadtask //爬虫主体任务
|
||||
| | | | +-- processor //后续任务处理
|
||||
| +-- 模块名
|
||||
```
|
||||
|
||||
在开始编写爬虫脚本前,请先确保你的模块下的core文件夹中有如上图的几个文件夹,如果已经拥有则可以开始进行爬虫脚本的编写了
|
||||
|
||||
### 创建LoadConfig
|
||||
::: tip 说明
|
||||
LoadConfig是当前你要进行爬虫时使用的爬虫参数文件,它可以包含url,header,cookie等等信息,**也是爬虫脚本启动的必备文件**
|
||||
:::
|
||||
|
||||
1. 创建一个LoadConfig文件,包含你爬虫需要的必备信息
|
||||
2. 添加@Creeper注解,将脚本文件交给CreeperCenter管理
|
||||
```java
|
||||
@Creeper(creeperName = "豆瓣书籍top250",loadTask = DouBanLoadTask.class,creeperDescription = "爬豆瓣爬的")
|
||||
public class DouBanLoadConfig extends LoadConfig {
|
||||
public DouBanLoadConfig() {
|
||||
this.url = "https://book.douban.com/top250"; //爬虫url
|
||||
//this.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36";
|
||||
//this.Origin = "https://live.bilibili.com";
|
||||
//this.Referer = "https://live.bilibili.com/";
|
||||
}
|
||||
}
|
||||
```
|
||||
LoadConfig的其他参数
|
||||
```java
|
||||
String url;
|
||||
String startTime;
|
||||
String UserAgent;
|
||||
String Origin;
|
||||
String Referer;
|
||||
Map<String,String> header; //头信息
|
||||
Map<String,String> cookie; //Cookie
|
||||
```
|
||||
如果你需要更多参数可以继承LoadConfig,自行添加,例如下面代码:
|
||||
```java
|
||||
public abstract class LoadVideoConfig extends LoadConfig {
|
||||
protected String videoPath;
|
||||
|
||||
// 视频保存名称
|
||||
protected String videoName;
|
||||
|
||||
protected int clarity;
|
||||
|
||||
public LoadVideoConfig(String videoPath, String videoName) {
|
||||
this.videoPath = videoPath;
|
||||
this.videoName = videoName;
|
||||
}
|
||||
}
|
||||
```
|
||||
### 创建LoadTask
|
||||
::: tip 说明
|
||||
LoadTask就是整个爬虫文件运行的核心,他负责读取LoadConfig中的数据,并进行爬虫!启动!**是爬虫脚本启动的必备文件**,
|
||||
在ChopperBot也提供了多种LoadTask类型的编写,目前结合了WebMagic框架,之后会结合更多爬虫框架。
|
||||
:::
|
||||
编写一个最简单的脚本
|
||||
```java
|
||||
/**
|
||||
* 网上随便找的个爬豆瓣的demo,需要导入jsoup,不清楚能不能运行
|
||||
* 链接:https://www.w3cschool.cn/article/69979497.html
|
||||
*/
|
||||
public class DouBanLoadTask extends CommonLoadTask<ArrayList<Book>> {
|
||||
|
||||
|
||||
public DouBanLoadTask(DouyuBanLoadConfig loadConfig) {
|
||||
super(loadConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ArrayList<Book> start() {
|
||||
try {
|
||||
// 连接到URL,并获取网页的文档对象
|
||||
Document doc = Jsoup.connect(loadConfig.getUrl()).get();
|
||||
// 选择所有包含书籍信息的元素
|
||||
Elements elements = doc.select("div.article > div.indent > table");
|
||||
// 遍历每个元素
|
||||
for (Element element : elements) {
|
||||
// 提取书籍的标题
|
||||
String title = element.select("div.pl2 > a").attr("title");
|
||||
// 提取书籍的作者
|
||||
String author = element.select("p.pl").text().split("/")[0];
|
||||
// 提取书籍的评分
|
||||
String rating = element.select("span.rating_nums").text();
|
||||
// 提取书籍的简介
|
||||
String summary = element.select("span.inq").text();
|
||||
// 创建一个Book对象
|
||||
Book book = new Book(title, author, rating, summary);
|
||||
// 将Book对象添加到列表中
|
||||
books.add(book);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return books;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() {
|
||||
|
||||
}
|
||||
|
||||
class Book{
|
||||
private String title; // 标题
|
||||
private String author; // 作者
|
||||
private String rating; // 评分
|
||||
private String summary; // 简介
|
||||
|
||||
|
||||
// 书籍的构造方法
|
||||
public Book(String title, String author, String rating, String summary) {
|
||||
this.title = title;
|
||||
this.author = author;
|
||||
this.rating = rating;
|
||||
this.summary = summary;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
#### 其他的LoadTask
|
||||
|
||||
- **CommonLoadTask**:
|
||||
无特殊需求无框架的LoadTask,适用于编写所有类型的爬虫脚本
|
||||
- **WebMagicLoadTask**:
|
||||
基于WebMagic框架开发的LoadTask,适用于快速开发多线程,深度广度等多种爬虫策略的LoadTask
|
||||
> 1. 创建Spider
|
||||
> ```SpiderFactory.buildSpider(平台名称,Processor,url)```
|
||||
> 2. 获取最终结果
|
||||
> ```Object obj = getData(spider,url)```
|
||||
> ```java
|
||||
> @Override
|
||||
> public List<DouyuLive> start(){
|
||||
>
|
||||
> List<DouyuLive> lives;
|
||||
>
|
||||
> Spider spider = SpiderFactory.buildSpider(ConstPool.PLATFORM.DOUYU.getName(), //平台名称
|
||||
> douyuHotLiveProcessor, //processor
|
||||
> loadConfig.getUrl()); //url
|
||||
> try {
|
||||
> lives = getData(spider,loadConfig.getUrl());
|
||||
> }catch (Exception e){
|
||||
> fail(e);
|
||||
> return null;
|
||||
> }
|
||||
> success();
|
||||
> return lives;
|
||||
> }
|
||||
> ```
|
||||
- ~~AsyncLoadTask(不完善,不推荐使用)~~:
|
||||
异步的LoadTask,用于异步运行的爬虫脚本,得到的结果需要提供给result
|
||||
|
||||
**至此你的爬虫脚本就编写完毕了,无需额外的学习,你可以当作写一个算法OR一个面向过程的程序,总之想要编写一个爬虫脚本很简单。除了创建几个必要的文件外和一行注释外,你只需要写下你的脚本代码即可**
|
||||
|
||||
## 爬虫运行中心
|
||||
::: tip 说明
|
||||
如果你已经完成一个可以单独运行的爬虫脚本。你可以将你写好的爬虫脚本放入[TaskCenter](/pages/691628/)中来进行爬虫脚本的运行管理,监控,调度,以及本地存储,失败恢复等功能。
|
||||
:::
|
||||
```java
|
||||
CommonPlugin plugin = InitPluginRegister.getPlugin(PluginName.TASK_CENTER_PLUGIN); //获取TaskCenter插件
|
||||
|
||||
ReptileRequest request = new ReptileRequest(new DouyuHotModuleConfig(), //需要调度爬虫方法的参数文件
|
||||
(t)->{System.out.println("return val:"+t)} //爬虫任务完成后的callback方法
|
||||
)
|
||||
|
||||
((TaskCenter)plugin).request(request);
|
||||
```
|
||||
以上代码分为以下几点:
|
||||
1. `TaskCenter`插件已经启动,并获取
|
||||
2. 构建`ReptileRequest`
|
||||
3. 放入需要爬虫方法的文件,文件要确保有对应的`LoadTask`
|
||||
4. 编写`CallBack`方法,这个是在`LoadTask`调用结束并返回值时执行的,t为返回的值,你可以自行处理返回的值
|
||||
5. 发送爬虫请求给`TaskCenter`运行
|
||||
|
||||
## 爬虫脚本库
|
||||
当你需要跨模块调用或者快捷使用爬虫脚本时,你可以在`CreeperManger`插件中查找你已经注解`@Creeper`的爬虫脚本
|
||||
```java
|
||||
CreeperManager manager = InitPluginRegister.getPlugin(PluginName.CREEPER_MANAGER_PLUGIN); //获取CreeperManager插件
|
||||
//根据名字获取爬虫脚本
|
||||
LoadTask task1 = manager.getLoadTask("豆瓣脚本")
|
||||
//根据爬虫请求中的LoadConfig获得爬虫脚本
|
||||
LoadTask task2 = manager.getLoadTask(ReptileRequest)
|
||||
|
||||
```
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
title: 爬虫脚本编写
|
||||
date: 2023-07-31 02:05:34
|
||||
permalink: /pages/63a89d/
|
||||
---
|
||||
@@ -43,6 +43,7 @@ postList: none
|
||||
[twj666🤡](https://github.com/tmlgenius)
|
||||
[welsir🤡](https://github.com/tmlgenius)
|
||||
[masteryf🤡](https://github.com/masteryf)
|
||||
[Klein422🤡](https://github.com/Klein422)
|
||||
|
||||
|
||||
### TML生态圈
|
||||
@@ -59,7 +60,7 @@ postList: none
|
||||
欢迎各路好汉一起来参与完善 ChopperBot,我们期待你的 PR!
|
||||
|
||||
- 贡献代码:代码地址 [ChopperBot](https://github.com/969025903/ChopperBot) ,欢迎提交 Issue 或者 Pull Requests
|
||||
- 维护文档:文档地址 [ChopperBot-Doc](https://github.com/twj666/Chopper-Doc) ,欢迎参与翻译和修订
|
||||
- 维护文档:文档地址 [ChopperBot-Doc](https://github.com/969025903/ChopperBot-Doc/) ,欢迎参与翻译和修订
|
||||
|
||||
|
||||
### 友情链接
|
||||
|
||||
Reference in New Issue
Block a user