From 976f0161447d202b48bc6d8eabe3dab4eec1bfbc Mon Sep 17 00:00:00 2001 From: userA Date: Thu, 31 Aug 2023 18:25:32 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=87=E6=A1=A3=E7=BC=96=E5=86=99=20ReadMe?= =?UTF-8?q?=E6=9B=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +- README.zh-CN.md | 6 +- .../org/example/constpool/ModuleName.java | 1 + .../org/example/constpool/PluginName.java | 1 + .../org/example/init/ModuleInitMachine.java | 8 +- .../java/org/example/plugin/CommonPlugin.java | 2 +- .../java/org/example/plugin/PluginAction.java | 2 +- .../org/example/plugin/PluginCheckAndDo.java | 32 ++- .../org/example/plugin/PluginFailAction.java | 11 + chopperbot-console/pom.xml | 5 + .../org/example/init/WorldInitMachine.java | 8 +- .../example/core/taskcenter/TaskCenter.java | 4 +- .../org/example/init/ConfigInitMachine.java | 18 ++ .../java/org/example/core/guard/Guard.java | 6 +- .../main/java/org/example/DouyuLiveTest.java | 14 -- .../main/java/org/example/LiveStreamTest.java | 18 -- .../core/component/LiveStreamTask.java | 26 +-- .../loadconfig/BilibiliLiveOnlineConfig.java | 5 + .../loadconfig/DouyuLiveOnlineConfig.java | 5 +- .../creeper/loadconfig/LoadLiveConfig.java | 5 + .../loadtask/BilibiliLiveOnlineLoadTask.java | 34 +++ .../loadtask/DouyuLiveOnlineLoadTask.java | 15 +- .../creeper/loadtask/LiveOnlineLoadTask.java | 49 ++++ .../example/core/factory/LiveTaskFactory.java | 3 - ...kManager.java => LiveDownloadManager.java} | 54 +++-- .../core/parser/PlatformVideoUrlParser.java | 4 +- .../parser/impl/BilibiliFlvUrlParser.java | 4 - .../core/parser/impl/DouyuFlvUrlParser.java | 10 +- .../core/parser/impl/DouyuM3u8UrlParser.java | 1 - .../init/LiveDownloadManagerInitMachine.java | 28 +++ .../init/module/LiveModuleInitMachine.java | 28 +++ .../example/pojo/live/BilibiliLiveConfig.java | 25 -- .../example/pojo/live/DouyuLiveConfig.java | 20 -- .../example/pojo/live/DouyuRecordConfig.java | 22 -- .../org/example/pojo/live/LiveConfig.java | 31 --- .../test/java/org/example/live/LiveTest.java | 32 +-- config/chopperBotConfig.json | 5 +- doc/ReadMe.md | 2 +- doc/docs/.vuepress/config/themeConfig.js | 2 +- .../01.开发指南/01.快速开发/02.模块与插件.md | 18 ++ .../01.开发指南/01.快速开发/03.如何编写爬虫.md | 213 ++++++++++++++++++ .../01.开发指南/01.快速开发/03.爬虫模块.md | 5 - doc/docs/index.md | 3 +- 43 files changed, 542 insertions(+), 249 deletions(-) create mode 100644 chopperbot-common/src/main/java/org/example/plugin/PluginFailAction.java delete mode 100644 chopperbot-live/src/main/java/org/example/DouyuLiveTest.java delete mode 100644 chopperbot-live/src/main/java/org/example/LiveStreamTest.java create mode 100644 chopperbot-live/src/main/java/org/example/core/creeper/loadtask/BilibiliLiveOnlineLoadTask.java create mode 100644 chopperbot-live/src/main/java/org/example/core/creeper/loadtask/LiveOnlineLoadTask.java rename chopperbot-live/src/main/java/org/example/core/manager/{TaskManager.java => LiveDownloadManager.java} (74%) create mode 100644 chopperbot-live/src/main/java/org/example/init/LiveDownloadManagerInitMachine.java create mode 100644 chopperbot-live/src/main/java/org/example/init/module/LiveModuleInitMachine.java delete mode 100644 chopperbot-live/src/main/java/org/example/pojo/live/BilibiliLiveConfig.java delete mode 100644 chopperbot-live/src/main/java/org/example/pojo/live/DouyuLiveConfig.java delete mode 100644 chopperbot-live/src/main/java/org/example/pojo/live/DouyuRecordConfig.java delete mode 100644 chopperbot-live/src/main/java/org/example/pojo/live/LiveConfig.java create mode 100644 doc/docs/01.指南/01.开发指南/01.快速开发/03.如何编写爬虫.md delete mode 100644 doc/docs/01.指南/01.开发指南/01.快速开发/03.爬虫模块.md diff --git a/README.md b/README.md index 6e5302e..0e32569 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@

- + 简体中文文档 @@ -74,8 +74,8 @@ ![Alt](https://repobeats.axiom.co/api/embed/0ae23655bb105addf8d90a999df36f690d615af7.svg "Repobeats analytics image") # 🔗 Links -👉 [Document](https://twj666.github.io/ChopperBot-Doc/) +👉 [Document](https://969025903.github.io/ChopperBot-Doc/) -👉 [Developer's Guide](https://twj666.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84) +👉 [Developer's Guide](https://969025903.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84) 👉 [CHANGE LOG](https://github.com/969025903/ChopperBot/blob/master/CHANGELOG.md) diff --git a/README.zh-CN.md b/README.zh-CN.md index 7314000..1d8674f 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -10,7 +10,7 @@

- + 简体中文文档 @@ -73,8 +73,8 @@ # 📈 项目动态 ![Alt](https://repobeats.axiom.co/api/embed/0ae23655bb105addf8d90a999df36f690d615af7.svg "Repobeats analytics image") # 🔗 相关链接 -👉 [项目文档](https://twj666.github.io/ChopperBot-Doc/) +👉 [项目文档](https://969025903.github.io/ChopperBot-Doc/) -👉 [项目开发指南](https://twj666.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84) +👉 [项目开发指南](https://969025903.github.io/ChopperBot-Doc/pages/779a67/#chopperbot%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84) 👉 [更新日志](https://github.com/969025903/ChopperBot/blob/master/CHANGELOG.md) diff --git a/chopperbot-common/src/main/java/org/example/constpool/ModuleName.java b/chopperbot-common/src/main/java/org/example/constpool/ModuleName.java index fac9dd2..ec0e040 100644 --- a/chopperbot-common/src/main/java/org/example/constpool/ModuleName.java +++ b/chopperbot-common/src/main/java/org/example/constpool/ModuleName.java @@ -15,4 +15,5 @@ public class ModuleName { public static final String FILE = ConstPool.FILE; public static final String BARRAGE = ConstPool.BARRAGE; + public static final String LIVE = ConstPool.LIVE_RECORD; } diff --git a/chopperbot-common/src/main/java/org/example/constpool/PluginName.java b/chopperbot-common/src/main/java/org/example/constpool/PluginName.java index 9906dd2..fa1f44f 100644 --- a/chopperbot-common/src/main/java/org/example/constpool/PluginName.java +++ b/chopperbot-common/src/main/java/org/example/constpool/PluginName.java @@ -27,4 +27,5 @@ public class PluginName { //Barrage public static final String BARRAGE_FILE_PLUGIN= "BarrageFileListen"; + public static final String LIVE_MANAGER_PLUGIN= "LiveDownLoadManager"; } diff --git a/chopperbot-common/src/main/java/org/example/init/ModuleInitMachine.java b/chopperbot-common/src/main/java/org/example/init/ModuleInitMachine.java index 608a017..a16437f 100644 --- a/chopperbot-common/src/main/java/org/example/init/ModuleInitMachine.java +++ b/chopperbot-common/src/main/java/org/example/init/ModuleInitMachine.java @@ -151,9 +151,11 @@ public abstract class ModuleInitMachine extends CommonInitMachine{ @Override public void shutdown() { logger.info("👇 <{}> is shutting down , {} plugins need to shut down...",moduleName,initMachines.size()); - initMachines.forEach( - InitMachine::shutdown - ); + initMachines.forEach(initMachine-> { + if(InitPluginRegister.isRegister(initMachine.getPluginName())){ + initMachine.shutdown(); + } + }); logger.info("👆 <{}> Completing the shutdown of all plugins!",moduleName); } } diff --git a/chopperbot-common/src/main/java/org/example/plugin/CommonPlugin.java b/chopperbot-common/src/main/java/org/example/plugin/CommonPlugin.java index 7fbe28a..93a64ad 100644 --- a/chopperbot-common/src/main/java/org/example/plugin/CommonPlugin.java +++ b/chopperbot-common/src/main/java/org/example/plugin/CommonPlugin.java @@ -15,7 +15,7 @@ public abstract class CommonPlugin implements ChopperBotPlugin{ @Override public boolean init() { - return false; + return true; } @Override diff --git a/chopperbot-common/src/main/java/org/example/plugin/PluginAction.java b/chopperbot-common/src/main/java/org/example/plugin/PluginAction.java index 5bc1353..421f94b 100644 --- a/chopperbot-common/src/main/java/org/example/plugin/PluginAction.java +++ b/chopperbot-common/src/main/java/org/example/plugin/PluginAction.java @@ -2,5 +2,5 @@ package org.example.plugin; @FunctionalInterface public interface PluginAction { - void action(); + void action(ChopperBotPlugin plugin); } diff --git a/chopperbot-common/src/main/java/org/example/plugin/PluginCheckAndDo.java b/chopperbot-common/src/main/java/org/example/plugin/PluginCheckAndDo.java index c4379c4..dc8a7ef 100644 --- a/chopperbot-common/src/main/java/org/example/plugin/PluginCheckAndDo.java +++ b/chopperbot-common/src/main/java/org/example/plugin/PluginCheckAndDo.java @@ -1,5 +1,6 @@ package org.example.plugin; +import org.example.constpool.PluginName; import org.example.init.InitPluginRegister; import java.util.function.Consumer; @@ -14,32 +15,25 @@ public class PluginCheckAndDo { /** * 且条件,检测所需的所有插件是否存在,存在则调用success,不存在则调用fail */ + public static void CheckAndDo( PluginAction success, - PluginAction fail, - String...needPlugins){ - for (String needPlugin : needPlugins) { - if (!InitPluginRegister.isRegister(needPlugin)) { - fail.action(); - return; - } + String needPlugin){ + if (!InitPluginRegister.isRegister(needPlugin)) { + return; } - success.action(); - + success.action((InitPluginRegister.getPlugin(needPlugin))); } - /** - * 且条件,检测所需的所有插件是否存在,存在则调用success,不存在则返回 - */ + public static void CheckAndDo( PluginAction success, - String...needPlugins){ - for (String needPlugin : needPlugins) { - if (!InitPluginRegister.isRegister(needPlugin)) { - return; - } + PluginFailAction fail, + String needPlugin){ + if (!InitPluginRegister.isRegister(needPlugin)) { + fail.action(); + return; } - success.action(); + success.action((InitPluginRegister.getPlugin(needPlugin))); } - } diff --git a/chopperbot-common/src/main/java/org/example/plugin/PluginFailAction.java b/chopperbot-common/src/main/java/org/example/plugin/PluginFailAction.java new file mode 100644 index 0000000..dcf062d --- /dev/null +++ b/chopperbot-common/src/main/java/org/example/plugin/PluginFailAction.java @@ -0,0 +1,11 @@ +package org.example.plugin; + +/** + * @author Genius + * @date 2023/08/31 02:12 + **/ +@FunctionalInterface +public interface PluginFailAction { + + void action(); +} diff --git a/chopperbot-console/pom.xml b/chopperbot-console/pom.xml index 5aedd32..860c50f 100644 --- a/chopperbot-console/pom.xml +++ b/chopperbot-console/pom.xml @@ -23,6 +23,11 @@ chopperbot-creeper 1.0-SNAPSHOT + + org.example + chopperbot-live + 1.0-SNAPSHOT + org.example chopperbot-file diff --git a/chopperbot-console/src/main/java/org/example/init/WorldInitMachine.java b/chopperbot-console/src/main/java/org/example/init/WorldInitMachine.java index 3cbf288..1dd020c 100644 --- a/chopperbot-console/src/main/java/org/example/init/WorldInitMachine.java +++ b/chopperbot-console/src/main/java/org/example/init/WorldInitMachine.java @@ -16,16 +16,13 @@ import java.util.function.Supplier; **/ public class WorldInitMachine extends ModuleInitMachine{ - - - private static final String githubUrl = "https://github.com/969025903/ChopperBot"; public WorldInitMachine() throws Exception { super("ChopperBot",ChopperLogFactory.getLogger(LoggerType.System)); } - + public List alreadyInitModule = new ArrayList<>(); @Override public boolean init() { @@ -41,6 +38,7 @@ public class WorldInitMachine extends ModuleInitMachine{ return fail(); } (initMachine).registerPlugin(); + alreadyInitModule.add(initMachine); }else{ return fail(); } @@ -86,7 +84,7 @@ public class WorldInitMachine extends ModuleInitMachine{ logger.info("🌏 <{}> is shutting down,{} modules need to be closed,please wait.....","ChopperBot",getInitMachines().size()); ChopperBotGuardPool.GuardPool().shutdown(); - this.getInitMachines().forEach( + this.alreadyInitModule.forEach( InitMachine::shutdown ); diff --git a/chopperbot-creeper/src/main/java/org/example/core/taskcenter/TaskCenter.java b/chopperbot-creeper/src/main/java/org/example/core/taskcenter/TaskCenter.java index 6932ab7..c01e664 100644 --- a/chopperbot-creeper/src/main/java/org/example/core/taskcenter/TaskCenter.java +++ b/chopperbot-creeper/src/main/java/org/example/core/taskcenter/TaskCenter.java @@ -244,8 +244,8 @@ public class TaskCenter extends GuardPlugin { public void request(ReptileRequest request){ PluginCheckAndDo.CheckAndDo( - ()->{ - ReptileTask task = ((CreeperManager)InitPluginRegister.getPlugin(PluginName.CREEPER_MANAGER_PLUGIN)).getReptileTask(request); + (plugin)->{ + ReptileTask task = ((CreeperManager)plugin).getReptileTask(request); if(task!=null){ addTask(task); } diff --git a/chopperbot-file/src/main/java/org/example/init/ConfigInitMachine.java b/chopperbot-file/src/main/java/org/example/init/ConfigInitMachine.java index 8bc1c34..4b97e4f 100644 --- a/chopperbot-file/src/main/java/org/example/init/ConfigInitMachine.java +++ b/chopperbot-file/src/main/java/org/example/init/ConfigInitMachine.java @@ -1,13 +1,18 @@ package org.example.init; import org.example.bean.ConfigFile; +import org.example.cache.FileCacheManager; +import org.example.constpool.PluginName; import org.example.plugin.CommonPlugin; +import org.example.plugin.PluginCheckAndDo; import org.example.plugin.annotation.Plugin; import org.example.util.ConfigFileUtil; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; + /** * @author Genius * @date 2023/07/29 00:08 @@ -40,5 +45,18 @@ public abstract class ConfigInitMachine extends CommonInitMachine{ } + @Override + public void shutdown() { + String path = Path.of(configFile.getFilePath(), configFile.getFileName()).toString(); + + PluginCheckAndDo.CheckAndDo( + (plugin)->{ + ((FileCacheManager)plugin).deleteFileCache(path); + }, + PluginName.FILE_CACHE_PLUGIN + ); + logger.info("[\uD83C\uDD96] {} close success",path); + } + } diff --git a/chopperbot-hot/src/main/java/org/example/core/guard/Guard.java b/chopperbot-hot/src/main/java/org/example/core/guard/Guard.java index 35f5e4c..ba3ee36 100644 --- a/chopperbot-hot/src/main/java/org/example/core/guard/Guard.java +++ b/chopperbot-hot/src/main/java/org/example/core/guard/Guard.java @@ -67,10 +67,10 @@ public class Guard implements Runnable, ResultLogge //查看热度推送插件是否装载,如果装载则进行热度推送 PluginCheckAndDo.CheckAndDo( - ()->{ - HeatRecommendation plugin = (HeatRecommendation) InitPluginRegister.getPlugin(PluginName.HOT_RECOMMENDATION_PLUGIN); + (plugin)->{ + assert plugin != null; - plugin.sendHotEvent(platform); + ((HeatRecommendation)plugin).sendHotEvent(platform); }, PluginName.HOT_RECOMMENDATION_PLUGIN ); diff --git a/chopperbot-live/src/main/java/org/example/DouyuLiveTest.java b/chopperbot-live/src/main/java/org/example/DouyuLiveTest.java deleted file mode 100644 index 01ad0ce..0000000 --- a/chopperbot-live/src/main/java/org/example/DouyuLiveTest.java +++ /dev/null @@ -1,14 +0,0 @@ -package org.example; - -import org.apache.poi.ss.formula.functions.T; -import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig; -import org.example.core.creeper.loadtask.DouyuLiveOnlineLoadTask; -import org.example.core.manager.TaskManager; -import org.example.pojo.live.DouyuLiveConfig; - - -public class DouyuLiveTest { - public static void main(String[] args) { - - } -} diff --git a/chopperbot-live/src/main/java/org/example/LiveStreamTest.java b/chopperbot-live/src/main/java/org/example/LiveStreamTest.java deleted file mode 100644 index 5479334..0000000 --- a/chopperbot-live/src/main/java/org/example/LiveStreamTest.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.example; - -import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig; -import org.example.core.manager.TaskManager; -import org.example.pojo.live.BilibiliLiveConfig; - -public class LiveStreamTest { - private static final int THREAD_NUM = 10; - - public static void main(String[] args) { - - // 创建直播配置 - - } -} - - - diff --git a/chopperbot-live/src/main/java/org/example/core/component/LiveStreamTask.java b/chopperbot-live/src/main/java/org/example/core/component/LiveStreamTask.java index 3fe678f..0b6ce5d 100644 --- a/chopperbot-live/src/main/java/org/example/core/component/LiveStreamTask.java +++ b/chopperbot-live/src/main/java/org/example/core/component/LiveStreamTask.java @@ -22,22 +22,20 @@ public class LiveStreamTask { private Map headers; private FlvHandle f = new FlvHandle(); - public void start(ExecutorService executor, StatusMonitor statusMonitor, OutputStream fileIO) { - executor.execute(() -> { - try { - URLConnection conn = new URL(this.url).openConnection(); - if (this.headers != null) { - for (Map.Entry entry : this.headers.entrySet()) { - conn.setRequestProperty(entry.getKey(), entry.getValue()); - } + public void start(StatusMonitor statusMonitor, OutputStream fileIO) { + try { + URLConnection conn = new URL(this.url).openConnection(); + if (this.headers != null) { + for (Map.Entry entry : this.headers.entrySet()) { + conn.setRequestProperty(entry.getKey(), entry.getValue()); } - try (InputStream in = conn.getInputStream()) { - f.parseStream(in, statusMonitor, fileIO); - } - } catch (Exception e) { - e.printStackTrace(); } - }); + try (InputStream in = conn.getInputStream()) { + f.parseStream(in, statusMonitor, fileIO); + } + } catch (Exception e) { + e.printStackTrace(); + } } public void terminate() { diff --git a/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/BilibiliLiveOnlineConfig.java b/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/BilibiliLiveOnlineConfig.java index 60805c2..21a50ae 100644 --- a/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/BilibiliLiveOnlineConfig.java +++ b/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/BilibiliLiveOnlineConfig.java @@ -1,12 +1,17 @@ package org.example.core.creeper.loadconfig; import lombok.Data; +import org.example.core.creeper.loadtask.BilibiliLiveOnlineLoadTask; +import org.example.core.creeper.loadtask.DouyuLiveOnlineLoadTask; +import org.example.core.creeper.loadtask.DouyuRecordLoadTask; +import org.example.core.manager.annotation.Creeper; /** * @author Genius * @date 2023/08/30 18:05 **/ @Data +@Creeper(creeperName = "bilibili_live",loadTask = BilibiliLiveOnlineLoadTask.class,creeperDescription = "B站直播爬取") public class BilibiliLiveOnlineConfig extends LoadLiveConfig{ diff --git a/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/DouyuLiveOnlineConfig.java b/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/DouyuLiveOnlineConfig.java index dda7bad..29ce91f 100644 --- a/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/DouyuLiveOnlineConfig.java +++ b/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/DouyuLiveOnlineConfig.java @@ -2,15 +2,18 @@ package org.example.core.creeper.loadconfig; import lombok.Data; import org.example.bean.live.DouyuLive; +import org.example.core.creeper.loadtask.DouyuLiveOnlineLoadTask; +import org.example.core.creeper.loadtask.DouyuRecordLoadTask; +import org.example.core.manager.annotation.Creeper; /** * @author Genius * @date 2023/07/28 23:17 **/ @Data +@Creeper(creeperName = "douyu_live",loadTask = DouyuLiveOnlineLoadTask.class,creeperDescription = "斗鱼直播爬取") public class DouyuLiveOnlineConfig extends LoadLiveConfig { - private String flvUrl = "http://openflv-huos.douyucdn2.cn/dyliveflv1/"; public DouyuLiveOnlineConfig(String roomId, String videoPath, String videoName,int clarity) { super(roomId, videoPath, videoName, false); this.clarity = clarity; diff --git a/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/LoadLiveConfig.java b/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/LoadLiveConfig.java index 877a459..3cfcb08 100644 --- a/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/LoadLiveConfig.java +++ b/chopperbot-live/src/main/java/org/example/core/creeper/loadconfig/LoadLiveConfig.java @@ -14,13 +14,18 @@ public abstract class LoadLiveConfig extends LoadVideoConfig { // 房间号 protected String roomId; + protected String liverName; + // 是否自动转换为mp4格式 protected boolean convertToMp4; + protected boolean showDownloadTable; + public LoadLiveConfig(String roomId, String videoPath, String videoName, boolean convertToMp4) { super(videoPath,videoName); this.roomId = roomId; this.convertToMp4 = convertToMp4; + this.showDownloadTable = false; } } diff --git a/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/BilibiliLiveOnlineLoadTask.java b/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/BilibiliLiveOnlineLoadTask.java new file mode 100644 index 0000000..f578821 --- /dev/null +++ b/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/BilibiliLiveOnlineLoadTask.java @@ -0,0 +1,34 @@ +package org.example.core.creeper.loadtask; + +import org.checkerframework.checker.units.qual.A; +import org.example.constpool.PluginName; +import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig; +import org.example.core.loadtask.CommonLoadTask; +import org.example.core.manager.LiveDownloadManager; +import org.example.log.ChopperLogFactory; +import org.example.log.LoggerType; +import org.example.plugin.PluginCheckAndDo; +import org.slf4j.Logger; + +import java.util.concurrent.atomic.AtomicReference; + +/** + * @author Genius + * @date 2023/08/31 15:44 + **/ +public class BilibiliLiveOnlineLoadTask extends LiveOnlineLoadTask { + + public BilibiliLiveOnlineLoadTask(BilibiliLiveOnlineConfig loadConfig) { + super(loadConfig); + } + + @Override + public String start() { + return this.start(ChopperLogFactory.getLogger(LoggerType.LiveRecord),(BilibiliLiveOnlineConfig)loadConfig); + } + + @Override + public void end() { + + } +} diff --git a/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/DouyuLiveOnlineLoadTask.java b/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/DouyuLiveOnlineLoadTask.java index 78bc92a..569f26b 100644 --- a/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/DouyuLiveOnlineLoadTask.java +++ b/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/DouyuLiveOnlineLoadTask.java @@ -1,32 +1,33 @@ package org.example.core.creeper.loadtask; import org.example.bean.live.DouyuLive; +import org.example.constpool.PluginName; +import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig; import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig; import org.example.core.loadtask.CommonLoadTask; import org.example.core.loadtask.WebMagicLoadTask; +import org.example.core.manager.LiveDownloadManager; import org.example.log.ChopperLogFactory; import org.example.log.LoggerType; +import org.example.plugin.PluginCheckAndDo; import org.slf4j.Logger; +import java.util.concurrent.atomic.AtomicReference; + /** * @author Genius * @date 2023/07/28 23:14 **/ -public class DouyuLiveOnlineLoadTask extends CommonLoadTask { +public class DouyuLiveOnlineLoadTask extends LiveOnlineLoadTask { - - private DouyuLive douyuLive; public DouyuLiveOnlineLoadTask(DouyuLiveOnlineConfig douyuLiveOnlineConfig) { super(douyuLiveOnlineConfig); } - //TODO 需要开发斗鱼在线直播爬取功能 @Override public String start() { - Logger logger = ChopperLogFactory.getLogger(LoggerType.Creeper); - logger.info("正在爬取主播:{},直播间:{},直播间id:{}",douyuLive.getLiver(),douyuLive.getLiveName(),douyuLive.getLiveId()); - return null; + return this.start(ChopperLogFactory.getLogger(LoggerType.LiveRecord),(DouyuLiveOnlineConfig)loadConfig); } @Override diff --git a/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/LiveOnlineLoadTask.java b/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/LiveOnlineLoadTask.java new file mode 100644 index 0000000..9178720 --- /dev/null +++ b/chopperbot-live/src/main/java/org/example/core/creeper/loadtask/LiveOnlineLoadTask.java @@ -0,0 +1,49 @@ +package org.example.core.creeper.loadtask; + +import org.example.constpool.PluginName; +import org.example.core.creeper.loadconfig.LoadLiveConfig; +import org.example.core.loadconfig.LoadConfig; +import org.example.core.loadtask.CommonLoadTask; +import org.example.core.manager.LiveDownloadManager; +import org.example.plugin.PluginCheckAndDo; +import org.slf4j.Logger; + +import java.util.concurrent.atomic.AtomicReference; + +/** + * @author Genius + * @date 2023/08/31 16:56 + **/ +public abstract class LiveOnlineLoadTask extends CommonLoadTask { + + + public LiveOnlineLoadTask(LoadConfig loadConfig) { + super(loadConfig); + } + + public String start(Logger logger, LoadLiveConfig loadLiveConfig){ + AtomicReference res = new AtomicReference<>(); + PluginCheckAndDo.CheckAndDo( + (plugin) -> { + try { + String taskId = ((LiveDownloadManager) plugin).addTask(loadLiveConfig); + logger.info("正在爬取{}的直播内容....",loadLiveConfig.getLiverName()); + res.set((String) ((LiveDownloadManager) plugin).waitResult(taskId, loadLiveConfig)); + }catch (Exception e){ + res.set(""); + } + }, + ()->{ + logger.error("该爬虫需要的{}插件不存在,无法启用直播,请检查插件是否安装", PluginName.LIVE_MANAGER_PLUGIN); + }, + PluginName.LIVE_MANAGER_PLUGIN + ); + return res.get(); + } + + + @Override + public void end() { + + } +} diff --git a/chopperbot-live/src/main/java/org/example/core/factory/LiveTaskFactory.java b/chopperbot-live/src/main/java/org/example/core/factory/LiveTaskFactory.java index cd5fa78..44c6cf6 100644 --- a/chopperbot-live/src/main/java/org/example/core/factory/LiveTaskFactory.java +++ b/chopperbot-live/src/main/java/org/example/core/factory/LiveTaskFactory.java @@ -7,9 +7,6 @@ import org.example.core.parser.PlatformVideoUrlParser; import org.example.core.parser.impl.BilibiliFlvUrlParser; import org.example.core.component.LiveStreamTask; import org.example.core.parser.impl.DouyuFlvUrlParser; -import org.example.pojo.live.BilibiliLiveConfig; -import org.example.pojo.live.DouyuLiveConfig; -import org.example.pojo.live.LiveConfig; import java.util.HashMap; import java.util.Map; diff --git a/chopperbot-live/src/main/java/org/example/core/manager/TaskManager.java b/chopperbot-live/src/main/java/org/example/core/manager/LiveDownloadManager.java similarity index 74% rename from chopperbot-live/src/main/java/org/example/core/manager/TaskManager.java rename to chopperbot-live/src/main/java/org/example/core/manager/LiveDownloadManager.java index faf3266..6d412c5 100644 --- a/chopperbot-live/src/main/java/org/example/core/manager/TaskManager.java +++ b/chopperbot-live/src/main/java/org/example/core/manager/LiveDownloadManager.java @@ -5,7 +5,7 @@ import org.example.core.component.StatusMonitor; import org.example.core.creeper.loadconfig.LoadLiveConfig; import org.example.core.factory.LiveTaskFactory; import org.example.plugin.CommonPlugin; -import org.example.pojo.live.LiveConfig; +import org.example.thread.NamedThreadFactory; import org.example.utils.VideoConverter; import java.io.FileNotFoundException; @@ -16,6 +16,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -25,7 +26,7 @@ import java.util.concurrent.Future; * @author 燧枫 * @date 2023/5/19 17:12 */ -public class TaskManager extends CommonPlugin { +public class LiveDownloadManager extends CommonPlugin { private ExecutorService executor; private ExecutorService logExecutor; @@ -34,16 +35,11 @@ public class TaskManager extends CommonPlugin { private LiveTaskFactory taskFactory; private Map statusMonitors; - public TaskManager(String module, String pluginName, List needPlugins, boolean isAutoStart) { + public LiveDownloadManager(String module, String pluginName, List needPlugins, boolean isAutoStart) { super(module, pluginName, needPlugins, isAutoStart); } - @Override - public boolean init() { - return super.init(); - } - - public TaskManager(int maxTasks) { + public LiveDownloadManager(int maxTasks) { super(null,null,null,true); this.executor = Executors.newFixedThreadPool(maxTasks); this.logExecutor = Executors.newFixedThreadPool(maxTasks); @@ -53,6 +49,20 @@ public class TaskManager extends CommonPlugin { this.statusMonitors = new HashMap<>(); } + @Override + public boolean init() { + NamedThreadFactory poolName = new NamedThreadFactory("LiveManager"); + this.executor = Executors.newCachedThreadPool(poolName); + this.logExecutor = Executors.newCachedThreadPool(poolName); + this.futures = new HashMap<>(); + this.tasks = new HashMap<>(); + this.taskFactory = new LiveTaskFactory(); + this.statusMonitors = new HashMap<>(); + return true; + } + + + public String addTask(LoadLiveConfig liveConfig) throws FileNotFoundException { LiveStreamTask task = this.taskFactory.create(liveConfig); if (task == null) { @@ -67,7 +77,7 @@ public class TaskManager extends CommonPlugin { OutputStream fileIO = new FileOutputStream(Path.of(liveConfig.getVideoPath(),liveConfig.getVideoName() + ".flv").toString()); Future future = executor.submit(() -> { - task.start(executor, statusMonitor, fileIO); + task.start(statusMonitor, fileIO); }); futures.put(taskId, future); @@ -83,6 +93,14 @@ public class TaskManager extends CommonPlugin { return new ArrayList<>(tasks.keySet()); } + public Object waitResult(String taskId,LoadLiveConfig liveConfig) throws ExecutionException, InterruptedException { + Future future = futures.get(taskId); + if(future!=null){ + future.get(); + return terminateThenSave(liveConfig,taskId); + } + return null; + } public void removeTask(String taskId) { pauseTask(taskId); statusMonitors.remove(taskId); @@ -97,17 +115,18 @@ public class TaskManager extends CommonPlugin { } } - public void terminateThenSave(LoadLiveConfig liveConfig,String taskId){ + public String terminateThenSave(LoadLiveConfig liveConfig,String taskId){ LiveStreamTask task = tasks.get(taskId); task.terminate(); removeTask(taskId); + String path = Path.of(liveConfig.getVideoPath(),liveConfig.getRoomId() + ".flv").toString(); if (liveConfig.isConvertToMp4()) { - String flvFilePath = Path.of(liveConfig.getVideoPath(),liveConfig.getRoomId() + ".flv").toString(); String mp4FilePath = Path.of(liveConfig.getVideoPath(),liveConfig.getRoomId() + ".mp4").toString(); - VideoConverter.convertFlvToMp4(flvFilePath, mp4FilePath); + VideoConverter.convertFlvToMp4(path, mp4FilePath); System.out.println("start: flv-->mp4"); - + path = mp4FilePath; } + return path; } private StatusMonitor getStatusMonitor(String taskId) { @@ -140,4 +159,11 @@ public class TaskManager extends CommonPlugin { statusMonitor.downloadLogTable(taskId); }); } + + @Override + public void shutdown() { + logExecutor.shutdown(); + executor.shutdown(); + super.shutdown(); + } } diff --git a/chopperbot-live/src/main/java/org/example/core/parser/PlatformVideoUrlParser.java b/chopperbot-live/src/main/java/org/example/core/parser/PlatformVideoUrlParser.java index 0c931eb..8c09f7a 100644 --- a/chopperbot-live/src/main/java/org/example/core/parser/PlatformVideoUrlParser.java +++ b/chopperbot-live/src/main/java/org/example/core/parser/PlatformVideoUrlParser.java @@ -1,9 +1,7 @@ package org.example.core.parser; -import org.example.core.creeper.loadconfig.LoadRecordConfig; import org.example.core.creeper.loadconfig.LoadVideoConfig; -import org.example.core.loadconfig.LoadConfig; -import org.example.pojo.live.LiveConfig; + /** * 全直播平台flv链接解析接口 diff --git a/chopperbot-live/src/main/java/org/example/core/parser/impl/BilibiliFlvUrlParser.java b/chopperbot-live/src/main/java/org/example/core/parser/impl/BilibiliFlvUrlParser.java index 3efe10b..0724a15 100644 --- a/chopperbot-live/src/main/java/org/example/core/parser/impl/BilibiliFlvUrlParser.java +++ b/chopperbot-live/src/main/java/org/example/core/parser/impl/BilibiliFlvUrlParser.java @@ -1,11 +1,7 @@ package org.example.core.parser.impl; import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig; -import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig; -import org.example.core.creeper.loadconfig.LoadRecordConfig; import org.example.core.parser.PlatformVideoUrlParser; -import org.example.pojo.live.BilibiliLiveConfig; -import org.example.pojo.live.LiveConfig; import org.example.utils.HttpClientUtil; import org.json.JSONArray; import org.json.JSONObject; diff --git a/chopperbot-live/src/main/java/org/example/core/parser/impl/DouyuFlvUrlParser.java b/chopperbot-live/src/main/java/org/example/core/parser/impl/DouyuFlvUrlParser.java index 4e0f1b3..a1d3349 100644 --- a/chopperbot-live/src/main/java/org/example/core/parser/impl/DouyuFlvUrlParser.java +++ b/chopperbot-live/src/main/java/org/example/core/parser/impl/DouyuFlvUrlParser.java @@ -2,10 +2,7 @@ package org.example.core.parser.impl; import org.apache.commons.codec.digest.DigestUtils; import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig; -import org.example.core.creeper.loadconfig.DouyuRecordConfig; import org.example.core.parser.PlatformVideoUrlParser; -import org.example.pojo.live.DouyuLiveConfig; -import org.example.pojo.live.LiveConfig; import org.example.utils.HttpClientUtil; import org.example.utils.RegexUtil; import org.json.JSONObject; @@ -17,7 +14,6 @@ import java.time.LocalDate; public class DouyuFlvUrlParser implements PlatformVideoUrlParser { - String flvBaseUrl = "http://openflv-huos.douyucdn2.cn/dyliveflv1/"; String did = "818074ef9c05a3fe94acdfe500091601"; @Override @@ -64,12 +60,14 @@ public class DouyuFlvUrlParser implements PlatformVideoUrlParser needPlugins, boolean isAutoStart, String moduleName, String name, Class clazz) { + super(needPlugins, isAutoStart, moduleName, name, clazz); + } + + +} diff --git a/chopperbot-live/src/main/java/org/example/init/module/LiveModuleInitMachine.java b/chopperbot-live/src/main/java/org/example/init/module/LiveModuleInitMachine.java new file mode 100644 index 0000000..344ffbe --- /dev/null +++ b/chopperbot-live/src/main/java/org/example/init/module/LiveModuleInitMachine.java @@ -0,0 +1,28 @@ +package org.example.init.module; + +import org.example.constpool.ConstPool; +import org.example.constpool.ModuleName; +import org.example.init.ModuleInitMachine; +import org.example.log.ChopperLogFactory; +import org.example.log.LoggerType; + +import java.util.List; + +/** + * @author Genius + * @date 2023/07/21 00:16 + **/ + +/** + * 整个热门模块的模块初始化类 + */ +public class LiveModuleInitMachine extends ModuleInitMachine { + + public LiveModuleInitMachine() { + super( + List.of(ConstPool.FILE,ConstPool.CREEPER), + ChopperLogFactory.getLogger(LoggerType.LiveRecord), + ModuleName.LIVE + ); + } +} diff --git a/chopperbot-live/src/main/java/org/example/pojo/live/BilibiliLiveConfig.java b/chopperbot-live/src/main/java/org/example/pojo/live/BilibiliLiveConfig.java deleted file mode 100644 index 8dac1a0..0000000 --- a/chopperbot-live/src/main/java/org/example/pojo/live/BilibiliLiveConfig.java +++ /dev/null @@ -1,25 +0,0 @@ -package org.example.pojo.live; - -import lombok.Data; - -/** - * b站直播下载配置 - * @author 燧枫 - * @date 2023/5/19 19:55 -*/ -@Data -public class BilibiliLiveConfig extends LiveConfig { - - // 清晰度,10000为原画画质 - private int clarity; - - public BilibiliLiveConfig(String roomId, int clarity, String videoPath, String videoName, boolean convertToMp4) { - super(roomId, videoPath, videoName, convertToMp4); - this.clarity = clarity; - } - - public BilibiliLiveConfig(String roomId, String videoPath, String videoName) { - super(roomId, videoPath, videoName, true); - this.clarity = 4000; - } -} diff --git a/chopperbot-live/src/main/java/org/example/pojo/live/DouyuLiveConfig.java b/chopperbot-live/src/main/java/org/example/pojo/live/DouyuLiveConfig.java deleted file mode 100644 index 1769177..0000000 --- a/chopperbot-live/src/main/java/org/example/pojo/live/DouyuLiveConfig.java +++ /dev/null @@ -1,20 +0,0 @@ -package org.example.pojo.live; - -import lombok.Data; - -@Data -public class DouyuLiveConfig extends LiveConfig{ - - // 清晰度,4000蓝光 - private int clarity; - - public DouyuLiveConfig(String roomId, int clarity, String videoPath, String videoName, boolean convertToMp4) { - super(roomId, videoPath, videoName, convertToMp4); - this.clarity = clarity; - } - - public DouyuLiveConfig(String roomId, String videoPath, String videoName) { - super(roomId, videoPath, videoName, true); - this.clarity = 4000; - } -} diff --git a/chopperbot-live/src/main/java/org/example/pojo/live/DouyuRecordConfig.java b/chopperbot-live/src/main/java/org/example/pojo/live/DouyuRecordConfig.java deleted file mode 100644 index 4dca4a1..0000000 --- a/chopperbot-live/src/main/java/org/example/pojo/live/DouyuRecordConfig.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.example.pojo.live; - -import lombok.Data; - -/** - * - * @author 燧枫 - * @date 2023/8/3 22:23 - */ -@Data -public class DouyuRecordConfig extends LiveConfig { - - private String startTime; - - private String endTime; - - public DouyuRecordConfig(String roomId, String videoPath, String videoName, String startTime, String endTime) { - super(roomId, videoPath, videoName, false); - this.startTime = startTime; - this.endTime = endTime; - } -} diff --git a/chopperbot-live/src/main/java/org/example/pojo/live/LiveConfig.java b/chopperbot-live/src/main/java/org/example/pojo/live/LiveConfig.java deleted file mode 100644 index bad3ede..0000000 --- a/chopperbot-live/src/main/java/org/example/pojo/live/LiveConfig.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.example.pojo.live; - -import lombok.Data; - -/** - * 直播下载配置 - * @author 燧枫 - * @date 2023/5/19 19:54 -*/ -@Data -public class LiveConfig { - - // 房间号 - private String roomId; - - // 视频保存路径 - private String videoPath; - - // 视频保存名称 - private String videoName; - - // 是否自动转换为mp4格式 - private boolean convertToMp4; - - public LiveConfig(String roomId, String videoPath, String videoName, boolean convertToMp4) { - this.roomId = roomId; - this.videoPath = videoPath; - this.videoName = videoName; - this.convertToMp4 = convertToMp4; - } -} diff --git a/chopperbot-test/src/test/java/org/example/live/LiveTest.java b/chopperbot-test/src/test/java/org/example/live/LiveTest.java index aa71fc1..a1a33f7 100644 --- a/chopperbot-test/src/test/java/org/example/live/LiveTest.java +++ b/chopperbot-test/src/test/java/org/example/live/LiveTest.java @@ -3,7 +3,7 @@ package org.example.live; import org.example.ConsoleApplication; import org.example.core.creeper.loadconfig.BilibiliLiveOnlineConfig; import org.example.core.creeper.loadconfig.DouyuLiveOnlineConfig; -import org.example.core.manager.TaskManager; +import org.example.core.manager.LiveDownloadManager; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.boot.test.context.SpringBootTest; @@ -19,21 +19,21 @@ import org.springframework.test.context.junit4.SpringRunner; public class LiveTest { @Test public void DouyuLive(){ - TaskManager taskManager = new TaskManager(5); + LiveDownloadManager liveDownLoadManager = new LiveDownloadManager(5); DouyuLiveOnlineConfig douyuLiveConfig = new DouyuLiveOnlineConfig( - "4333872", "E:\\Project\\ChopperBot\\config\\LiveRecord\\", "CF", true + "36252", "E:\\Project\\ChopperBot\\config\\LiveRecord\\", "CF", true ); try { // 向任务管理器中添加任务 - String taskId = taskManager.addTask(douyuLiveConfig); // 获取任务的标识符 - taskManager.showDownloadTable(taskId); + String taskId = liveDownLoadManager.addTask(douyuLiveConfig); // 获取任务的标识符 + liveDownLoadManager.showDownloadTable(taskId); int cnt = 100; while (cnt > 0) { cnt--; Thread.sleep(1000); } - taskManager.terminateThenSave(douyuLiveConfig,taskId); + liveDownLoadManager.terminateThenSave(douyuLiveConfig,taskId); } catch (Exception e) { e.printStackTrace(); } @@ -44,19 +44,19 @@ public class LiveTest { BilibiliLiveOnlineConfig liveConfig = new BilibiliLiveOnlineConfig("732", "E:\\Project\\ChopperBot\\config\\LiveRecord\\", "猪猪公主",false); // 创建下载任务管理器 - TaskManager taskManager = new TaskManager(5); + LiveDownloadManager liveDownLoadManager = new LiveDownloadManager(5); try { // 向任务管理器中添加任务 - String taskId = taskManager.addTask(liveConfig); // 获取任务的标识符 - taskManager.showDownloadTable(taskId); -// int cnt = 20; -// while (cnt > 0) { -// cnt--; -// Thread.sleep(1000); -// } -// -// taskManager.terminateThenSave(liveConfig,taskId); + String taskId = liveDownLoadManager.addTask(liveConfig); // 获取任务的标识符 + liveDownLoadManager.showDownloadTable(taskId); + int cnt = 600; + while (cnt > 0) { + cnt--; + Thread.sleep(1000); + } + + liveDownLoadManager.terminateThenSave(liveConfig,taskId); } catch (Exception e) { e.printStackTrace(); } diff --git a/config/chopperBotConfig.json b/config/chopperBotConfig.json index 73d441b..63edb72 100644 --- a/config/chopperBotConfig.json +++ b/config/chopperBotConfig.json @@ -18,8 +18,9 @@ "BarrageFileListen":false, "TaskCenter":true, "CreeperConfig":true, + "LiveDownLoadManager":true, "HotRecommendation":true } }, - "updateTime":"2023-08-26 15:13:13" -} \ No newline at end of file + "updateTime":"2023-08-31 17:18:49" +} diff --git a/doc/ReadMe.md b/doc/ReadMe.md index 87ec6bb..cbe8733 100644 --- a/doc/ReadMe.md +++ b/doc/ReadMe.md @@ -16,6 +16,6 @@ git commit -m 'deploy' # 推到你仓库的的 gh-page 分支 # 将 / 替换为你的信息 -git push -f git@github.com:twj666/ChopperBot-Doc.git master:pages +git push -f git@github.com:969025903/ChopperBot-Doc.git master:gh-pages ``` diff --git a/doc/docs/.vuepress/config/themeConfig.js b/doc/docs/.vuepress/config/themeConfig.js index 04b1f96..eef87c4 100644 --- a/doc/docs/.vuepress/config/themeConfig.js +++ b/doc/docs/.vuepress/config/themeConfig.js @@ -10,7 +10,7 @@ module.exports = { searchMaxSuggestions: 10, lastUpdated: "上次更新", - docsRepo: "/twj666/Chopper-Doc", + docsRepo: "/969025903/Chopper-Doc", docsDir: "docs", docsBranch: "master", editLinks: true, diff --git a/doc/docs/01.指南/01.开发指南/01.快速开发/02.模块与插件.md b/doc/docs/01.指南/01.开发指南/01.快速开发/02.模块与插件.md index 02bab69..fee460b 100644 --- a/doc/docs/01.指南/01.开发指南/01.快速开发/02.模块与插件.md +++ b/doc/docs/01.指南/01.开发指南/01.快速开发/02.模块与插件.md @@ -213,3 +213,21 @@ InitPluginRegister.getPlugin插件名); //注册插件 InitPluginRegister.register(CommonInitMachine); ``` +### 其他用法 + +#### CheckAndDo + +::: tip PluginCheckAndDo +有的插件可能在某些插件启动时有不同的功能,为了检测目标插件是否存在,并根据存在情况使用不同的方法,你可以使用PluginCheckAndDo方法 +::: +```java + PluginCheckAndDo.CheckAndDo( + (plugin)->{ + //存在时执行的方法 + }, + ()->{ + //不存在时执行的方法 + }, + PluginName.CREEPER_MANAGER_PLUGIN +); +``` diff --git a/doc/docs/01.指南/01.开发指南/01.快速开发/03.如何编写爬虫.md b/doc/docs/01.指南/01.开发指南/01.快速开发/03.如何编写爬虫.md new file mode 100644 index 0000000..d983044 --- /dev/null +++ b/doc/docs/01.指南/01.开发指南/01.快速开发/03.如何编写爬虫.md @@ -0,0 +1,213 @@ +--- +title: 如何编写爬虫 +date: 2023-07-31 02:05:34 +permalink: /pages/63a89d/ +--- + +[[toc]] +## ChopperBot与爬虫 + +::: tip 简介 +如果说插件构成了ChopperBot这颗独特的星球,那么爬虫得到数据则是这颗星球的生命。没有生命的星球将是死气沉沉的,而没有爬虫的ChopperBot也将失去它的光泽 +再此向各位开发者介绍如何在ChopperBot中编写一个规范的爬虫并在融合进入ChopperBot中。 +::: + +如果你不知道什么是爬虫,或者如何编写,请参考下列文章: +- [java爬虫详解及简单实例](https://zhuanlan.zhihu.com/p/634122028) +- [爬虫框架WebMagic](https://webmagic.io/) + +## 编写ChopperBot爬虫 + +### 创建文件夹 +``` ++-- ChopperBot +| +-- 模块名 +| | +-- core +| | | +-- creeper +| | | | +-- loadconfig //爬虫参数文件 +| | | | +-- loadtask //爬虫主体任务 +| | | | +-- processor //后续任务处理 +| +-- 模块名 +``` + +在开始编写爬虫脚本前,请先确保你的模块下的core文件夹中有如上图的几个文件夹,如果已经拥有则可以开始进行爬虫脚本的编写了 + +### 创建LoadConfig +::: tip 说明 +LoadConfig是当前你要进行爬虫时使用的爬虫参数文件,它可以包含url,header,cookie等等信息,**也是爬虫脚本启动的必备文件** +::: + +1. 创建一个LoadConfig文件,包含你爬虫需要的必备信息 +2. 添加@Creeper注解,将脚本文件交给CreeperCenter管理 +```java +@Creeper(creeperName = "豆瓣书籍top250",loadTask = DouBanLoadTask.class,creeperDescription = "爬豆瓣爬的") +public class DouBanLoadConfig extends LoadConfig { + public DouBanLoadConfig() { + this.url = "https://book.douban.com/top250"; //爬虫url + //this.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"; + //this.Origin = "https://live.bilibili.com"; + //this.Referer = "https://live.bilibili.com/"; + } +} +``` +LoadConfig的其他参数 +```java +String url; +String startTime; +String UserAgent; +String Origin; +String Referer; +Map header; //头信息 +Map cookie; //Cookie +``` +如果你需要更多参数可以继承LoadConfig,自行添加,例如下面代码: +```java +public abstract class LoadVideoConfig extends LoadConfig { + protected String videoPath; + + // 视频保存名称 + protected String videoName; + + protected int clarity; + + public LoadVideoConfig(String videoPath, String videoName) { + this.videoPath = videoPath; + this.videoName = videoName; + } +} +``` +### 创建LoadTask +::: tip 说明 +LoadTask就是整个爬虫文件运行的核心,他负责读取LoadConfig中的数据,并进行爬虫!启动!**是爬虫脚本启动的必备文件**, +在ChopperBot也提供了多种LoadTask类型的编写,目前结合了WebMagic框架,之后会结合更多爬虫框架。 +::: +编写一个最简单的脚本 +```java +/** + * 网上随便找的个爬豆瓣的demo,需要导入jsoup,不清楚能不能运行 + * 链接:https://www.w3cschool.cn/article/69979497.html + */ +public class DouBanLoadTask extends CommonLoadTask> { + + + public DouBanLoadTask(DouyuBanLoadConfig loadConfig) { + super(loadConfig); + } + + @Override + public ArrayList start() { + try { + // 连接到URL,并获取网页的文档对象 + Document doc = Jsoup.connect(loadConfig.getUrl()).get(); + // 选择所有包含书籍信息的元素 + Elements elements = doc.select("div.article > div.indent > table"); + // 遍历每个元素 + for (Element element : elements) { + // 提取书籍的标题 + String title = element.select("div.pl2 > a").attr("title"); + // 提取书籍的作者 + String author = element.select("p.pl").text().split("/")[0]; + // 提取书籍的评分 + String rating = element.select("span.rating_nums").text(); + // 提取书籍的简介 + String summary = element.select("span.inq").text(); + // 创建一个Book对象 + Book book = new Book(title, author, rating, summary); + // 将Book对象添加到列表中 + books.add(book); + } + } catch (IOException e) { + e.printStackTrace(); + } + return books; + } + + @Override + public void end() { + + } + + class Book{ + private String title; // 标题 + private String author; // 作者 + private String rating; // 评分 + private String summary; // 简介 + + + // 书籍的构造方法 + public Book(String title, String author, String rating, String summary) { + this.title = title; + this.author = author; + this.rating = rating; + this.summary = summary; + } + + } +} + +``` + +#### 其他的LoadTask + +- **CommonLoadTask**: +无特殊需求无框架的LoadTask,适用于编写所有类型的爬虫脚本 +- **WebMagicLoadTask**: +基于WebMagic框架开发的LoadTask,适用于快速开发多线程,深度广度等多种爬虫策略的LoadTask +> 1. 创建Spider +> ```SpiderFactory.buildSpider(平台名称,Processor,url)``` +> 2. 获取最终结果 +> ```Object obj = getData(spider,url)``` +> ```java +> @Override +> public List start(){ +> +> List lives; +> +> Spider spider = SpiderFactory.buildSpider(ConstPool.PLATFORM.DOUYU.getName(), //平台名称 +> douyuHotLiveProcessor, //processor +> loadConfig.getUrl()); //url +> try { +> lives = getData(spider,loadConfig.getUrl()); +> }catch (Exception e){ +> fail(e); +> return null; +> } +> success(); +> return lives; +> } +> ``` +- ~~AsyncLoadTask(不完善,不推荐使用)~~: +异步的LoadTask,用于异步运行的爬虫脚本,得到的结果需要提供给result + +**至此你的爬虫脚本就编写完毕了,无需额外的学习,你可以当作写一个算法OR一个面向过程的程序,总之想要编写一个爬虫脚本很简单。除了创建几个必要的文件外和一行注释外,你只需要写下你的脚本代码即可** + +## 爬虫运行中心 +::: tip 说明 +如果你已经完成一个可以单独运行的爬虫脚本。你可以将你写好的爬虫脚本放入[TaskCenter](/pages/691628/)中来进行爬虫脚本的运行管理,监控,调度,以及本地存储,失败恢复等功能。 +::: +```java +CommonPlugin plugin = InitPluginRegister.getPlugin(PluginName.TASK_CENTER_PLUGIN); //获取TaskCenter插件 + +ReptileRequest request = new ReptileRequest(new DouyuHotModuleConfig(), //需要调度爬虫方法的参数文件 +(t)->{System.out.println("return val:"+t)} //爬虫任务完成后的callback方法 +) + +((TaskCenter)plugin).request(request); +``` +以上代码分为以下几点: +1. `TaskCenter`插件已经启动,并获取 +2. 构建`ReptileRequest` +3. 放入需要爬虫方法的文件,文件要确保有对应的`LoadTask` +4. 编写`CallBack`方法,这个是在`LoadTask`调用结束并返回值时执行的,t为返回的值,你可以自行处理返回的值 +5. 发送爬虫请求给`TaskCenter`运行 + +## 爬虫脚本库 +当你需要跨模块调用或者快捷使用爬虫脚本时,你可以在`CreeperManger`插件中查找你已经注解`@Creeper`的爬虫脚本 +```java +CreeperManager manager = InitPluginRegister.getPlugin(PluginName.CREEPER_MANAGER_PLUGIN); //获取CreeperManager插件 +//根据名字获取爬虫脚本 +LoadTask task1 = manager.getLoadTask("豆瓣脚本") +//根据爬虫请求中的LoadConfig获得爬虫脚本 +LoadTask task2 = manager.getLoadTask(ReptileRequest) + +``` diff --git a/doc/docs/01.指南/01.开发指南/01.快速开发/03.爬虫模块.md b/doc/docs/01.指南/01.开发指南/01.快速开发/03.爬虫模块.md deleted file mode 100644 index 14b2628..0000000 --- a/doc/docs/01.指南/01.开发指南/01.快速开发/03.爬虫模块.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -title: 爬虫脚本编写 -date: 2023-07-31 02:05:34 -permalink: /pages/63a89d/ ---- diff --git a/doc/docs/index.md b/doc/docs/index.md index 86427bd..4656345 100644 --- a/doc/docs/index.md +++ b/doc/docs/index.md @@ -43,6 +43,7 @@ postList: none [twj666🤡](https://github.com/tmlgenius) [welsir🤡](https://github.com/tmlgenius) [masteryf🤡](https://github.com/masteryf) +[Klein422🤡](https://github.com/Klein422) ### TML生态圈 @@ -59,7 +60,7 @@ postList: none 欢迎各路好汉一起来参与完善 ChopperBot,我们期待你的 PR! - 贡献代码:代码地址 [ChopperBot](https://github.com/969025903/ChopperBot) ,欢迎提交 Issue 或者 Pull Requests -- 维护文档:文档地址 [ChopperBot-Doc](https://github.com/twj666/Chopper-Doc) ,欢迎参与翻译和修订 +- 维护文档:文档地址 [ChopperBot-Doc](https://github.com/969025903/ChopperBot-Doc/) ,欢迎参与翻译和修订 ### 友情链接