爬虫服务

This commit is contained in:
2025-11-11 11:57:58 +08:00
parent 3d742bf322
commit 7be02fe396
6 changed files with 265 additions and 11 deletions

View File

@@ -19,6 +19,18 @@
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.xyzh</groupId>
<artifactId>auth</artifactId>
@@ -34,11 +46,6 @@
<artifactId>api-all</artifactId>
<version>${school-news.version}</version>
</dependency>
<dependency>
<groupId>org.xyzh</groupId>
<artifactId>ai</artifactId>
<version>${school-news.version}</version>
</dependency>
<dependency>
<groupId>org.xyzh</groupId>
<artifactId>system</artifactId>

View File

@@ -113,6 +113,37 @@ school-news:
- "/file/download/**"
crawler:
python:
path: F:\Environment\Conda\envs\shoolNewsCrewer
base:
path: F:/Project/schoolNews/schoolNewsCrawler
crontab:
items: #可供前端选择的定时任务列表
- name: 人民日报新闻爬取
methods: #爬取方式
- name: 关键字搜索爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/RmrbSearch.py
params:
query: String #搜索关键字
total: Integer #总新闻数量
- name: 排行榜爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/RmrbHotPoint.py
- name: 往日精彩头条爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/RmrbTrending.py
params:
startDate: String #开始日期
endDate: String #结束日期
isYestoday: Boolean #是否是昨天
# 文件存储配置
file:
storage:

View File

@@ -14,7 +14,7 @@ public class CrontabItem {
private List<CrontabMethod> methods;
@Data
public class CrontabMethod {
public static class CrontabMethod {
private String name;
@JSONField(name = "class")
private String clazz;

View File

@@ -1,6 +1,6 @@
crawler:
python:
path: C:/Python312/python.exe
path: F:\Environment\Conda\envs\shoolNewsCrewer
base:
path: F:/Project/schoolNews/schoolNewsCrawler
@@ -10,16 +10,16 @@ crontab:
methods: #爬取方式
- name: 关键字搜索爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
path: crawler/RmrbSearch.py
params:
query: String #搜索关键字
total: Integer #总新闻数量
- name: 排行榜爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
path: crawler/RmrbHotPoint.py
- name: 往日精彩头条爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
path: crawler/RmrbTrending.py
params:
startDate: String #开始日期
endDate: String #结束日期