temp定时任务修改

This commit is contained in:
2025-11-10 16:03:50 +08:00
parent e8b76278e9
commit 9adbd6d365
38 changed files with 2710 additions and 2032 deletions

View File

@@ -0,0 +1,652 @@
# Java调用Python并获取返回结果详解
## 一、核心原理
Java通过 `ProcessBuilder``Runtime.exec()` 创建操作系统进程来执行Python脚本然后通过进程的标准输入/输出流进行通信。
## 二、当前实现详解
### 1. 构建命令
```java
// 步骤1: 构建命令列表
List<String> command = new ArrayList<>();
// 步骤2: 处理Windows/Linux系统差异
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
// Windows系统需要通过cmd执行
command.add("cmd"); // 命令解释器
command.add("/c"); // /c表示执行后关闭
command.add(pythonPath); // python或python3
} else {
// Linux/Mac系统直接执行
command.add(pythonPath);
}
// 步骤3: 添加Python脚本和参数
command.add("main.py"); // Python脚本
command.add(category); // 参数1: 分类
command.add(limit); // 参数2: 数量
command.add(outputFile); // 参数3: 输出文件
```
**命令示例:**
- Windows: `cmd /c python main.py politics 20 output/news.json`
- Linux: `python3 main.py politics 20 output/news.json`
### 2. 创建进程
```java
// 创建进程构建器
ProcessBuilder processBuilder = new ProcessBuilder(command);
// 设置工作目录Python脚本所在目录
processBuilder.directory(scriptDir.toFile());
// 合并标准输出和错误输出(便于统一读取)
processBuilder.redirectErrorStream(true);
// 启动进程
Process process = processBuilder.start();
```
**关键点:**
- `directory()`: 设置工作目录确保Python脚本能找到相对路径的资源
- `redirectErrorStream(true)`: 将stderr合并到stdout方便统一读取
- `start()`: 异步启动进程,不会阻塞
### 3. 读取输出流
```java
// 读取标准输出Python的print输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
logger.debug("Python输出: {}", line);
}
}
```
**重要说明:**
- `process.getInputStream()`: 获取Python进程的标准输出
- 必须读取输出流,否则缓冲区满会导致进程阻塞
- 使用UTF-8编码避免中文乱码
### 4. 等待进程结束
```java
// 方式1: 带超时的等待(推荐)
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
// 超时后强制终止进程
process.destroy(); // 或 process.destroyForcibly() 强制终止
throw new RuntimeException("任务超时");
}
// 方式2: 无限等待(不推荐,可能导致死锁)
int exitCode = process.waitFor();
```
**退出码说明:**
- `0`: 执行成功
- `非0`: 执行失败(通常是错误码)
### 5. 获取返回结果
当前实现通过**文件传递**方式获取结果:
```java
// Python脚本将结果写入JSON文件
Path outputPath = scriptDir.resolve(outputFile);
// Java读取文件内容
String jsonContent = Files.readString(outputPath);
// 解析JSON
ObjectMapper mapper = new ObjectMapper();
List<Map<String, Object>> newsList = mapper.readValue(
jsonContent,
List.class
);
```
## 三、三种数据传递方式对比
### 方式1: 文件传递(当前实现)
**优点:**
- ✅ 适合大数据量
- ✅ 数据持久化,便于调试
- ✅ 实现简单
**缺点:**
- ⚠️ 需要文件I/O操作
- ⚠️ 需要管理临时文件
- ⚠️ 可能有并发问题(文件名冲突)
**实现示例:**
```java
// Java端
String outputFile = "output/result_" + System.currentTimeMillis() + ".json";
command.add(outputFile);
// Python端
import json
import sys
result = {"status": "success", "data": [...]}
with open(sys.argv[1], 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False)
```
### 方式2: 标准输出传递(适合小数据)
**优点:**
- ✅ 实时传输,无需文件
- ✅ 适合小数据量(< 1MB
- 无文件管理开销
**缺点:**
- 大数据量可能阻塞
- 不能传递二进制数据
- 需要与日志输出区分
**实现示例:**
```java
// Java端读取标准输出
StringBuilder result = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
String line;
while ((line = reader.readLine()) != null) {
// 约定:以特定标记区分结果和日志
if (line.startsWith("RESULT:")) {
result.append(line.substring(7)); // 去掉"RESULT:"前缀
} else {
logger.info("Python日志: {}", line);
}
}
}
// 解析JSON结果
String jsonResult = result.toString();
ObjectMapper mapper = new ObjectMapper();
Map<String, Object> data = mapper.readValue(jsonResult, Map.class);
```
```python
# Python端输出结果
import json
import sys
# 日志输出到stderr
print("开始爬取...", file=sys.stderr)
# 结果输出到stdout带标记
result = {"status": "success", "data": [...]}
print("RESULT:" + json.dumps(result, ensure_ascii=False))
```
### 方式3: 标准输入传递参数(双向通信)
**优点:**
- 可以传递复杂参数
- 支持交互式通信
**缺点:**
- 实现复杂
- 需要处理流关闭时机
**实现示例:**
```java
// Java端通过标准输入传递参数
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 写入参数到标准输入
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(process.getOutputStream(), "UTF-8"))) {
String params = "{\"category\":\"politics\",\"limit\":20}";
writer.write(params);
writer.newLine();
writer.flush();
}
// 关闭输入流告诉Python输入结束
process.getOutputStream().close();
// 读取输出
// ... 同方式2
```
```python
# Python端从标准输入读取参数
import json
import sys
# 读取参数
params_json = sys.stdin.readline().strip()
params = json.loads(params_json)
category = params.get("category", "politics")
limit = params.get("limit", 20)
# 执行爬取
result = crawl_news(category, limit)
# 输出结果
print(json.dumps(result, ensure_ascii=False))
```
## 四、完整优化实现
### 改进版实现(支持多种方式)
```java
package org.xyzh.crontab.task.newsTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
/**
* Java调用Python的完整实现
*/
@Component("newsCrewerTask")
public class NewsCrewerTask {
private static final Logger logger = LoggerFactory.getLogger(NewsCrewerTask.class);
private final ObjectMapper objectMapper = new ObjectMapper();
@Value("${crewer.python.path:python}")
private String pythonPath;
@Value("${crewer.script.path:../schoolNewsCrewer}")
private String scriptPath;
@Value("${crewer.timeout:300}")
private int timeout;
/**
* 方式1: 通过文件传递结果(当前实现,适合大数据)
*/
public List<Map<String, Object>> executeByFile(String category, int limit) {
logger.info("执行爬虫任务 - 文件方式");
try {
// 1. 构建命令
List<String> command = buildCommand("main.py", category, String.valueOf(limit));
// 2. 生成输出文件
String timestamp = String.valueOf(System.currentTimeMillis());
String outputFile = String.format("output/news_%s_%s.json", category, timestamp);
command.add(outputFile);
// 3. 执行进程
ProcessResult result = executeProcess(command);
if (result.getExitCode() != 0) {
throw new RuntimeException("Python执行失败: " + result.getOutput());
}
// 4. 读取结果文件
Path outputPath = Paths.get(scriptPath).resolve(outputFile);
if (!Files.exists(outputPath)) {
throw new RuntimeException("输出文件不存在: " + outputFile);
}
String jsonContent = Files.readString(outputPath, StandardCharsets.UTF_8);
List<Map<String, Object>> newsList = objectMapper.readValue(
jsonContent,
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
);
// 5. 清理临时文件(可选)
// Files.deleteIfExists(outputPath);
return newsList;
} catch (Exception e) {
logger.error("执行失败", e);
throw new RuntimeException("爬虫任务执行失败", e);
}
}
/**
* 方式2: 通过标准输出传递结果(适合小数据)
*/
public List<Map<String, Object>> executeByStdout(String category, int limit) {
logger.info("执行爬虫任务 - 标准输出方式");
try {
// 1. 构建命令使用特殊脚本输出JSON到stdout
List<String> command = buildCommand("main_stdout.py", category, String.valueOf(limit));
// 2. 执行进程
ProcessResult result = executeProcess(command);
if (result.getExitCode() != 0) {
throw new RuntimeException("Python执行失败: " + result.getOutput());
}
// 3. 从输出中提取JSON约定最后一行是JSON结果
String output = result.getOutput();
String[] lines = output.split("\n");
// 查找JSON行以{或[开头)
String jsonLine = null;
for (int i = lines.length - 1; i >= 0; i--) {
String line = lines[i].trim();
if (line.startsWith("{") || line.startsWith("[")) {
jsonLine = line;
break;
}
}
if (jsonLine == null) {
throw new RuntimeException("未找到JSON结果");
}
// 4. 解析JSON
List<Map<String, Object>> newsList = objectMapper.readValue(
jsonLine,
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
);
return newsList;
} catch (Exception e) {
logger.error("执行失败", e);
throw new RuntimeException("爬虫任务执行失败", e);
}
}
/**
* 方式3: 通过标准输入传递参数(双向通信)
*/
public List<Map<String, Object>> executeByStdin(String category, int limit) {
logger.info("执行爬虫任务 - 标准输入方式");
Process process = null;
try {
// 1. 构建命令
List<String> command = buildCommand("main_stdin.py");
ProcessBuilder pb = new ProcessBuilder(command);
pb.directory(Paths.get(scriptPath).toFile());
pb.redirectErrorStream(true);
// 2. 启动进程
process = pb.start();
// 3. 写入参数到标准输入
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) {
Map<String, Object> params = Map.of(
"category", category,
"limit", limit
);
String paramsJson = objectMapper.writeValueAsString(params);
writer.write(paramsJson);
writer.newLine();
writer.flush();
}
// 4. 关闭输入流(重要!)
process.getOutputStream().close();
// 5. 读取输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
}
}
// 6. 等待进程结束
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly();
throw new RuntimeException("任务超时");
}
int exitCode = process.exitValue();
if (exitCode != 0) {
throw new RuntimeException("Python执行失败退出码: " + exitCode);
}
// 7. 解析结果
String jsonResult = output.toString().trim();
List<Map<String, Object>> newsList = objectMapper.readValue(
jsonResult,
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
);
return newsList;
} catch (Exception e) {
logger.error("执行失败", e);
throw new RuntimeException("爬虫任务执行失败", e);
} finally {
if (process != null && process.isAlive()) {
process.destroyForcibly();
}
}
}
/**
* 通用进程执行方法
*/
private ProcessResult executeProcess(List<String> command) throws IOException, InterruptedException {
long startTime = System.currentTimeMillis();
// 创建进程构建器
ProcessBuilder pb = new ProcessBuilder(command);
pb.directory(Paths.get(scriptPath).toFile());
pb.redirectErrorStream(true);
logger.info("执行命令: {}", String.join(" ", command));
// 启动进程
Process process = pb.start();
// 读取输出(必须在单独线程中,避免阻塞)
StringBuilder output = new StringBuilder();
StringBuilder error = new StringBuilder();
// 使用CompletableFuture异步读取避免死锁
CompletableFuture<String> outputFuture = CompletableFuture.supplyAsync(() -> {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
logger.debug("Python输出: {}", line);
}
return output.toString();
} catch (IOException e) {
logger.error("读取输出失败", e);
return "";
}
});
// 等待进程结束(带超时)
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly();
throw new RuntimeException("任务超时(超过" + timeout + "秒)");
}
// 获取输出
String outputStr = outputFuture.get(5, TimeUnit.SECONDS);
int exitCode = process.exitValue();
long duration = System.currentTimeMillis() - startTime;
logger.info("进程执行完成 - 退出码: {}, 耗时: {}ms", exitCode, duration);
return new ProcessResult(exitCode, outputStr, duration);
}
/**
* 构建命令列表
*/
private List<String> buildCommand(String... args) {
List<String> command = new ArrayList<>();
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
command.add("cmd");
command.add("/c");
command.add(pythonPath);
} else {
command.add(pythonPath);
}
for (String arg : args) {
command.add(arg);
}
return command;
}
/**
* 进程执行结果
*/
private static class ProcessResult {
private final int exitCode;
private final String output;
private final long duration;
public ProcessResult(int exitCode, String output, long duration) {
this.exitCode = exitCode;
this.output = output;
this.duration = duration;
}
public int getExitCode() {
return exitCode;
}
public String getOutput() {
return output;
}
public long getDuration() {
return duration;
}
}
}
```
## 五、关键注意事项
### 1. 必须读取输出流
**错误示例:**
```java
Process process = pb.start();
int exitCode = process.waitFor(); // 可能永远阻塞!
```
**原因:** 如果输出缓冲区满了Python进程会阻塞等待读取
**正确做法:**
```java
Process process = pb.start();
// 必须读取输出流
Thread outputThread = new Thread(() -> {
try (BufferedReader reader = ...) {
// 读取输出
}
});
outputThread.start();
process.waitFor();
```
### 2. 处理编码问题
```java
// 指定UTF-8编码避免中文乱码
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8)
```
### 3. 超时控制
```java
// 使用带超时的waitFor
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly(); // 强制终止
}
```
### 4. 资源清理
```java
try {
// 执行逻辑
} finally {
if (process != null && process.isAlive()) {
process.destroyForcibly();
}
// 关闭流
process.getInputStream().close();
process.getOutputStream().close();
process.getErrorStream().close();
}
```
### 5. 错误处理
```java
// 检查退出码
if (exitCode != 0) {
// 读取错误输出
String error = readErrorStream(process);
throw new RuntimeException("执行失败: " + error);
}
```
## 六、性能优化建议
1. **使用线程池**如果频繁调用使用线程池管理进程
2. **连接复用**考虑Python服务模式HTTP/GRPC
3. **异步执行**使用CompletableFuture异步执行
4. **缓存结果**对相同参数的请求缓存结果
## 七、总结
- **文件传递**适合大数据量当前实现方式
- **标准输出**适合小数据量实时传输
- **标准输入**适合复杂参数双向通信
根据实际需求选择合适的方式当前的文件传递方式已经足够好

View File

@@ -25,6 +25,11 @@
<artifactId>api-crontab</artifactId>
<version>${school-news.version}</version>
</dependency>
<dependency>
<groupId>org.xyzh</groupId>
<artifactId>api-news</artifactId>
<version>${school-news.version}</version>
</dependency>
<!-- Common模块依赖 -->
<dependency>
@@ -38,6 +43,10 @@
<artifactId>system</artifactId>
<version>${school-news.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<!-- Spring Boot Web -->
<dependency>

View File

@@ -0,0 +1,21 @@
package org.xyzh.crontab.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.xyzh.crontab.task.newsTask.ScriptDomain;
import lombok.Data;
import org.springframework.beans.factory.annotation.Value;
import java.util.List;
@Data
@ConfigurationProperties(prefix = "crawler")
public class CrawlerProperties {
@Value("${crawler.base.path}")
private String basePath;
@Value("${crawler.script}")
private List<ScriptDomain> scripts;
}

View File

@@ -0,0 +1,8 @@
package org.xyzh.crontab.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties(prefix = "crontab")
public class CrontabPrpperties {
}

View File

@@ -10,6 +10,13 @@ import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.core.page.PageRequest;
import org.xyzh.common.dto.crontab.TbCrontabTask;
import org.xyzh.common.dto.crontab.TbCrontabLog;
import org.xyzh.common.utils.IDUtils;
import org.xyzh.crontab.pojo.CrontabItem;
import java.util.Date;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
/**
* @description 定时任务控制器
@@ -27,204 +34,64 @@ public class CrontabController {
@Autowired
private CrontabService crontabService;
// ----------------定时任务管理--------------------------------
/**
* @description 创建定时任务
* @param task 任务对象
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
* 获取可创建定时任务
* @return
*/
@PostMapping("/task")
public ResultDomain<TbCrontabTask> createTask(@RequestBody TbCrontabTask task) {
return crontabService.createTask(task);
@GetMapping("/getEnabledCrontabList")
public ResultDomain<CrontabItem> getEnabledCrontabList(@RequestParam String param) {
return null;
}
/**
* @description 更新定时任务
* @param task 任务对象
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
* 创建定时任务
* @param crontabItem
* @return
*/
@PutMapping("/task")
public ResultDomain<TbCrontabTask> updateTask(@RequestBody TbCrontabTask task) {
return crontabService.updateTask(task);
@PostMapping("/crontabTask")
public ResultDomain<TbCrontabTask> createCrontab(@RequestBody TbCrontabTask crontabItem) {
return null;
}
/**
* @description 删除定时任务
* @param task 任务对象
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
* 更新定时任务
* @param crontabItem
* @return
*/
@DeleteMapping("/task")
public ResultDomain<TbCrontabTask> deleteTask(@RequestBody TbCrontabTask task) {
return crontabService.deleteTask(task.getID());
@PutMapping("/crontabTask")
public ResultDomain<TbCrontabTask> updateCrontab(@RequestBody TbCrontabTask crontabItem) {
return null;
}
/**
* @description 根据ID查询任务
* @param taskId 任务ID
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
* 删除定时任务
* @param crontabItem
* @return
*/
@GetMapping("/task/{taskId}")
public ResultDomain<TbCrontabTask> getTaskById(@PathVariable(value = "taskId") String taskId) {
return crontabService.getTaskById(taskId);
@DeleteMapping("/crontabTask")
public ResultDomain<TbCrontabTask> deleteCrontab(@RequestBody TbCrontabTask crontabItem) {
return null;
}
/**
* @description 查询任务列表
* @param filter 过滤条件
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
* 获取定时任务分页列表
* @param pageParam
* @return
*/
@PostMapping("/task/list")
public ResultDomain<TbCrontabTask> getTaskList(@RequestBody TbCrontabTask filter) {
return crontabService.getTaskList(filter);
@PostMapping("/crontabTaskPage")
public ResultDomain<TbCrontabTask> getCrontabTask(@RequestBody PageRequest<TbCrontabTask> pageRequest) {
return null;
}
/**
* @description 分页查询任务列表
* @param pageRequest 分页请求对象
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
* 获取定时任务日志分页列表
* @param pageRequest
* @return
*/
@PostMapping("/task/page")
public ResultDomain<TbCrontabTask> getTaskPage(@RequestBody PageRequest<TbCrontabTask> pageRequest) {
TbCrontabTask filter = pageRequest.getFilter();
PageParam pageParam = pageRequest.getPageParam();
return crontabService.getTaskPage(filter, pageParam);
}
/**
* @description 启动定时任务
* @param taskId 任务ID
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/task/start/{taskId}")
public ResultDomain<TbCrontabTask> startTask(@PathVariable(value = "taskId") String taskId) {
return crontabService.startTask(taskId);
}
/**
* @description 暂停定时任务
* @param taskId 任务ID
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/task/pause/{taskId}")
public ResultDomain<TbCrontabTask> pauseTask(@PathVariable(value = "taskId") String taskId) {
return crontabService.pauseTask(taskId);
}
/**
* @description 立即执行一次任务
* @param taskId 任务ID
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/task/execute/{taskId}")
public ResultDomain<TbCrontabTask> executeTaskOnce(@PathVariable(value = "taskId") String taskId) {
return crontabService.executeTaskOnce(taskId);
}
/**
* @description 验证Cron表达式
* @param cronExpression Cron表达式
* @return ResultDomain<String>
* @author yslg
* @since 2025-10-25
*/
@GetMapping("/task/validate")
public ResultDomain<String> validateCronExpression(@RequestParam String cronExpression) {
return crontabService.validateCronExpression(cronExpression);
}
// ----------------定时任务日志--------------------------------
/**
* @description 根据任务ID查询日志
* @param taskId 任务ID
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@GetMapping("/log/task/{taskId}")
public ResultDomain<TbCrontabLog> getLogsByTaskId(@PathVariable(value = "taskId") String taskId) {
return crontabService.getLogsByTaskId(taskId);
}
/**
* @description 查询日志列表
* @param filter 过滤条件
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/log/list")
public ResultDomain<TbCrontabLog> getLogList(@RequestBody TbCrontabLog filter) {
return crontabService.getLogList(filter);
}
/**
* @description 分页查询日志列表
* @param pageRequest 分页请求对象
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/log/page")
public ResultDomain<TbCrontabLog> getLogPage(@RequestBody PageRequest<TbCrontabLog> pageRequest) {
TbCrontabLog filter = pageRequest.getFilter();
PageParam pageParam = pageRequest.getPageParam();
return crontabService.getLogPage(filter, pageParam);
}
/**
* @description 根据ID查询日志详情
* @param logId 日志ID
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@GetMapping("/log/{logId}")
public ResultDomain<TbCrontabLog> getLogById(@PathVariable(value = "logId") String logId) {
return crontabService.getLogById(logId);
}
/**
* @description 清理指定天数之前的日志
* @param days 天数
* @return ResultDomain<Integer>
* @author yslg
* @since 2025-10-25
*/
@DeleteMapping("/log/clean/{days}")
public ResultDomain<Integer> cleanLogs(@PathVariable(value = "days") Integer days) {
return crontabService.cleanLogs(days);
}
/**
* @description 删除日志
* @param log 日志对象
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@DeleteMapping("/log")
public ResultDomain<TbCrontabLog> deleteLog(@RequestBody TbCrontabLog log) {
return crontabService.deleteLog(log.getID());
@PostMapping("/crontabTaskLogPage")
public ResultDomain<TbCrontabLog> getCrontabTaskLog(@RequestBody PageRequest<TbCrontabLog> pageRequest) {
return null;
}
}

View File

@@ -0,0 +1,62 @@
package org.xyzh.crontab.controller;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.core.page.PageRequest;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.vo.DataCollectionItemVO;
import java.util.List;
/**
* @description 数据采集项控制器
* @filename DataCollectionItemController.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@RestController
@RequestMapping("/crontab/collection/item")
public class DataCollectionItemController {
private static final Logger logger = LoggerFactory.getLogger(DataCollectionItemController.class);
@Autowired
private DataCollectionItemService itemService;
/**
* @description 查看一个任务日志对应创建的所有数据采集项
* @param taskLogId
* @return
*/
@GetMapping("/task/{taskLogId}")
public ResultDomain<DataCollectionItemVO> getTaskLogDataCollectionItemList(@PathVariable String taskLogId) {
return null;
}
/**
* @description 获取数据采集项分页列表
* @param pageRequest
* @return
*/
@PostMapping("/page")
public ResultDomain<DataCollectionItemVO> getCollectionItemPage(@RequestBody PageRequest<DataCollectionItemVO> pageRequest) {
return null;
}
/**
* @description 转换成文章
* @param dataCollectionItem
* @return
*/
@PostMapping("/resource")
public ResultDomain<DataCollectionItemVO> convertToArticle(@RequestBody DataCollectionItemVO dataCollectionItem) {
return null;
}
}

View File

@@ -2,14 +2,17 @@ package org.xyzh.crontab.enums;
import java.util.Arrays;
import org.xyzh.crontab.task.DataBackupTask;
import org.xyzh.crontab.task.LogCleanTask;
import org.xyzh.crontab.task.SystemStatisticsTask;
import org.xyzh.crontab.task.newsTask.NewsCrawlerTask;
// import org.xyzh.crontab.task.DataBackupTask;
// import org.xyzh.crontab.task.LogCleanTask;
// import org.xyzh.crontab.task.SystemStatisticsTask;
public enum TaskEnums {
DATA_BACKUP("dataBackup", DataBackupTask.class),
LOG_CLEAN("logClean", LogCleanTask.class),
SystemStatistics("systemStatistics", SystemStatisticsTask.class);
// DATA_BACKUP("dataBackup", DataBackupTask.class),
// LOG_CLEAN("logClean", LogCleanTask.class),
// SystemStatistics("systemStatistics", SystemStatisticsTask.class);
NEWS_CRAWLER("newsCrawler", NewsCrawlerTask.class);
private String name;

View File

@@ -0,0 +1,86 @@
package org.xyzh.crontab.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import java.util.List;
/**
* @description 数据采集项数据访问层
* @filename DataCollectionItemMapper.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@Mapper
public interface DataCollectionItemMapper extends BaseMapper<TbDataCollectionItem> {
/**
* @description 根据来源URL查询采集项用于去重
* @param sourceUrl 来源URL
* @return TbDataCollectionItem 采集项
* @author yslg
* @since 2025-11-08
*/
TbDataCollectionItem selectBySourceUrl(@Param("sourceUrl") String sourceUrl);
/**
* @description 根据任务ID查询采集项列表
* @param taskId 任务ID
* @return List<TbDataCollectionItem> 采集项列表
* @author yslg
* @since 2025-11-08
*/
List<TbDataCollectionItem> selectByTaskId(@Param("taskId") String taskId);
/**
* @description 查询采集项列表
* @param filter 过滤条件
* @return List<TbDataCollectionItem> 采集项列表
* @author yslg
* @since 2025-11-08
*/
List<TbDataCollectionItem> selectItemList(TbDataCollectionItem filter);
/**
* @description 分页查询采集项列表
* @param filter 过滤条件
* @param pageParam 分页参数
* @return List<TbDataCollectionItem> 采集项列表
* @author yslg
* @since 2025-11-08
*/
List<TbDataCollectionItem> selectItemPage(@Param("filter") TbDataCollectionItem filter, @Param("pageParam") PageParam pageParam);
/**
* @description 统计采集项总数
* @param filter 过滤条件
* @return long 总数
* @author yslg
* @since 2025-11-08
*/
long countItems(@Param("filter") TbDataCollectionItem filter);
/**
* @description 批量插入采集项
* @param itemList 采集项列表
* @return int 影响行数
* @author yslg
* @since 2025-11-08
*/
int batchInsertItems(@Param("itemList") List<TbDataCollectionItem> itemList);
/**
* @description 根据状态统计数量
* @param taskId 任务ID可选
* @param status 状态
* @return long 数量
* @author yslg
* @since 2025-11-08
*/
long countByStatus(@Param("taskId") String taskId, @Param("status") Integer status);
}

View File

@@ -0,0 +1,22 @@
package org.xyzh.crontab.pojo;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
import java.util.Map;
@Data
@NoArgsConstructor
public class CrontabItem {
private String name;
private List<CrontabMethod> methods;
@Data
public class CrontabMethod {
private String name;
private String clazz;
private String path;
private Map<String, Object> params;
}
}

View File

@@ -65,7 +65,12 @@ public class TaskExecutor {
if (task.getMethodParams() != null && !task.getMethodParams().isEmpty()) {
// 如果有参数,需要解析参数类型
method = bean.getClass().getMethod(task.getMethodName(), String.class);
method.invoke(bean, task.getMethodParams());
// 如果是newsCrewerTask将taskId添加到参数前面
String methodParams = task.getMethodParams();
if ("newsCrewerTask".equals(task.getBeanName()) && task.getTaskId() != null) {
methodParams = task.getTaskId() + "|" + methodParams;
}
method.invoke(bean, methodParams);
} else {
// 无参方法
method = bean.getClass().getMethod(task.getMethodName());

View File

@@ -0,0 +1,15 @@
package org.xyzh.crontab.service;
import org.xyzh.api.crontab.DataCollectionItemService;
/**
* @description 数据采集项服务接口继承API接口
* @filename DataCollectionItemService.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
public interface NCDataCollectionItemService extends DataCollectionItemService {
}

View File

@@ -0,0 +1,479 @@
package org.xyzh.crontab.service.impl;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.api.news.resource.ResourceService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.core.page.PageDomain;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.dto.resource.TbResource;
import org.xyzh.common.utils.IDUtils;
import org.xyzh.common.vo.DataCollectionItemVO;
import org.xyzh.common.vo.ResourceVO;
import org.xyzh.crontab.mapper.DataCollectionItemMapper;
import org.xyzh.crontab.mapper.CrontabTaskMapper;
import org.xyzh.common.dto.crontab.TbCrontabTask;
import org.xyzh.system.utils.LoginUtil;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
/**
* @description 数据采集项服务实现类
* @filename DataCollectionItemServiceImpl.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@Service
public class DataCollectionItemServiceImpl implements DataCollectionItemService {
private static final Logger logger = LoggerFactory.getLogger(DataCollectionItemServiceImpl.class);
@Autowired
private DataCollectionItemMapper itemMapper;
@Autowired
private CrontabTaskMapper taskMapper;
@Autowired
private ResourceService resourceService;
private final ObjectMapper objectMapper = new ObjectMapper();
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> createItem(TbDataCollectionItem item) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
// 检查URL是否已存在去重
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
if (existing != null) {
resultDomain.fail("该文章已存在URL: " + item.getSourceUrl());
return resultDomain;
}
}
// 生成ID
item.setID(IDUtils.generateID());
item.setCreateTime(new Date());
item.setDeleted(false);
// 默认值
if (item.getStatus() == null) {
item.setStatus(0); // 默认未处理
}
if (item.getCrawlTime() == null) {
item.setCrawlTime(new Date());
}
int result = itemMapper.insert(item);
if (result > 0) {
logger.info("创建采集项成功: {}", item.getTitle());
resultDomain.success("创建采集项成功", item);
} else {
resultDomain.fail("创建采集项失败");
}
} catch (Exception e) {
logger.error("创建采集项异常: ", e);
resultDomain.fail("创建采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<Integer> batchCreateItems(List<TbDataCollectionItem> itemList) {
ResultDomain<Integer> resultDomain = new ResultDomain<>();
try {
if (itemList == null || itemList.isEmpty()) {
resultDomain.fail("采集项列表为空");
return resultDomain;
}
int successCount = 0;
Date now = new Date();
for (TbDataCollectionItem item : itemList) {
// 检查URL是否已存在去重
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
if (existing != null) {
logger.debug("跳过已存在的采集项: {}", item.getSourceUrl());
continue;
}
}
// 设置默认值
item.setID(IDUtils.generateID());
item.setCreateTime(now);
item.setDeleted(false);
if (item.getStatus() == null) {
item.setStatus(0);
}
if (item.getCrawlTime() == null) {
item.setCrawlTime(now);
}
itemMapper.insert(item);
successCount++;
}
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
resultDomain.success("批量创建采集项成功", successCount);
} catch (Exception e) {
logger.error("批量创建采集项异常: ", e);
resultDomain.fail("批量创建采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> updateItem(TbDataCollectionItem item) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
if (item.getID() == null) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
item.setUpdateTime(new Date());
int result = itemMapper.updateById(item);
if (result > 0) {
logger.info("更新采集项成功: {}", item.getID());
resultDomain.success("更新采集项成功", item);
} else {
resultDomain.fail("更新采集项失败");
}
} catch (Exception e) {
logger.error("更新采集项异常: ", e);
resultDomain.fail("更新采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> deleteItem(String itemId) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
int result = itemMapper.deleteById(itemId);
if (result > 0) {
logger.info("删除采集项成功ID: {}", itemId);
resultDomain.success("删除采集项成功", (TbDataCollectionItem) null);
} else {
resultDomain.fail("删除采集项失败");
}
} catch (Exception e) {
logger.error("删除采集项异常: ", e);
resultDomain.fail("删除采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemById(String itemId) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
TbDataCollectionItem item = itemMapper.selectById(itemId);
if (item != null) {
DataCollectionItemVO vo = buildVO(item);
resultDomain.success("查询成功", vo);
} else {
resultDomain.fail("采集项不存在");
}
} catch (Exception e) {
logger.error("查询采集项异常: ", e);
resultDomain.fail("查询采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemList(TbDataCollectionItem filter) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (filter == null) {
filter = new TbDataCollectionItem();
}
filter.setDeleted(false);
List<TbDataCollectionItem> list = itemMapper.selectItemList(filter);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
resultDomain.success("查询成功", voList);
} catch (Exception e) {
logger.error("查询采集项列表异常: ", e);
resultDomain.fail("查询采集项列表异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemPage(TbDataCollectionItem filter, PageParam pageParam) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (filter == null) {
filter = new TbDataCollectionItem();
}
filter.setDeleted(false);
if (pageParam == null) {
pageParam = new PageParam();
}
List<TbDataCollectionItem> list = itemMapper.selectItemPage(filter, pageParam);
long total = itemMapper.countItems(filter);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
PageDomain<DataCollectionItemVO> pageDomain = new PageDomain<>();
pageDomain.setDataList(voList);
pageParam.setTotalElements(total);
pageParam.setTotalPages((int) Math.ceil((double) total / pageParam.getPageSize()));
pageDomain.setPageParam(pageParam);
resultDomain.success("查询成功", pageDomain);
} catch (Exception e) {
logger.error("分页查询采集项异常: ", e);
resultDomain.fail("分页查询采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemsByTaskId(String taskId) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (taskId == null || taskId.isEmpty()) {
resultDomain.fail("任务ID不能为空");
return resultDomain;
}
List<TbDataCollectionItem> list = itemMapper.selectByTaskId(taskId);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
resultDomain.success("查询成功", voList);
} catch (Exception e) {
logger.error("根据任务ID查询采集项异常: ", e);
resultDomain.fail("根据任务ID查询采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<String> convertToResource(String itemId, String tagId) {
ResultDomain<String> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
if (tagId == null || tagId.isEmpty()) {
resultDomain.fail("标签ID不能为空");
return resultDomain;
}
// 查询采集项
TbDataCollectionItem item = itemMapper.selectById(itemId);
if (item == null) {
resultDomain.fail("采集项不存在");
return resultDomain;
}
if (item.getStatus() == 1) {
resultDomain.fail("该采集项已转换为资源");
return resultDomain;
}
// 创建资源
TbResource resource = new TbResource();
resource.setResourceID(IDUtils.generateID());
resource.setTitle(item.getTitle());
resource.setContent(item.getContent());
resource.setSummary(item.getSummary());
resource.setCoverImage(item.getCoverImage());
resource.setTagID(tagId);
resource.setAuthor(item.getAuthor());
resource.setSource(item.getSource());
resource.setSourceUrl(item.getSourceUrl());
resource.setPublishTime(item.getPublishTime() != null ? item.getPublishTime() : new Date());
resource.setStatus(1); // 已发布
resource.setViewCount(0);
resource.setLikeCount(0);
resource.setCollectCount(0);
resource.setIsRecommend(false);
resource.setIsBanner(false);
resource.setCreateTime(new Date());
resource.setDeleted(false);
ResourceVO resourceVO = new ResourceVO();
resourceVO.setResource(resource);
ResultDomain<ResourceVO> createResult = resourceService.createResource(resourceVO);
if (!createResult.isSuccess()) {
resultDomain.fail("转换为资源失败: " + createResult.getMessage());
return resultDomain;
}
// 更新采集项状态
item.setStatus(1); // 已转换为资源
item.setResourceId(resource.getResourceID());
item.setProcessTime(new Date());
item.setProcessor(LoginUtil.getCurrentUserId());
itemMapper.updateById(item);
logger.info("采集项转换为资源成功采集项ID: {}, 资源ID: {}", itemId, resource.getResourceID());
resultDomain.success("转换为资源成功", resource.getResourceID());
} catch (Exception e) {
logger.error("转换为资源异常: ", e);
resultDomain.fail("转换为资源异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<Integer> batchConvertToResource(List<String> itemIds, String tagId) {
ResultDomain<Integer> resultDomain = new ResultDomain<>();
try {
if (itemIds == null || itemIds.isEmpty()) {
resultDomain.fail("采集项ID列表为空");
return resultDomain;
}
if (tagId == null || tagId.isEmpty()) {
resultDomain.fail("标签ID不能为空");
return resultDomain;
}
int successCount = 0;
for (String itemId : itemIds) {
ResultDomain<String> convertResult = convertToResource(itemId, tagId);
if (convertResult.isSuccess()) {
successCount++;
}
}
logger.info("批量转换为资源完成,共{}条,成功{}条", itemIds.size(), successCount);
resultDomain.success("批量转换为资源完成", successCount);
} catch (Exception e) {
logger.error("批量转换为资源异常: ", e);
resultDomain.fail("批量转换为资源异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> ignoreItem(String itemId) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
TbDataCollectionItem item = itemMapper.selectById(itemId);
if (item == null) {
resultDomain.fail("采集项不存在");
return resultDomain;
}
item.setStatus(2); // 已忽略
item.setProcessTime(new Date());
item.setProcessor(LoginUtil.getCurrentUserId());
itemMapper.updateById(item);
logger.info("忽略采集项成功ID: {}", itemId);
resultDomain.success("忽略采集项成功", item);
} catch (Exception e) {
logger.error("忽略采集项异常: ", e);
resultDomain.fail("忽略采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<Long> countByStatus(String taskId, Integer status) {
ResultDomain<Long> resultDomain = new ResultDomain<>();
try {
long count = itemMapper.countByStatus(taskId, status);
resultDomain.success("统计成功", count);
} catch (Exception e) {
logger.error("统计采集项数量异常: ", e);
resultDomain.fail("统计采集项数量异常: " + e.getMessage());
}
return resultDomain;
}
/**
* @description 构建VO对象
* @param item 采集项
* @return DataCollectionItemVO
* @author yslg
* @since 2025-11-08
*/
private DataCollectionItemVO buildVO(TbDataCollectionItem item) {
DataCollectionItemVO vo = new DataCollectionItemVO();
vo.setItem(item);
// 查询关联的定时任务
if (item.getTaskId() != null && !item.getTaskId().isEmpty()) {
TbCrontabTask task = taskMapper.selectTaskById(item.getTaskId());
vo.setTask(task);
}
// 设置状态文本
String statusText = "未处理";
if (item.getStatus() != null) {
switch (item.getStatus()) {
case 0:
statusText = "未处理";
break;
case 1:
statusText = "已转换为资源";
break;
case 2:
statusText = "已忽略";
break;
default:
statusText = "未知";
}
}
vo.setStatusText(statusText);
// 设置操作权限
vo.setCanEdit(item.getStatus() == null || item.getStatus() == 0 || item.getStatus() == 2);
vo.setCanConvert(item.getStatus() == null || item.getStatus() == 0);
return vo;
}
}

View File

@@ -1,60 +1,60 @@
package org.xyzh.crontab.task;
// package org.xyzh.crontab.task;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
// import org.slf4j.Logger;
// import org.slf4j.LoggerFactory;
// import org.springframework.stereotype.Component;
import java.text.SimpleDateFormat;
import java.util.Date;
// import java.text.SimpleDateFormat;
// import java.util.Date;
/**
* @description 数据备份任务
* @filename DataBackupTask.java
* @author yslg
* @copyright xyzh
* @since 2025-10-25
*/
@Component("dataBackupTask")
public class DataBackupTask {
// /**
// * @description 数据备份任务
// * @filename DataBackupTask.java
// * @author yslg
// * @copyright xyzh
// * @since 2025-10-25
// */
// @Component("dataBackupTask")
// public class DataBackupTask {
private static final Logger logger = LoggerFactory.getLogger(DataBackupTask.class);
// private static final Logger logger = LoggerFactory.getLogger(DataBackupTask.class);
/**
* @description 执行数据备份
* @author yslg
* @since 2025-10-25
*/
public void execute() {
logger.info("开始执行数据备份任务...");
// /**
// * @description 执行数据备份
// * @author yslg
// * @since 2025-10-25
// */
// public void execute() {
// logger.info("开始执行数据备份任务...");
try {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss");
String backupTime = sdf.format(new Date());
// try {
// SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss");
// String backupTime = sdf.format(new Date());
// TODO: 实现数据备份逻辑
// 1. 备份数据库
// 2. 备份文件
// 3. 压缩备份文件
// 4. 上传到备份服务器或云存储
// // TODO: 实现数据备份逻辑
// // 1. 备份数据库
// // 2. 备份文件
// // 3. 压缩备份文件
// // 4. 上传到备份服务器或云存储
Thread.sleep(2000); // 模拟执行
// Thread.sleep(2000); // 模拟执行
logger.info("数据备份任务执行完成,备份标识: {}", backupTime);
} catch (Exception e) {
logger.error("数据备份任务执行失败: ", e);
throw new RuntimeException("数据备份任务执行失败", e);
}
}
// logger.info("数据备份任务执行完成,备份标识: {}", backupTime);
// } catch (Exception e) {
// logger.error("数据备份任务执行失败: ", e);
// throw new RuntimeException("数据备份任务执行失败", e);
// }
// }
/**
* @description 执行带参数的备份任务
* @param params 参数备份类型full-全量incremental-增量)
* @author yslg
* @since 2025-10-25
*/
public void execute(String params) {
logger.info("开始执行数据备份任务,备份类型: {}", params);
execute();
}
}
// /**
// * @description 执行带参数的备份任务
// * @param params 参数备份类型full-全量incremental-增量)
// * @author yslg
// * @since 2025-10-25
// */
// public void execute(String params) {
// logger.info("开始执行数据备份任务,备份类型: {}", params);
// execute();
// }
// }

View File

@@ -1,68 +1,68 @@
package org.xyzh.crontab.task;
// package org.xyzh.crontab.task;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.xyzh.crontab.mapper.CrontabLogMapper;
// import org.slf4j.Logger;
// import org.slf4j.LoggerFactory;
// import org.springframework.beans.factory.annotation.Autowired;
// import org.springframework.stereotype.Component;
// import org.xyzh.crontab.mapper.CrontabLogMapper;
import java.util.Calendar;
import java.util.Date;
// import java.util.Calendar;
// import java.util.Date;
/**
* @description 清理过期日志任务
* @filename LogCleanTask.java
* @author yslg
* @copyright xyzh
* @since 2025-10-25
*/
@Component("logCleanTask")
public class LogCleanTask {
// /**
// * @description 清理过期日志任务
// * @filename LogCleanTask.java
// * @author yslg
// * @copyright xyzh
// * @since 2025-10-25
// */
// @Component("logCleanTask")
// public class LogCleanTask {
private static final Logger logger = LoggerFactory.getLogger(LogCleanTask.class);
// private static final Logger logger = LoggerFactory.getLogger(LogCleanTask.class);
@Autowired
private CrontabLogMapper logMapper;
// @Autowired
// private CrontabLogMapper logMapper;
/**
* @description 执行日志清理默认清理30天前的日志
* @author yslg
* @since 2025-10-25
*/
public void execute() {
execute("30");
}
// /**
// * @description 执行日志清理默认清理30天前的日志
// * @author yslg
// * @since 2025-10-25
// */
// public void execute() {
// execute("30");
// }
/**
* @description 执行日志清理
* @param params 天数参数
* @author yslg
* @since 2025-10-25
*/
public void execute(String params) {
logger.info("开始执行日志清理任务...");
// /**
// * @description 执行日志清理
// * @param params 天数参数
// * @author yslg
// * @since 2025-10-25
// */
// public void execute(String params) {
// logger.info("开始执行日志清理任务...");
try {
int days = 30; // 默认30天
if (params != null && !params.isEmpty()) {
try {
days = Integer.parseInt(params);
} catch (NumberFormatException e) {
logger.warn("参数格式错误使用默认值30天");
}
}
// try {
// int days = 30; // 默认30天
// if (params != null && !params.isEmpty()) {
// try {
// days = Integer.parseInt(params);
// } catch (NumberFormatException e) {
// logger.warn("参数格式错误使用默认值30天");
// }
// }
Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.DAY_OF_MONTH, -days);
Date beforeDate = calendar.getTime();
// Calendar calendar = Calendar.getInstance();
// calendar.add(Calendar.DAY_OF_MONTH, -days);
// Date beforeDate = calendar.getTime();
int count = logMapper.cleanLogsByDate(beforeDate);
// int count = logMapper.cleanLogsByDate(beforeDate);
logger.info("日志清理任务执行完成,共清理{}条日志", count);
} catch (Exception e) {
logger.error("日志清理任务执行失败: ", e);
throw new RuntimeException("日志清理任务执行失败", e);
}
}
}
// logger.info("日志清理任务执行完成,共清理{}条日志", count);
// } catch (Exception e) {
// logger.error("日志清理任务执行失败: ", e);
// throw new RuntimeException("日志清理任务执行失败", e);
// }
// }
// }

View File

@@ -1,54 +1,54 @@
package org.xyzh.crontab.task;
// package org.xyzh.crontab.task;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
// import org.slf4j.Logger;
// import org.slf4j.LoggerFactory;
// import org.springframework.stereotype.Component;
/**
* @description 系统数据统计任务
* @filename SystemStatisticsTask.java
* @author yslg
* @copyright xyzh
* @since 2025-10-25
*/
@Component("systemStatisticsTask")
public class SystemStatisticsTask {
// /**
// * @description 系统数据统计任务
// * @filename SystemStatisticsTask.java
// * @author yslg
// * @copyright xyzh
// * @since 2025-10-25
// */
// @Component("systemStatisticsTask")
// public class SystemStatisticsTask {
private static final Logger logger = LoggerFactory.getLogger(SystemStatisticsTask.class);
// private static final Logger logger = LoggerFactory.getLogger(SystemStatisticsTask.class);
/**
* @description 执行系统数据统计
* @author yslg
* @since 2025-10-25
*/
public void execute() {
logger.info("开始执行系统数据统计任务...");
// /**
// * @description 执行系统数据统计
// * @author yslg
// * @since 2025-10-25
// */
// public void execute() {
// logger.info("开始执行系统数据统计任务...");
try {
// TODO: 实现系统数据统计逻辑
// 1. 统计用户数据
// 2. 统计资源数据
// 3. 统计访问数据
// 4. 生成统计报告
// try {
// // TODO: 实现系统数据统计逻辑
// // 1. 统计用户数据
// // 2. 统计资源数据
// // 3. 统计访问数据
// // 4. 生成统计报告
Thread.sleep(1000); // 模拟执行
// Thread.sleep(1000); // 模拟执行
logger.info("系统数据统计任务执行完成");
} catch (Exception e) {
logger.error("系统数据统计任务执行失败: ", e);
throw new RuntimeException("系统数据统计任务执行失败", e);
}
}
// logger.info("系统数据统计任务执行完成");
// } catch (Exception e) {
// logger.error("系统数据统计任务执行失败: ", e);
// throw new RuntimeException("系统数据统计任务执行失败", e);
// }
// }
/**
* @description 执行带参数的统计任务
* @param params 参数
* @author yslg
* @since 2025-10-25
*/
public void execute(String params) {
logger.info("开始执行系统数据统计任务,参数: {}", params);
execute();
}
}
// /**
// * @description 执行带参数的统计任务
// * @param params 参数
// * @author yslg
// * @since 2025-10-25
// */
// public void execute(String params) {
// logger.info("开始执行系统数据统计任务,参数: {}", params);
// execute();
// }
// }

View File

@@ -0,0 +1,46 @@
package org.xyzh.crontab.task.newsTask;
import java.util.List;
import org.xyzh.common.dto.resource.TbResource;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @description 爬虫返回文章结构
* @filename ArticleStruct.java
* @author yslg
* @copyright xyzh
* @since 2025-11-10
*/
@Data
@NoArgsConstructor
public class ArticleStruct {
private String title;
private String url;
private String publishTime;
private String author;
private String source;
private List<RowStruct> contentRows;
@Data
@NoArgsConstructor
public class RowStruct {
// private String tag;
// private String style; // ttext-indent: 2em;->\t\t
private String content; // 完整携带样式的p标签
}
public TbResource toTbResource(){
TbResource tbResource = new TbResource();
tbResource.setTitle(this.title);
// tbResource.setUrl(this.url);
// tbResource.setPublishTime(this.publishTime);
// tbResource.setAuthor(this.author);
// tbResource.setSource(this.source);
// tbResource.setContentRows(this.contentRows);
return tbResource;
}
}

View File

@@ -0,0 +1,328 @@
package org.xyzh.crontab.task.newsTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* @description 新闻爬虫定时任务
* @filename NewsCrewerTask.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@Component("newsCrewerTask")
public class NewsCrawlerTask {
private static final Logger logger = LoggerFactory.getLogger(NewsCrawlerTask.class);
@Value("${crewer.python.path:python}")
private String pythonPath;
@Value("${crewer.script.path:../schoolNewsCrewer}")
private String scriptPath;
@Value("${crewer.timeout:300}")
private int timeout;
@Autowired
private DataCollectionItemService itemService;
private final ObjectMapper objectMapper = new ObjectMapper();
/**
* @description 执行新闻爬虫任务默认爬取人民日报政治类新闻20条
* @author yslg
* @since 2025-11-08
*/
public void execute() {
execute("rmrb,politics,20");
}
/**
* @description 执行新闻爬虫任务
* @param params 参数格式: "source,category,limit" 或 "taskId|source,category,limit"
* 如果包含taskId格式为: "taskId|source,category,limit"
* source: 新闻源rmrb-人民日报)
* category: 分类politics-政治, society-社会等)
* limit: 爬取数量
* @author yslg
* @since 2025-11-08
*/
public void execute(String params) {
logger.info("开始执行新闻爬虫任务,参数: {}", params);
try {
// 解析参数支持taskId|source,category,limit格式
String taskId = null;
String actualParams = params;
if (params.contains("|")) {
String[] parts = params.split("\\|", 2);
taskId = parts[0];
actualParams = parts[1];
}
String[] paramArray = actualParams.split(",");
String source = paramArray.length > 0 ? paramArray[0] : "rmrb";
String category = paramArray.length > 1 ? paramArray[1] : "politics";
String limit = paramArray.length > 2 ? paramArray[2] : "20";
logger.info("爬虫参数 - 来源: {}, 分类: {}, 数量: {}", source, category, limit);
// 验证Python和脚本路径
Path scriptDir = Paths.get(scriptPath);
if (!Files.exists(scriptDir)) {
throw new RuntimeException("爬虫脚本目录不存在: " + scriptPath);
}
// 构建Python命令
List<String> command = new ArrayList<>();
// 检查是否是Windows系统
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
command.add("cmd");
command.add("/c");
command.add(pythonPath);
} else {
command.add(pythonPath);
}
command.add("main.py");
command.add(category);
command.add(limit);
// 生成输出文件名
String timestamp = String.valueOf(System.currentTimeMillis());
String outputFile = String.format("output/news_%s_%s_%s.json", source, category, timestamp);
command.add(outputFile);
logger.info("执行命令: {}", String.join(" ", command));
// 创建进程构建器
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.directory(scriptDir.toFile());
processBuilder.redirectErrorStream(true);
// 启动进程
Process process = processBuilder.start();
// 读取输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
logger.debug("Python输出: {}", line);
}
}
// 等待进程结束
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroy();
throw new RuntimeException("爬虫任务超时(超过" + timeout + "秒)");
}
int exitCode = process.exitValue();
if (exitCode == 0) {
logger.info("新闻爬虫任务执行成功");
// 读取并解析结果文件
Path outputPath = scriptDir.resolve(outputFile);
if (Files.exists(outputPath)) {
String jsonContent = Files.readString(outputPath);
ObjectMapper mapper = new ObjectMapper();
List<Map<String, Object>> newsList = mapper.readValue(
jsonContent,
List.class
);
logger.info("成功爬取 {} 条新闻", newsList.size());
// 保存新闻数据到数据库
if (taskId != null && !taskId.isEmpty()) {
saveNewsToDatabase(newsList, taskId, source, category);
} else {
logger.warn("未提供任务ID跳过数据保存");
}
} else {
logger.warn("输出文件不存在: {}", outputFile);
}
} else {
logger.error("新闻爬虫任务执行失败,退出码: {}", exitCode);
logger.error("输出内容:\n{}", output.toString());
throw new RuntimeException("爬虫任务执行失败,退出码: " + exitCode);
}
} catch (Exception e) {
logger.error("新闻爬虫任务执行异常: ", e);
throw new RuntimeException("新闻爬虫任务执行异常", e);
}
}
/**
* @description 测试Python环境
* @author yslg
* @since 2025-11-08
*/
public void testPythonEnvironment() {
logger.info("测试Python环境...");
try {
ProcessBuilder pb = new ProcessBuilder(pythonPath, "--version");
Process process = pb.start();
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream())
);
String version = reader.readLine();
int exitCode = process.waitFor();
if (exitCode == 0) {
logger.info("Python环境正常: {}", version);
} else {
logger.error("Python环境异常");
}
} catch (Exception e) {
logger.error("测试Python环境失败: ", e);
}
}
/**
* @description 将新闻数据保存到数据库
* @param newsList 新闻列表
* @param taskId 任务ID
* @param source 新闻来源
* @param category 分类
* @author yslg
* @since 2025-11-08
*/
private void saveNewsToDatabase(List<Map<String, Object>> newsList, String taskId, String source, String category) {
logger.info("开始保存 {} 条新闻到数据库任务ID: {}", newsList.size(), taskId);
try {
List<TbDataCollectionItem> itemList = new ArrayList<>();
Date now = new Date();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (Map<String, Object> news : newsList) {
try {
TbDataCollectionItem item = new TbDataCollectionItem();
// 基本信息
item.setTaskId(taskId);
item.setTitle(getStringValue(news, "title"));
item.setContent(getStringValue(news, "content"));
item.setSummary(getStringValue(news, "summary"));
item.setSource(source.equals("rmrb") ? "人民日报" : source);
item.setSourceUrl(getStringValue(news, "url"));
item.setCategory(category);
item.setAuthor(getStringValue(news, "author"));
// 发布时间
String publishTimeStr = getStringValue(news, "publish_time");
if (publishTimeStr != null && !publishTimeStr.isEmpty()) {
try {
item.setPublishTime(dateFormat.parse(publishTimeStr));
} catch (Exception e) {
logger.warn("解析发布时间失败: {}", publishTimeStr);
item.setPublishTime(now);
}
} else {
item.setPublishTime(now);
}
// 封面图片
item.setCoverImage(getStringValue(news, "cover_image"));
// 图片列表JSON格式
Object imagesObj = news.get("images");
if (imagesObj != null) {
if (imagesObj instanceof List) {
item.setImages(objectMapper.writeValueAsString(imagesObj));
} else if (imagesObj instanceof String) {
item.setImages((String) imagesObj);
}
}
// 标签
Object tagsObj = news.get("tags");
if (tagsObj != null) {
if (tagsObj instanceof List) {
List<String> tags = (List<String>) tagsObj;
item.setTags(String.join(",", tags));
} else if (tagsObj instanceof String) {
item.setTags((String) tagsObj);
}
}
// 状态和时间
item.setStatus(0); // 未处理
item.setCrawlTime(now);
itemList.add(item);
} catch (Exception e) {
logger.error("转换新闻数据失败: ", e);
}
}
// 批量保存
if (!itemList.isEmpty()) {
ResultDomain<Integer> result = itemService.batchCreateItems(itemList);
if (result.isSuccess()) {
logger.info("成功保存 {} 条新闻到数据库", result.getData());
} else {
logger.error("保存新闻到数据库失败: {}", result.getMessage());
}
} else {
logger.warn("没有有效的新闻数据需要保存");
}
} catch (Exception e) {
logger.error("保存新闻数据到数据库异常: ", e);
}
}
/**
* @description 从Map中安全获取字符串值
* @param map Map对象
* @param key 键
* @return String 值
* @author yslg
* @since 2025-11-08
*/
private String getStringValue(Map<String, Object> map, String key) {
Object value = map.get(key);
if (value == null) {
return null;
}
return value.toString();
}
}

View File

@@ -5,8 +5,9 @@ abstract public class NewsTask {
// 爬取网站目标
private String target;
// 爬取标题
private String title;
// 爬取搜索
private String query;
}

View File

@@ -0,0 +1,234 @@
package org.xyzh.crontab.task.newsTask;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* Java调用Python的简化示例
* 用于学习和理解核心原理
*/
public class PythonExecutorExample {
/**
* 示例1: 最简单的调用方式
*/
public static void example1_Simple() throws Exception {
// 1. 构建命令
ProcessBuilder pb = new ProcessBuilder("python", "script.py", "arg1", "arg2");
// 2. 启动进程
Process process = pb.start();
// 3. 读取输出
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
);
String line;
while ((line = reader.readLine()) != null) {
System.out.println("Python输出: " + line);
}
// 4. 等待结束
int exitCode = process.waitFor();
System.out.println("退出码: " + exitCode);
}
/**
* 示例2: 获取返回结果(通过标准输出)
*/
public static String example2_GetResult() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 读取所有输出
StringBuilder result = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
result.append(line);
}
}
process.waitFor();
return result.toString();
}
/**
* 示例3: 带超时控制
*/
public static void example3_WithTimeout() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 带超时的等待5秒
boolean finished = process.waitFor(5, TimeUnit.SECONDS);
if (!finished) {
// 超时,强制终止
process.destroyForcibly();
System.out.println("任务超时");
} else {
int exitCode = process.exitValue();
System.out.println("执行完成,退出码: " + exitCode);
}
}
/**
* 示例4: 传递参数(通过命令行)
*/
public static void example4_PassArgs() throws Exception {
// 方式1: 通过命令行参数
List<String> command = new ArrayList<>();
command.add("python");
command.add("script.py");
command.add("category=politics");
command.add("limit=20");
ProcessBuilder pb = new ProcessBuilder(command);
Process process = pb.start();
// ... 读取输出
process.waitFor();
}
/**
* 示例5: 传递参数(通过标准输入)
*/
public static void example5_PassArgsByStdin() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 写入参数到标准输入
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) {
writer.write("{\"category\":\"politics\",\"limit\":20}");
writer.newLine();
writer.flush();
}
// 关闭输入流重要告诉Python输入结束
process.getOutputStream().close();
// 读取输出
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
);
String result = reader.readLine();
System.out.println("结果: " + result);
process.waitFor();
}
/**
* 示例6: 处理Windows/Linux差异
*/
public static void example6_CrossPlatform() throws Exception {
List<String> command = new ArrayList<>();
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
// Windows需要通过cmd执行
command.add("cmd");
command.add("/c");
command.add("python");
} else {
// Linux/Mac直接执行
command.add("python3");
}
command.add("script.py");
ProcessBuilder pb = new ProcessBuilder(command);
Process process = pb.start();
process.waitFor();
}
/**
* 示例7: 完整的错误处理
*/
public static void example7_Complete() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
// 合并标准输出和错误输出
pb.redirectErrorStream(true);
// 设置工作目录
pb.directory(new File("/path/to/script"));
Process process = null;
try {
process = pb.start();
// 读取输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
}
}
// 等待结束(带超时)
boolean finished = process.waitFor(30, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly();
throw new RuntimeException("任务超时");
}
int exitCode = process.exitValue();
if (exitCode == 0) {
System.out.println("执行成功");
System.out.println("输出: " + output.toString());
} else {
System.err.println("执行失败,退出码: " + exitCode);
System.err.println("错误输出: " + output.toString());
throw new RuntimeException("Python执行失败");
}
} catch (Exception e) {
throw new RuntimeException("执行异常", e);
} finally {
// 清理资源
if (process != null && process.isAlive()) {
process.destroyForcibly();
}
}
}
/**
* 示例8: 异步执行(不阻塞)
*/
public static void example8_Async() {
new Thread(() -> {
try {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 在后台线程中读取输出
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
);
String line;
while ((line = reader.readLine()) != null) {
System.out.println("后台输出: " + line);
}
process.waitFor();
System.out.println("后台任务完成");
} catch (Exception e) {
e.printStackTrace();
}
}).start();
System.out.println("主线程继续执行...");
}
}

View File

@@ -0,0 +1,15 @@
package org.xyzh.crontab.task.newsTask;
import lombok.Data;
@Data
public class ScriptDomain {
private String name;
private String path;
private String method;
private String param;
private String output;
}

View File

@@ -0,0 +1,34 @@
crawler:
python:
path: C:/Python312/python.exe
base:
path: F:/Project/schoolNews/schoolNewsCrawler
script:
- name: xxx爬虫
path: crawler/xxx.py
method: xxx
param: xxx
output: xxx
crontab:
items: #可供前端选择的定时任务列表
- name: 人民日报新闻爬取
methods: #爬取方式
- name: 关键字搜索爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
params:
query: String #搜索关键字
total: Integer #总新闻数量
- name: 排行榜爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
- name: 往日精彩头条爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
params:
startDate: String #开始日期
endDate: String #结束日期
isYestoday: Boolean #是否是昨天