temp定时任务修改
This commit is contained in:
652
schoolNewsServ/crontab/Java调用Python详解.md
Normal file
652
schoolNewsServ/crontab/Java调用Python详解.md
Normal file
@@ -0,0 +1,652 @@
|
||||
# Java调用Python并获取返回结果详解
|
||||
|
||||
## 一、核心原理
|
||||
|
||||
Java通过 `ProcessBuilder` 或 `Runtime.exec()` 创建操作系统进程来执行Python脚本,然后通过进程的标准输入/输出流进行通信。
|
||||
|
||||
## 二、当前实现详解
|
||||
|
||||
### 1. 构建命令
|
||||
|
||||
```java
|
||||
// 步骤1: 构建命令列表
|
||||
List<String> command = new ArrayList<>();
|
||||
|
||||
// 步骤2: 处理Windows/Linux系统差异
|
||||
String os = System.getProperty("os.name").toLowerCase();
|
||||
if (os.contains("win")) {
|
||||
// Windows系统需要通过cmd执行
|
||||
command.add("cmd"); // 命令解释器
|
||||
command.add("/c"); // /c表示执行后关闭
|
||||
command.add(pythonPath); // python或python3
|
||||
} else {
|
||||
// Linux/Mac系统直接执行
|
||||
command.add(pythonPath);
|
||||
}
|
||||
|
||||
// 步骤3: 添加Python脚本和参数
|
||||
command.add("main.py"); // Python脚本
|
||||
command.add(category); // 参数1: 分类
|
||||
command.add(limit); // 参数2: 数量
|
||||
command.add(outputFile); // 参数3: 输出文件
|
||||
```
|
||||
|
||||
**命令示例:**
|
||||
- Windows: `cmd /c python main.py politics 20 output/news.json`
|
||||
- Linux: `python3 main.py politics 20 output/news.json`
|
||||
|
||||
### 2. 创建进程
|
||||
|
||||
```java
|
||||
// 创建进程构建器
|
||||
ProcessBuilder processBuilder = new ProcessBuilder(command);
|
||||
|
||||
// 设置工作目录(Python脚本所在目录)
|
||||
processBuilder.directory(scriptDir.toFile());
|
||||
|
||||
// 合并标准输出和错误输出(便于统一读取)
|
||||
processBuilder.redirectErrorStream(true);
|
||||
|
||||
// 启动进程
|
||||
Process process = processBuilder.start();
|
||||
```
|
||||
|
||||
**关键点:**
|
||||
- `directory()`: 设置工作目录,确保Python脚本能找到相对路径的资源
|
||||
- `redirectErrorStream(true)`: 将stderr合并到stdout,方便统一读取
|
||||
- `start()`: 异步启动进程,不会阻塞
|
||||
|
||||
### 3. 读取输出流
|
||||
|
||||
```java
|
||||
// 读取标准输出(Python的print输出)
|
||||
StringBuilder output = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
output.append(line).append("\n");
|
||||
logger.debug("Python输出: {}", line);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**重要说明:**
|
||||
- `process.getInputStream()`: 获取Python进程的标准输出
|
||||
- 必须读取输出流,否则缓冲区满会导致进程阻塞
|
||||
- 使用UTF-8编码避免中文乱码
|
||||
|
||||
### 4. 等待进程结束
|
||||
|
||||
```java
|
||||
// 方式1: 带超时的等待(推荐)
|
||||
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
|
||||
|
||||
if (!finished) {
|
||||
// 超时后强制终止进程
|
||||
process.destroy(); // 或 process.destroyForcibly() 强制终止
|
||||
throw new RuntimeException("任务超时");
|
||||
}
|
||||
|
||||
// 方式2: 无限等待(不推荐,可能导致死锁)
|
||||
int exitCode = process.waitFor();
|
||||
```
|
||||
|
||||
**退出码说明:**
|
||||
- `0`: 执行成功
|
||||
- `非0`: 执行失败(通常是错误码)
|
||||
|
||||
### 5. 获取返回结果
|
||||
|
||||
当前实现通过**文件传递**方式获取结果:
|
||||
|
||||
```java
|
||||
// Python脚本将结果写入JSON文件
|
||||
Path outputPath = scriptDir.resolve(outputFile);
|
||||
|
||||
// Java读取文件内容
|
||||
String jsonContent = Files.readString(outputPath);
|
||||
|
||||
// 解析JSON
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
List<Map<String, Object>> newsList = mapper.readValue(
|
||||
jsonContent,
|
||||
List.class
|
||||
);
|
||||
```
|
||||
|
||||
## 三、三种数据传递方式对比
|
||||
|
||||
### 方式1: 文件传递(当前实现)
|
||||
|
||||
**优点:**
|
||||
- ✅ 适合大数据量
|
||||
- ✅ 数据持久化,便于调试
|
||||
- ✅ 实现简单
|
||||
|
||||
**缺点:**
|
||||
- ⚠️ 需要文件I/O操作
|
||||
- ⚠️ 需要管理临时文件
|
||||
- ⚠️ 可能有并发问题(文件名冲突)
|
||||
|
||||
**实现示例:**
|
||||
|
||||
```java
|
||||
// Java端
|
||||
String outputFile = "output/result_" + System.currentTimeMillis() + ".json";
|
||||
command.add(outputFile);
|
||||
|
||||
// Python端
|
||||
import json
|
||||
import sys
|
||||
|
||||
result = {"status": "success", "data": [...]}
|
||||
with open(sys.argv[1], 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False)
|
||||
```
|
||||
|
||||
### 方式2: 标准输出传递(适合小数据)
|
||||
|
||||
**优点:**
|
||||
- ✅ 实时传输,无需文件
|
||||
- ✅ 适合小数据量(< 1MB)
|
||||
- ✅ 无文件管理开销
|
||||
|
||||
**缺点:**
|
||||
- ⚠️ 大数据量可能阻塞
|
||||
- ⚠️ 不能传递二进制数据
|
||||
- ⚠️ 需要与日志输出区分
|
||||
|
||||
**实现示例:**
|
||||
|
||||
```java
|
||||
// Java端:读取标准输出
|
||||
StringBuilder result = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
// 约定:以特定标记区分结果和日志
|
||||
if (line.startsWith("RESULT:")) {
|
||||
result.append(line.substring(7)); // 去掉"RESULT:"前缀
|
||||
} else {
|
||||
logger.info("Python日志: {}", line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 解析JSON结果
|
||||
String jsonResult = result.toString();
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
Map<String, Object> data = mapper.readValue(jsonResult, Map.class);
|
||||
```
|
||||
|
||||
```python
|
||||
# Python端:输出结果
|
||||
import json
|
||||
import sys
|
||||
|
||||
# 日志输出到stderr
|
||||
print("开始爬取...", file=sys.stderr)
|
||||
|
||||
# 结果输出到stdout(带标记)
|
||||
result = {"status": "success", "data": [...]}
|
||||
print("RESULT:" + json.dumps(result, ensure_ascii=False))
|
||||
```
|
||||
|
||||
### 方式3: 标准输入传递参数(双向通信)
|
||||
|
||||
**优点:**
|
||||
- ✅ 可以传递复杂参数
|
||||
- ✅ 支持交互式通信
|
||||
|
||||
**缺点:**
|
||||
- ⚠️ 实现复杂
|
||||
- ⚠️ 需要处理流关闭时机
|
||||
|
||||
**实现示例:**
|
||||
|
||||
```java
|
||||
// Java端:通过标准输入传递参数
|
||||
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
|
||||
Process process = pb.start();
|
||||
|
||||
// 写入参数到标准输入
|
||||
try (BufferedWriter writer = new BufferedWriter(
|
||||
new OutputStreamWriter(process.getOutputStream(), "UTF-8"))) {
|
||||
String params = "{\"category\":\"politics\",\"limit\":20}";
|
||||
writer.write(params);
|
||||
writer.newLine();
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
// 关闭输入流(告诉Python输入结束)
|
||||
process.getOutputStream().close();
|
||||
|
||||
// 读取输出
|
||||
// ... 同方式2
|
||||
```
|
||||
|
||||
```python
|
||||
# Python端:从标准输入读取参数
|
||||
import json
|
||||
import sys
|
||||
|
||||
# 读取参数
|
||||
params_json = sys.stdin.readline().strip()
|
||||
params = json.loads(params_json)
|
||||
|
||||
category = params.get("category", "politics")
|
||||
limit = params.get("limit", 20)
|
||||
|
||||
# 执行爬取
|
||||
result = crawl_news(category, limit)
|
||||
|
||||
# 输出结果
|
||||
print(json.dumps(result, ensure_ascii=False))
|
||||
```
|
||||
|
||||
## 四、完整优化实现
|
||||
|
||||
### 改进版实现(支持多种方式)
|
||||
|
||||
```java
|
||||
package org.xyzh.crontab.task.newsTask;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
/**
|
||||
* Java调用Python的完整实现
|
||||
*/
|
||||
@Component("newsCrewerTask")
|
||||
public class NewsCrewerTask {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(NewsCrewerTask.class);
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
@Value("${crewer.python.path:python}")
|
||||
private String pythonPath;
|
||||
|
||||
@Value("${crewer.script.path:../schoolNewsCrewer}")
|
||||
private String scriptPath;
|
||||
|
||||
@Value("${crewer.timeout:300}")
|
||||
private int timeout;
|
||||
|
||||
/**
|
||||
* 方式1: 通过文件传递结果(当前实现,适合大数据)
|
||||
*/
|
||||
public List<Map<String, Object>> executeByFile(String category, int limit) {
|
||||
logger.info("执行爬虫任务 - 文件方式");
|
||||
|
||||
try {
|
||||
// 1. 构建命令
|
||||
List<String> command = buildCommand("main.py", category, String.valueOf(limit));
|
||||
|
||||
// 2. 生成输出文件
|
||||
String timestamp = String.valueOf(System.currentTimeMillis());
|
||||
String outputFile = String.format("output/news_%s_%s.json", category, timestamp);
|
||||
command.add(outputFile);
|
||||
|
||||
// 3. 执行进程
|
||||
ProcessResult result = executeProcess(command);
|
||||
|
||||
if (result.getExitCode() != 0) {
|
||||
throw new RuntimeException("Python执行失败: " + result.getOutput());
|
||||
}
|
||||
|
||||
// 4. 读取结果文件
|
||||
Path outputPath = Paths.get(scriptPath).resolve(outputFile);
|
||||
if (!Files.exists(outputPath)) {
|
||||
throw new RuntimeException("输出文件不存在: " + outputFile);
|
||||
}
|
||||
|
||||
String jsonContent = Files.readString(outputPath, StandardCharsets.UTF_8);
|
||||
List<Map<String, Object>> newsList = objectMapper.readValue(
|
||||
jsonContent,
|
||||
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
|
||||
);
|
||||
|
||||
// 5. 清理临时文件(可选)
|
||||
// Files.deleteIfExists(outputPath);
|
||||
|
||||
return newsList;
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("执行失败", e);
|
||||
throw new RuntimeException("爬虫任务执行失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 方式2: 通过标准输出传递结果(适合小数据)
|
||||
*/
|
||||
public List<Map<String, Object>> executeByStdout(String category, int limit) {
|
||||
logger.info("执行爬虫任务 - 标准输出方式");
|
||||
|
||||
try {
|
||||
// 1. 构建命令(使用特殊脚本,输出JSON到stdout)
|
||||
List<String> command = buildCommand("main_stdout.py", category, String.valueOf(limit));
|
||||
|
||||
// 2. 执行进程
|
||||
ProcessResult result = executeProcess(command);
|
||||
|
||||
if (result.getExitCode() != 0) {
|
||||
throw new RuntimeException("Python执行失败: " + result.getOutput());
|
||||
}
|
||||
|
||||
// 3. 从输出中提取JSON(约定:最后一行是JSON结果)
|
||||
String output = result.getOutput();
|
||||
String[] lines = output.split("\n");
|
||||
|
||||
// 查找JSON行(以{或[开头)
|
||||
String jsonLine = null;
|
||||
for (int i = lines.length - 1; i >= 0; i--) {
|
||||
String line = lines[i].trim();
|
||||
if (line.startsWith("{") || line.startsWith("[")) {
|
||||
jsonLine = line;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (jsonLine == null) {
|
||||
throw new RuntimeException("未找到JSON结果");
|
||||
}
|
||||
|
||||
// 4. 解析JSON
|
||||
List<Map<String, Object>> newsList = objectMapper.readValue(
|
||||
jsonLine,
|
||||
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
|
||||
);
|
||||
|
||||
return newsList;
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("执行失败", e);
|
||||
throw new RuntimeException("爬虫任务执行失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 方式3: 通过标准输入传递参数(双向通信)
|
||||
*/
|
||||
public List<Map<String, Object>> executeByStdin(String category, int limit) {
|
||||
logger.info("执行爬虫任务 - 标准输入方式");
|
||||
|
||||
Process process = null;
|
||||
try {
|
||||
// 1. 构建命令
|
||||
List<String> command = buildCommand("main_stdin.py");
|
||||
ProcessBuilder pb = new ProcessBuilder(command);
|
||||
pb.directory(Paths.get(scriptPath).toFile());
|
||||
pb.redirectErrorStream(true);
|
||||
|
||||
// 2. 启动进程
|
||||
process = pb.start();
|
||||
|
||||
// 3. 写入参数到标准输入
|
||||
try (BufferedWriter writer = new BufferedWriter(
|
||||
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) {
|
||||
|
||||
Map<String, Object> params = Map.of(
|
||||
"category", category,
|
||||
"limit", limit
|
||||
);
|
||||
|
||||
String paramsJson = objectMapper.writeValueAsString(params);
|
||||
writer.write(paramsJson);
|
||||
writer.newLine();
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
// 4. 关闭输入流(重要!)
|
||||
process.getOutputStream().close();
|
||||
|
||||
// 5. 读取输出
|
||||
StringBuilder output = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
output.append(line).append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// 6. 等待进程结束
|
||||
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
|
||||
if (!finished) {
|
||||
process.destroyForcibly();
|
||||
throw new RuntimeException("任务超时");
|
||||
}
|
||||
|
||||
int exitCode = process.exitValue();
|
||||
if (exitCode != 0) {
|
||||
throw new RuntimeException("Python执行失败,退出码: " + exitCode);
|
||||
}
|
||||
|
||||
// 7. 解析结果
|
||||
String jsonResult = output.toString().trim();
|
||||
List<Map<String, Object>> newsList = objectMapper.readValue(
|
||||
jsonResult,
|
||||
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
|
||||
);
|
||||
|
||||
return newsList;
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("执行失败", e);
|
||||
throw new RuntimeException("爬虫任务执行失败", e);
|
||||
} finally {
|
||||
if (process != null && process.isAlive()) {
|
||||
process.destroyForcibly();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 通用进程执行方法
|
||||
*/
|
||||
private ProcessResult executeProcess(List<String> command) throws IOException, InterruptedException {
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
// 创建进程构建器
|
||||
ProcessBuilder pb = new ProcessBuilder(command);
|
||||
pb.directory(Paths.get(scriptPath).toFile());
|
||||
pb.redirectErrorStream(true);
|
||||
|
||||
logger.info("执行命令: {}", String.join(" ", command));
|
||||
|
||||
// 启动进程
|
||||
Process process = pb.start();
|
||||
|
||||
// 读取输出(必须在单独线程中,避免阻塞)
|
||||
StringBuilder output = new StringBuilder();
|
||||
StringBuilder error = new StringBuilder();
|
||||
|
||||
// 使用CompletableFuture异步读取,避免死锁
|
||||
CompletableFuture<String> outputFuture = CompletableFuture.supplyAsync(() -> {
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
output.append(line).append("\n");
|
||||
logger.debug("Python输出: {}", line);
|
||||
}
|
||||
return output.toString();
|
||||
} catch (IOException e) {
|
||||
logger.error("读取输出失败", e);
|
||||
return "";
|
||||
}
|
||||
});
|
||||
|
||||
// 等待进程结束(带超时)
|
||||
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
|
||||
|
||||
if (!finished) {
|
||||
process.destroyForcibly();
|
||||
throw new RuntimeException("任务超时(超过" + timeout + "秒)");
|
||||
}
|
||||
|
||||
// 获取输出
|
||||
String outputStr = outputFuture.get(5, TimeUnit.SECONDS);
|
||||
|
||||
int exitCode = process.exitValue();
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
|
||||
logger.info("进程执行完成 - 退出码: {}, 耗时: {}ms", exitCode, duration);
|
||||
|
||||
return new ProcessResult(exitCode, outputStr, duration);
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建命令列表
|
||||
*/
|
||||
private List<String> buildCommand(String... args) {
|
||||
List<String> command = new ArrayList<>();
|
||||
|
||||
String os = System.getProperty("os.name").toLowerCase();
|
||||
if (os.contains("win")) {
|
||||
command.add("cmd");
|
||||
command.add("/c");
|
||||
command.add(pythonPath);
|
||||
} else {
|
||||
command.add(pythonPath);
|
||||
}
|
||||
|
||||
for (String arg : args) {
|
||||
command.add(arg);
|
||||
}
|
||||
|
||||
return command;
|
||||
}
|
||||
|
||||
/**
|
||||
* 进程执行结果
|
||||
*/
|
||||
private static class ProcessResult {
|
||||
private final int exitCode;
|
||||
private final String output;
|
||||
private final long duration;
|
||||
|
||||
public ProcessResult(int exitCode, String output, long duration) {
|
||||
this.exitCode = exitCode;
|
||||
this.output = output;
|
||||
this.duration = duration;
|
||||
}
|
||||
|
||||
public int getExitCode() {
|
||||
return exitCode;
|
||||
}
|
||||
|
||||
public String getOutput() {
|
||||
return output;
|
||||
}
|
||||
|
||||
public long getDuration() {
|
||||
return duration;
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 五、关键注意事项
|
||||
|
||||
### 1. 必须读取输出流
|
||||
|
||||
**错误示例:**
|
||||
```java
|
||||
Process process = pb.start();
|
||||
int exitCode = process.waitFor(); // 可能永远阻塞!
|
||||
```
|
||||
|
||||
**原因:** 如果输出缓冲区满了,Python进程会阻塞等待读取。
|
||||
|
||||
**正确做法:**
|
||||
```java
|
||||
Process process = pb.start();
|
||||
|
||||
// 必须读取输出流
|
||||
Thread outputThread = new Thread(() -> {
|
||||
try (BufferedReader reader = ...) {
|
||||
// 读取输出
|
||||
}
|
||||
});
|
||||
outputThread.start();
|
||||
|
||||
process.waitFor();
|
||||
```
|
||||
|
||||
### 2. 处理编码问题
|
||||
|
||||
```java
|
||||
// 指定UTF-8编码,避免中文乱码
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
|
||||
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8)
|
||||
```
|
||||
|
||||
### 3. 超时控制
|
||||
|
||||
```java
|
||||
// 使用带超时的waitFor
|
||||
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
|
||||
if (!finished) {
|
||||
process.destroyForcibly(); // 强制终止
|
||||
}
|
||||
```
|
||||
|
||||
### 4. 资源清理
|
||||
|
||||
```java
|
||||
try {
|
||||
// 执行逻辑
|
||||
} finally {
|
||||
if (process != null && process.isAlive()) {
|
||||
process.destroyForcibly();
|
||||
}
|
||||
// 关闭流
|
||||
process.getInputStream().close();
|
||||
process.getOutputStream().close();
|
||||
process.getErrorStream().close();
|
||||
}
|
||||
```
|
||||
|
||||
### 5. 错误处理
|
||||
|
||||
```java
|
||||
// 检查退出码
|
||||
if (exitCode != 0) {
|
||||
// 读取错误输出
|
||||
String error = readErrorStream(process);
|
||||
throw new RuntimeException("执行失败: " + error);
|
||||
}
|
||||
```
|
||||
|
||||
## 六、性能优化建议
|
||||
|
||||
1. **使用线程池**:如果频繁调用,使用线程池管理进程
|
||||
2. **连接复用**:考虑Python服务模式(HTTP/GRPC)
|
||||
3. **异步执行**:使用CompletableFuture异步执行
|
||||
4. **缓存结果**:对相同参数的请求缓存结果
|
||||
|
||||
## 七、总结
|
||||
|
||||
- **文件传递**:适合大数据量,当前实现方式
|
||||
- **标准输出**:适合小数据量,实时传输
|
||||
- **标准输入**:适合复杂参数,双向通信
|
||||
|
||||
根据实际需求选择合适的方式,当前的文件传递方式已经足够好!
|
||||
|
||||
@@ -25,6 +25,11 @@
|
||||
<artifactId>api-crontab</artifactId>
|
||||
<version>${school-news.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.xyzh</groupId>
|
||||
<artifactId>api-news</artifactId>
|
||||
<version>${school-news.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Common模块依赖 -->
|
||||
<dependency>
|
||||
@@ -38,6 +43,10 @@
|
||||
<artifactId>system</artifactId>
|
||||
<version>${school-news.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spring Boot Web -->
|
||||
<dependency>
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
package org.xyzh.crontab.config;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.xyzh.crontab.task.newsTask.ScriptDomain;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@ConfigurationProperties(prefix = "crawler")
|
||||
public class CrawlerProperties {
|
||||
|
||||
@Value("${crawler.base.path}")
|
||||
private String basePath;
|
||||
|
||||
@Value("${crawler.script}")
|
||||
private List<ScriptDomain> scripts;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
package org.xyzh.crontab.config;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
@ConfigurationProperties(prefix = "crontab")
|
||||
public class CrontabPrpperties {
|
||||
|
||||
}
|
||||
@@ -10,6 +10,13 @@ import org.xyzh.common.core.page.PageParam;
|
||||
import org.xyzh.common.core.page.PageRequest;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabTask;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabLog;
|
||||
import org.xyzh.common.utils.IDUtils;
|
||||
import org.xyzh.crontab.pojo.CrontabItem;
|
||||
|
||||
import java.util.Date;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
|
||||
|
||||
/**
|
||||
* @description 定时任务控制器
|
||||
@@ -27,204 +34,64 @@ public class CrontabController {
|
||||
@Autowired
|
||||
private CrontabService crontabService;
|
||||
|
||||
// ----------------定时任务管理--------------------------------
|
||||
|
||||
/**
|
||||
* @description 创建定时任务
|
||||
* @param task 任务对象
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
* 获取可创建的定时任务
|
||||
* @return
|
||||
*/
|
||||
@PostMapping("/task")
|
||||
public ResultDomain<TbCrontabTask> createTask(@RequestBody TbCrontabTask task) {
|
||||
return crontabService.createTask(task);
|
||||
@GetMapping("/getEnabledCrontabList")
|
||||
public ResultDomain<CrontabItem> getEnabledCrontabList(@RequestParam String param) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 更新定时任务
|
||||
* @param task 任务对象
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
* 创建定时任务
|
||||
* @param crontabItem
|
||||
* @return
|
||||
*/
|
||||
@PutMapping("/task")
|
||||
public ResultDomain<TbCrontabTask> updateTask(@RequestBody TbCrontabTask task) {
|
||||
return crontabService.updateTask(task);
|
||||
@PostMapping("/crontabTask")
|
||||
public ResultDomain<TbCrontabTask> createCrontab(@RequestBody TbCrontabTask crontabItem) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 删除定时任务
|
||||
* @param task 任务对象
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
* 更新定时任务
|
||||
* @param crontabItem
|
||||
* @return
|
||||
*/
|
||||
@DeleteMapping("/task")
|
||||
public ResultDomain<TbCrontabTask> deleteTask(@RequestBody TbCrontabTask task) {
|
||||
return crontabService.deleteTask(task.getID());
|
||||
@PutMapping("/crontabTask")
|
||||
public ResultDomain<TbCrontabTask> updateCrontab(@RequestBody TbCrontabTask crontabItem) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 根据ID查询任务
|
||||
* @param taskId 任务ID
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
* 删除定时任务
|
||||
* @param crontabItem
|
||||
* @return
|
||||
*/
|
||||
@GetMapping("/task/{taskId}")
|
||||
public ResultDomain<TbCrontabTask> getTaskById(@PathVariable(value = "taskId") String taskId) {
|
||||
return crontabService.getTaskById(taskId);
|
||||
@DeleteMapping("/crontabTask")
|
||||
public ResultDomain<TbCrontabTask> deleteCrontab(@RequestBody TbCrontabTask crontabItem) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 查询任务列表
|
||||
* @param filter 过滤条件
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
* 获取定时任务分页列表
|
||||
* @param pageParam
|
||||
* @return
|
||||
*/
|
||||
@PostMapping("/task/list")
|
||||
public ResultDomain<TbCrontabTask> getTaskList(@RequestBody TbCrontabTask filter) {
|
||||
return crontabService.getTaskList(filter);
|
||||
@PostMapping("/crontabTaskPage")
|
||||
public ResultDomain<TbCrontabTask> getCrontabTask(@RequestBody PageRequest<TbCrontabTask> pageRequest) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 分页查询任务列表
|
||||
* @param pageRequest 分页请求对象
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
* 获取定时任务日志分页列表
|
||||
* @param pageRequest
|
||||
* @return
|
||||
*/
|
||||
@PostMapping("/task/page")
|
||||
public ResultDomain<TbCrontabTask> getTaskPage(@RequestBody PageRequest<TbCrontabTask> pageRequest) {
|
||||
TbCrontabTask filter = pageRequest.getFilter();
|
||||
PageParam pageParam = pageRequest.getPageParam();
|
||||
return crontabService.getTaskPage(filter, pageParam);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 启动定时任务
|
||||
* @param taskId 任务ID
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@PostMapping("/task/start/{taskId}")
|
||||
public ResultDomain<TbCrontabTask> startTask(@PathVariable(value = "taskId") String taskId) {
|
||||
return crontabService.startTask(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 暂停定时任务
|
||||
* @param taskId 任务ID
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@PostMapping("/task/pause/{taskId}")
|
||||
public ResultDomain<TbCrontabTask> pauseTask(@PathVariable(value = "taskId") String taskId) {
|
||||
return crontabService.pauseTask(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 立即执行一次任务
|
||||
* @param taskId 任务ID
|
||||
* @return ResultDomain<TbCrontabTask>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@PostMapping("/task/execute/{taskId}")
|
||||
public ResultDomain<TbCrontabTask> executeTaskOnce(@PathVariable(value = "taskId") String taskId) {
|
||||
return crontabService.executeTaskOnce(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 验证Cron表达式
|
||||
* @param cronExpression Cron表达式
|
||||
* @return ResultDomain<String>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@GetMapping("/task/validate")
|
||||
public ResultDomain<String> validateCronExpression(@RequestParam String cronExpression) {
|
||||
return crontabService.validateCronExpression(cronExpression);
|
||||
}
|
||||
|
||||
// ----------------定时任务日志--------------------------------
|
||||
|
||||
/**
|
||||
* @description 根据任务ID查询日志
|
||||
* @param taskId 任务ID
|
||||
* @return ResultDomain<TbCrontabLog>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@GetMapping("/log/task/{taskId}")
|
||||
public ResultDomain<TbCrontabLog> getLogsByTaskId(@PathVariable(value = "taskId") String taskId) {
|
||||
return crontabService.getLogsByTaskId(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 查询日志列表
|
||||
* @param filter 过滤条件
|
||||
* @return ResultDomain<TbCrontabLog>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@PostMapping("/log/list")
|
||||
public ResultDomain<TbCrontabLog> getLogList(@RequestBody TbCrontabLog filter) {
|
||||
return crontabService.getLogList(filter);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 分页查询日志列表
|
||||
* @param pageRequest 分页请求对象
|
||||
* @return ResultDomain<TbCrontabLog>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@PostMapping("/log/page")
|
||||
public ResultDomain<TbCrontabLog> getLogPage(@RequestBody PageRequest<TbCrontabLog> pageRequest) {
|
||||
TbCrontabLog filter = pageRequest.getFilter();
|
||||
PageParam pageParam = pageRequest.getPageParam();
|
||||
return crontabService.getLogPage(filter, pageParam);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 根据ID查询日志详情
|
||||
* @param logId 日志ID
|
||||
* @return ResultDomain<TbCrontabLog>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@GetMapping("/log/{logId}")
|
||||
public ResultDomain<TbCrontabLog> getLogById(@PathVariable(value = "logId") String logId) {
|
||||
return crontabService.getLogById(logId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 清理指定天数之前的日志
|
||||
* @param days 天数
|
||||
* @return ResultDomain<Integer>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@DeleteMapping("/log/clean/{days}")
|
||||
public ResultDomain<Integer> cleanLogs(@PathVariable(value = "days") Integer days) {
|
||||
return crontabService.cleanLogs(days);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 删除日志
|
||||
* @param log 日志对象
|
||||
* @return ResultDomain<TbCrontabLog>
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@DeleteMapping("/log")
|
||||
public ResultDomain<TbCrontabLog> deleteLog(@RequestBody TbCrontabLog log) {
|
||||
return crontabService.deleteLog(log.getID());
|
||||
@PostMapping("/crontabTaskLogPage")
|
||||
public ResultDomain<TbCrontabLog> getCrontabTaskLog(@RequestBody PageRequest<TbCrontabLog> pageRequest) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
package org.xyzh.crontab.controller;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.xyzh.api.crontab.DataCollectionItemService;
|
||||
import org.xyzh.common.core.domain.ResultDomain;
|
||||
import org.xyzh.common.core.page.PageParam;
|
||||
import org.xyzh.common.core.page.PageRequest;
|
||||
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
|
||||
import org.xyzh.common.vo.DataCollectionItemVO;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @description 数据采集项控制器
|
||||
* @filename DataCollectionItemController.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/crontab/collection/item")
|
||||
public class DataCollectionItemController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DataCollectionItemController.class);
|
||||
|
||||
@Autowired
|
||||
private DataCollectionItemService itemService;
|
||||
|
||||
/**
|
||||
* @description 查看一个任务日志对应创建的所有数据采集项
|
||||
* @param taskLogId
|
||||
* @return
|
||||
*/
|
||||
@GetMapping("/task/{taskLogId}")
|
||||
public ResultDomain<DataCollectionItemVO> getTaskLogDataCollectionItemList(@PathVariable String taskLogId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取数据采集项分页列表
|
||||
* @param pageRequest
|
||||
* @return
|
||||
*/
|
||||
@PostMapping("/page")
|
||||
public ResultDomain<DataCollectionItemVO> getCollectionItemPage(@RequestBody PageRequest<DataCollectionItemVO> pageRequest) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 转换成文章
|
||||
* @param dataCollectionItem
|
||||
* @return
|
||||
*/
|
||||
@PostMapping("/resource")
|
||||
public ResultDomain<DataCollectionItemVO> convertToArticle(@RequestBody DataCollectionItemVO dataCollectionItem) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,14 +2,17 @@ package org.xyzh.crontab.enums;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.xyzh.crontab.task.DataBackupTask;
|
||||
import org.xyzh.crontab.task.LogCleanTask;
|
||||
import org.xyzh.crontab.task.SystemStatisticsTask;
|
||||
import org.xyzh.crontab.task.newsTask.NewsCrawlerTask;
|
||||
|
||||
// import org.xyzh.crontab.task.DataBackupTask;
|
||||
// import org.xyzh.crontab.task.LogCleanTask;
|
||||
// import org.xyzh.crontab.task.SystemStatisticsTask;
|
||||
|
||||
public enum TaskEnums {
|
||||
DATA_BACKUP("dataBackup", DataBackupTask.class),
|
||||
LOG_CLEAN("logClean", LogCleanTask.class),
|
||||
SystemStatistics("systemStatistics", SystemStatisticsTask.class);
|
||||
// DATA_BACKUP("dataBackup", DataBackupTask.class),
|
||||
// LOG_CLEAN("logClean", LogCleanTask.class),
|
||||
// SystemStatistics("systemStatistics", SystemStatisticsTask.class);
|
||||
NEWS_CRAWLER("newsCrawler", NewsCrawlerTask.class);
|
||||
|
||||
|
||||
private String name;
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
package org.xyzh.crontab.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.xyzh.common.core.page.PageParam;
|
||||
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @description 数据采集项数据访问层
|
||||
* @filename DataCollectionItemMapper.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
@Mapper
|
||||
public interface DataCollectionItemMapper extends BaseMapper<TbDataCollectionItem> {
|
||||
|
||||
/**
|
||||
* @description 根据来源URL查询采集项(用于去重)
|
||||
* @param sourceUrl 来源URL
|
||||
* @return TbDataCollectionItem 采集项
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
TbDataCollectionItem selectBySourceUrl(@Param("sourceUrl") String sourceUrl);
|
||||
|
||||
/**
|
||||
* @description 根据任务ID查询采集项列表
|
||||
* @param taskId 任务ID
|
||||
* @return List<TbDataCollectionItem> 采集项列表
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
List<TbDataCollectionItem> selectByTaskId(@Param("taskId") String taskId);
|
||||
|
||||
/**
|
||||
* @description 查询采集项列表
|
||||
* @param filter 过滤条件
|
||||
* @return List<TbDataCollectionItem> 采集项列表
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
List<TbDataCollectionItem> selectItemList(TbDataCollectionItem filter);
|
||||
|
||||
/**
|
||||
* @description 分页查询采集项列表
|
||||
* @param filter 过滤条件
|
||||
* @param pageParam 分页参数
|
||||
* @return List<TbDataCollectionItem> 采集项列表
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
List<TbDataCollectionItem> selectItemPage(@Param("filter") TbDataCollectionItem filter, @Param("pageParam") PageParam pageParam);
|
||||
|
||||
/**
|
||||
* @description 统计采集项总数
|
||||
* @param filter 过滤条件
|
||||
* @return long 总数
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
long countItems(@Param("filter") TbDataCollectionItem filter);
|
||||
|
||||
/**
|
||||
* @description 批量插入采集项
|
||||
* @param itemList 采集项列表
|
||||
* @return int 影响行数
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
int batchInsertItems(@Param("itemList") List<TbDataCollectionItem> itemList);
|
||||
|
||||
/**
|
||||
* @description 根据状态统计数量
|
||||
* @param taskId 任务ID(可选)
|
||||
* @param status 状态
|
||||
* @return long 数量
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
long countByStatus(@Param("taskId") String taskId, @Param("status") Integer status);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package org.xyzh.crontab.pojo;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class CrontabItem {
|
||||
|
||||
private String name;
|
||||
private List<CrontabMethod> methods;
|
||||
|
||||
@Data
|
||||
public class CrontabMethod {
|
||||
private String name;
|
||||
private String clazz;
|
||||
private String path;
|
||||
private Map<String, Object> params;
|
||||
}
|
||||
}
|
||||
@@ -65,7 +65,12 @@ public class TaskExecutor {
|
||||
if (task.getMethodParams() != null && !task.getMethodParams().isEmpty()) {
|
||||
// 如果有参数,需要解析参数类型
|
||||
method = bean.getClass().getMethod(task.getMethodName(), String.class);
|
||||
method.invoke(bean, task.getMethodParams());
|
||||
// 如果是newsCrewerTask,将taskId添加到参数前面
|
||||
String methodParams = task.getMethodParams();
|
||||
if ("newsCrewerTask".equals(task.getBeanName()) && task.getTaskId() != null) {
|
||||
methodParams = task.getTaskId() + "|" + methodParams;
|
||||
}
|
||||
method.invoke(bean, methodParams);
|
||||
} else {
|
||||
// 无参方法
|
||||
method = bean.getClass().getMethod(task.getMethodName());
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
package org.xyzh.crontab.service;
|
||||
|
||||
import org.xyzh.api.crontab.DataCollectionItemService;
|
||||
|
||||
/**
|
||||
* @description 数据采集项服务接口(继承API接口)
|
||||
* @filename DataCollectionItemService.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
public interface NCDataCollectionItemService extends DataCollectionItemService {
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,479 @@
|
||||
package org.xyzh.crontab.service.impl;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.xyzh.api.crontab.DataCollectionItemService;
|
||||
import org.xyzh.api.news.resource.ResourceService;
|
||||
import org.xyzh.common.core.domain.ResultDomain;
|
||||
import org.xyzh.common.core.page.PageDomain;
|
||||
import org.xyzh.common.core.page.PageParam;
|
||||
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
|
||||
import org.xyzh.common.dto.resource.TbResource;
|
||||
import org.xyzh.common.utils.IDUtils;
|
||||
import org.xyzh.common.vo.DataCollectionItemVO;
|
||||
import org.xyzh.common.vo.ResourceVO;
|
||||
import org.xyzh.crontab.mapper.DataCollectionItemMapper;
|
||||
import org.xyzh.crontab.mapper.CrontabTaskMapper;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabTask;
|
||||
import org.xyzh.system.utils.LoginUtil;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* @description 数据采集项服务实现类
|
||||
* @filename DataCollectionItemServiceImpl.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
@Service
|
||||
public class DataCollectionItemServiceImpl implements DataCollectionItemService {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DataCollectionItemServiceImpl.class);
|
||||
|
||||
@Autowired
|
||||
private DataCollectionItemMapper itemMapper;
|
||||
|
||||
@Autowired
|
||||
private CrontabTaskMapper taskMapper;
|
||||
|
||||
@Autowired
|
||||
private ResourceService resourceService;
|
||||
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<TbDataCollectionItem> createItem(TbDataCollectionItem item) {
|
||||
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
// 检查URL是否已存在(去重)
|
||||
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
|
||||
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
|
||||
if (existing != null) {
|
||||
resultDomain.fail("该文章已存在,URL: " + item.getSourceUrl());
|
||||
return resultDomain;
|
||||
}
|
||||
}
|
||||
|
||||
// 生成ID
|
||||
item.setID(IDUtils.generateID());
|
||||
item.setCreateTime(new Date());
|
||||
item.setDeleted(false);
|
||||
|
||||
// 默认值
|
||||
if (item.getStatus() == null) {
|
||||
item.setStatus(0); // 默认未处理
|
||||
}
|
||||
if (item.getCrawlTime() == null) {
|
||||
item.setCrawlTime(new Date());
|
||||
}
|
||||
|
||||
int result = itemMapper.insert(item);
|
||||
if (result > 0) {
|
||||
logger.info("创建采集项成功: {}", item.getTitle());
|
||||
resultDomain.success("创建采集项成功", item);
|
||||
} else {
|
||||
resultDomain.fail("创建采集项失败");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("创建采集项异常: ", e);
|
||||
resultDomain.fail("创建采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<Integer> batchCreateItems(List<TbDataCollectionItem> itemList) {
|
||||
ResultDomain<Integer> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (itemList == null || itemList.isEmpty()) {
|
||||
resultDomain.fail("采集项列表为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
int successCount = 0;
|
||||
Date now = new Date();
|
||||
|
||||
for (TbDataCollectionItem item : itemList) {
|
||||
// 检查URL是否已存在(去重)
|
||||
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
|
||||
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
|
||||
if (existing != null) {
|
||||
logger.debug("跳过已存在的采集项: {}", item.getSourceUrl());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// 设置默认值
|
||||
item.setID(IDUtils.generateID());
|
||||
item.setCreateTime(now);
|
||||
item.setDeleted(false);
|
||||
if (item.getStatus() == null) {
|
||||
item.setStatus(0);
|
||||
}
|
||||
if (item.getCrawlTime() == null) {
|
||||
item.setCrawlTime(now);
|
||||
}
|
||||
|
||||
itemMapper.insert(item);
|
||||
successCount++;
|
||||
}
|
||||
|
||||
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
|
||||
resultDomain.success("批量创建采集项成功", successCount);
|
||||
} catch (Exception e) {
|
||||
logger.error("批量创建采集项异常: ", e);
|
||||
resultDomain.fail("批量创建采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<TbDataCollectionItem> updateItem(TbDataCollectionItem item) {
|
||||
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (item.getID() == null) {
|
||||
resultDomain.fail("采集项ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
item.setUpdateTime(new Date());
|
||||
int result = itemMapper.updateById(item);
|
||||
|
||||
if (result > 0) {
|
||||
logger.info("更新采集项成功: {}", item.getID());
|
||||
resultDomain.success("更新采集项成功", item);
|
||||
} else {
|
||||
resultDomain.fail("更新采集项失败");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("更新采集项异常: ", e);
|
||||
resultDomain.fail("更新采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<TbDataCollectionItem> deleteItem(String itemId) {
|
||||
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (itemId == null || itemId.isEmpty()) {
|
||||
resultDomain.fail("采集项ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
int result = itemMapper.deleteById(itemId);
|
||||
if (result > 0) {
|
||||
logger.info("删除采集项成功,ID: {}", itemId);
|
||||
resultDomain.success("删除采集项成功", (TbDataCollectionItem) null);
|
||||
} else {
|
||||
resultDomain.fail("删除采集项失败");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("删除采集项异常: ", e);
|
||||
resultDomain.fail("删除采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultDomain<DataCollectionItemVO> getItemById(String itemId) {
|
||||
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (itemId == null || itemId.isEmpty()) {
|
||||
resultDomain.fail("采集项ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
TbDataCollectionItem item = itemMapper.selectById(itemId);
|
||||
if (item != null) {
|
||||
DataCollectionItemVO vo = buildVO(item);
|
||||
resultDomain.success("查询成功", vo);
|
||||
} else {
|
||||
resultDomain.fail("采集项不存在");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("查询采集项异常: ", e);
|
||||
resultDomain.fail("查询采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultDomain<DataCollectionItemVO> getItemList(TbDataCollectionItem filter) {
|
||||
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (filter == null) {
|
||||
filter = new TbDataCollectionItem();
|
||||
}
|
||||
filter.setDeleted(false);
|
||||
|
||||
List<TbDataCollectionItem> list = itemMapper.selectItemList(filter);
|
||||
List<DataCollectionItemVO> voList = list.stream()
|
||||
.map(this::buildVO)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
resultDomain.success("查询成功", voList);
|
||||
} catch (Exception e) {
|
||||
logger.error("查询采集项列表异常: ", e);
|
||||
resultDomain.fail("查询采集项列表异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultDomain<DataCollectionItemVO> getItemPage(TbDataCollectionItem filter, PageParam pageParam) {
|
||||
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (filter == null) {
|
||||
filter = new TbDataCollectionItem();
|
||||
}
|
||||
filter.setDeleted(false);
|
||||
|
||||
if (pageParam == null) {
|
||||
pageParam = new PageParam();
|
||||
}
|
||||
|
||||
List<TbDataCollectionItem> list = itemMapper.selectItemPage(filter, pageParam);
|
||||
long total = itemMapper.countItems(filter);
|
||||
|
||||
List<DataCollectionItemVO> voList = list.stream()
|
||||
.map(this::buildVO)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
PageDomain<DataCollectionItemVO> pageDomain = new PageDomain<>();
|
||||
pageDomain.setDataList(voList);
|
||||
pageParam.setTotalElements(total);
|
||||
pageParam.setTotalPages((int) Math.ceil((double) total / pageParam.getPageSize()));
|
||||
pageDomain.setPageParam(pageParam);
|
||||
|
||||
resultDomain.success("查询成功", pageDomain);
|
||||
} catch (Exception e) {
|
||||
logger.error("分页查询采集项异常: ", e);
|
||||
resultDomain.fail("分页查询采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultDomain<DataCollectionItemVO> getItemsByTaskId(String taskId) {
|
||||
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (taskId == null || taskId.isEmpty()) {
|
||||
resultDomain.fail("任务ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
List<TbDataCollectionItem> list = itemMapper.selectByTaskId(taskId);
|
||||
List<DataCollectionItemVO> voList = list.stream()
|
||||
.map(this::buildVO)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
resultDomain.success("查询成功", voList);
|
||||
} catch (Exception e) {
|
||||
logger.error("根据任务ID查询采集项异常: ", e);
|
||||
resultDomain.fail("根据任务ID查询采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<String> convertToResource(String itemId, String tagId) {
|
||||
ResultDomain<String> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (itemId == null || itemId.isEmpty()) {
|
||||
resultDomain.fail("采集项ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
if (tagId == null || tagId.isEmpty()) {
|
||||
resultDomain.fail("标签ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
// 查询采集项
|
||||
TbDataCollectionItem item = itemMapper.selectById(itemId);
|
||||
if (item == null) {
|
||||
resultDomain.fail("采集项不存在");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
if (item.getStatus() == 1) {
|
||||
resultDomain.fail("该采集项已转换为资源");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
// 创建资源
|
||||
TbResource resource = new TbResource();
|
||||
resource.setResourceID(IDUtils.generateID());
|
||||
resource.setTitle(item.getTitle());
|
||||
resource.setContent(item.getContent());
|
||||
resource.setSummary(item.getSummary());
|
||||
resource.setCoverImage(item.getCoverImage());
|
||||
resource.setTagID(tagId);
|
||||
resource.setAuthor(item.getAuthor());
|
||||
resource.setSource(item.getSource());
|
||||
resource.setSourceUrl(item.getSourceUrl());
|
||||
resource.setPublishTime(item.getPublishTime() != null ? item.getPublishTime() : new Date());
|
||||
resource.setStatus(1); // 已发布
|
||||
resource.setViewCount(0);
|
||||
resource.setLikeCount(0);
|
||||
resource.setCollectCount(0);
|
||||
resource.setIsRecommend(false);
|
||||
resource.setIsBanner(false);
|
||||
resource.setCreateTime(new Date());
|
||||
resource.setDeleted(false);
|
||||
|
||||
ResourceVO resourceVO = new ResourceVO();
|
||||
resourceVO.setResource(resource);
|
||||
|
||||
ResultDomain<ResourceVO> createResult = resourceService.createResource(resourceVO);
|
||||
if (!createResult.isSuccess()) {
|
||||
resultDomain.fail("转换为资源失败: " + createResult.getMessage());
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
// 更新采集项状态
|
||||
item.setStatus(1); // 已转换为资源
|
||||
item.setResourceId(resource.getResourceID());
|
||||
item.setProcessTime(new Date());
|
||||
item.setProcessor(LoginUtil.getCurrentUserId());
|
||||
itemMapper.updateById(item);
|
||||
|
||||
logger.info("采集项转换为资源成功,采集项ID: {}, 资源ID: {}", itemId, resource.getResourceID());
|
||||
resultDomain.success("转换为资源成功", resource.getResourceID());
|
||||
} catch (Exception e) {
|
||||
logger.error("转换为资源异常: ", e);
|
||||
resultDomain.fail("转换为资源异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<Integer> batchConvertToResource(List<String> itemIds, String tagId) {
|
||||
ResultDomain<Integer> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (itemIds == null || itemIds.isEmpty()) {
|
||||
resultDomain.fail("采集项ID列表为空");
|
||||
return resultDomain;
|
||||
}
|
||||
if (tagId == null || tagId.isEmpty()) {
|
||||
resultDomain.fail("标签ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
int successCount = 0;
|
||||
for (String itemId : itemIds) {
|
||||
ResultDomain<String> convertResult = convertToResource(itemId, tagId);
|
||||
if (convertResult.isSuccess()) {
|
||||
successCount++;
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("批量转换为资源完成,共{}条,成功{}条", itemIds.size(), successCount);
|
||||
resultDomain.success("批量转换为资源完成", successCount);
|
||||
} catch (Exception e) {
|
||||
logger.error("批量转换为资源异常: ", e);
|
||||
resultDomain.fail("批量转换为资源异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<TbDataCollectionItem> ignoreItem(String itemId) {
|
||||
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (itemId == null || itemId.isEmpty()) {
|
||||
resultDomain.fail("采集项ID不能为空");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
TbDataCollectionItem item = itemMapper.selectById(itemId);
|
||||
if (item == null) {
|
||||
resultDomain.fail("采集项不存在");
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
item.setStatus(2); // 已忽略
|
||||
item.setProcessTime(new Date());
|
||||
item.setProcessor(LoginUtil.getCurrentUserId());
|
||||
itemMapper.updateById(item);
|
||||
|
||||
logger.info("忽略采集项成功,ID: {}", itemId);
|
||||
resultDomain.success("忽略采集项成功", item);
|
||||
} catch (Exception e) {
|
||||
logger.error("忽略采集项异常: ", e);
|
||||
resultDomain.fail("忽略采集项异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultDomain<Long> countByStatus(String taskId, Integer status) {
|
||||
ResultDomain<Long> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
long count = itemMapper.countByStatus(taskId, status);
|
||||
resultDomain.success("统计成功", count);
|
||||
} catch (Exception e) {
|
||||
logger.error("统计采集项数量异常: ", e);
|
||||
resultDomain.fail("统计采集项数量异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 构建VO对象
|
||||
* @param item 采集项
|
||||
* @return DataCollectionItemVO
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
private DataCollectionItemVO buildVO(TbDataCollectionItem item) {
|
||||
DataCollectionItemVO vo = new DataCollectionItemVO();
|
||||
vo.setItem(item);
|
||||
|
||||
// 查询关联的定时任务
|
||||
if (item.getTaskId() != null && !item.getTaskId().isEmpty()) {
|
||||
TbCrontabTask task = taskMapper.selectTaskById(item.getTaskId());
|
||||
vo.setTask(task);
|
||||
}
|
||||
|
||||
// 设置状态文本
|
||||
String statusText = "未处理";
|
||||
if (item.getStatus() != null) {
|
||||
switch (item.getStatus()) {
|
||||
case 0:
|
||||
statusText = "未处理";
|
||||
break;
|
||||
case 1:
|
||||
statusText = "已转换为资源";
|
||||
break;
|
||||
case 2:
|
||||
statusText = "已忽略";
|
||||
break;
|
||||
default:
|
||||
statusText = "未知";
|
||||
}
|
||||
}
|
||||
vo.setStatusText(statusText);
|
||||
|
||||
// 设置操作权限
|
||||
vo.setCanEdit(item.getStatus() == null || item.getStatus() == 0 || item.getStatus() == 2);
|
||||
vo.setCanConvert(item.getStatus() == null || item.getStatus() == 0);
|
||||
|
||||
return vo;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,60 +1,60 @@
|
||||
package org.xyzh.crontab.task;
|
||||
// package org.xyzh.crontab.task;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
// import org.slf4j.Logger;
|
||||
// import org.slf4j.LoggerFactory;
|
||||
// import org.springframework.stereotype.Component;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
// import java.text.SimpleDateFormat;
|
||||
// import java.util.Date;
|
||||
|
||||
/**
|
||||
* @description 数据备份任务
|
||||
* @filename DataBackupTask.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@Component("dataBackupTask")
|
||||
public class DataBackupTask {
|
||||
// /**
|
||||
// * @description 数据备份任务
|
||||
// * @filename DataBackupTask.java
|
||||
// * @author yslg
|
||||
// * @copyright xyzh
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// @Component("dataBackupTask")
|
||||
// public class DataBackupTask {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DataBackupTask.class);
|
||||
// private static final Logger logger = LoggerFactory.getLogger(DataBackupTask.class);
|
||||
|
||||
/**
|
||||
* @description 执行数据备份
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
public void execute() {
|
||||
logger.info("开始执行数据备份任务...");
|
||||
// /**
|
||||
// * @description 执行数据备份
|
||||
// * @author yslg
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// public void execute() {
|
||||
// logger.info("开始执行数据备份任务...");
|
||||
|
||||
try {
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss");
|
||||
String backupTime = sdf.format(new Date());
|
||||
// try {
|
||||
// SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss");
|
||||
// String backupTime = sdf.format(new Date());
|
||||
|
||||
// TODO: 实现数据备份逻辑
|
||||
// 1. 备份数据库
|
||||
// 2. 备份文件
|
||||
// 3. 压缩备份文件
|
||||
// 4. 上传到备份服务器或云存储
|
||||
// // TODO: 实现数据备份逻辑
|
||||
// // 1. 备份数据库
|
||||
// // 2. 备份文件
|
||||
// // 3. 压缩备份文件
|
||||
// // 4. 上传到备份服务器或云存储
|
||||
|
||||
Thread.sleep(2000); // 模拟执行
|
||||
// Thread.sleep(2000); // 模拟执行
|
||||
|
||||
logger.info("数据备份任务执行完成,备份标识: {}", backupTime);
|
||||
} catch (Exception e) {
|
||||
logger.error("数据备份任务执行失败: ", e);
|
||||
throw new RuntimeException("数据备份任务执行失败", e);
|
||||
}
|
||||
}
|
||||
// logger.info("数据备份任务执行完成,备份标识: {}", backupTime);
|
||||
// } catch (Exception e) {
|
||||
// logger.error("数据备份任务执行失败: ", e);
|
||||
// throw new RuntimeException("数据备份任务执行失败", e);
|
||||
// }
|
||||
// }
|
||||
|
||||
/**
|
||||
* @description 执行带参数的备份任务
|
||||
* @param params 参数(备份类型:full-全量,incremental-增量)
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
public void execute(String params) {
|
||||
logger.info("开始执行数据备份任务,备份类型: {}", params);
|
||||
execute();
|
||||
}
|
||||
}
|
||||
// /**
|
||||
// * @description 执行带参数的备份任务
|
||||
// * @param params 参数(备份类型:full-全量,incremental-增量)
|
||||
// * @author yslg
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// public void execute(String params) {
|
||||
// logger.info("开始执行数据备份任务,备份类型: {}", params);
|
||||
// execute();
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
@@ -1,68 +1,68 @@
|
||||
package org.xyzh.crontab.task;
|
||||
// package org.xyzh.crontab.task;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.xyzh.crontab.mapper.CrontabLogMapper;
|
||||
// import org.slf4j.Logger;
|
||||
// import org.slf4j.LoggerFactory;
|
||||
// import org.springframework.beans.factory.annotation.Autowired;
|
||||
// import org.springframework.stereotype.Component;
|
||||
// import org.xyzh.crontab.mapper.CrontabLogMapper;
|
||||
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
// import java.util.Calendar;
|
||||
// import java.util.Date;
|
||||
|
||||
/**
|
||||
* @description 清理过期日志任务
|
||||
* @filename LogCleanTask.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@Component("logCleanTask")
|
||||
public class LogCleanTask {
|
||||
// /**
|
||||
// * @description 清理过期日志任务
|
||||
// * @filename LogCleanTask.java
|
||||
// * @author yslg
|
||||
// * @copyright xyzh
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// @Component("logCleanTask")
|
||||
// public class LogCleanTask {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(LogCleanTask.class);
|
||||
// private static final Logger logger = LoggerFactory.getLogger(LogCleanTask.class);
|
||||
|
||||
@Autowired
|
||||
private CrontabLogMapper logMapper;
|
||||
// @Autowired
|
||||
// private CrontabLogMapper logMapper;
|
||||
|
||||
/**
|
||||
* @description 执行日志清理,默认清理30天前的日志
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
public void execute() {
|
||||
execute("30");
|
||||
}
|
||||
// /**
|
||||
// * @description 执行日志清理,默认清理30天前的日志
|
||||
// * @author yslg
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// public void execute() {
|
||||
// execute("30");
|
||||
// }
|
||||
|
||||
/**
|
||||
* @description 执行日志清理
|
||||
* @param params 天数参数
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
public void execute(String params) {
|
||||
logger.info("开始执行日志清理任务...");
|
||||
// /**
|
||||
// * @description 执行日志清理
|
||||
// * @param params 天数参数
|
||||
// * @author yslg
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// public void execute(String params) {
|
||||
// logger.info("开始执行日志清理任务...");
|
||||
|
||||
try {
|
||||
int days = 30; // 默认30天
|
||||
if (params != null && !params.isEmpty()) {
|
||||
try {
|
||||
days = Integer.parseInt(params);
|
||||
} catch (NumberFormatException e) {
|
||||
logger.warn("参数格式错误,使用默认值30天");
|
||||
}
|
||||
}
|
||||
// try {
|
||||
// int days = 30; // 默认30天
|
||||
// if (params != null && !params.isEmpty()) {
|
||||
// try {
|
||||
// days = Integer.parseInt(params);
|
||||
// } catch (NumberFormatException e) {
|
||||
// logger.warn("参数格式错误,使用默认值30天");
|
||||
// }
|
||||
// }
|
||||
|
||||
Calendar calendar = Calendar.getInstance();
|
||||
calendar.add(Calendar.DAY_OF_MONTH, -days);
|
||||
Date beforeDate = calendar.getTime();
|
||||
// Calendar calendar = Calendar.getInstance();
|
||||
// calendar.add(Calendar.DAY_OF_MONTH, -days);
|
||||
// Date beforeDate = calendar.getTime();
|
||||
|
||||
int count = logMapper.cleanLogsByDate(beforeDate);
|
||||
// int count = logMapper.cleanLogsByDate(beforeDate);
|
||||
|
||||
logger.info("日志清理任务执行完成,共清理{}条日志", count);
|
||||
} catch (Exception e) {
|
||||
logger.error("日志清理任务执行失败: ", e);
|
||||
throw new RuntimeException("日志清理任务执行失败", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
// logger.info("日志清理任务执行完成,共清理{}条日志", count);
|
||||
// } catch (Exception e) {
|
||||
// logger.error("日志清理任务执行失败: ", e);
|
||||
// throw new RuntimeException("日志清理任务执行失败", e);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
@@ -1,54 +1,54 @@
|
||||
package org.xyzh.crontab.task;
|
||||
// package org.xyzh.crontab.task;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
// import org.slf4j.Logger;
|
||||
// import org.slf4j.LoggerFactory;
|
||||
// import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* @description 系统数据统计任务
|
||||
* @filename SystemStatisticsTask.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
@Component("systemStatisticsTask")
|
||||
public class SystemStatisticsTask {
|
||||
// /**
|
||||
// * @description 系统数据统计任务
|
||||
// * @filename SystemStatisticsTask.java
|
||||
// * @author yslg
|
||||
// * @copyright xyzh
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// @Component("systemStatisticsTask")
|
||||
// public class SystemStatisticsTask {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SystemStatisticsTask.class);
|
||||
// private static final Logger logger = LoggerFactory.getLogger(SystemStatisticsTask.class);
|
||||
|
||||
/**
|
||||
* @description 执行系统数据统计
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
public void execute() {
|
||||
logger.info("开始执行系统数据统计任务...");
|
||||
// /**
|
||||
// * @description 执行系统数据统计
|
||||
// * @author yslg
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// public void execute() {
|
||||
// logger.info("开始执行系统数据统计任务...");
|
||||
|
||||
try {
|
||||
// TODO: 实现系统数据统计逻辑
|
||||
// 1. 统计用户数据
|
||||
// 2. 统计资源数据
|
||||
// 3. 统计访问数据
|
||||
// 4. 生成统计报告
|
||||
// try {
|
||||
// // TODO: 实现系统数据统计逻辑
|
||||
// // 1. 统计用户数据
|
||||
// // 2. 统计资源数据
|
||||
// // 3. 统计访问数据
|
||||
// // 4. 生成统计报告
|
||||
|
||||
Thread.sleep(1000); // 模拟执行
|
||||
// Thread.sleep(1000); // 模拟执行
|
||||
|
||||
logger.info("系统数据统计任务执行完成");
|
||||
} catch (Exception e) {
|
||||
logger.error("系统数据统计任务执行失败: ", e);
|
||||
throw new RuntimeException("系统数据统计任务执行失败", e);
|
||||
}
|
||||
}
|
||||
// logger.info("系统数据统计任务执行完成");
|
||||
// } catch (Exception e) {
|
||||
// logger.error("系统数据统计任务执行失败: ", e);
|
||||
// throw new RuntimeException("系统数据统计任务执行失败", e);
|
||||
// }
|
||||
// }
|
||||
|
||||
/**
|
||||
* @description 执行带参数的统计任务
|
||||
* @param params 参数
|
||||
* @author yslg
|
||||
* @since 2025-10-25
|
||||
*/
|
||||
public void execute(String params) {
|
||||
logger.info("开始执行系统数据统计任务,参数: {}", params);
|
||||
execute();
|
||||
}
|
||||
}
|
||||
// /**
|
||||
// * @description 执行带参数的统计任务
|
||||
// * @param params 参数
|
||||
// * @author yslg
|
||||
// * @since 2025-10-25
|
||||
// */
|
||||
// public void execute(String params) {
|
||||
// logger.info("开始执行系统数据统计任务,参数: {}", params);
|
||||
// execute();
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
package org.xyzh.crontab.task.newsTask;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.xyzh.common.dto.resource.TbResource;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* @description 爬虫返回文章结构
|
||||
* @filename ArticleStruct.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-11-10
|
||||
*/
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class ArticleStruct {
|
||||
|
||||
private String title;
|
||||
private String url;
|
||||
private String publishTime;
|
||||
private String author;
|
||||
private String source;
|
||||
private List<RowStruct> contentRows;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class RowStruct {
|
||||
// private String tag;
|
||||
// private String style; // ttext-indent: 2em;->\t\t
|
||||
private String content; // 完整携带样式的p标签
|
||||
}
|
||||
|
||||
public TbResource toTbResource(){
|
||||
TbResource tbResource = new TbResource();
|
||||
tbResource.setTitle(this.title);
|
||||
// tbResource.setUrl(this.url);
|
||||
// tbResource.setPublishTime(this.publishTime);
|
||||
// tbResource.setAuthor(this.author);
|
||||
// tbResource.setSource(this.source);
|
||||
// tbResource.setContentRows(this.contentRows);
|
||||
return tbResource;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,328 @@
|
||||
package org.xyzh.crontab.task.newsTask;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.xyzh.api.crontab.DataCollectionItemService;
|
||||
import org.xyzh.common.core.domain.ResultDomain;
|
||||
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* @description 新闻爬虫定时任务
|
||||
* @filename NewsCrewerTask.java
|
||||
* @author yslg
|
||||
* @copyright xyzh
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
@Component("newsCrewerTask")
|
||||
public class NewsCrawlerTask {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(NewsCrawlerTask.class);
|
||||
|
||||
@Value("${crewer.python.path:python}")
|
||||
private String pythonPath;
|
||||
|
||||
@Value("${crewer.script.path:../schoolNewsCrewer}")
|
||||
private String scriptPath;
|
||||
|
||||
@Value("${crewer.timeout:300}")
|
||||
private int timeout;
|
||||
|
||||
@Autowired
|
||||
private DataCollectionItemService itemService;
|
||||
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
/**
|
||||
* @description 执行新闻爬虫任务(默认爬取人民日报政治类新闻20条)
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
public void execute() {
|
||||
execute("rmrb,politics,20");
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 执行新闻爬虫任务
|
||||
* @param params 参数格式: "source,category,limit" 或 "taskId|source,category,limit"
|
||||
* 如果包含taskId,格式为: "taskId|source,category,limit"
|
||||
* source: 新闻源(rmrb-人民日报)
|
||||
* category: 分类(politics-政治, society-社会等)
|
||||
* limit: 爬取数量
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
public void execute(String params) {
|
||||
logger.info("开始执行新闻爬虫任务,参数: {}", params);
|
||||
|
||||
try {
|
||||
// 解析参数(支持taskId|source,category,limit格式)
|
||||
String taskId = null;
|
||||
String actualParams = params;
|
||||
|
||||
if (params.contains("|")) {
|
||||
String[] parts = params.split("\\|", 2);
|
||||
taskId = parts[0];
|
||||
actualParams = parts[1];
|
||||
}
|
||||
|
||||
String[] paramArray = actualParams.split(",");
|
||||
String source = paramArray.length > 0 ? paramArray[0] : "rmrb";
|
||||
String category = paramArray.length > 1 ? paramArray[1] : "politics";
|
||||
String limit = paramArray.length > 2 ? paramArray[2] : "20";
|
||||
|
||||
logger.info("爬虫参数 - 来源: {}, 分类: {}, 数量: {}", source, category, limit);
|
||||
|
||||
// 验证Python和脚本路径
|
||||
Path scriptDir = Paths.get(scriptPath);
|
||||
if (!Files.exists(scriptDir)) {
|
||||
throw new RuntimeException("爬虫脚本目录不存在: " + scriptPath);
|
||||
}
|
||||
|
||||
// 构建Python命令
|
||||
List<String> command = new ArrayList<>();
|
||||
|
||||
// 检查是否是Windows系统
|
||||
String os = System.getProperty("os.name").toLowerCase();
|
||||
if (os.contains("win")) {
|
||||
command.add("cmd");
|
||||
command.add("/c");
|
||||
command.add(pythonPath);
|
||||
} else {
|
||||
command.add(pythonPath);
|
||||
}
|
||||
|
||||
command.add("main.py");
|
||||
command.add(category);
|
||||
command.add(limit);
|
||||
|
||||
// 生成输出文件名
|
||||
String timestamp = String.valueOf(System.currentTimeMillis());
|
||||
String outputFile = String.format("output/news_%s_%s_%s.json", source, category, timestamp);
|
||||
command.add(outputFile);
|
||||
|
||||
logger.info("执行命令: {}", String.join(" ", command));
|
||||
|
||||
// 创建进程构建器
|
||||
ProcessBuilder processBuilder = new ProcessBuilder(command);
|
||||
processBuilder.directory(scriptDir.toFile());
|
||||
processBuilder.redirectErrorStream(true);
|
||||
|
||||
// 启动进程
|
||||
Process process = processBuilder.start();
|
||||
|
||||
// 读取输出
|
||||
StringBuilder output = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
output.append(line).append("\n");
|
||||
logger.debug("Python输出: {}", line);
|
||||
}
|
||||
}
|
||||
|
||||
// 等待进程结束
|
||||
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
|
||||
|
||||
if (!finished) {
|
||||
process.destroy();
|
||||
throw new RuntimeException("爬虫任务超时(超过" + timeout + "秒)");
|
||||
}
|
||||
|
||||
int exitCode = process.exitValue();
|
||||
|
||||
if (exitCode == 0) {
|
||||
logger.info("新闻爬虫任务执行成功");
|
||||
|
||||
// 读取并解析结果文件
|
||||
Path outputPath = scriptDir.resolve(outputFile);
|
||||
if (Files.exists(outputPath)) {
|
||||
String jsonContent = Files.readString(outputPath);
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
List<Map<String, Object>> newsList = mapper.readValue(
|
||||
jsonContent,
|
||||
List.class
|
||||
);
|
||||
|
||||
logger.info("成功爬取 {} 条新闻", newsList.size());
|
||||
|
||||
// 保存新闻数据到数据库
|
||||
if (taskId != null && !taskId.isEmpty()) {
|
||||
saveNewsToDatabase(newsList, taskId, source, category);
|
||||
} else {
|
||||
logger.warn("未提供任务ID,跳过数据保存");
|
||||
}
|
||||
|
||||
} else {
|
||||
logger.warn("输出文件不存在: {}", outputFile);
|
||||
}
|
||||
|
||||
} else {
|
||||
logger.error("新闻爬虫任务执行失败,退出码: {}", exitCode);
|
||||
logger.error("输出内容:\n{}", output.toString());
|
||||
throw new RuntimeException("爬虫任务执行失败,退出码: " + exitCode);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("新闻爬虫任务执行异常: ", e);
|
||||
throw new RuntimeException("新闻爬虫任务执行异常", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 测试Python环境
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
public void testPythonEnvironment() {
|
||||
logger.info("测试Python环境...");
|
||||
|
||||
try {
|
||||
ProcessBuilder pb = new ProcessBuilder(pythonPath, "--version");
|
||||
Process process = pb.start();
|
||||
|
||||
BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream())
|
||||
);
|
||||
|
||||
String version = reader.readLine();
|
||||
int exitCode = process.waitFor();
|
||||
|
||||
if (exitCode == 0) {
|
||||
logger.info("Python环境正常: {}", version);
|
||||
} else {
|
||||
logger.error("Python环境异常");
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("测试Python环境失败: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 将新闻数据保存到数据库
|
||||
* @param newsList 新闻列表
|
||||
* @param taskId 任务ID
|
||||
* @param source 新闻来源
|
||||
* @param category 分类
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
private void saveNewsToDatabase(List<Map<String, Object>> newsList, String taskId, String source, String category) {
|
||||
logger.info("开始保存 {} 条新闻到数据库,任务ID: {}", newsList.size(), taskId);
|
||||
|
||||
try {
|
||||
List<TbDataCollectionItem> itemList = new ArrayList<>();
|
||||
Date now = new Date();
|
||||
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
for (Map<String, Object> news : newsList) {
|
||||
try {
|
||||
TbDataCollectionItem item = new TbDataCollectionItem();
|
||||
|
||||
// 基本信息
|
||||
item.setTaskId(taskId);
|
||||
item.setTitle(getStringValue(news, "title"));
|
||||
item.setContent(getStringValue(news, "content"));
|
||||
item.setSummary(getStringValue(news, "summary"));
|
||||
item.setSource(source.equals("rmrb") ? "人民日报" : source);
|
||||
item.setSourceUrl(getStringValue(news, "url"));
|
||||
item.setCategory(category);
|
||||
item.setAuthor(getStringValue(news, "author"));
|
||||
|
||||
// 发布时间
|
||||
String publishTimeStr = getStringValue(news, "publish_time");
|
||||
if (publishTimeStr != null && !publishTimeStr.isEmpty()) {
|
||||
try {
|
||||
item.setPublishTime(dateFormat.parse(publishTimeStr));
|
||||
} catch (Exception e) {
|
||||
logger.warn("解析发布时间失败: {}", publishTimeStr);
|
||||
item.setPublishTime(now);
|
||||
}
|
||||
} else {
|
||||
item.setPublishTime(now);
|
||||
}
|
||||
|
||||
// 封面图片
|
||||
item.setCoverImage(getStringValue(news, "cover_image"));
|
||||
|
||||
// 图片列表(JSON格式)
|
||||
Object imagesObj = news.get("images");
|
||||
if (imagesObj != null) {
|
||||
if (imagesObj instanceof List) {
|
||||
item.setImages(objectMapper.writeValueAsString(imagesObj));
|
||||
} else if (imagesObj instanceof String) {
|
||||
item.setImages((String) imagesObj);
|
||||
}
|
||||
}
|
||||
|
||||
// 标签
|
||||
Object tagsObj = news.get("tags");
|
||||
if (tagsObj != null) {
|
||||
if (tagsObj instanceof List) {
|
||||
List<String> tags = (List<String>) tagsObj;
|
||||
item.setTags(String.join(",", tags));
|
||||
} else if (tagsObj instanceof String) {
|
||||
item.setTags((String) tagsObj);
|
||||
}
|
||||
}
|
||||
|
||||
// 状态和时间
|
||||
item.setStatus(0); // 未处理
|
||||
item.setCrawlTime(now);
|
||||
|
||||
itemList.add(item);
|
||||
} catch (Exception e) {
|
||||
logger.error("转换新闻数据失败: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
// 批量保存
|
||||
if (!itemList.isEmpty()) {
|
||||
ResultDomain<Integer> result = itemService.batchCreateItems(itemList);
|
||||
if (result.isSuccess()) {
|
||||
logger.info("成功保存 {} 条新闻到数据库", result.getData());
|
||||
} else {
|
||||
logger.error("保存新闻到数据库失败: {}", result.getMessage());
|
||||
}
|
||||
} else {
|
||||
logger.warn("没有有效的新闻数据需要保存");
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("保存新闻数据到数据库异常: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 从Map中安全获取字符串值
|
||||
* @param map Map对象
|
||||
* @param key 键
|
||||
* @return String 值
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
private String getStringValue(Map<String, Object> map, String key) {
|
||||
Object value = map.get(key);
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return value.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,8 +5,9 @@ abstract public class NewsTask {
|
||||
|
||||
// 爬取网站目标
|
||||
private String target;
|
||||
// 爬取标题
|
||||
private String title;
|
||||
|
||||
// 爬取搜索
|
||||
private String query;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,234 @@
|
||||
package org.xyzh.crontab.task.newsTask;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Java调用Python的简化示例
|
||||
* 用于学习和理解核心原理
|
||||
*/
|
||||
public class PythonExecutorExample {
|
||||
|
||||
/**
|
||||
* 示例1: 最简单的调用方式
|
||||
*/
|
||||
public static void example1_Simple() throws Exception {
|
||||
// 1. 构建命令
|
||||
ProcessBuilder pb = new ProcessBuilder("python", "script.py", "arg1", "arg2");
|
||||
|
||||
// 2. 启动进程
|
||||
Process process = pb.start();
|
||||
|
||||
// 3. 读取输出
|
||||
BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
|
||||
);
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
System.out.println("Python输出: " + line);
|
||||
}
|
||||
|
||||
// 4. 等待结束
|
||||
int exitCode = process.waitFor();
|
||||
System.out.println("退出码: " + exitCode);
|
||||
}
|
||||
|
||||
/**
|
||||
* 示例2: 获取返回结果(通过标准输出)
|
||||
*/
|
||||
public static String example2_GetResult() throws Exception {
|
||||
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
|
||||
Process process = pb.start();
|
||||
|
||||
// 读取所有输出
|
||||
StringBuilder result = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
result.append(line);
|
||||
}
|
||||
}
|
||||
|
||||
process.waitFor();
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* 示例3: 带超时控制
|
||||
*/
|
||||
public static void example3_WithTimeout() throws Exception {
|
||||
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
|
||||
Process process = pb.start();
|
||||
|
||||
// 带超时的等待(5秒)
|
||||
boolean finished = process.waitFor(5, TimeUnit.SECONDS);
|
||||
|
||||
if (!finished) {
|
||||
// 超时,强制终止
|
||||
process.destroyForcibly();
|
||||
System.out.println("任务超时");
|
||||
} else {
|
||||
int exitCode = process.exitValue();
|
||||
System.out.println("执行完成,退出码: " + exitCode);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 示例4: 传递参数(通过命令行)
|
||||
*/
|
||||
public static void example4_PassArgs() throws Exception {
|
||||
// 方式1: 通过命令行参数
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("python");
|
||||
command.add("script.py");
|
||||
command.add("category=politics");
|
||||
command.add("limit=20");
|
||||
|
||||
ProcessBuilder pb = new ProcessBuilder(command);
|
||||
Process process = pb.start();
|
||||
|
||||
// ... 读取输出
|
||||
process.waitFor();
|
||||
}
|
||||
|
||||
/**
|
||||
* 示例5: 传递参数(通过标准输入)
|
||||
*/
|
||||
public static void example5_PassArgsByStdin() throws Exception {
|
||||
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
|
||||
Process process = pb.start();
|
||||
|
||||
// 写入参数到标准输入
|
||||
try (BufferedWriter writer = new BufferedWriter(
|
||||
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) {
|
||||
writer.write("{\"category\":\"politics\",\"limit\":20}");
|
||||
writer.newLine();
|
||||
writer.flush();
|
||||
}
|
||||
|
||||
// 关闭输入流(重要!告诉Python输入结束)
|
||||
process.getOutputStream().close();
|
||||
|
||||
// 读取输出
|
||||
BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
|
||||
);
|
||||
String result = reader.readLine();
|
||||
System.out.println("结果: " + result);
|
||||
|
||||
process.waitFor();
|
||||
}
|
||||
|
||||
/**
|
||||
* 示例6: 处理Windows/Linux差异
|
||||
*/
|
||||
public static void example6_CrossPlatform() throws Exception {
|
||||
List<String> command = new ArrayList<>();
|
||||
|
||||
String os = System.getProperty("os.name").toLowerCase();
|
||||
if (os.contains("win")) {
|
||||
// Windows需要通过cmd执行
|
||||
command.add("cmd");
|
||||
command.add("/c");
|
||||
command.add("python");
|
||||
} else {
|
||||
// Linux/Mac直接执行
|
||||
command.add("python3");
|
||||
}
|
||||
|
||||
command.add("script.py");
|
||||
|
||||
ProcessBuilder pb = new ProcessBuilder(command);
|
||||
Process process = pb.start();
|
||||
process.waitFor();
|
||||
}
|
||||
|
||||
/**
|
||||
* 示例7: 完整的错误处理
|
||||
*/
|
||||
public static void example7_Complete() throws Exception {
|
||||
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
|
||||
|
||||
// 合并标准输出和错误输出
|
||||
pb.redirectErrorStream(true);
|
||||
|
||||
// 设置工作目录
|
||||
pb.directory(new File("/path/to/script"));
|
||||
|
||||
Process process = null;
|
||||
try {
|
||||
process = pb.start();
|
||||
|
||||
// 读取输出
|
||||
StringBuilder output = new StringBuilder();
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
output.append(line).append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// 等待结束(带超时)
|
||||
boolean finished = process.waitFor(30, TimeUnit.SECONDS);
|
||||
|
||||
if (!finished) {
|
||||
process.destroyForcibly();
|
||||
throw new RuntimeException("任务超时");
|
||||
}
|
||||
|
||||
int exitCode = process.exitValue();
|
||||
|
||||
if (exitCode == 0) {
|
||||
System.out.println("执行成功");
|
||||
System.out.println("输出: " + output.toString());
|
||||
} else {
|
||||
System.err.println("执行失败,退出码: " + exitCode);
|
||||
System.err.println("错误输出: " + output.toString());
|
||||
throw new RuntimeException("Python执行失败");
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("执行异常", e);
|
||||
} finally {
|
||||
// 清理资源
|
||||
if (process != null && process.isAlive()) {
|
||||
process.destroyForcibly();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 示例8: 异步执行(不阻塞)
|
||||
*/
|
||||
public static void example8_Async() {
|
||||
new Thread(() -> {
|
||||
try {
|
||||
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
|
||||
Process process = pb.start();
|
||||
|
||||
// 在后台线程中读取输出
|
||||
BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
|
||||
);
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
System.out.println("后台输出: " + line);
|
||||
}
|
||||
|
||||
process.waitFor();
|
||||
System.out.println("后台任务完成");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}).start();
|
||||
|
||||
System.out.println("主线程继续执行...");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
package org.xyzh.crontab.task.newsTask;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ScriptDomain {
|
||||
|
||||
private String name;
|
||||
private String path;
|
||||
private String method;
|
||||
private String param;
|
||||
private String output;
|
||||
|
||||
|
||||
}
|
||||
34
schoolNewsServ/crontab/src/main/resources/appliaction.yml
Normal file
34
schoolNewsServ/crontab/src/main/resources/appliaction.yml
Normal file
@@ -0,0 +1,34 @@
|
||||
crawler:
|
||||
python:
|
||||
path: C:/Python312/python.exe
|
||||
base:
|
||||
path: F:/Project/schoolNews/schoolNewsCrawler
|
||||
script:
|
||||
- name: xxx爬虫
|
||||
path: crawler/xxx.py
|
||||
method: xxx
|
||||
param: xxx
|
||||
output: xxx
|
||||
|
||||
crontab:
|
||||
items: #可供前端选择的定时任务列表
|
||||
- name: 人民日报新闻爬取
|
||||
methods: #爬取方式
|
||||
- name: 关键字搜索爬取
|
||||
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
|
||||
path: crawler/xxx.py
|
||||
params:
|
||||
query: String #搜索关键字
|
||||
total: Integer #总新闻数量
|
||||
- name: 排行榜爬取
|
||||
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
|
||||
path: crawler/xxx.py
|
||||
- name: 往日精彩头条爬取
|
||||
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
|
||||
path: crawler/xxx.py
|
||||
params:
|
||||
startDate: String #开始日期
|
||||
endDate: String #结束日期
|
||||
isYestoday: Boolean #是否是昨天
|
||||
|
||||
|
||||
Reference in New Issue
Block a user