temp定时任务修改

This commit is contained in:
2025-11-10 16:03:50 +08:00
parent e8b76278e9
commit 9adbd6d365
38 changed files with 2710 additions and 2032 deletions

View File

@@ -1,18 +0,0 @@
-- ========================================
-- 添加 embedding_model_provider 字段
-- ========================================
-- 用途:在 tb_ai_knowledge 表中添加向量模型提供商字段
-- 执行时间2025-11-06
-- 注意:如果该字段已存在,请忽略此脚本
-- ========================================
USE `school_news`;
-- 检查并添加 embedding_model_provider 字段
ALTER TABLE `tb_ai_knowledge`
ADD COLUMN `embedding_model_provider` VARCHAR(100) DEFAULT NULL COMMENT '向量模型提供商'
AFTER `embedding_model`;
-- 完成
SELECT 'embedding_model_provider 字段添加成功!' AS message;

View File

@@ -57,3 +57,37 @@ CREATE TABLE `tb_crontab_log` (
KEY `idx_start_time` (`start_time`), KEY `idx_start_time` (`start_time`),
KEY `idx_deleted` (`deleted`) KEY `idx_deleted` (`deleted`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci COMMENT='定时任务执行日志表'; ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci COMMENT='定时任务执行日志表';
-- ====================================================
-- 数据采集项表(定时任务采集结果存储)
-- ====================================================
DROP TABLE IF EXISTS `tb_data_collection_item`;
CREATE TABLE `tb_data_collection_item` (
`id` VARCHAR(64) NOT NULL COMMENT '主键ID',
`task_id` VARCHAR(64) NOT NULL COMMENT '关联任务ID',
`title` VARCHAR(255) NOT NULL COMMENT '文章标题',
`content` LONGTEXT DEFAULT NULL COMMENT '文章内容HTML',
`summary` VARCHAR(500) DEFAULT NULL COMMENT '文章摘要',
`source` VARCHAR(255) DEFAULT NULL COMMENT '来源(如 人民日报)',
`source_url` VARCHAR(500) DEFAULT NULL COMMENT '来源URL用于去重',
`category` VARCHAR(100) DEFAULT NULL COMMENT '分类politics/society等',
`author` VARCHAR(100) DEFAULT NULL COMMENT '作者',
`publish_time` DATETIME DEFAULT NULL COMMENT '发布时间',
`cover_image` VARCHAR(500) DEFAULT NULL COMMENT '封面图片URL',
`images` TEXT DEFAULT NULL COMMENT '图片列表JSON',
`tags` VARCHAR(500) DEFAULT NULL COMMENT '标签(逗号分隔)',
`status` TINYINT(1) NOT NULL DEFAULT 0 COMMENT '状态0未处理 1已转换为资源 2已忽略',
`resource_id` VARCHAR(64) DEFAULT NULL COMMENT '转换后的资源ID',
`crawl_time` DATETIME DEFAULT NULL COMMENT '爬取时间',
`process_time` DATETIME DEFAULT NULL COMMENT '处理时间',
`processor` VARCHAR(64) DEFAULT NULL COMMENT '处理人',
`create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` DATETIME DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`delete_time` DATETIME DEFAULT NULL COMMENT '删除时间',
`deleted` TINYINT(1) NOT NULL DEFAULT 0 COMMENT '是否删除0:否 1:是)',
PRIMARY KEY (`id`),
KEY `idx_task_id` (`task_id`),
KEY `idx_status` (`status`),
KEY `idx_publish_time` (`publish_time`),
KEY `idx_source_url` (`source_url`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci COMMENT='数据采集项表';

View File

@@ -111,40 +111,3 @@ CREATE TABLE `tb_resource_tag` (
KEY `idx_resource` (`resource_id`), KEY `idx_resource` (`resource_id`),
KEY `idx_tag` (`tag_id`) KEY `idx_tag` (`tag_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci COMMENT='资源标签关联表'; ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci COMMENT='资源标签关联表';
-- 数据采集配置表
DROP TABLE IF EXISTS `tb_data_collection_config`;
CREATE TABLE `tb_data_collection_config` (
`id` VARCHAR(50) NOT NULL COMMENT '配置ID',
`name` VARCHAR(100) NOT NULL COMMENT '配置名称',
`source_url` VARCHAR(500) NOT NULL COMMENT '采集源URL',
`source_type` VARCHAR(50) DEFAULT NULL COMMENT '采集源类型',
`frequency` VARCHAR(20) DEFAULT 'daily' COMMENT '采集频率daily每天 weekly每周',
`tag_id` VARCHAR(50) DEFAULT NULL COMMENT '默认标签ID文章分类标签tagType=1',
`status` INT(4) DEFAULT 1 COMMENT '状态0禁用 1启用',
`last_collect_time` TIMESTAMP NULL DEFAULT NULL COMMENT '最后采集时间',
`creator` VARCHAR(50) DEFAULT NULL COMMENT '创建者',
`updater` VARCHAR(50) DEFAULT NULL COMMENT '更新者',
`create_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`delete_time` TIMESTAMP NULL DEFAULT NULL COMMENT '删除时间',
`deleted` TINYINT(1) NOT NULL DEFAULT 0 COMMENT '是否删除',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci COMMENT='数据采集配置表';
-- 数据采集记录表
DROP TABLE IF EXISTS `tb_data_collection_log`;
CREATE TABLE `tb_data_collection_log` (
`id` VARCHAR(50) NOT NULL COMMENT '记录ID',
`config_id` VARCHAR(50) NOT NULL COMMENT '配置ID',
`collect_count` INT(11) DEFAULT 0 COMMENT '采集数量',
`success_count` INT(11) DEFAULT 0 COMMENT '成功数量',
`fail_count` INT(11) DEFAULT 0 COMMENT '失败数量',
`status` INT(4) DEFAULT 1 COMMENT '状态0失败 1成功 2部分成功',
`message` TEXT COMMENT '采集消息',
`collect_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '采集时间',
PRIMARY KEY (`id`),
KEY `idx_config` (`config_id`),
KEY `idx_collect_time` (`collect_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci COMMENT='数据采集记录表';

View File

@@ -0,0 +1,129 @@
package org.xyzh.api.crontab;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.vo.DataCollectionItemVO;
/**
* @description 数据采集项服务接口
* @filename DataCollectionItemService.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
public interface DataCollectionItemService {
/**
* @description 创建采集项
* @param item 采集项对象
* @return ResultDomain<TbDataCollectionItem> 创建结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<TbDataCollectionItem> createItem(TbDataCollectionItem item);
/**
* @description 批量创建采集项
* @param itemList 采集项列表
* @return ResultDomain<Integer> 创建数量
* @author yslg
* @since 2025-11-08
*/
ResultDomain<Integer> batchCreateItems(java.util.List<TbDataCollectionItem> itemList);
/**
* @description 更新采集项
* @param item 采集项对象
* @return ResultDomain<TbDataCollectionItem> 更新结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<TbDataCollectionItem> updateItem(TbDataCollectionItem item);
/**
* @description 删除采集项
* @param itemId 采集项ID
* @return ResultDomain<TbDataCollectionItem> 删除结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<TbDataCollectionItem> deleteItem(String itemId);
/**
* @description 根据ID查询采集项
* @param itemId 采集项ID
* @return ResultDomain<DataCollectionItemVO> 查询结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<DataCollectionItemVO> getItemById(String itemId);
/**
* @description 查询采集项列表
* @param filter 过滤条件
* @return ResultDomain<DataCollectionItemVO> 查询结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<DataCollectionItemVO> getItemList(TbDataCollectionItem filter);
/**
* @description 分页查询采集项列表
* @param filter 过滤条件
* @param pageParam 分页参数
* @return ResultDomain<DataCollectionItemVO> 查询结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<DataCollectionItemVO> getItemPage(TbDataCollectionItem filter, PageParam pageParam);
/**
* @description 根据任务ID查询采集项列表
* @param taskId 任务ID
* @return ResultDomain<DataCollectionItemVO> 查询结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<DataCollectionItemVO> getItemsByTaskId(String taskId);
/**
* @description 将采集项转换为资源
* @param itemId 采集项ID
* @param tagId 标签ID文章分类
* @return ResultDomain<String> 转换后的资源ID
* @author yslg
* @since 2025-11-08
*/
ResultDomain<String> convertToResource(String itemId, String tagId);
/**
* @description 批量转换为资源
* @param itemIds 采集项ID列表
* @param tagId 标签ID文章分类
* @return ResultDomain<Integer> 转换数量
* @author yslg
* @since 2025-11-08
*/
ResultDomain<Integer> batchConvertToResource(java.util.List<String> itemIds, String tagId);
/**
* @description 忽略采集项
* @param itemId 采集项ID
* @return ResultDomain<TbDataCollectionItem> 操作结果
* @author yslg
* @since 2025-11-08
*/
ResultDomain<TbDataCollectionItem> ignoreItem(String itemId);
/**
* @description 统计采集项数量(按状态)
* @param taskId 任务ID可选
* @param status 状态
* @return ResultDomain<Long> 数量
* @author yslg
* @since 2025-11-08
*/
ResultDomain<Long> countByStatus(String taskId, Integer status);
}

View File

@@ -1,163 +0,0 @@
package org.xyzh.api.news.collection;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.resource.TbDataCollectionConfig;
import org.xyzh.common.dto.resource.TbDataCollectionLog;
import java.util.Date;
import java.util.List;
/**
* @description 数据采集服务接口
* @filename DataCollectionService.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
public interface DataCollectionService {
// ----------------采集配置相关--------------------------------
/**
* @description 获取采集配置列表
* @param status 状态(可选)
* @return ResultDomain<TbDataCollectionConfig> 配置列表
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionConfig> getConfigList(Integer status);
/**
* @description 根据ID获取配置详情
* @param configID 配置ID
* @return ResultDomain<TbDataCollectionConfig> 配置详情
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionConfig> getConfigById(String configID);
/**
* @description 创建采集配置
* @param config 配置信息
* @return ResultDomain<TbDataCollectionConfig> 创建结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionConfig> createConfig(TbDataCollectionConfig config);
/**
* @description 更新采集配置
* @param config 配置信息
* @return ResultDomain<TbDataCollectionConfig> 更新结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionConfig> updateConfig(TbDataCollectionConfig config);
/**
* @description 删除采集配置
* @param configID 配置ID
* @return ResultDomain<Boolean> 删除结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<Boolean> deleteConfig(String configID);
/**
* @description 更新配置状态
* @param configID 配置ID
* @param status 状态
* @return ResultDomain<TbDataCollectionConfig> 更新结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionConfig> updateConfigStatus(String configID, Integer status);
/**
* @description 更新最后采集时间
* @param configID 配置ID
* @param lastCollectTime 最后采集时间
* @return ResultDomain<TbDataCollectionConfig> 更新结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionConfig> updateLastCollectTime(String configID, Date lastCollectTime);
// ----------------采集日志相关--------------------------------
/**
* @description 获取采集日志列表
* @param configID 配置ID可选
* @param startDate 开始日期(可选)
* @param endDate 结束日期(可选)
* @return ResultDomain<TbDataCollectionLog> 日志列表
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionLog> getLogList(String configID, Date startDate, Date endDate);
/**
* @description 根据ID获取日志详情
* @param logID 日志ID
* @return ResultDomain<TbDataCollectionLog> 日志详情
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionLog> getLogById(String logID);
/**
* @description 创建采集日志
* @param log 日志信息
* @return ResultDomain<TbDataCollectionLog> 创建结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionLog> createLog(TbDataCollectionLog log);
/**
* @description 获取配置的采集统计
* @param configID 配置ID
* @return ResultDomain<TbDataCollectionLog> 采集统计
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionLog> getConfigStatistics(String configID);
// ----------------采集操作相关--------------------------------
/**
* @description 执行数据采集
* @param configID 配置ID
* @return ResultDomain<TbDataCollectionLog> 采集结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionLog> executeCollection(String configID);
/**
* @description 批量执行数据采集
* @param configIDs 配置ID列表
* @return ResultDomain<TbDataCollectionLog> 采集结果列表
* @author yslg
* @since 2025-10-15
*/
ResultDomain<TbDataCollectionLog> batchExecuteCollection(List<String> configIDs);
/**
* @description 停止采集任务
* @param configID 配置ID
* @return ResultDomain<Boolean> 停止结果
* @author yslg
* @since 2025-10-15
*/
ResultDomain<Boolean> stopCollection(String configID);
/**
* @description 获取采集任务状态
* @param configID 配置ID
* @return ResultDomain<String> 任务状态
* @author yslg
* @since 2025-10-15
*/
ResultDomain<String> getCollectionStatus(String configID);
}

View File

@@ -0,0 +1,239 @@
package org.xyzh.common.dto.crontab;
import org.xyzh.common.dto.BaseDTO;
import java.util.Date;
/**
* @description 数据采集项表(存储爬取的文章数据)
* @filename TbDataCollectionItem.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
public class TbDataCollectionItem extends BaseDTO {
private static final long serialVersionUID = 1L;
/**
* @description 关联的定时任务ID
*/
private String taskId;
/**
* @description 文章标题
*/
private String title;
/**
* @description 文章内容HTML格式
*/
private String content;
/**
* @description 文章摘要
*/
private String summary;
/**
* @description 来源(人民日报、新华社等)
*/
private String source;
/**
* @description 来源URL用于去重
*/
private String sourceUrl;
/**
* @description 分类(政治、社会、国际等)
*/
private String category;
/**
* @description 作者
*/
private String author;
/**
* @description 发布时间
*/
private Date publishTime;
/**
* @description 封面图片URL
*/
private String coverImage;
/**
* @description 图片列表JSON格式存储图片URL数组
*/
private String images;
/**
* @description 标签(多个用逗号分隔)
*/
private String tags;
/**
* @description 状态0:未处理 1:已转换为资源 2:已忽略)
*/
private Integer status;
/**
* @description 转换后的资源ID如果已转换为资源
*/
private String resourceId;
/**
* @description 爬取时间
*/
private Date crawlTime;
/**
* @description 处理时间
*/
private Date processTime;
/**
* @description 处理人
*/
private String processor;
public String getTaskId() {
return taskId;
}
public void setTaskId(String taskId) {
this.taskId = taskId;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getSourceUrl() {
return sourceUrl;
}
public void setSourceUrl(String sourceUrl) {
this.sourceUrl = sourceUrl;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public Date getPublishTime() {
return publishTime;
}
public void setPublishTime(Date publishTime) {
this.publishTime = publishTime;
}
public String getCoverImage() {
return coverImage;
}
public void setCoverImage(String coverImage) {
this.coverImage = coverImage;
}
public String getImages() {
return images;
}
public void setImages(String images) {
this.images = images;
}
public String getTags() {
return tags;
}
public void setTags(String tags) {
this.tags = tags;
}
public Integer getStatus() {
return status;
}
public void setStatus(Integer status) {
this.status = status;
}
public String getResourceId() {
return resourceId;
}
public void setResourceId(String resourceId) {
this.resourceId = resourceId;
}
public Date getCrawlTime() {
return crawlTime;
}
public void setCrawlTime(Date crawlTime) {
this.crawlTime = crawlTime;
}
public Date getProcessTime() {
return processTime;
}
public void setProcessTime(Date processTime) {
this.processTime = processTime;
}
public String getProcessor() {
return processor;
}
public void setProcessor(String processor) {
this.processor = processor;
}
}

View File

@@ -1,146 +0,0 @@
package org.xyzh.common.dto.resource;
import org.xyzh.common.dto.BaseDTO;
import java.util.Date;
/**
* @description 数据采集配置表
* @filename TbDataCollectionConfig.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
public class TbDataCollectionConfig extends BaseDTO {
private static final long serialVersionUID = 1L;
/**
* @description 配置名称
*/
private String name;
/**
* @description 采集源URL
*/
private String sourceUrl;
/**
* @description 采集源类型
*/
private String sourceType;
/**
* @description 采集频率daily每天 weekly每周
*/
private String frequency;
/**
* @description 默认标签ID文章分类标签tagType=1
*/
private String tagID;
/**
* @description 状态0禁用 1启用
*/
private Integer status;
/**
* @description 最后采集时间
*/
private Date lastCollectTime;
/**
* @description 创建者
*/
private String creator;
/**
* @description 更新者
*/
private String updater;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSourceUrl() {
return sourceUrl;
}
public void setSourceUrl(String sourceUrl) {
this.sourceUrl = sourceUrl;
}
public String getSourceType() {
return sourceType;
}
public void setSourceType(String sourceType) {
this.sourceType = sourceType;
}
public String getFrequency() {
return frequency;
}
public void setFrequency(String frequency) {
this.frequency = frequency;
}
public String getTagID() {
return tagID;
}
public void setTagID(String tagID) {
this.tagID = tagID;
}
public Integer getStatus() {
return status;
}
public void setStatus(Integer status) {
this.status = status;
}
public Date getLastCollectTime() {
return lastCollectTime;
}
public void setLastCollectTime(Date lastCollectTime) {
this.lastCollectTime = lastCollectTime;
}
public String getCreator() {
return creator;
}
public void setCreator(String creator) {
this.creator = creator;
}
public String getUpdater() {
return updater;
}
public void setUpdater(String updater) {
this.updater = updater;
}
@Override
public String toString() {
return "TbDataCollectionConfig{" +
"id=" + getID() +
", name='" + name + '\'' +
", sourceUrl='" + sourceUrl + '\'' +
", sourceType='" + sourceType + '\'' +
", frequency='" + frequency + '\'' +
", status=" + status +
", lastCollectTime=" + lastCollectTime +
'}';
}
}

View File

@@ -1,120 +0,0 @@
package org.xyzh.common.dto.resource;
import org.xyzh.common.dto.BaseDTO;
import java.util.Date;
/**
* @description 数据采集记录表
* @filename TbDataCollectionLog.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
public class TbDataCollectionLog extends BaseDTO {
private static final long serialVersionUID = 1L;
/**
* @description 配置ID
*/
private String configID;
/**
* @description 采集数量
*/
private Integer collectCount;
/**
* @description 成功数量
*/
private Integer successCount;
/**
* @description 失败数量
*/
private Integer failCount;
/**
* @description 状态0失败 1成功 2部分成功
*/
private Integer status;
/**
* @description 采集消息
*/
private String message;
/**
* @description 采集时间
*/
private Date collectTime;
public String getConfigID() {
return configID;
}
public void setConfigID(String configID) {
this.configID = configID;
}
public Integer getCollectCount() {
return collectCount;
}
public void setCollectCount(Integer collectCount) {
this.collectCount = collectCount;
}
public Integer getSuccessCount() {
return successCount;
}
public void setSuccessCount(Integer successCount) {
this.successCount = successCount;
}
public Integer getFailCount() {
return failCount;
}
public void setFailCount(Integer failCount) {
this.failCount = failCount;
}
public Integer getStatus() {
return status;
}
public void setStatus(Integer status) {
this.status = status;
}
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public Date getCollectTime() {
return collectTime;
}
public void setCollectTime(Date collectTime) {
this.collectTime = collectTime;
}
@Override
public String toString() {
return "TbDataCollectionLog{" +
"id=" + getID() +
", configID='" + configID + '\'' +
", collectCount=" + collectCount +
", successCount=" + successCount +
", failCount=" + failCount +
", status=" + status +
", collectTime=" + collectTime +
'}';
}
}

View File

@@ -0,0 +1,84 @@
package org.xyzh.common.vo;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.dto.crontab.TbCrontabTask;
import java.io.Serializable;
/**
* @description 数据采集项VO
* @filename DataCollectionItemVO.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
public class DataCollectionItemVO implements Serializable {
private static final long serialVersionUID = 1L;
/**
* @description 采集项数据
*/
private TbDataCollectionItem item;
/**
* @description 关联的定时任务信息
*/
private TbCrontabTask task;
/**
* @description 状态文本(用于前端显示)
*/
private String statusText;
/**
* @description 是否可以编辑(未处理和已忽略的可以编辑)
*/
private Boolean canEdit;
/**
* @description 是否可以转换为资源(未处理的可以转换)
*/
private Boolean canConvert;
public TbDataCollectionItem getItem() {
return item;
}
public void setItem(TbDataCollectionItem item) {
this.item = item;
}
public TbCrontabTask getTask() {
return task;
}
public void setTask(TbCrontabTask task) {
this.task = task;
}
public String getStatusText() {
return statusText;
}
public void setStatusText(String statusText) {
this.statusText = statusText;
}
public Boolean getCanEdit() {
return canEdit;
}
public void setCanEdit(Boolean canEdit) {
this.canEdit = canEdit;
}
public Boolean getCanConvert() {
return canConvert;
}
public void setCanConvert(Boolean canConvert) {
this.canConvert = canConvert;
}
}

View File

@@ -0,0 +1,652 @@
# Java调用Python并获取返回结果详解
## 一、核心原理
Java通过 `ProcessBuilder``Runtime.exec()` 创建操作系统进程来执行Python脚本然后通过进程的标准输入/输出流进行通信。
## 二、当前实现详解
### 1. 构建命令
```java
// 步骤1: 构建命令列表
List<String> command = new ArrayList<>();
// 步骤2: 处理Windows/Linux系统差异
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
// Windows系统需要通过cmd执行
command.add("cmd"); // 命令解释器
command.add("/c"); // /c表示执行后关闭
command.add(pythonPath); // python或python3
} else {
// Linux/Mac系统直接执行
command.add(pythonPath);
}
// 步骤3: 添加Python脚本和参数
command.add("main.py"); // Python脚本
command.add(category); // 参数1: 分类
command.add(limit); // 参数2: 数量
command.add(outputFile); // 参数3: 输出文件
```
**命令示例:**
- Windows: `cmd /c python main.py politics 20 output/news.json`
- Linux: `python3 main.py politics 20 output/news.json`
### 2. 创建进程
```java
// 创建进程构建器
ProcessBuilder processBuilder = new ProcessBuilder(command);
// 设置工作目录Python脚本所在目录
processBuilder.directory(scriptDir.toFile());
// 合并标准输出和错误输出(便于统一读取)
processBuilder.redirectErrorStream(true);
// 启动进程
Process process = processBuilder.start();
```
**关键点:**
- `directory()`: 设置工作目录确保Python脚本能找到相对路径的资源
- `redirectErrorStream(true)`: 将stderr合并到stdout方便统一读取
- `start()`: 异步启动进程,不会阻塞
### 3. 读取输出流
```java
// 读取标准输出Python的print输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
logger.debug("Python输出: {}", line);
}
}
```
**重要说明:**
- `process.getInputStream()`: 获取Python进程的标准输出
- 必须读取输出流,否则缓冲区满会导致进程阻塞
- 使用UTF-8编码避免中文乱码
### 4. 等待进程结束
```java
// 方式1: 带超时的等待(推荐)
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
// 超时后强制终止进程
process.destroy(); // 或 process.destroyForcibly() 强制终止
throw new RuntimeException("任务超时");
}
// 方式2: 无限等待(不推荐,可能导致死锁)
int exitCode = process.waitFor();
```
**退出码说明:**
- `0`: 执行成功
- `非0`: 执行失败(通常是错误码)
### 5. 获取返回结果
当前实现通过**文件传递**方式获取结果:
```java
// Python脚本将结果写入JSON文件
Path outputPath = scriptDir.resolve(outputFile);
// Java读取文件内容
String jsonContent = Files.readString(outputPath);
// 解析JSON
ObjectMapper mapper = new ObjectMapper();
List<Map<String, Object>> newsList = mapper.readValue(
jsonContent,
List.class
);
```
## 三、三种数据传递方式对比
### 方式1: 文件传递(当前实现)
**优点:**
- ✅ 适合大数据量
- ✅ 数据持久化,便于调试
- ✅ 实现简单
**缺点:**
- ⚠️ 需要文件I/O操作
- ⚠️ 需要管理临时文件
- ⚠️ 可能有并发问题(文件名冲突)
**实现示例:**
```java
// Java端
String outputFile = "output/result_" + System.currentTimeMillis() + ".json";
command.add(outputFile);
// Python端
import json
import sys
result = {"status": "success", "data": [...]}
with open(sys.argv[1], 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False)
```
### 方式2: 标准输出传递(适合小数据)
**优点:**
- ✅ 实时传输,无需文件
- ✅ 适合小数据量(< 1MB
- 无文件管理开销
**缺点:**
- 大数据量可能阻塞
- 不能传递二进制数据
- 需要与日志输出区分
**实现示例:**
```java
// Java端读取标准输出
StringBuilder result = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
String line;
while ((line = reader.readLine()) != null) {
// 约定:以特定标记区分结果和日志
if (line.startsWith("RESULT:")) {
result.append(line.substring(7)); // 去掉"RESULT:"前缀
} else {
logger.info("Python日志: {}", line);
}
}
}
// 解析JSON结果
String jsonResult = result.toString();
ObjectMapper mapper = new ObjectMapper();
Map<String, Object> data = mapper.readValue(jsonResult, Map.class);
```
```python
# Python端输出结果
import json
import sys
# 日志输出到stderr
print("开始爬取...", file=sys.stderr)
# 结果输出到stdout带标记
result = {"status": "success", "data": [...]}
print("RESULT:" + json.dumps(result, ensure_ascii=False))
```
### 方式3: 标准输入传递参数(双向通信)
**优点:**
- 可以传递复杂参数
- 支持交互式通信
**缺点:**
- 实现复杂
- 需要处理流关闭时机
**实现示例:**
```java
// Java端通过标准输入传递参数
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 写入参数到标准输入
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(process.getOutputStream(), "UTF-8"))) {
String params = "{\"category\":\"politics\",\"limit\":20}";
writer.write(params);
writer.newLine();
writer.flush();
}
// 关闭输入流告诉Python输入结束
process.getOutputStream().close();
// 读取输出
// ... 同方式2
```
```python
# Python端从标准输入读取参数
import json
import sys
# 读取参数
params_json = sys.stdin.readline().strip()
params = json.loads(params_json)
category = params.get("category", "politics")
limit = params.get("limit", 20)
# 执行爬取
result = crawl_news(category, limit)
# 输出结果
print(json.dumps(result, ensure_ascii=False))
```
## 四、完整优化实现
### 改进版实现(支持多种方式)
```java
package org.xyzh.crontab.task.newsTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
/**
* Java调用Python的完整实现
*/
@Component("newsCrewerTask")
public class NewsCrewerTask {
private static final Logger logger = LoggerFactory.getLogger(NewsCrewerTask.class);
private final ObjectMapper objectMapper = new ObjectMapper();
@Value("${crewer.python.path:python}")
private String pythonPath;
@Value("${crewer.script.path:../schoolNewsCrewer}")
private String scriptPath;
@Value("${crewer.timeout:300}")
private int timeout;
/**
* 方式1: 通过文件传递结果(当前实现,适合大数据)
*/
public List<Map<String, Object>> executeByFile(String category, int limit) {
logger.info("执行爬虫任务 - 文件方式");
try {
// 1. 构建命令
List<String> command = buildCommand("main.py", category, String.valueOf(limit));
// 2. 生成输出文件
String timestamp = String.valueOf(System.currentTimeMillis());
String outputFile = String.format("output/news_%s_%s.json", category, timestamp);
command.add(outputFile);
// 3. 执行进程
ProcessResult result = executeProcess(command);
if (result.getExitCode() != 0) {
throw new RuntimeException("Python执行失败: " + result.getOutput());
}
// 4. 读取结果文件
Path outputPath = Paths.get(scriptPath).resolve(outputFile);
if (!Files.exists(outputPath)) {
throw new RuntimeException("输出文件不存在: " + outputFile);
}
String jsonContent = Files.readString(outputPath, StandardCharsets.UTF_8);
List<Map<String, Object>> newsList = objectMapper.readValue(
jsonContent,
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
);
// 5. 清理临时文件(可选)
// Files.deleteIfExists(outputPath);
return newsList;
} catch (Exception e) {
logger.error("执行失败", e);
throw new RuntimeException("爬虫任务执行失败", e);
}
}
/**
* 方式2: 通过标准输出传递结果(适合小数据)
*/
public List<Map<String, Object>> executeByStdout(String category, int limit) {
logger.info("执行爬虫任务 - 标准输出方式");
try {
// 1. 构建命令使用特殊脚本输出JSON到stdout
List<String> command = buildCommand("main_stdout.py", category, String.valueOf(limit));
// 2. 执行进程
ProcessResult result = executeProcess(command);
if (result.getExitCode() != 0) {
throw new RuntimeException("Python执行失败: " + result.getOutput());
}
// 3. 从输出中提取JSON约定最后一行是JSON结果
String output = result.getOutput();
String[] lines = output.split("\n");
// 查找JSON行以{或[开头)
String jsonLine = null;
for (int i = lines.length - 1; i >= 0; i--) {
String line = lines[i].trim();
if (line.startsWith("{") || line.startsWith("[")) {
jsonLine = line;
break;
}
}
if (jsonLine == null) {
throw new RuntimeException("未找到JSON结果");
}
// 4. 解析JSON
List<Map<String, Object>> newsList = objectMapper.readValue(
jsonLine,
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
);
return newsList;
} catch (Exception e) {
logger.error("执行失败", e);
throw new RuntimeException("爬虫任务执行失败", e);
}
}
/**
* 方式3: 通过标准输入传递参数(双向通信)
*/
public List<Map<String, Object>> executeByStdin(String category, int limit) {
logger.info("执行爬虫任务 - 标准输入方式");
Process process = null;
try {
// 1. 构建命令
List<String> command = buildCommand("main_stdin.py");
ProcessBuilder pb = new ProcessBuilder(command);
pb.directory(Paths.get(scriptPath).toFile());
pb.redirectErrorStream(true);
// 2. 启动进程
process = pb.start();
// 3. 写入参数到标准输入
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) {
Map<String, Object> params = Map.of(
"category", category,
"limit", limit
);
String paramsJson = objectMapper.writeValueAsString(params);
writer.write(paramsJson);
writer.newLine();
writer.flush();
}
// 4. 关闭输入流(重要!)
process.getOutputStream().close();
// 5. 读取输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
}
}
// 6. 等待进程结束
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly();
throw new RuntimeException("任务超时");
}
int exitCode = process.exitValue();
if (exitCode != 0) {
throw new RuntimeException("Python执行失败退出码: " + exitCode);
}
// 7. 解析结果
String jsonResult = output.toString().trim();
List<Map<String, Object>> newsList = objectMapper.readValue(
jsonResult,
objectMapper.getTypeFactory().constructCollectionType(List.class, Map.class)
);
return newsList;
} catch (Exception e) {
logger.error("执行失败", e);
throw new RuntimeException("爬虫任务执行失败", e);
} finally {
if (process != null && process.isAlive()) {
process.destroyForcibly();
}
}
}
/**
* 通用进程执行方法
*/
private ProcessResult executeProcess(List<String> command) throws IOException, InterruptedException {
long startTime = System.currentTimeMillis();
// 创建进程构建器
ProcessBuilder pb = new ProcessBuilder(command);
pb.directory(Paths.get(scriptPath).toFile());
pb.redirectErrorStream(true);
logger.info("执行命令: {}", String.join(" ", command));
// 启动进程
Process process = pb.start();
// 读取输出(必须在单独线程中,避免阻塞)
StringBuilder output = new StringBuilder();
StringBuilder error = new StringBuilder();
// 使用CompletableFuture异步读取避免死锁
CompletableFuture<String> outputFuture = CompletableFuture.supplyAsync(() -> {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
logger.debug("Python输出: {}", line);
}
return output.toString();
} catch (IOException e) {
logger.error("读取输出失败", e);
return "";
}
});
// 等待进程结束(带超时)
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly();
throw new RuntimeException("任务超时(超过" + timeout + "秒)");
}
// 获取输出
String outputStr = outputFuture.get(5, TimeUnit.SECONDS);
int exitCode = process.exitValue();
long duration = System.currentTimeMillis() - startTime;
logger.info("进程执行完成 - 退出码: {}, 耗时: {}ms", exitCode, duration);
return new ProcessResult(exitCode, outputStr, duration);
}
/**
* 构建命令列表
*/
private List<String> buildCommand(String... args) {
List<String> command = new ArrayList<>();
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
command.add("cmd");
command.add("/c");
command.add(pythonPath);
} else {
command.add(pythonPath);
}
for (String arg : args) {
command.add(arg);
}
return command;
}
/**
* 进程执行结果
*/
private static class ProcessResult {
private final int exitCode;
private final String output;
private final long duration;
public ProcessResult(int exitCode, String output, long duration) {
this.exitCode = exitCode;
this.output = output;
this.duration = duration;
}
public int getExitCode() {
return exitCode;
}
public String getOutput() {
return output;
}
public long getDuration() {
return duration;
}
}
}
```
## 五、关键注意事项
### 1. 必须读取输出流
**错误示例:**
```java
Process process = pb.start();
int exitCode = process.waitFor(); // 可能永远阻塞!
```
**原因:** 如果输出缓冲区满了Python进程会阻塞等待读取
**正确做法:**
```java
Process process = pb.start();
// 必须读取输出流
Thread outputThread = new Thread(() -> {
try (BufferedReader reader = ...) {
// 读取输出
}
});
outputThread.start();
process.waitFor();
```
### 2. 处理编码问题
```java
// 指定UTF-8编码避免中文乱码
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8)
```
### 3. 超时控制
```java
// 使用带超时的waitFor
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly(); // 强制终止
}
```
### 4. 资源清理
```java
try {
// 执行逻辑
} finally {
if (process != null && process.isAlive()) {
process.destroyForcibly();
}
// 关闭流
process.getInputStream().close();
process.getOutputStream().close();
process.getErrorStream().close();
}
```
### 5. 错误处理
```java
// 检查退出码
if (exitCode != 0) {
// 读取错误输出
String error = readErrorStream(process);
throw new RuntimeException("执行失败: " + error);
}
```
## 六、性能优化建议
1. **使用线程池**如果频繁调用使用线程池管理进程
2. **连接复用**考虑Python服务模式HTTP/GRPC
3. **异步执行**使用CompletableFuture异步执行
4. **缓存结果**对相同参数的请求缓存结果
## 七、总结
- **文件传递**适合大数据量当前实现方式
- **标准输出**适合小数据量实时传输
- **标准输入**适合复杂参数双向通信
根据实际需求选择合适的方式当前的文件传递方式已经足够好

View File

@@ -25,6 +25,11 @@
<artifactId>api-crontab</artifactId> <artifactId>api-crontab</artifactId>
<version>${school-news.version}</version> <version>${school-news.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.xyzh</groupId>
<artifactId>api-news</artifactId>
<version>${school-news.version}</version>
</dependency>
<!-- Common模块依赖 --> <!-- Common模块依赖 -->
<dependency> <dependency>
@@ -38,6 +43,10 @@
<artifactId>system</artifactId> <artifactId>system</artifactId>
<version>${school-news.version}</version> <version>${school-news.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<!-- Spring Boot Web --> <!-- Spring Boot Web -->
<dependency> <dependency>

View File

@@ -0,0 +1,21 @@
package org.xyzh.crontab.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.xyzh.crontab.task.newsTask.ScriptDomain;
import lombok.Data;
import org.springframework.beans.factory.annotation.Value;
import java.util.List;
@Data
@ConfigurationProperties(prefix = "crawler")
public class CrawlerProperties {
@Value("${crawler.base.path}")
private String basePath;
@Value("${crawler.script}")
private List<ScriptDomain> scripts;
}

View File

@@ -0,0 +1,8 @@
package org.xyzh.crontab.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties(prefix = "crontab")
public class CrontabPrpperties {
}

View File

@@ -10,6 +10,13 @@ import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.core.page.PageRequest; import org.xyzh.common.core.page.PageRequest;
import org.xyzh.common.dto.crontab.TbCrontabTask; import org.xyzh.common.dto.crontab.TbCrontabTask;
import org.xyzh.common.dto.crontab.TbCrontabLog; import org.xyzh.common.dto.crontab.TbCrontabLog;
import org.xyzh.common.utils.IDUtils;
import org.xyzh.crontab.pojo.CrontabItem;
import java.util.Date;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
/** /**
* @description 定时任务控制器 * @description 定时任务控制器
@@ -27,204 +34,64 @@ public class CrontabController {
@Autowired @Autowired
private CrontabService crontabService; private CrontabService crontabService;
// ----------------定时任务管理--------------------------------
/** /**
* @description 创建定时任务 * 获取可创建定时任务
* @param task 任务对象 * @return
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
*/ */
@PostMapping("/task") @GetMapping("/getEnabledCrontabList")
public ResultDomain<TbCrontabTask> createTask(@RequestBody TbCrontabTask task) { public ResultDomain<CrontabItem> getEnabledCrontabList(@RequestParam String param) {
return crontabService.createTask(task); return null;
} }
/** /**
* @description 更新定时任务 * 创建定时任务
* @param task 任务对象 * @param crontabItem
* @return ResultDomain<TbCrontabTask> * @return
* @author yslg
* @since 2025-10-25
*/ */
@PutMapping("/task") @PostMapping("/crontabTask")
public ResultDomain<TbCrontabTask> updateTask(@RequestBody TbCrontabTask task) { public ResultDomain<TbCrontabTask> createCrontab(@RequestBody TbCrontabTask crontabItem) {
return crontabService.updateTask(task); return null;
} }
/** /**
* @description 删除定时任务 * 更新定时任务
* @param task 任务对象 * @param crontabItem
* @return ResultDomain<TbCrontabTask> * @return
* @author yslg
* @since 2025-10-25
*/ */
@DeleteMapping("/task") @PutMapping("/crontabTask")
public ResultDomain<TbCrontabTask> deleteTask(@RequestBody TbCrontabTask task) { public ResultDomain<TbCrontabTask> updateCrontab(@RequestBody TbCrontabTask crontabItem) {
return crontabService.deleteTask(task.getID()); return null;
} }
/** /**
* @description 根据ID查询任务 * 删除定时任务
* @param taskId 任务ID * @param crontabItem
* @return ResultDomain<TbCrontabTask> * @return
* @author yslg
* @since 2025-10-25
*/ */
@GetMapping("/task/{taskId}") @DeleteMapping("/crontabTask")
public ResultDomain<TbCrontabTask> getTaskById(@PathVariable(value = "taskId") String taskId) { public ResultDomain<TbCrontabTask> deleteCrontab(@RequestBody TbCrontabTask crontabItem) {
return crontabService.getTaskById(taskId); return null;
} }
/** /**
* @description 查询任务列表 * 获取定时任务分页列表
* @param filter 过滤条件 * @param pageParam
* @return ResultDomain<TbCrontabTask> * @return
* @author yslg
* @since 2025-10-25
*/ */
@PostMapping("/task/list") @PostMapping("/crontabTaskPage")
public ResultDomain<TbCrontabTask> getTaskList(@RequestBody TbCrontabTask filter) { public ResultDomain<TbCrontabTask> getCrontabTask(@RequestBody PageRequest<TbCrontabTask> pageRequest) {
return crontabService.getTaskList(filter); return null;
} }
/** /**
* @description 分页查询任务列表 * 获取定时任务日志分页列表
* @param pageRequest 分页请求对象 * @param pageRequest
* @return ResultDomain<TbCrontabTask> * @return
* @author yslg
* @since 2025-10-25
*/ */
@PostMapping("/task/page") @PostMapping("/crontabTaskLogPage")
public ResultDomain<TbCrontabTask> getTaskPage(@RequestBody PageRequest<TbCrontabTask> pageRequest) { public ResultDomain<TbCrontabLog> getCrontabTaskLog(@RequestBody PageRequest<TbCrontabLog> pageRequest) {
TbCrontabTask filter = pageRequest.getFilter(); return null;
PageParam pageParam = pageRequest.getPageParam();
return crontabService.getTaskPage(filter, pageParam);
} }
/**
* @description 启动定时任务
* @param taskId 任务ID
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/task/start/{taskId}")
public ResultDomain<TbCrontabTask> startTask(@PathVariable(value = "taskId") String taskId) {
return crontabService.startTask(taskId);
}
/**
* @description 暂停定时任务
* @param taskId 任务ID
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/task/pause/{taskId}")
public ResultDomain<TbCrontabTask> pauseTask(@PathVariable(value = "taskId") String taskId) {
return crontabService.pauseTask(taskId);
} }
/**
* @description 立即执行一次任务
* @param taskId 任务ID
* @return ResultDomain<TbCrontabTask>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/task/execute/{taskId}")
public ResultDomain<TbCrontabTask> executeTaskOnce(@PathVariable(value = "taskId") String taskId) {
return crontabService.executeTaskOnce(taskId);
}
/**
* @description 验证Cron表达式
* @param cronExpression Cron表达式
* @return ResultDomain<String>
* @author yslg
* @since 2025-10-25
*/
@GetMapping("/task/validate")
public ResultDomain<String> validateCronExpression(@RequestParam String cronExpression) {
return crontabService.validateCronExpression(cronExpression);
}
// ----------------定时任务日志--------------------------------
/**
* @description 根据任务ID查询日志
* @param taskId 任务ID
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@GetMapping("/log/task/{taskId}")
public ResultDomain<TbCrontabLog> getLogsByTaskId(@PathVariable(value = "taskId") String taskId) {
return crontabService.getLogsByTaskId(taskId);
}
/**
* @description 查询日志列表
* @param filter 过滤条件
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/log/list")
public ResultDomain<TbCrontabLog> getLogList(@RequestBody TbCrontabLog filter) {
return crontabService.getLogList(filter);
}
/**
* @description 分页查询日志列表
* @param pageRequest 分页请求对象
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@PostMapping("/log/page")
public ResultDomain<TbCrontabLog> getLogPage(@RequestBody PageRequest<TbCrontabLog> pageRequest) {
TbCrontabLog filter = pageRequest.getFilter();
PageParam pageParam = pageRequest.getPageParam();
return crontabService.getLogPage(filter, pageParam);
}
/**
* @description 根据ID查询日志详情
* @param logId 日志ID
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@GetMapping("/log/{logId}")
public ResultDomain<TbCrontabLog> getLogById(@PathVariable(value = "logId") String logId) {
return crontabService.getLogById(logId);
}
/**
* @description 清理指定天数之前的日志
* @param days 天数
* @return ResultDomain<Integer>
* @author yslg
* @since 2025-10-25
*/
@DeleteMapping("/log/clean/{days}")
public ResultDomain<Integer> cleanLogs(@PathVariable(value = "days") Integer days) {
return crontabService.cleanLogs(days);
}
/**
* @description 删除日志
* @param log 日志对象
* @return ResultDomain<TbCrontabLog>
* @author yslg
* @since 2025-10-25
*/
@DeleteMapping("/log")
public ResultDomain<TbCrontabLog> deleteLog(@RequestBody TbCrontabLog log) {
return crontabService.deleteLog(log.getID());
}
}

View File

@@ -0,0 +1,62 @@
package org.xyzh.crontab.controller;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.core.page.PageRequest;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.vo.DataCollectionItemVO;
import java.util.List;
/**
* @description 数据采集项控制器
* @filename DataCollectionItemController.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@RestController
@RequestMapping("/crontab/collection/item")
public class DataCollectionItemController {
private static final Logger logger = LoggerFactory.getLogger(DataCollectionItemController.class);
@Autowired
private DataCollectionItemService itemService;
/**
* @description 查看一个任务日志对应创建的所有数据采集项
* @param taskLogId
* @return
*/
@GetMapping("/task/{taskLogId}")
public ResultDomain<DataCollectionItemVO> getTaskLogDataCollectionItemList(@PathVariable String taskLogId) {
return null;
}
/**
* @description 获取数据采集项分页列表
* @param pageRequest
* @return
*/
@PostMapping("/page")
public ResultDomain<DataCollectionItemVO> getCollectionItemPage(@RequestBody PageRequest<DataCollectionItemVO> pageRequest) {
return null;
}
/**
* @description 转换成文章
* @param dataCollectionItem
* @return
*/
@PostMapping("/resource")
public ResultDomain<DataCollectionItemVO> convertToArticle(@RequestBody DataCollectionItemVO dataCollectionItem) {
return null;
}
}

View File

@@ -2,14 +2,17 @@ package org.xyzh.crontab.enums;
import java.util.Arrays; import java.util.Arrays;
import org.xyzh.crontab.task.DataBackupTask; import org.xyzh.crontab.task.newsTask.NewsCrawlerTask;
import org.xyzh.crontab.task.LogCleanTask;
import org.xyzh.crontab.task.SystemStatisticsTask; // import org.xyzh.crontab.task.DataBackupTask;
// import org.xyzh.crontab.task.LogCleanTask;
// import org.xyzh.crontab.task.SystemStatisticsTask;
public enum TaskEnums { public enum TaskEnums {
DATA_BACKUP("dataBackup", DataBackupTask.class), // DATA_BACKUP("dataBackup", DataBackupTask.class),
LOG_CLEAN("logClean", LogCleanTask.class), // LOG_CLEAN("logClean", LogCleanTask.class),
SystemStatistics("systemStatistics", SystemStatisticsTask.class); // SystemStatistics("systemStatistics", SystemStatisticsTask.class);
NEWS_CRAWLER("newsCrawler", NewsCrawlerTask.class);
private String name; private String name;

View File

@@ -0,0 +1,86 @@
package org.xyzh.crontab.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import java.util.List;
/**
* @description 数据采集项数据访问层
* @filename DataCollectionItemMapper.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@Mapper
public interface DataCollectionItemMapper extends BaseMapper<TbDataCollectionItem> {
/**
* @description 根据来源URL查询采集项用于去重
* @param sourceUrl 来源URL
* @return TbDataCollectionItem 采集项
* @author yslg
* @since 2025-11-08
*/
TbDataCollectionItem selectBySourceUrl(@Param("sourceUrl") String sourceUrl);
/**
* @description 根据任务ID查询采集项列表
* @param taskId 任务ID
* @return List<TbDataCollectionItem> 采集项列表
* @author yslg
* @since 2025-11-08
*/
List<TbDataCollectionItem> selectByTaskId(@Param("taskId") String taskId);
/**
* @description 查询采集项列表
* @param filter 过滤条件
* @return List<TbDataCollectionItem> 采集项列表
* @author yslg
* @since 2025-11-08
*/
List<TbDataCollectionItem> selectItemList(TbDataCollectionItem filter);
/**
* @description 分页查询采集项列表
* @param filter 过滤条件
* @param pageParam 分页参数
* @return List<TbDataCollectionItem> 采集项列表
* @author yslg
* @since 2025-11-08
*/
List<TbDataCollectionItem> selectItemPage(@Param("filter") TbDataCollectionItem filter, @Param("pageParam") PageParam pageParam);
/**
* @description 统计采集项总数
* @param filter 过滤条件
* @return long 总数
* @author yslg
* @since 2025-11-08
*/
long countItems(@Param("filter") TbDataCollectionItem filter);
/**
* @description 批量插入采集项
* @param itemList 采集项列表
* @return int 影响行数
* @author yslg
* @since 2025-11-08
*/
int batchInsertItems(@Param("itemList") List<TbDataCollectionItem> itemList);
/**
* @description 根据状态统计数量
* @param taskId 任务ID可选
* @param status 状态
* @return long 数量
* @author yslg
* @since 2025-11-08
*/
long countByStatus(@Param("taskId") String taskId, @Param("status") Integer status);
}

View File

@@ -0,0 +1,22 @@
package org.xyzh.crontab.pojo;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
import java.util.Map;
@Data
@NoArgsConstructor
public class CrontabItem {
private String name;
private List<CrontabMethod> methods;
@Data
public class CrontabMethod {
private String name;
private String clazz;
private String path;
private Map<String, Object> params;
}
}

View File

@@ -65,7 +65,12 @@ public class TaskExecutor {
if (task.getMethodParams() != null && !task.getMethodParams().isEmpty()) { if (task.getMethodParams() != null && !task.getMethodParams().isEmpty()) {
// 如果有参数,需要解析参数类型 // 如果有参数,需要解析参数类型
method = bean.getClass().getMethod(task.getMethodName(), String.class); method = bean.getClass().getMethod(task.getMethodName(), String.class);
method.invoke(bean, task.getMethodParams()); // 如果是newsCrewerTask将taskId添加到参数前面
String methodParams = task.getMethodParams();
if ("newsCrewerTask".equals(task.getBeanName()) && task.getTaskId() != null) {
methodParams = task.getTaskId() + "|" + methodParams;
}
method.invoke(bean, methodParams);
} else { } else {
// 无参方法 // 无参方法
method = bean.getClass().getMethod(task.getMethodName()); method = bean.getClass().getMethod(task.getMethodName());

View File

@@ -0,0 +1,15 @@
package org.xyzh.crontab.service;
import org.xyzh.api.crontab.DataCollectionItemService;
/**
* @description 数据采集项服务接口继承API接口
* @filename DataCollectionItemService.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
public interface NCDataCollectionItemService extends DataCollectionItemService {
}

View File

@@ -0,0 +1,479 @@
package org.xyzh.crontab.service.impl;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.api.news.resource.ResourceService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.core.page.PageDomain;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.dto.resource.TbResource;
import org.xyzh.common.utils.IDUtils;
import org.xyzh.common.vo.DataCollectionItemVO;
import org.xyzh.common.vo.ResourceVO;
import org.xyzh.crontab.mapper.DataCollectionItemMapper;
import org.xyzh.crontab.mapper.CrontabTaskMapper;
import org.xyzh.common.dto.crontab.TbCrontabTask;
import org.xyzh.system.utils.LoginUtil;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
/**
* @description 数据采集项服务实现类
* @filename DataCollectionItemServiceImpl.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@Service
public class DataCollectionItemServiceImpl implements DataCollectionItemService {
private static final Logger logger = LoggerFactory.getLogger(DataCollectionItemServiceImpl.class);
@Autowired
private DataCollectionItemMapper itemMapper;
@Autowired
private CrontabTaskMapper taskMapper;
@Autowired
private ResourceService resourceService;
private final ObjectMapper objectMapper = new ObjectMapper();
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> createItem(TbDataCollectionItem item) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
// 检查URL是否已存在去重
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
if (existing != null) {
resultDomain.fail("该文章已存在URL: " + item.getSourceUrl());
return resultDomain;
}
}
// 生成ID
item.setID(IDUtils.generateID());
item.setCreateTime(new Date());
item.setDeleted(false);
// 默认值
if (item.getStatus() == null) {
item.setStatus(0); // 默认未处理
}
if (item.getCrawlTime() == null) {
item.setCrawlTime(new Date());
}
int result = itemMapper.insert(item);
if (result > 0) {
logger.info("创建采集项成功: {}", item.getTitle());
resultDomain.success("创建采集项成功", item);
} else {
resultDomain.fail("创建采集项失败");
}
} catch (Exception e) {
logger.error("创建采集项异常: ", e);
resultDomain.fail("创建采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<Integer> batchCreateItems(List<TbDataCollectionItem> itemList) {
ResultDomain<Integer> resultDomain = new ResultDomain<>();
try {
if (itemList == null || itemList.isEmpty()) {
resultDomain.fail("采集项列表为空");
return resultDomain;
}
int successCount = 0;
Date now = new Date();
for (TbDataCollectionItem item : itemList) {
// 检查URL是否已存在去重
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
if (existing != null) {
logger.debug("跳过已存在的采集项: {}", item.getSourceUrl());
continue;
}
}
// 设置默认值
item.setID(IDUtils.generateID());
item.setCreateTime(now);
item.setDeleted(false);
if (item.getStatus() == null) {
item.setStatus(0);
}
if (item.getCrawlTime() == null) {
item.setCrawlTime(now);
}
itemMapper.insert(item);
successCount++;
}
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
resultDomain.success("批量创建采集项成功", successCount);
} catch (Exception e) {
logger.error("批量创建采集项异常: ", e);
resultDomain.fail("批量创建采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> updateItem(TbDataCollectionItem item) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
if (item.getID() == null) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
item.setUpdateTime(new Date());
int result = itemMapper.updateById(item);
if (result > 0) {
logger.info("更新采集项成功: {}", item.getID());
resultDomain.success("更新采集项成功", item);
} else {
resultDomain.fail("更新采集项失败");
}
} catch (Exception e) {
logger.error("更新采集项异常: ", e);
resultDomain.fail("更新采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> deleteItem(String itemId) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
int result = itemMapper.deleteById(itemId);
if (result > 0) {
logger.info("删除采集项成功ID: {}", itemId);
resultDomain.success("删除采集项成功", (TbDataCollectionItem) null);
} else {
resultDomain.fail("删除采集项失败");
}
} catch (Exception e) {
logger.error("删除采集项异常: ", e);
resultDomain.fail("删除采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemById(String itemId) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
TbDataCollectionItem item = itemMapper.selectById(itemId);
if (item != null) {
DataCollectionItemVO vo = buildVO(item);
resultDomain.success("查询成功", vo);
} else {
resultDomain.fail("采集项不存在");
}
} catch (Exception e) {
logger.error("查询采集项异常: ", e);
resultDomain.fail("查询采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemList(TbDataCollectionItem filter) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (filter == null) {
filter = new TbDataCollectionItem();
}
filter.setDeleted(false);
List<TbDataCollectionItem> list = itemMapper.selectItemList(filter);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
resultDomain.success("查询成功", voList);
} catch (Exception e) {
logger.error("查询采集项列表异常: ", e);
resultDomain.fail("查询采集项列表异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemPage(TbDataCollectionItem filter, PageParam pageParam) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (filter == null) {
filter = new TbDataCollectionItem();
}
filter.setDeleted(false);
if (pageParam == null) {
pageParam = new PageParam();
}
List<TbDataCollectionItem> list = itemMapper.selectItemPage(filter, pageParam);
long total = itemMapper.countItems(filter);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
PageDomain<DataCollectionItemVO> pageDomain = new PageDomain<>();
pageDomain.setDataList(voList);
pageParam.setTotalElements(total);
pageParam.setTotalPages((int) Math.ceil((double) total / pageParam.getPageSize()));
pageDomain.setPageParam(pageParam);
resultDomain.success("查询成功", pageDomain);
} catch (Exception e) {
logger.error("分页查询采集项异常: ", e);
resultDomain.fail("分页查询采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<DataCollectionItemVO> getItemsByTaskId(String taskId) {
ResultDomain<DataCollectionItemVO> resultDomain = new ResultDomain<>();
try {
if (taskId == null || taskId.isEmpty()) {
resultDomain.fail("任务ID不能为空");
return resultDomain;
}
List<TbDataCollectionItem> list = itemMapper.selectByTaskId(taskId);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
resultDomain.success("查询成功", voList);
} catch (Exception e) {
logger.error("根据任务ID查询采集项异常: ", e);
resultDomain.fail("根据任务ID查询采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<String> convertToResource(String itemId, String tagId) {
ResultDomain<String> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
if (tagId == null || tagId.isEmpty()) {
resultDomain.fail("标签ID不能为空");
return resultDomain;
}
// 查询采集项
TbDataCollectionItem item = itemMapper.selectById(itemId);
if (item == null) {
resultDomain.fail("采集项不存在");
return resultDomain;
}
if (item.getStatus() == 1) {
resultDomain.fail("该采集项已转换为资源");
return resultDomain;
}
// 创建资源
TbResource resource = new TbResource();
resource.setResourceID(IDUtils.generateID());
resource.setTitle(item.getTitle());
resource.setContent(item.getContent());
resource.setSummary(item.getSummary());
resource.setCoverImage(item.getCoverImage());
resource.setTagID(tagId);
resource.setAuthor(item.getAuthor());
resource.setSource(item.getSource());
resource.setSourceUrl(item.getSourceUrl());
resource.setPublishTime(item.getPublishTime() != null ? item.getPublishTime() : new Date());
resource.setStatus(1); // 已发布
resource.setViewCount(0);
resource.setLikeCount(0);
resource.setCollectCount(0);
resource.setIsRecommend(false);
resource.setIsBanner(false);
resource.setCreateTime(new Date());
resource.setDeleted(false);
ResourceVO resourceVO = new ResourceVO();
resourceVO.setResource(resource);
ResultDomain<ResourceVO> createResult = resourceService.createResource(resourceVO);
if (!createResult.isSuccess()) {
resultDomain.fail("转换为资源失败: " + createResult.getMessage());
return resultDomain;
}
// 更新采集项状态
item.setStatus(1); // 已转换为资源
item.setResourceId(resource.getResourceID());
item.setProcessTime(new Date());
item.setProcessor(LoginUtil.getCurrentUserId());
itemMapper.updateById(item);
logger.info("采集项转换为资源成功采集项ID: {}, 资源ID: {}", itemId, resource.getResourceID());
resultDomain.success("转换为资源成功", resource.getResourceID());
} catch (Exception e) {
logger.error("转换为资源异常: ", e);
resultDomain.fail("转换为资源异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<Integer> batchConvertToResource(List<String> itemIds, String tagId) {
ResultDomain<Integer> resultDomain = new ResultDomain<>();
try {
if (itemIds == null || itemIds.isEmpty()) {
resultDomain.fail("采集项ID列表为空");
return resultDomain;
}
if (tagId == null || tagId.isEmpty()) {
resultDomain.fail("标签ID不能为空");
return resultDomain;
}
int successCount = 0;
for (String itemId : itemIds) {
ResultDomain<String> convertResult = convertToResource(itemId, tagId);
if (convertResult.isSuccess()) {
successCount++;
}
}
logger.info("批量转换为资源完成,共{}条,成功{}条", itemIds.size(), successCount);
resultDomain.success("批量转换为资源完成", successCount);
} catch (Exception e) {
logger.error("批量转换为资源异常: ", e);
resultDomain.fail("批量转换为资源异常: " + e.getMessage());
}
return resultDomain;
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<TbDataCollectionItem> ignoreItem(String itemId) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
if (itemId == null || itemId.isEmpty()) {
resultDomain.fail("采集项ID不能为空");
return resultDomain;
}
TbDataCollectionItem item = itemMapper.selectById(itemId);
if (item == null) {
resultDomain.fail("采集项不存在");
return resultDomain;
}
item.setStatus(2); // 已忽略
item.setProcessTime(new Date());
item.setProcessor(LoginUtil.getCurrentUserId());
itemMapper.updateById(item);
logger.info("忽略采集项成功ID: {}", itemId);
resultDomain.success("忽略采集项成功", item);
} catch (Exception e) {
logger.error("忽略采集项异常: ", e);
resultDomain.fail("忽略采集项异常: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<Long> countByStatus(String taskId, Integer status) {
ResultDomain<Long> resultDomain = new ResultDomain<>();
try {
long count = itemMapper.countByStatus(taskId, status);
resultDomain.success("统计成功", count);
} catch (Exception e) {
logger.error("统计采集项数量异常: ", e);
resultDomain.fail("统计采集项数量异常: " + e.getMessage());
}
return resultDomain;
}
/**
* @description 构建VO对象
* @param item 采集项
* @return DataCollectionItemVO
* @author yslg
* @since 2025-11-08
*/
private DataCollectionItemVO buildVO(TbDataCollectionItem item) {
DataCollectionItemVO vo = new DataCollectionItemVO();
vo.setItem(item);
// 查询关联的定时任务
if (item.getTaskId() != null && !item.getTaskId().isEmpty()) {
TbCrontabTask task = taskMapper.selectTaskById(item.getTaskId());
vo.setTask(task);
}
// 设置状态文本
String statusText = "未处理";
if (item.getStatus() != null) {
switch (item.getStatus()) {
case 0:
statusText = "未处理";
break;
case 1:
statusText = "已转换为资源";
break;
case 2:
statusText = "已忽略";
break;
default:
statusText = "未知";
}
}
vo.setStatusText(statusText);
// 设置操作权限
vo.setCanEdit(item.getStatus() == null || item.getStatus() == 0 || item.getStatus() == 2);
vo.setCanConvert(item.getStatus() == null || item.getStatus() == 0);
return vo;
}
}

View File

@@ -1,60 +1,60 @@
package org.xyzh.crontab.task; // package org.xyzh.crontab.task;
import org.slf4j.Logger; // import org.slf4j.Logger;
import org.slf4j.LoggerFactory; // import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; // import org.springframework.stereotype.Component;
import java.text.SimpleDateFormat; // import java.text.SimpleDateFormat;
import java.util.Date; // import java.util.Date;
/** // /**
* @description 数据备份任务 // * @description 数据备份任务
* @filename DataBackupTask.java // * @filename DataBackupTask.java
* @author yslg // * @author yslg
* @copyright xyzh // * @copyright xyzh
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
@Component("dataBackupTask") // @Component("dataBackupTask")
public class DataBackupTask { // public class DataBackupTask {
private static final Logger logger = LoggerFactory.getLogger(DataBackupTask.class); // private static final Logger logger = LoggerFactory.getLogger(DataBackupTask.class);
/** // /**
* @description 执行数据备份 // * @description 执行数据备份
* @author yslg // * @author yslg
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
public void execute() { // public void execute() {
logger.info("开始执行数据备份任务..."); // logger.info("开始执行数据备份任务...");
try { // try {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss"); // SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss");
String backupTime = sdf.format(new Date()); // String backupTime = sdf.format(new Date());
// TODO: 实现数据备份逻辑 // // TODO: 实现数据备份逻辑
// 1. 备份数据库 // // 1. 备份数据库
// 2. 备份文件 // // 2. 备份文件
// 3. 压缩备份文件 // // 3. 压缩备份文件
// 4. 上传到备份服务器或云存储 // // 4. 上传到备份服务器或云存储
Thread.sleep(2000); // 模拟执行 // Thread.sleep(2000); // 模拟执行
logger.info("数据备份任务执行完成,备份标识: {}", backupTime); // logger.info("数据备份任务执行完成,备份标识: {}", backupTime);
} catch (Exception e) { // } catch (Exception e) {
logger.error("数据备份任务执行失败: ", e); // logger.error("数据备份任务执行失败: ", e);
throw new RuntimeException("数据备份任务执行失败", e); // throw new RuntimeException("数据备份任务执行失败", e);
} // }
} // }
/** // /**
* @description 执行带参数的备份任务 // * @description 执行带参数的备份任务
* @param params 参数备份类型full-全量incremental-增量) // * @param params 参数备份类型full-全量incremental-增量)
* @author yslg // * @author yslg
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
public void execute(String params) { // public void execute(String params) {
logger.info("开始执行数据备份任务,备份类型: {}", params); // logger.info("开始执行数据备份任务,备份类型: {}", params);
execute(); // execute();
} // }
} // }

View File

@@ -1,68 +1,68 @@
package org.xyzh.crontab.task; // package org.xyzh.crontab.task;
import org.slf4j.Logger; // import org.slf4j.Logger;
import org.slf4j.LoggerFactory; // import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; // import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component; // import org.springframework.stereotype.Component;
import org.xyzh.crontab.mapper.CrontabLogMapper; // import org.xyzh.crontab.mapper.CrontabLogMapper;
import java.util.Calendar; // import java.util.Calendar;
import java.util.Date; // import java.util.Date;
/** // /**
* @description 清理过期日志任务 // * @description 清理过期日志任务
* @filename LogCleanTask.java // * @filename LogCleanTask.java
* @author yslg // * @author yslg
* @copyright xyzh // * @copyright xyzh
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
@Component("logCleanTask") // @Component("logCleanTask")
public class LogCleanTask { // public class LogCleanTask {
private static final Logger logger = LoggerFactory.getLogger(LogCleanTask.class); // private static final Logger logger = LoggerFactory.getLogger(LogCleanTask.class);
@Autowired // @Autowired
private CrontabLogMapper logMapper; // private CrontabLogMapper logMapper;
/** // /**
* @description 执行日志清理默认清理30天前的日志 // * @description 执行日志清理默认清理30天前的日志
* @author yslg // * @author yslg
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
public void execute() { // public void execute() {
execute("30"); // execute("30");
} // }
/** // /**
* @description 执行日志清理 // * @description 执行日志清理
* @param params 天数参数 // * @param params 天数参数
* @author yslg // * @author yslg
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
public void execute(String params) { // public void execute(String params) {
logger.info("开始执行日志清理任务..."); // logger.info("开始执行日志清理任务...");
try { // try {
int days = 30; // 默认30天 // int days = 30; // 默认30天
if (params != null && !params.isEmpty()) { // if (params != null && !params.isEmpty()) {
try { // try {
days = Integer.parseInt(params); // days = Integer.parseInt(params);
} catch (NumberFormatException e) { // } catch (NumberFormatException e) {
logger.warn("参数格式错误使用默认值30天"); // logger.warn("参数格式错误使用默认值30天");
} // }
} // }
Calendar calendar = Calendar.getInstance(); // Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.DAY_OF_MONTH, -days); // calendar.add(Calendar.DAY_OF_MONTH, -days);
Date beforeDate = calendar.getTime(); // Date beforeDate = calendar.getTime();
int count = logMapper.cleanLogsByDate(beforeDate); // int count = logMapper.cleanLogsByDate(beforeDate);
logger.info("日志清理任务执行完成,共清理{}条日志", count); // logger.info("日志清理任务执行完成,共清理{}条日志", count);
} catch (Exception e) { // } catch (Exception e) {
logger.error("日志清理任务执行失败: ", e); // logger.error("日志清理任务执行失败: ", e);
throw new RuntimeException("日志清理任务执行失败", e); // throw new RuntimeException("日志清理任务执行失败", e);
} // }
} // }
} // }

View File

@@ -1,54 +1,54 @@
package org.xyzh.crontab.task; // package org.xyzh.crontab.task;
import org.slf4j.Logger; // import org.slf4j.Logger;
import org.slf4j.LoggerFactory; // import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; // import org.springframework.stereotype.Component;
/** // /**
* @description 系统数据统计任务 // * @description 系统数据统计任务
* @filename SystemStatisticsTask.java // * @filename SystemStatisticsTask.java
* @author yslg // * @author yslg
* @copyright xyzh // * @copyright xyzh
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
@Component("systemStatisticsTask") // @Component("systemStatisticsTask")
public class SystemStatisticsTask { // public class SystemStatisticsTask {
private static final Logger logger = LoggerFactory.getLogger(SystemStatisticsTask.class); // private static final Logger logger = LoggerFactory.getLogger(SystemStatisticsTask.class);
/** // /**
* @description 执行系统数据统计 // * @description 执行系统数据统计
* @author yslg // * @author yslg
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
public void execute() { // public void execute() {
logger.info("开始执行系统数据统计任务..."); // logger.info("开始执行系统数据统计任务...");
try { // try {
// TODO: 实现系统数据统计逻辑 // // TODO: 实现系统数据统计逻辑
// 1. 统计用户数据 // // 1. 统计用户数据
// 2. 统计资源数据 // // 2. 统计资源数据
// 3. 统计访问数据 // // 3. 统计访问数据
// 4. 生成统计报告 // // 4. 生成统计报告
Thread.sleep(1000); // 模拟执行 // Thread.sleep(1000); // 模拟执行
logger.info("系统数据统计任务执行完成"); // logger.info("系统数据统计任务执行完成");
} catch (Exception e) { // } catch (Exception e) {
logger.error("系统数据统计任务执行失败: ", e); // logger.error("系统数据统计任务执行失败: ", e);
throw new RuntimeException("系统数据统计任务执行失败", e); // throw new RuntimeException("系统数据统计任务执行失败", e);
} // }
} // }
/** // /**
* @description 执行带参数的统计任务 // * @description 执行带参数的统计任务
* @param params 参数 // * @param params 参数
* @author yslg // * @author yslg
* @since 2025-10-25 // * @since 2025-10-25
*/ // */
public void execute(String params) { // public void execute(String params) {
logger.info("开始执行系统数据统计任务,参数: {}", params); // logger.info("开始执行系统数据统计任务,参数: {}", params);
execute(); // execute();
} // }
} // }

View File

@@ -0,0 +1,46 @@
package org.xyzh.crontab.task.newsTask;
import java.util.List;
import org.xyzh.common.dto.resource.TbResource;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @description 爬虫返回文章结构
* @filename ArticleStruct.java
* @author yslg
* @copyright xyzh
* @since 2025-11-10
*/
@Data
@NoArgsConstructor
public class ArticleStruct {
private String title;
private String url;
private String publishTime;
private String author;
private String source;
private List<RowStruct> contentRows;
@Data
@NoArgsConstructor
public class RowStruct {
// private String tag;
// private String style; // ttext-indent: 2em;->\t\t
private String content; // 完整携带样式的p标签
}
public TbResource toTbResource(){
TbResource tbResource = new TbResource();
tbResource.setTitle(this.title);
// tbResource.setUrl(this.url);
// tbResource.setPublishTime(this.publishTime);
// tbResource.setAuthor(this.author);
// tbResource.setSource(this.source);
// tbResource.setContentRows(this.contentRows);
return tbResource;
}
}

View File

@@ -0,0 +1,328 @@
package org.xyzh.crontab.task.newsTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* @description 新闻爬虫定时任务
* @filename NewsCrewerTask.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
@Component("newsCrewerTask")
public class NewsCrawlerTask {
private static final Logger logger = LoggerFactory.getLogger(NewsCrawlerTask.class);
@Value("${crewer.python.path:python}")
private String pythonPath;
@Value("${crewer.script.path:../schoolNewsCrewer}")
private String scriptPath;
@Value("${crewer.timeout:300}")
private int timeout;
@Autowired
private DataCollectionItemService itemService;
private final ObjectMapper objectMapper = new ObjectMapper();
/**
* @description 执行新闻爬虫任务默认爬取人民日报政治类新闻20条
* @author yslg
* @since 2025-11-08
*/
public void execute() {
execute("rmrb,politics,20");
}
/**
* @description 执行新闻爬虫任务
* @param params 参数格式: "source,category,limit" 或 "taskId|source,category,limit"
* 如果包含taskId格式为: "taskId|source,category,limit"
* source: 新闻源rmrb-人民日报)
* category: 分类politics-政治, society-社会等)
* limit: 爬取数量
* @author yslg
* @since 2025-11-08
*/
public void execute(String params) {
logger.info("开始执行新闻爬虫任务,参数: {}", params);
try {
// 解析参数支持taskId|source,category,limit格式
String taskId = null;
String actualParams = params;
if (params.contains("|")) {
String[] parts = params.split("\\|", 2);
taskId = parts[0];
actualParams = parts[1];
}
String[] paramArray = actualParams.split(",");
String source = paramArray.length > 0 ? paramArray[0] : "rmrb";
String category = paramArray.length > 1 ? paramArray[1] : "politics";
String limit = paramArray.length > 2 ? paramArray[2] : "20";
logger.info("爬虫参数 - 来源: {}, 分类: {}, 数量: {}", source, category, limit);
// 验证Python和脚本路径
Path scriptDir = Paths.get(scriptPath);
if (!Files.exists(scriptDir)) {
throw new RuntimeException("爬虫脚本目录不存在: " + scriptPath);
}
// 构建Python命令
List<String> command = new ArrayList<>();
// 检查是否是Windows系统
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
command.add("cmd");
command.add("/c");
command.add(pythonPath);
} else {
command.add(pythonPath);
}
command.add("main.py");
command.add(category);
command.add(limit);
// 生成输出文件名
String timestamp = String.valueOf(System.currentTimeMillis());
String outputFile = String.format("output/news_%s_%s_%s.json", source, category, timestamp);
command.add(outputFile);
logger.info("执行命令: {}", String.join(" ", command));
// 创建进程构建器
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.directory(scriptDir.toFile());
processBuilder.redirectErrorStream(true);
// 启动进程
Process process = processBuilder.start();
// 读取输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), "UTF-8"))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
logger.debug("Python输出: {}", line);
}
}
// 等待进程结束
boolean finished = process.waitFor(timeout, TimeUnit.SECONDS);
if (!finished) {
process.destroy();
throw new RuntimeException("爬虫任务超时(超过" + timeout + "秒)");
}
int exitCode = process.exitValue();
if (exitCode == 0) {
logger.info("新闻爬虫任务执行成功");
// 读取并解析结果文件
Path outputPath = scriptDir.resolve(outputFile);
if (Files.exists(outputPath)) {
String jsonContent = Files.readString(outputPath);
ObjectMapper mapper = new ObjectMapper();
List<Map<String, Object>> newsList = mapper.readValue(
jsonContent,
List.class
);
logger.info("成功爬取 {} 条新闻", newsList.size());
// 保存新闻数据到数据库
if (taskId != null && !taskId.isEmpty()) {
saveNewsToDatabase(newsList, taskId, source, category);
} else {
logger.warn("未提供任务ID跳过数据保存");
}
} else {
logger.warn("输出文件不存在: {}", outputFile);
}
} else {
logger.error("新闻爬虫任务执行失败,退出码: {}", exitCode);
logger.error("输出内容:\n{}", output.toString());
throw new RuntimeException("爬虫任务执行失败,退出码: " + exitCode);
}
} catch (Exception e) {
logger.error("新闻爬虫任务执行异常: ", e);
throw new RuntimeException("新闻爬虫任务执行异常", e);
}
}
/**
* @description 测试Python环境
* @author yslg
* @since 2025-11-08
*/
public void testPythonEnvironment() {
logger.info("测试Python环境...");
try {
ProcessBuilder pb = new ProcessBuilder(pythonPath, "--version");
Process process = pb.start();
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream())
);
String version = reader.readLine();
int exitCode = process.waitFor();
if (exitCode == 0) {
logger.info("Python环境正常: {}", version);
} else {
logger.error("Python环境异常");
}
} catch (Exception e) {
logger.error("测试Python环境失败: ", e);
}
}
/**
* @description 将新闻数据保存到数据库
* @param newsList 新闻列表
* @param taskId 任务ID
* @param source 新闻来源
* @param category 分类
* @author yslg
* @since 2025-11-08
*/
private void saveNewsToDatabase(List<Map<String, Object>> newsList, String taskId, String source, String category) {
logger.info("开始保存 {} 条新闻到数据库任务ID: {}", newsList.size(), taskId);
try {
List<TbDataCollectionItem> itemList = new ArrayList<>();
Date now = new Date();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (Map<String, Object> news : newsList) {
try {
TbDataCollectionItem item = new TbDataCollectionItem();
// 基本信息
item.setTaskId(taskId);
item.setTitle(getStringValue(news, "title"));
item.setContent(getStringValue(news, "content"));
item.setSummary(getStringValue(news, "summary"));
item.setSource(source.equals("rmrb") ? "人民日报" : source);
item.setSourceUrl(getStringValue(news, "url"));
item.setCategory(category);
item.setAuthor(getStringValue(news, "author"));
// 发布时间
String publishTimeStr = getStringValue(news, "publish_time");
if (publishTimeStr != null && !publishTimeStr.isEmpty()) {
try {
item.setPublishTime(dateFormat.parse(publishTimeStr));
} catch (Exception e) {
logger.warn("解析发布时间失败: {}", publishTimeStr);
item.setPublishTime(now);
}
} else {
item.setPublishTime(now);
}
// 封面图片
item.setCoverImage(getStringValue(news, "cover_image"));
// 图片列表JSON格式
Object imagesObj = news.get("images");
if (imagesObj != null) {
if (imagesObj instanceof List) {
item.setImages(objectMapper.writeValueAsString(imagesObj));
} else if (imagesObj instanceof String) {
item.setImages((String) imagesObj);
}
}
// 标签
Object tagsObj = news.get("tags");
if (tagsObj != null) {
if (tagsObj instanceof List) {
List<String> tags = (List<String>) tagsObj;
item.setTags(String.join(",", tags));
} else if (tagsObj instanceof String) {
item.setTags((String) tagsObj);
}
}
// 状态和时间
item.setStatus(0); // 未处理
item.setCrawlTime(now);
itemList.add(item);
} catch (Exception e) {
logger.error("转换新闻数据失败: ", e);
}
}
// 批量保存
if (!itemList.isEmpty()) {
ResultDomain<Integer> result = itemService.batchCreateItems(itemList);
if (result.isSuccess()) {
logger.info("成功保存 {} 条新闻到数据库", result.getData());
} else {
logger.error("保存新闻到数据库失败: {}", result.getMessage());
}
} else {
logger.warn("没有有效的新闻数据需要保存");
}
} catch (Exception e) {
logger.error("保存新闻数据到数据库异常: ", e);
}
}
/**
* @description 从Map中安全获取字符串值
* @param map Map对象
* @param key 键
* @return String 值
* @author yslg
* @since 2025-11-08
*/
private String getStringValue(Map<String, Object> map, String key) {
Object value = map.get(key);
if (value == null) {
return null;
}
return value.toString();
}
}

View File

@@ -5,8 +5,9 @@ abstract public class NewsTask {
// 爬取网站目标 // 爬取网站目标
private String target; private String target;
// 爬取标题
private String title; // 爬取搜索
private String query;
} }

View File

@@ -0,0 +1,234 @@
package org.xyzh.crontab.task.newsTask;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* Java调用Python的简化示例
* 用于学习和理解核心原理
*/
public class PythonExecutorExample {
/**
* 示例1: 最简单的调用方式
*/
public static void example1_Simple() throws Exception {
// 1. 构建命令
ProcessBuilder pb = new ProcessBuilder("python", "script.py", "arg1", "arg2");
// 2. 启动进程
Process process = pb.start();
// 3. 读取输出
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
);
String line;
while ((line = reader.readLine()) != null) {
System.out.println("Python输出: " + line);
}
// 4. 等待结束
int exitCode = process.waitFor();
System.out.println("退出码: " + exitCode);
}
/**
* 示例2: 获取返回结果(通过标准输出)
*/
public static String example2_GetResult() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 读取所有输出
StringBuilder result = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
result.append(line);
}
}
process.waitFor();
return result.toString();
}
/**
* 示例3: 带超时控制
*/
public static void example3_WithTimeout() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 带超时的等待5秒
boolean finished = process.waitFor(5, TimeUnit.SECONDS);
if (!finished) {
// 超时,强制终止
process.destroyForcibly();
System.out.println("任务超时");
} else {
int exitCode = process.exitValue();
System.out.println("执行完成,退出码: " + exitCode);
}
}
/**
* 示例4: 传递参数(通过命令行)
*/
public static void example4_PassArgs() throws Exception {
// 方式1: 通过命令行参数
List<String> command = new ArrayList<>();
command.add("python");
command.add("script.py");
command.add("category=politics");
command.add("limit=20");
ProcessBuilder pb = new ProcessBuilder(command);
Process process = pb.start();
// ... 读取输出
process.waitFor();
}
/**
* 示例5: 传递参数(通过标准输入)
*/
public static void example5_PassArgsByStdin() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 写入参数到标准输入
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(process.getOutputStream(), StandardCharsets.UTF_8))) {
writer.write("{\"category\":\"politics\",\"limit\":20}");
writer.newLine();
writer.flush();
}
// 关闭输入流重要告诉Python输入结束
process.getOutputStream().close();
// 读取输出
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
);
String result = reader.readLine();
System.out.println("结果: " + result);
process.waitFor();
}
/**
* 示例6: 处理Windows/Linux差异
*/
public static void example6_CrossPlatform() throws Exception {
List<String> command = new ArrayList<>();
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
// Windows需要通过cmd执行
command.add("cmd");
command.add("/c");
command.add("python");
} else {
// Linux/Mac直接执行
command.add("python3");
}
command.add("script.py");
ProcessBuilder pb = new ProcessBuilder(command);
Process process = pb.start();
process.waitFor();
}
/**
* 示例7: 完整的错误处理
*/
public static void example7_Complete() throws Exception {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
// 合并标准输出和错误输出
pb.redirectErrorStream(true);
// 设置工作目录
pb.directory(new File("/path/to/script"));
Process process = null;
try {
process = pb.start();
// 读取输出
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append("\n");
}
}
// 等待结束(带超时)
boolean finished = process.waitFor(30, TimeUnit.SECONDS);
if (!finished) {
process.destroyForcibly();
throw new RuntimeException("任务超时");
}
int exitCode = process.exitValue();
if (exitCode == 0) {
System.out.println("执行成功");
System.out.println("输出: " + output.toString());
} else {
System.err.println("执行失败,退出码: " + exitCode);
System.err.println("错误输出: " + output.toString());
throw new RuntimeException("Python执行失败");
}
} catch (Exception e) {
throw new RuntimeException("执行异常", e);
} finally {
// 清理资源
if (process != null && process.isAlive()) {
process.destroyForcibly();
}
}
}
/**
* 示例8: 异步执行(不阻塞)
*/
public static void example8_Async() {
new Thread(() -> {
try {
ProcessBuilder pb = new ProcessBuilder("python", "script.py");
Process process = pb.start();
// 在后台线程中读取输出
BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)
);
String line;
while ((line = reader.readLine()) != null) {
System.out.println("后台输出: " + line);
}
process.waitFor();
System.out.println("后台任务完成");
} catch (Exception e) {
e.printStackTrace();
}
}).start();
System.out.println("主线程继续执行...");
}
}

View File

@@ -0,0 +1,15 @@
package org.xyzh.crontab.task.newsTask;
import lombok.Data;
@Data
public class ScriptDomain {
private String name;
private String path;
private String method;
private String param;
private String output;
}

View File

@@ -0,0 +1,34 @@
crawler:
python:
path: C:/Python312/python.exe
base:
path: F:/Project/schoolNews/schoolNewsCrawler
script:
- name: xxx爬虫
path: crawler/xxx.py
method: xxx
param: xxx
output: xxx
crontab:
items: #可供前端选择的定时任务列表
- name: 人民日报新闻爬取
methods: #爬取方式
- name: 关键字搜索爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
params:
query: String #搜索关键字
total: Integer #总新闻数量
- name: 排行榜爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
- name: 往日精彩头条爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/xxx.py
params:
startDate: String #开始日期
endDate: String #结束日期
isYestoday: Boolean #是否是昨天

View File

@@ -1,120 +0,0 @@
package org.xyzh.news.controller;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import org.xyzh.api.news.collection.DataCollectionService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.resource.TbDataCollectionConfig;
import org.xyzh.common.dto.resource.TbDataCollectionLog;
/**
* @description 数据采集控制器
* @filename DataCollectionController.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
@RestController
@RequestMapping("/news/collection")
public class DataCollectionController {
private static final Logger logger = LoggerFactory.getLogger(DataCollectionController.class);
@Autowired
private DataCollectionService dataCollectionService;
/**
* 获取配置列表
*/
@GetMapping("/config/list")
public ResultDomain<TbDataCollectionConfig> getConfigList(TbDataCollectionConfig filter) {
return null;
// return dataCollectionService.getConfigList(filter);
}
/**
* 根据ID获取配置详情
*/
@GetMapping("/config/{configID}")
public ResultDomain<TbDataCollectionConfig> getConfigById(@PathVariable String configID) {
return dataCollectionService.getConfigById(configID);
}
/**
* 创建配置
*/
@PostMapping("/config/create")
public ResultDomain<TbDataCollectionConfig> createConfig(@RequestBody TbDataCollectionConfig config) {
return dataCollectionService.createConfig(config);
}
/**
* 更新配置
*/
@PutMapping("/config/update")
public ResultDomain<TbDataCollectionConfig> updateConfig(@RequestBody TbDataCollectionConfig config) {
return dataCollectionService.updateConfig(config);
}
/**
* 删除配置
*/
@DeleteMapping("/config/{configID}")
public ResultDomain<Boolean> deleteConfig(@PathVariable String configID) {
return dataCollectionService.deleteConfig(configID);
}
/**
* 更新配置状态
*/
@PutMapping("/config/{configID}/status")
public ResultDomain<TbDataCollectionConfig> updateConfigStatus(
@PathVariable String configID,
@RequestParam Integer status) {
return dataCollectionService.updateConfigStatus(configID, status);
}
/**
* 获取日志列表
*/
@GetMapping("/log/list")
public ResultDomain<TbDataCollectionLog> getLogList(TbDataCollectionLog filter) {
return null;
// return dataCollectionService.getLogList(filter);
}
/**
* 根据ID获取日志详情
*/
@GetMapping("/log/{logID}")
public ResultDomain<TbDataCollectionLog> getLogById(@PathVariable String logID) {
return dataCollectionService.getLogById(logID);
}
/**
* 创建日志
*/
@PostMapping("/log/create")
public ResultDomain<TbDataCollectionLog> createLog(@RequestBody TbDataCollectionLog log) {
return dataCollectionService.createLog(log);
}
/**
* 删除日志
*/
@DeleteMapping("/log/{logID}")
public ResultDomain<Boolean> deleteLog(@PathVariable String logID) {
return null;
// return dataCollectionService.deleteLog(logID);
}
/**
* 获取活跃配置
*/
@GetMapping("/active")
public ResultDomain<TbDataCollectionLog> getActiveConfigs() {
return null;
// return dataCollectionService.getActiveConfigs();
}
}

View File

@@ -1,253 +0,0 @@
package org.xyzh.news.controller;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.bind.annotation.*;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.resource.TbResource;
import org.xyzh.common.dto.resource.TbDataCollectionConfig;
import java.util.Date;
import java.util.Map;
/**
* @description 资源管理控制器
* @filename ResourceManagementController.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
@RestController
@RequestMapping("/news/management")
public class ResourceManagementController {
private static final Logger logger = LoggerFactory.getLogger(ResourceManagementController.class);
// ==================== 数据采集管理 ====================
/**
* 配置采集来源
*/
@PostMapping("/collection/config-source")
public ResultDomain<TbDataCollectionConfig> configCollectionSource(@RequestBody Map<String, Object> configData) {
// TODO: 实现配置采集来源
return null;
}
/**
* 设置采集频率
*/
@PutMapping("/collection/frequency")
public ResultDomain<Boolean> setCollectionFrequency(@RequestBody Map<String, Object> params) {
// TODO: 实现设置采集频率(天/周)
return null;
}
/**
* 手动触发采集
*/
@PostMapping("/collection/manual-trigger")
public ResultDomain<Boolean> manualTriggerCollection(@RequestParam String configID) {
// TODO: 实现手动触发采集
return null;
}
/**
* 获取采集配置列表
*/
@GetMapping("/collection/config-list")
public ResultDomain<TbDataCollectionConfig> getCollectionConfigList() {
// TODO: 实现获取采集配置列表
return null;
}
/**
* 更新采集配置
*/
@PutMapping("/collection/config-update")
public ResultDomain<TbDataCollectionConfig> updateCollectionConfig(@RequestBody TbDataCollectionConfig config) {
// TODO: 实现更新采集配置
return null;
}
/**
* 删除采集配置
*/
@DeleteMapping("/collection/config/{configID}")
public ResultDomain<Boolean> deleteCollectionConfig(@PathVariable String configID) {
// TODO: 实现删除采集配置
return null;
}
// ==================== 文章编辑管理 ====================
/**
* 手动新建文章
*/
@PostMapping("/article/create")
public ResultDomain<TbResource> createArticle(@RequestBody Map<String, Object> articleData) {
// TODO: 实现手动新建文章(富文本编辑器,插入图片/链接)
return null;
}
/**
* 编辑文章内容
*/
@PutMapping("/article/edit")
public ResultDomain<TbResource> editArticle(@RequestBody TbResource article) {
// TODO: 实现编辑文章内容
return null;
}
/**
* 删除文章
*/
@DeleteMapping("/article/{articleID}")
public ResultDomain<Boolean> deleteArticle(@PathVariable String articleID) {
// TODO: 实现删除文章
return null;
}
/**
* 设置文章状态
*/
@PutMapping("/article/status")
public ResultDomain<Boolean> setArticleStatus(@RequestBody Map<String, Object> params) {
// TODO: 实现设置文章状态(草稿/已发布)
return null;
}
/**
* 上传文章图片
*/
@PostMapping("/article/upload-image")
public ResultDomain<String> uploadArticleImage(@RequestParam("file") String file) {
// TODO: 实现上传文章图片
return null;
}
/**
* 插入文章链接
*/
@PutMapping("/article/insert-link")
public ResultDomain<Boolean> insertArticleLink(@RequestBody Map<String, Object> params) {
// TODO: 实现插入文章链接
return null;
}
/**
* 获取文章编辑历史
*/
@GetMapping("/article/edit-history/{articleID}")
public ResultDomain<Map<String, Object>> getArticleEditHistory(@PathVariable String articleID) {
// TODO: 实现获取文章编辑历史
return null;
}
// ==================== 数据记录管理 ====================
/**
* 记录数据采集信息
*/
@PostMapping("/record/collection")
public ResultDomain<Boolean> recordCollectionData(@RequestBody Map<String, Object> recordData) {
// TODO: 实现记录数据采集时间、采集数量、采集状态
return null;
}
/**
* 记录文章发布信息
*/
@PostMapping("/record/publish")
public ResultDomain<Boolean> recordPublishData(@RequestBody Map<String, Object> publishData) {
// TODO: 实现记录文章发布时间、发布人、修改记录
return null;
}
/**
* 获取采集记录列表
*/
@GetMapping("/record/collection-list")
public ResultDomain<Map<String, Object>> getCollectionRecordList(
@RequestParam(required = false) Date startDate,
@RequestParam(required = false) Date endDate) {
// TODO: 实现获取采集记录列表
return null;
}
/**
* 获取发布记录列表
*/
@GetMapping("/record/publish-list")
public ResultDomain<Map<String, Object>> getPublishRecordList(
@RequestParam(required = false) String publisher,
@RequestParam(required = false) Date startDate,
@RequestParam(required = false) Date endDate) {
// TODO: 实现获取发布记录列表
return null;
}
// ==================== 自动发布管理 ====================
/**
* 配置文章自动发布时间
*/
@PutMapping("/auto-publish/schedule")
public ResultDomain<Boolean> scheduleAutoPublish(@RequestBody Map<String, Object> scheduleData) {
// TODO: 实现配置文章自动发布时间
return null;
}
/**
* 设置发布前核验规则
*/
@PutMapping("/auto-publish/verification")
public ResultDomain<Boolean> setVerificationRules(@RequestBody Map<String, Object> rules) {
// TODO: 实现设置发布前核验规则(如内容审核)
return null;
}
/**
* 配置通知方式
*/
@PutMapping("/auto-publish/notification")
public ResultDomain<Boolean> configNotification(@RequestBody Map<String, Object> notificationConfig) {
// TODO: 实现设置通知方式(邮件/站内信)、提醒格式
return null;
}
/**
* 开启/关闭自动发布
*/
@PutMapping("/auto-publish/toggle")
public ResultDomain<Boolean> toggleAutoPublish(@RequestBody Map<String, Object> params) {
// TODO: 实现支持关闭自动发布
return null;
}
/**
* 获取自动发布配置
*/
@GetMapping("/auto-publish/config")
public ResultDomain<Map<String, Object>> getAutoPublishConfig() {
// TODO: 实现获取自动发布配置
return null;
}
/**
* 获取自动发布任务列表
*/
@GetMapping("/auto-publish/task-list")
public ResultDomain<Map<String, Object>> getAutoPublishTaskList() {
// TODO: 实现获取自动发布任务列表
return null;
}
/**
* 手动执行自动发布任务
*/
@PostMapping("/auto-publish/execute")
public ResultDomain<Boolean> executeAutoPublishTask(@RequestParam String taskID) {
// TODO: 实现手动执行自动发布任务
return null;
}
}

View File

@@ -1,147 +0,0 @@
package org.xyzh.news.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.resource.TbDataCollectionConfig;
import java.util.List;
/**
* @description DataCollectionConfigMapper.java文件描述 数据采集配置数据访问层
* @filename DataCollectionConfigMapper.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
@Mapper
public interface DataCollectionConfigMapper extends BaseMapper<TbDataCollectionConfig> {
/**
* @description 查询数据采集配置列表
* @param filter 过滤条件
* @return List<TbDataCollectionConfig> 数据采集配置列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionConfig> selectDataCollectionConfigs(TbDataCollectionConfig filter);
/**
* @description 根据配置ID查询配置信息
* @param configId 配置ID
* @return TbDataCollectionConfig 配置信息
* @author yslg
* @since 2025-10-15
*/
TbDataCollectionConfig selectByConfigId(@Param("configId") String configId);
/**
* @description 根据名称查询配置
* @param name 配置名称
* @return TbDataCollectionConfig 配置信息
* @author yslg
* @since 2025-10-15
*/
TbDataCollectionConfig selectByName(@Param("name") String name);
/**
* @description 根据状态查询配置列表
* @param status 状态
* @return List<TbDataCollectionConfig> 配置列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionConfig> selectByStatus(@Param("status") Integer status);
/**
* @description 根据类型查询配置列表
* @param type 类型
* @return List<TbDataCollectionConfig> 配置列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionConfig> selectByType(@Param("type") Integer type);
/**
* @description 查询启用的配置列表
* @return List<TbDataCollectionConfig> 配置列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionConfig> selectActiveConfigs();
/**
* @description 检查配置名称是否存在
* @param name 配置名称
* @param excludeId 排除的配置ID用于更新时排除自身
* @return int 存在的数量
* @author yslg
* @since 2025-10-15
*/
int countByName(@Param("name") String name, @Param("excludeId") String excludeId);
/**
* @description 插入数据采集配置
* @param dataCollectionConfig 数据采集配置
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int insertDataCollectionConfig(TbDataCollectionConfig dataCollectionConfig);
/**
* @description 更新数据采集配置
* @param dataCollectionConfig 数据采集配置
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int updateDataCollectionConfig(TbDataCollectionConfig dataCollectionConfig);
/**
* @description 删除数据采集配置
* @param dataCollectionConfig 数据采集配置
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int deleteDataCollectionConfig(TbDataCollectionConfig dataCollectionConfig);
/**
* @description 批量插入数据采集配置
* @param dataCollectionConfigList 数据采集配置列表
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int batchInsertDataCollectionConfigs(@Param("dataCollectionConfigList") List<TbDataCollectionConfig> dataCollectionConfigList);
/**
* @description 批量删除数据采集配置
* @param ids 配置ID列表
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int batchDeleteDataCollectionConfigs(@Param("ids") List<String> ids);
/**
* @description 分页查询数据采集配置
* @param filter 过滤条件
* @param pageParam 分页参数
* @return List<TbDataCollectionConfig> 数据采集配置列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionConfig> selectDataCollectionConfigsPage(@Param("filter") TbDataCollectionConfig filter, @Param("pageParam") PageParam pageParam);
/**
* @description 统计数据采集配置总数
* @param filter 过滤条件
* @return long 总数
* @author yslg
* @since 2025-10-15
*/
long countDataCollectionConfigs(@Param("filter") TbDataCollectionConfig filter);
}

View File

@@ -1,147 +0,0 @@
package org.xyzh.news.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.resource.TbDataCollectionLog;
import java.util.List;
/**
* @description DataCollectionLogMapper.java文件描述 数据采集记录数据访问层
* @filename DataCollectionLogMapper.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
@Mapper
public interface DataCollectionLogMapper extends BaseMapper<TbDataCollectionLog> {
/**
* @description 查询数据采集记录列表
* @param filter 过滤条件
* @return List<TbDataCollectionLog> 数据采集记录列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionLog> selectDataCollectionLogs(TbDataCollectionLog filter);
/**
* @description 根据记录ID查询记录信息
* @param logId 记录ID
* @return TbDataCollectionLog 记录信息
* @author yslg
* @since 2025-10-15
*/
TbDataCollectionLog selectByLogId(@Param("logId") String logId);
/**
* @description 根据配置ID查询记录列表
* @param configId 配置ID
* @return List<TbDataCollectionLog> 记录列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionLog> selectByConfigId(@Param("configId") String configId);
/**
* @description 根据状态查询记录列表
* @param status 状态
* @return List<TbDataCollectionLog> 记录列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionLog> selectByStatus(@Param("status") Integer status);
/**
* @description 根据类型查询记录列表
* @param type 类型
* @return List<TbDataCollectionLog> 记录列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionLog> selectByType(@Param("type") Integer type);
/**
* @description 查询最新的记录列表
* @param limit 限制数量
* @return List<TbDataCollectionLog> 记录列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionLog> selectLatestLogs(@Param("limit") Integer limit);
/**
* @description 查询采集统计信息
* @param configId 配置ID
* @return TbDataCollectionLog 统计信息
* @author yslg
* @since 2025-10-15
*/
TbDataCollectionLog selectCollectionStatistics(@Param("configId") String configId);
/**
* @description 插入数据采集记录
* @param dataCollectionLog 数据采集记录
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int insertDataCollectionLog(TbDataCollectionLog dataCollectionLog);
/**
* @description 更新数据采集记录
* @param dataCollectionLog 数据采集记录
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int updateDataCollectionLog(TbDataCollectionLog dataCollectionLog);
/**
* @description 删除数据采集记录
* @param dataCollectionLog 数据采集记录
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int deleteDataCollectionLog(TbDataCollectionLog dataCollectionLog);
/**
* @description 批量插入数据采集记录
* @param dataCollectionLogList 数据采集记录列表
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int batchInsertDataCollectionLogs(@Param("dataCollectionLogList") List<TbDataCollectionLog> dataCollectionLogList);
/**
* @description 批量删除数据采集记录
* @param ids 记录ID列表
* @return int 影响行数
* @author yslg
* @since 2025-10-15
*/
int batchDeleteDataCollectionLogs(@Param("ids") List<String> ids);
/**
* @description 分页查询数据采集记录
* @param filter 过滤条件
* @param pageParam 分页参数
* @return List<TbDataCollectionLog> 数据采集记录列表
* @author yslg
* @since 2025-10-15
*/
List<TbDataCollectionLog> selectDataCollectionLogsPage(@Param("filter") TbDataCollectionLog filter, @Param("pageParam") PageParam pageParam);
/**
* @description 统计数据采集记录总数
* @param filter 过滤条件
* @return long 总数
* @author yslg
* @since 2025-10-15
*/
long countDataCollectionLogs(@Param("filter") TbDataCollectionLog filter);
}

View File

@@ -1,14 +0,0 @@
package org.xyzh.news.service;
import org.xyzh.api.news.collection.DataCollectionService;
/**
* @description 数据采集服务接口
* @filename NCDataCollectionService.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
public interface NCDataCollectionService extends DataCollectionService {
}

View File

@@ -1,126 +0,0 @@
package org.xyzh.news.service.impl;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.resource.TbDataCollectionConfig;
import org.xyzh.common.dto.resource.TbDataCollectionLog;
import org.xyzh.news.mapper.DataCollectionConfigMapper;
import org.xyzh.news.mapper.DataCollectionLogMapper;
import org.xyzh.api.news.collection.DataCollectionService;
/**
* @description 数据采集服务实现类
* @filename NCDataCollectionServiceImpl.java
* @author yslg
* @copyright xyzh
* @since 2025-10-15
*/
@Service
public class NCDataCollectionServiceImpl implements DataCollectionService {
private static final Logger logger = LoggerFactory.getLogger(NCDataCollectionServiceImpl.class);
@Autowired
private DataCollectionConfigMapper dataCollectionConfigMapper;
@Autowired
private DataCollectionLogMapper dataCollectionLogMapper;
@Override
public ResultDomain<TbDataCollectionLog> batchExecuteCollection(List<String> configIDs) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionConfig> createConfig(TbDataCollectionConfig config) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionLog> createLog(TbDataCollectionLog log) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<Boolean> deleteConfig(String configID) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionLog> executeCollection(String configID) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<String> getCollectionStatus(String configID) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionConfig> getConfigById(String configID) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionConfig> getConfigList(Integer status) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionLog> getConfigStatistics(String configID) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionLog> getLogById(String logID) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionLog> getLogList(String configID, Date startDate, Date endDate) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<Boolean> stopCollection(String configID) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionConfig> updateConfig(TbDataCollectionConfig config) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionConfig> updateConfigStatus(String configID, Integer status) {
// TODO Auto-generated method stub
return null;
}
@Override
public ResultDomain<TbDataCollectionConfig> updateLastCollectTime(String configID, Date lastCollectTime) {
// TODO Auto-generated method stub
return null;
}
}

View File

@@ -1,216 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.xyzh.news.mapper.DataCollectionConfigMapper">
<!-- 基础结果映射 -->
<resultMap id="BaseResultMap" type="org.xyzh.common.dto.resource.TbDataCollectionConfig">
<id column="id" property="id" jdbcType="VARCHAR"/>
<result column="name" property="name" jdbcType="VARCHAR"/>
<result column="source_url" property="sourceUrl" jdbcType="VARCHAR"/>
<result column="source_type" property="sourceType" jdbcType="VARCHAR"/>
<result column="frequency" property="frequency" jdbcType="VARCHAR"/>
<result column="tag_id" property="tagID" jdbcType="VARCHAR"/>
<result column="status" property="status" jdbcType="INTEGER"/>
<result column="last_collect_time" property="lastCollectTime" jdbcType="TIMESTAMP"/>
<result column="creator" property="creator" jdbcType="VARCHAR"/>
<result column="updater" property="updater" jdbcType="VARCHAR"/>
<result column="create_time" property="createTime" jdbcType="TIMESTAMP"/>
<result column="update_time" property="updateTime" jdbcType="TIMESTAMP"/>
<result column="delete_time" property="deleteTime" jdbcType="TIMESTAMP"/>
<result column="deleted" property="deleted" jdbcType="BOOLEAN"/>
</resultMap>
<!-- 基础字段 -->
<sql id="Base_Column_List">
id, name, source_url, source_type, frequency, tag_id, status,
last_collect_time, creator, updater, create_time, update_time,
delete_time, deleted
</sql>
<!-- 通用条件 -->
<sql id="Where_Clause">
<where>
deleted = 0
<if test="name != null and name != ''">
AND name LIKE CONCAT('%', #{name}, '%')
</if>
<if test="sourceType != null and sourceType != ''">
AND source_type = #{sourceType}
</if>
<if test="frequency != null and frequency != ''">
AND frequency = #{frequency}
</if>
<if test="tagID != null and tagID != ''">
AND tag_id = #{tagID}
</if>
<if test="status != null">
AND status = #{status}
</if>
</where>
</sql>
<!-- selectDataCollectionConfigs -->
<select id="selectDataCollectionConfigs" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List"/>
FROM tb_data_collection_config
<include refid="Where_Clause"/>
ORDER BY create_time DESC
</select>
<!-- 根据配置ID查询配置信息 -->
<select id="selectByConfigId" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_config
WHERE id = #{configId} AND deleted = 0
</select>
<!-- 根据名称查询配置 -->
<select id="selectByName" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_config
WHERE name = #{name} AND deleted = 0
</select>
<!-- 根据状态查询配置列表 -->
<select id="selectByStatus" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_config
WHERE status = #{status} AND deleted = 0
ORDER BY create_time DESC
</select>
<!-- 根据类型查询配置列表 -->
<select id="selectByType" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_config
WHERE source_type = #{type} AND deleted = 0
ORDER BY create_time DESC
</select>
<!-- 查询启用的配置列表 -->
<select id="selectActiveConfigs" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_config
WHERE status = 1 AND deleted = 0
ORDER BY create_time DESC
</select>
<!-- 检查配置名称是否存在 -->
<select id="countByName" resultType="int">
SELECT COUNT(1)
FROM tb_data_collection_config
WHERE name = #{name} AND deleted = 0
<if test="excludeId != null and excludeId != ''">
AND id != #{excludeId}
</if>
</select>
<!-- 插入数据采集配置 -->
<insert id="insertDataCollectionConfig" parameterType="org.xyzh.common.dto.resource.TbDataCollectionConfig">
INSERT INTO tb_data_collection_config (
id, name, source_url, source_type, frequency, tag_id, status,
last_collect_time, creator, updater, create_time, update_time,
delete_time, deleted
) VALUES (
#{id}, #{name}, #{sourceUrl}, #{sourceType}, #{frequency}, #{tagID}, #{status},
#{lastCollectTime}, #{creator}, #{updater}, #{createTime}, #{updateTime},
#{deleteTime}, #{deleted}
)
</insert>
<!-- 更新数据采集配置 -->
<update id="updateDataCollectionConfig" parameterType="org.xyzh.common.dto.resource.TbDataCollectionConfig">
UPDATE tb_data_collection_config
<set>
<if test="name != null and name != ''">
name = #{name},
</if>
<if test="sourceUrl != null and sourceUrl != ''">
source_url = #{sourceUrl},
</if>
<if test="sourceType != null and sourceType != ''">
source_type = #{sourceType},
</if>
<if test="frequency != null and frequency != ''">
frequency = #{frequency},
</if>
<if test="tagID != null and tagID != ''">
tag_id = #{tagID},
</if>
<if test="status != null">
status = #{status},
</if>
<if test="lastCollectTime != null">
last_collect_time = #{lastCollectTime},
</if>
<if test="updater != null and updater != ''">
updater = #{updater},
</if>
<if test="updateTime != null">
update_time = #{updateTime},
</if>
<if test="deleteTime != null">
delete_time = #{deleteTime},
</if>
<if test="deleted != null">
deleted = #{deleted},
</if>
</set>
WHERE id = #{id}
</update>
<!-- 删除数据采集配置 -->
<delete id="deleteDataCollectionConfig" parameterType="org.xyzh.common.dto.resource.TbDataCollectionConfig">
DELETE FROM tb_data_collection_config
WHERE id = #{id}
</delete>
<!-- 批量插入数据采集配置 -->
<insert id="batchInsertDataCollectionConfigs" parameterType="java.util.List">
INSERT INTO tb_data_collection_config (
id, name, source_url, source_type, frequency, tag_id, status,
last_collect_time, creator, updater, create_time, update_time,
delete_time, deleted
) VALUES
<foreach collection="dataCollectionConfigList" item="item" separator=",">
(
#{item.id}, #{item.name}, #{item.sourceUrl}, #{item.sourceType}, #{item.frequency},
#{item.tagID}, #{item.status}, #{item.lastCollectTime}, #{item.creator},
#{item.updater}, #{item.createTime}, #{item.updateTime}, #{item.deleteTime}, #{item.deleted}
)
</foreach>
</insert>
<!-- 批量删除数据采集配置 -->
<delete id="batchDeleteDataCollectionConfigs">
DELETE FROM tb_data_collection_config
WHERE id IN
<foreach collection="ids" item="id" open="(" separator="," close=")">
#{id}
</foreach>
</delete>
<!-- 分页查询数据采集配置 -->
<select id="selectDataCollectionConfigsPage" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_config
<include refid="Where_Clause" />
ORDER BY create_time DESC
LIMIT #{pageParam.pageSize} OFFSET #{pageParam.offset}
</select>
<!-- 统计数据采集配置总数 -->
<select id="countDataCollectionConfigs" resultType="long">
SELECT COUNT(1)
FROM tb_data_collection_config
<include refid="Where_Clause" />
</select>
</mapper>

View File

@@ -1,188 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.xyzh.news.mapper.DataCollectionLogMapper">
<!-- 基础结果映射 -->
<resultMap id="BaseResultMap" type="org.xyzh.common.dto.resource.TbDataCollectionLog">
<id column="id" property="id" jdbcType="VARCHAR"/>
<result column="config_id" property="configID" jdbcType="VARCHAR"/>
<result column="collect_count" property="collectCount" jdbcType="INTEGER"/>
<result column="success_count" property="successCount" jdbcType="INTEGER"/>
<result column="fail_count" property="failCount" jdbcType="INTEGER"/>
<result column="status" property="status" jdbcType="INTEGER"/>
<result column="message" property="message" jdbcType="LONGVARCHAR"/>
<result column="collect_time" property="collectTime" jdbcType="TIMESTAMP"/>
</resultMap>
<!-- 基础字段 -->
<sql id="Base_Column_List">
id, config_id, collect_count, success_count, fail_count, status,
message, collect_time
</sql>
<!-- 通用条件 -->
<sql id="Where_Clause">
<where>
<if test="configID != null and configID != ''">
AND config_id = #{configID}
</if>
<if test="status != null">
AND status = #{status}
</if>
</where>
</sql>
<!-- selectDataCollectionLogs -->
<select id="selectDataCollectionLogs" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List"/>
FROM tb_data_collection_log
<include refid="Where_Clause"/>
ORDER BY collect_time DESC
</select>
<!-- 根据记录ID查询记录信息 -->
<select id="selectByLogId" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_log
WHERE id = #{logId}
</select>
<!-- 根据配置ID查询记录列表 -->
<select id="selectByConfigId" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_log
WHERE config_id = #{configId}
ORDER BY collect_time DESC
</select>
<!-- 根据状态查询记录列表 -->
<select id="selectByStatus" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_log
WHERE status = #{status}
ORDER BY collect_time DESC
</select>
<!-- 根据类型查询记录列表 -->
<select id="selectByType" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_log
ORDER BY collect_time DESC
</select>
<!-- 查询最新的记录列表 -->
<select id="selectLatestLogs" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_log
ORDER BY collect_time DESC
<if test="limit != null and limit > 0">
LIMIT #{limit}
</if>
</select>
<!-- 查询采集统计信息 -->
<select id="selectCollectionStatistics" resultMap="BaseResultMap">
SELECT
config_id,
SUM(collect_count) as collect_count,
SUM(success_count) as success_count,
SUM(fail_count) as fail_count,
MAX(collect_time) as collect_time
FROM tb_data_collection_log
WHERE config_id = #{configId}
GROUP BY config_id
</select>
<!-- 插入数据采集记录 -->
<insert id="insertDataCollectionLog" parameterType="org.xyzh.common.dto.resource.TbDataCollectionLog">
INSERT INTO tb_data_collection_log (
id, config_id, collect_count, success_count, fail_count, status,
message, collect_time
) VALUES (
#{id}, #{configID}, #{collectCount}, #{successCount}, #{failCount}, #{status},
#{message}, #{collectTime}
)
</insert>
<!-- 更新数据采集记录 -->
<update id="updateDataCollectionLog" parameterType="org.xyzh.common.dto.resource.TbDataCollectionLog">
UPDATE tb_data_collection_log
<set>
<if test="configID != null and configID != ''">
config_id = #{configID},
</if>
<if test="collectCount != null">
collect_count = #{collectCount},
</if>
<if test="successCount != null">
success_count = #{successCount},
</if>
<if test="failCount != null">
fail_count = #{failCount},
</if>
<if test="status != null">
status = #{status},
</if>
<if test="message != null and message != ''">
message = #{message},
</if>
<if test="collectTime != null">
collect_time = #{collectTime},
</if>
</set>
WHERE id = #{id}
</update>
<!-- 删除数据采集记录 -->
<delete id="deleteDataCollectionLog" parameterType="org.xyzh.common.dto.resource.TbDataCollectionLog">
DELETE FROM tb_data_collection_log
WHERE id = #{id}
</delete>
<!-- 批量插入数据采集记录 -->
<insert id="batchInsertDataCollectionLogs" parameterType="java.util.List">
INSERT INTO tb_data_collection_log (
id, config_id, collect_count, success_count, fail_count, status,
message, collect_time
) VALUES
<foreach collection="dataCollectionLogList" item="item" separator=",">
(
#{item.id}, #{item.configID}, #{item.collectCount}, #{item.successCount},
#{item.failCount}, #{item.status}, #{item.message}, #{item.collectTime}
)
</foreach>
</insert>
<!-- 批量删除数据采集记录 -->
<delete id="batchDeleteDataCollectionLogs">
DELETE FROM tb_data_collection_log
WHERE id IN
<foreach collection="ids" item="id" open="(" separator="," close=")">
#{id}
</foreach>
</delete>
<!-- 分页查询数据采集记录 -->
<select id="selectDataCollectionLogsPage" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_log
<include refid="Where_Clause" />
ORDER BY collect_time DESC
LIMIT #{pageParam.pageSize} OFFSET #{pageParam.offset}
</select>
<!-- 统计数据采集记录总数 -->
<select id="countDataCollectionLogs" resultType="long">
SELECT COUNT(1)
FROM tb_data_collection_log
<include refid="Where_Clause" />
</select>
</mapper>