搜索关键字爬虫

This commit is contained in:
2025-11-12 16:10:34 +08:00
parent 7be02fe396
commit 675e6da7d7
37 changed files with 3382 additions and 572 deletions

View File

@@ -66,7 +66,7 @@ CREATE TABLE `tb_data_collection_item` (
`id` VARCHAR(64) NOT NULL COMMENT '主键ID',
`task_id` VARCHAR(64) NOT NULL COMMENT '关联任务ID',
`log_id` VARCHAR(64) NOT NULL COMMENT '关联执行日志ID',
`title` VARCHAR(255) NOT NULL COMMENT '文章标题',
`title` VARCHAR(255) DEFAULT NULL COMMENT '文章标题',
`content` LONGTEXT DEFAULT NULL COMMENT '文章内容HTML',
`summary` VARCHAR(500) DEFAULT NULL COMMENT '文章摘要',
`source` VARCHAR(255) DEFAULT NULL COMMENT '来源(如 人民日报)',

View File

@@ -114,35 +114,49 @@ school-news:
crawler:
python:
path: F:\Environment\Conda\envs\shoolNewsCrewer
base:
path: F:/Project/schoolNews/schoolNewsCrawler
# Python 可执行文件路径Windows 建议指向 python.exe如已在 PATH可直接用 "python"
pythonPath: F:/Environment/Conda/envs/schoolNewsCrawler/python.exe
# 爬虫脚本根目录NewsCrawlerTask 的工作目录)
basePath: F:/Project/schoolNews/schoolNewsCrawler
crontab:
items: #可供前端选择的定时任务列表
- name: 人民日报新闻爬取
methods: #爬取方式
- name: 关键字搜索爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
clazz: newsCrewerTask
excuete_method: execute
path: crawler/RmrbSearch.py
params:
query: String #搜索关键字
total: Integer #总新闻数量
- name: query
description: 搜索关键字
type: String
value: ""
- name: total
description: 总新闻数量
type: Integer
value: 10
- name: 排行榜爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
clazz: newsCrewerTask
excuete_method: execute
path: crawler/RmrbHotPoint.py
- name: 往日精彩头条爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
clazz: newsCrewerTask
excuete_method: execute
path: crawler/RmrbTrending.py
params:
startDate: String #开始日期
endDate: String #结束日期
isYestoday: Boolean #是否是昨天
- name: startDate
description: 开始日期
type: String
value: ""
- name: endDate
description: 结束日期
type: String
value: ""
- name: yesterday
description: 是否是昨天
type: Boolean
value: true
# 文件存储配置
file:

View File

@@ -111,6 +111,9 @@
<Logger name="org.xyzh.news.mapper" level="debug" additivity="false">
<AppenderRef ref="Console"/>
</Logger>
<Logger name="org.xyzh.crontab.mapper" level="debug" additivity="false">
<AppenderRef ref="Console"/>
</Logger>
<!-- 项目包日志配置 - Auth模块 -->
<Logger name="org.xyzh.auth" level="debug" additivity="false">
@@ -162,6 +165,15 @@
<AppenderRef ref="DatabaseAppender"/>
</Logger>
<Logger name="org.xyzh.crontab" level="debug" additivity="false">
<AppenderRef ref="Console"/>
<AppenderRef ref="Filelog"/>
<AppenderRef ref="RollingFileInfo"/>
<AppenderRef ref="RollingFileWarn"/>
<AppenderRef ref="RollingFileError"/>
<AppenderRef ref="DatabaseAppender"/>
</Logger>
<root level="info">
<appender-ref ref="Console"/>
<appender-ref ref="Filelog"/>

View File

@@ -1,5 +1,7 @@
package org.xyzh.api.crontab;
import java.util.List;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
@@ -30,7 +32,7 @@ public interface DataCollectionItemService {
* @author yslg
* @since 2025-11-08
*/
ResultDomain<Integer> batchCreateItems(java.util.List<TbDataCollectionItem> itemList);
ResultDomain<Integer> batchCreateItems(List<TbDataCollectionItem> itemList);
/**
* @description 更新采集项

View File

@@ -1,68 +1,427 @@
package org.xyzh.common.vo;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.dto.crontab.TbCrontabTask;
import java.io.Serializable;
import java.util.Date;
/**
* @description 数据采集项VO
* @description 数据采集项VO (平铺结构,包含关联的任务和日志信息)
* @filename DataCollectionItemVO.java
* @author yslg
* @copyright xyzh
* @since 2025-11-08
*/
public class DataCollectionItemVO implements Serializable {
private static final long serialVersionUID = 1L;
// ==================== 采集项基本信息 ====================
/**
* @description 采集项数据
* 采集项ID
*/
private TbDataCollectionItem item;
private String id;
/**
* @description 关联的定时任务信息
* 任务ID
*/
private TbCrontabTask task;
private String taskId;
/**
* @description 状态文本(用于前端显示)
* 日志ID
*/
private String statusText;
private String logId;
/**
* @description 是否可以编辑(未处理和已忽略的可以编辑)
* 文章标题
*/
private String title;
/**
* 文章内容(HTML)
*/
private String content;
/**
* 文章摘要
*/
private String summary;
/**
* 来源
*/
private String source;
/**
* 来源URL
*/
private String sourceUrl;
/**
* 分类
*/
private String category;
/**
* 作者
*/
private String author;
/**
* 发布时间
*/
private Date publishTime;
/**
* 封面图片URL
*/
private String coverImage;
/**
* 图片列表(JSON)
*/
private String images;
/**
* 标签
*/
private String tags;
/**
* 状态(0:未处理 1:已转换为资源 2:已忽略)
*/
private Integer status;
/**
* 转换后的资源ID
*/
private String resourceId;
/**
* 爬取时间
*/
private Date crawlTime;
/**
* 处理时间
*/
private Date processTime;
/**
* 处理人
*/
private String processor;
/**
* 创建时间
*/
private Date createTime;
/**
* 更新时间
*/
private Date updateTime;
// ==================== 关联的任务信息 ====================
/**
* 任务名称
*/
private String taskName;
/**
* 任务分组
*/
private String taskGroup;
/**
* Bean名称
*/
private String beanName;
/**
* 方法名称
*/
private String methodName;
/**
* 方法参数
*/
private String methodParams;
// ==================== 关联的日志信息 ====================
/**
* 执行状态(0:失败 1:成功)
*/
private Integer executeStatus;
/**
* 执行时长(ms)
*/
private Long executeDuration;
/**
* 开始时间
*/
private Date startTime;
/**
* 结束时间
*/
private Date endTime;
// ==================== 扩展字段 ====================
/**
* 是否可以编辑(未处理和已忽略的可以编辑)
*/
private Boolean canEdit;
/**
* @description 是否可以转换为资源未处理的可以转换
* 是否可以转换为资源(未处理的可以转换)
*/
private Boolean canConvert;
public TbDataCollectionItem getItem() {
return item;
// ==================== Getter/Setter ====================
public String getId() {
return id;
}
public void setItem(TbDataCollectionItem item) {
this.item = item;
public void setId(String id) {
this.id = id;
}
public TbCrontabTask getTask() {
return task;
public String getTaskId() {
return taskId;
}
public void setTask(TbCrontabTask task) {
this.task = task;
public void setTaskId(String taskId) {
this.taskId = taskId;
}
public String getStatusText() {
return statusText;
public String getLogId() {
return logId;
}
public void setStatusText(String statusText) {
this.statusText = statusText;
public void setLogId(String logId) {
this.logId = logId;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getSourceUrl() {
return sourceUrl;
}
public void setSourceUrl(String sourceUrl) {
this.sourceUrl = sourceUrl;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public Date getPublishTime() {
return publishTime;
}
public void setPublishTime(Date publishTime) {
this.publishTime = publishTime;
}
public String getCoverImage() {
return coverImage;
}
public void setCoverImage(String coverImage) {
this.coverImage = coverImage;
}
public String getImages() {
return images;
}
public void setImages(String images) {
this.images = images;
}
public String getTags() {
return tags;
}
public void setTags(String tags) {
this.tags = tags;
}
public Integer getStatus() {
return status;
}
public void setStatus(Integer status) {
this.status = status;
}
public String getResourceId() {
return resourceId;
}
public void setResourceId(String resourceId) {
this.resourceId = resourceId;
}
public Date getCrawlTime() {
return crawlTime;
}
public void setCrawlTime(Date crawlTime) {
this.crawlTime = crawlTime;
}
public Date getProcessTime() {
return processTime;
}
public void setProcessTime(Date processTime) {
this.processTime = processTime;
}
public String getProcessor() {
return processor;
}
public void setProcessor(String processor) {
this.processor = processor;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
public Date getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Date updateTime) {
this.updateTime = updateTime;
}
public String getTaskName() {
return taskName;
}
public void setTaskName(String taskName) {
this.taskName = taskName;
}
public String getTaskGroup() {
return taskGroup;
}
public void setTaskGroup(String taskGroup) {
this.taskGroup = taskGroup;
}
public String getBeanName() {
return beanName;
}
public void setBeanName(String beanName) {
this.beanName = beanName;
}
public String getMethodName() {
return methodName;
}
public void setMethodName(String methodName) {
this.methodName = methodName;
}
public String getMethodParams() {
return methodParams;
}
public void setMethodParams(String methodParams) {
this.methodParams = methodParams;
}
public Integer getExecuteStatus() {
return executeStatus;
}
public void setExecuteStatus(Integer executeStatus) {
this.executeStatus = executeStatus;
}
public Long getExecuteDuration() {
return executeDuration;
}
public void setExecuteDuration(Long executeDuration) {
this.executeDuration = executeDuration;
}
public Date getStartTime() {
return startTime;
}
public void setStartTime(Date startTime) {
this.startTime = startTime;
}
public Date getEndTime() {
return endTime;
}
public void setEndTime(Date endTime) {
this.endTime = endTime;
}
public Boolean getCanEdit() {

View File

@@ -1,5 +1,6 @@
package org.xyzh.crontab.config;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.properties.ConfigurationProperties;
import lombok.Data;
import org.springframework.stereotype.Component;
@@ -9,8 +10,10 @@ import org.springframework.stereotype.Component;
@Component
public class CrawlerProperties {
@Value("${crawler.pythonPath}")
private String pythonPath;
@Value("${crawler.basePath}")
private String basePath;
}

View File

@@ -12,6 +12,10 @@ import org.xyzh.common.dto.crontab.TbCrontabTask;
import org.xyzh.common.dto.crontab.TbCrontabLog;
import org.xyzh.common.utils.IDUtils;
import org.xyzh.crontab.pojo.CrontabItem;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import org.xyzh.common.utils.spring.SpringContextUtil;
import org.xyzh.crontab.config.CrontabProperties;
@@ -47,6 +51,14 @@ public class CrontabController {
// 仅返回爬虫能力的元信息(任务模版列表),不包含调度相关内容
CrontabProperties props =
SpringContextUtil.getBean(CrontabProperties.class);
String jString = JSON.toJSONString(props);
props = JSON.parseObject(jString, CrontabProperties.class);
props.getItems().forEach(item->item.getMethods().forEach(
method->{
method.setClazz(null);
method.setExcuete_method(null);
method.setPath(null);
}));
rd.success("ok", props.getItems());
} catch (Exception e) {
rd.fail("获取可创建定时任务失败: " + e.getMessage());
@@ -63,6 +75,25 @@ public class CrontabController {
public ResultDomain<TbCrontabTask> createCrontab(@RequestBody TbCrontabTask crontabItem) {
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
try {
// 根据taskGroup和methodName查找配置并填充beanName和methodName
if (crontabItem.getBeanName() == null || crontabItem.getBeanName().isEmpty()) {
CrontabItem.CrontabMethod method = findMethodByTaskGroupAndMethodName(
crontabItem.getTaskGroup(),
crontabItem.getMethodName()
);
if (method != null) {
crontabItem.setBeanName(method.getClazz()); // 设置Bean名称
crontabItem.setMethodName(method.getExcuete_method()); // 设置执行方法名
JSONObject methodParams = JSON.parseObject(crontabItem.getMethodParams());
methodParams.put("scriptPath", method.getPath());
crontabItem.setMethodParams(methodParams.toJSONString());
} else {
rd.fail("未找到对应的配置: taskGroup=" + crontabItem.getTaskGroup()
+ ", methodName=" + crontabItem.getMethodName());
return rd;
}
}
return crontabService.createTask(crontabItem);
} catch (Exception e) {
logger.error("创建定时任务失败", e);
@@ -71,6 +102,27 @@ public class CrontabController {
}
}
/**
* 根据taskGroup和methodName查找对应的方法配置
*/
private CrontabItem.CrontabMethod findMethodByTaskGroupAndMethodName(String taskGroup, String methodName) {
CrontabProperties props = SpringContextUtil.getBean(CrontabProperties.class);
if (props == null || props.getItems() == null) {
return null;
}
for (CrontabItem item : props.getItems()) {
if (item.getName().equals(taskGroup)) {
for (CrontabItem.CrontabMethod method : item.getMethods()) {
if (method.getName().equals(methodName)) {
return method;
}
}
}
}
return null;
}
/**
* 更新定时任务
* @param crontabItem
@@ -80,6 +132,21 @@ public class CrontabController {
public ResultDomain<TbCrontabTask> updateCrontab(@RequestBody TbCrontabTask crontabItem) {
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
try {
// 根据taskGroup和methodName查找配置并填充beanName和methodName
if (crontabItem.getBeanName() == null || crontabItem.getBeanName().isEmpty()) {
CrontabItem.CrontabMethod method = findMethodByTaskGroupAndMethodName(
crontabItem.getTaskGroup(),
crontabItem.getMethodName()
);
if (method != null) {
crontabItem.setBeanName(method.getClazz()); // 设置Bean名称
crontabItem.setMethodName(method.getExcuete_method()); // 设置执行方法名
} else {
rd.fail("未找到对应的配置: taskGroup=" + crontabItem.getTaskGroup()
+ ", methodName=" + crontabItem.getMethodName());
return rd;
}
}
return crontabService.updateTask(crontabItem);
} catch (Exception e) {
logger.error("更新定时任务失败", e);
@@ -146,6 +213,88 @@ public class CrontabController {
return rd;
}
}
/**
* 根据ID查询日志详情
* @param logId 日志ID
* @return ResultDomain<TbCrontabLog>
*/
@GetMapping("/log/{logId}")
public ResultDomain<TbCrontabLog> getLogById(@PathVariable(required = true, name="logId") String logId) {
ResultDomain<TbCrontabLog> rd = new ResultDomain<>();
try {
return crontabService.getLogById(logId);
} catch (Exception e) {
logger.error("获取日志详情失败", e);
rd.fail("获取日志详情失败: " + e.getMessage());
return rd;
}
}
@GetMapping("/task/validate")
public ResultDomain<String> validateCronExpression(@RequestParam(required = true, name="cronExpression") String cronExpression) {
ResultDomain<String> rd = new ResultDomain<>();
try {
return crontabService.validateCronExpression(cronExpression);
} catch (Exception e) {
logger.error("验证Cron表达式失败", e);
rd.fail("验证Cron表达式失败: " + e.getMessage());
return rd;
}
}
/**
* @description 启动定时任务
* @param
* @author yslg
* @since 2025-11-11
*/
@PostMapping("/task/start/{taskId}")
public ResultDomain<TbCrontabTask> startTask(@PathVariable(required = true, name="taskId") String taskId) {
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
try {
return crontabService.startTask(taskId);
} catch (Exception e) {
logger.error("启动定时任务失败", e);
rd.fail("启动定时任务失败: " + e.getMessage());
return rd;
}
}
/**
* @description 暂停定时任务
* @param
* @author yslg
* @since 2025-11-11
*/
@PostMapping("/task/pause/{taskId}")
public ResultDomain<TbCrontabTask> pauseTask(@PathVariable(required = true, name="taskId") String taskId) {
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
try {
return crontabService.pauseTask(taskId);
} catch (Exception e) {
logger.error("暂停定时任务失败", e);
rd.fail("暂停定时任务失败: " + e.getMessage());
return rd;
}
}
/**
* @description 立即执行一次任务
* @param
* @author yslg
* @since 2025-11-11
*/
@PostMapping("/task/execute/{taskId}")
public ResultDomain<TbCrontabTask> executeTaskOnce(@PathVariable(required = true, name="taskId") String taskId) {
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
try {
return crontabService.executeTaskOnce(taskId);
} catch (Exception e) {
logger.error("执行定时任务失败", e);
rd.fail("执行定时任务失败: " + e.getMessage());
return rd;
}
}
}

View File

@@ -5,6 +5,7 @@ import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.xyzh.common.core.page.PageParam;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.vo.DataCollectionItemVO;
import java.util.List;
@@ -82,5 +83,45 @@ public interface DataCollectionItemMapper extends BaseMapper<TbDataCollectionIte
* @since 2025-11-08
*/
long countByStatus(@Param("taskId") String taskId, @Param("status") Integer status);
// ==================== VO查询方法(使用JOIN返回完整VO) ====================
/**
* @description 根据ID查询采集项VO包含关联的任务和日志信息
* @param itemId 采集项ID
* @return DataCollectionItemVO 采集项VO
* @author yslg
* @since 2025-11-08
*/
DataCollectionItemVO selectVOById(@Param("itemId") String itemId);
/**
* @description 查询采集项VO列表包含关联的任务和日志信息
* @param filter 过滤条件
* @return List<DataCollectionItemVO> 采集项VO列表
* @author yslg
* @since 2025-11-08
*/
List<DataCollectionItemVO> selectVOList(TbDataCollectionItem filter);
/**
* @description 分页查询采集项VO列表包含关联的任务和日志信息
* @param filter 过滤条件
* @param pageParam 分页参数
* @return List<DataCollectionItemVO> 采集项VO列表
* @author yslg
* @since 2025-11-08
*/
List<DataCollectionItemVO> selectVOPage(@Param("filter") TbDataCollectionItem filter, @Param("pageParam") PageParam pageParam);
/**
* @description 根据任务ID查询采集项VO列表包含关联的任务和日志信息
* @param taskId 任务ID
* @return List<DataCollectionItemVO> 采集项VO列表
* @author yslg
* @since 2025-11-08
*/
List<DataCollectionItemVO> selectVOByTaskId(@Param("taskId") String taskId);
}

View File

@@ -16,9 +16,17 @@ public class CrontabItem {
@Data
public static class CrontabMethod {
private String name;
@JSONField(name = "class")
private String clazz;
private String excuete_method;
private String path;
private Map<String, Object> params;
private List<CrontabParam> params;
}
@Data
public static class CrontabParam {
private String name;
private String description;
private String type;
private Object value;
}
}

View File

@@ -11,9 +11,13 @@ import org.xyzh.common.utils.IDUtils;
import org.xyzh.crontab.mapper.CrontabLogMapper;
import org.xyzh.crontab.pojo.TaskParams;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.TypeReference;
import java.lang.reflect.Method;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
/**
* @description 任务执行器
@@ -138,25 +142,29 @@ public class TaskExecutor {
private String injectTaskContext(Object bean, TbCrontabTask task, TbCrontabLog log) {
String methodParams = task.getMethodParams();
// 如果Bean是BaseTask的子类注入taskId和logId到JSON参数中
if (bean instanceof org.xyzh.crontab.task.BaseTask) {
try {
TaskParams taskParams = TaskParams.fromJson(methodParams);
if (taskParams != null) {
// 注入taskId和logId
if (taskParams.getParams() == null) {
taskParams.setParams(new HashMap<>());
}
taskParams.getParams().put("taskId", task.getTaskId());
taskParams.getParams().put("logId", log.getID());
methodParams = taskParams.toJson();
logger.debug("已注入任务上下文: taskId={}, logId={}", task.getTaskId(), log.getID());
}
// 从task对象构建完整的TaskParams
TaskParams taskParams = new TaskParams();
taskParams.setTaskGroup(task.getTaskGroup()); // task表获取
taskParams.setMethodName(task.getMethodName()); // 从task表获取
// 将methodParams解析为Map并设置到params字段
Map<String, Object> params = JSON.parseObject(methodParams,
new TypeReference<Map<String, Object>>(){});
// 注入taskId和logId
params.put("taskId", task.getTaskId());
params.put("logId", log.getID());
taskParams.setParams(params);
methodParams = taskParams.toJson();
} catch (Exception e) {
logger.warn("注入任务上下文失败,使用原始参数: {}", e.getMessage());
logger.warn("构建TaskParams失败: {}", e.getMessage());
}
}
return methodParams;
}
}

View File

@@ -23,7 +23,6 @@ import org.xyzh.system.utils.LoginUtil;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
/**
* @description 数据采集项服务实现类
@@ -102,29 +101,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
int successCount = 0;
Date now = new Date();
for (TbDataCollectionItem item : itemList) {
// 检查URL是否已存在去重
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
if (existing != null) {
logger.debug("跳过已存在的采集项: {}", item.getSourceUrl());
continue;
}
}
// 设置默认值
item.setID(IDUtils.generateID());
item.setCreateTime(now);
item.setDeleted(false);
if (item.getStatus() == null) {
item.setStatus(0);
}
if (item.getCrawlTime() == null) {
item.setCrawlTime(now);
}
itemMapper.insert(item);
successCount++;
int result = itemMapper.batchInsertItems(itemList);
if (result > 0) {
successCount = result;
}
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
@@ -195,9 +174,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
return resultDomain;
}
TbDataCollectionItem item = itemMapper.selectById(itemId);
if (item != null) {
DataCollectionItemVO vo = buildVO(item);
DataCollectionItemVO vo = itemMapper.selectVOById(itemId);
if (vo != null) {
resultDomain.success("查询成功", vo);
} else {
resultDomain.fail("采集项不存在");
@@ -218,10 +196,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
}
filter.setDeleted(false);
List<TbDataCollectionItem> list = itemMapper.selectItemList(filter);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
List<DataCollectionItemVO> voList = itemMapper.selectVOList(filter);
resultDomain.success("查询成功", voList);
} catch (Exception e) {
@@ -244,12 +220,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
pageParam = new PageParam();
}
List<TbDataCollectionItem> list = itemMapper.selectItemPage(filter, pageParam);
long total = itemMapper.countItems(filter);
List<DataCollectionItemVO> voList = itemMapper.selectVOPage(filter, pageParam);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
long total = itemMapper.countItems(filter);
PageDomain<DataCollectionItemVO> pageDomain = new PageDomain<>();
pageDomain.setDataList(voList);
@@ -274,10 +247,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
return resultDomain;
}
List<TbDataCollectionItem> list = itemMapper.selectByTaskId(taskId);
List<DataCollectionItemVO> voList = list.stream()
.map(this::buildVO)
.collect(Collectors.toList());
List<DataCollectionItemVO> voList = itemMapper.selectVOByTaskId(taskId);
resultDomain.success("查询成功", voList);
} catch (Exception e) {
@@ -433,47 +404,5 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
return resultDomain;
}
/**
* @description 构建VO对象
* @param item 采集项
* @return DataCollectionItemVO
* @author yslg
* @since 2025-11-08
*/
private DataCollectionItemVO buildVO(TbDataCollectionItem item) {
DataCollectionItemVO vo = new DataCollectionItemVO();
vo.setItem(item);
// 查询关联的定时任务
if (item.getTaskId() != null && !item.getTaskId().isEmpty()) {
TbCrontabTask task = taskMapper.selectTaskById(item.getTaskId());
vo.setTask(task);
}
// 设置状态文本
String statusText = "未处理";
if (item.getStatus() != null) {
switch (item.getStatus()) {
case 0:
statusText = "未处理";
break;
case 1:
statusText = "已转换为资源";
break;
case 2:
statusText = "已忽略";
break;
default:
statusText = "未知";
}
}
vo.setStatusText(statusText);
// 设置操作权限
vo.setCanEdit(item.getStatus() == null || item.getStatus() == 0 || item.getStatus() == 2);
vo.setCanConvert(item.getStatus() == null || item.getStatus() == 0);
return vo;
}
}

View File

@@ -8,6 +8,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
@@ -41,6 +42,11 @@ public abstract class CommandTask extends BaseTask {
processBuilder.directory(workDir.toFile());
processBuilder.redirectErrorStream(true);
// 设置环境变量强制Python使用UTF-8编码(解决Windows GBK编码问题)
Map<String, String> env = processBuilder.environment();
env.put("PYTHONIOENCODING", "utf-8"); // Python I/O编码
env.put("PYTHONUTF8", "1"); // Python 3.7+ UTF-8模式
// 启动进程
Process process = processBuilder.start();

View File

@@ -18,7 +18,6 @@ public abstract class PythonCommandTask extends CommandTask {
@Autowired
protected CrawlerProperties crawlerProperties;
/**
* 获取Python可执行文件路径
*/
@@ -47,18 +46,16 @@ public abstract class PythonCommandTask extends CommandTask {
/**
* 构建Python命令
*
* 注意: 不使用 cmd /c 或 bash -c直接调用Python可执行文件
* 这样可以避免shell对JSON参数中的引号进行错误处理
* ProcessBuilder可以直接启动exe文件参数会正确传递
*/
@Override
protected List<String> buildCommand(TaskParams taskParams) throws Exception {
List<String> command = new ArrayList<>();
// 检查操作系统
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("win")) {
command.add("cmd");
command.add("/c");
}
// 直接调用Python可执行文件不使用shell
command.add(getPythonPath());
// 添加Python脚本和参数由子类实现

View File

@@ -7,6 +7,7 @@ import org.springframework.stereotype.Component;
import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.utils.IDUtils;
import org.xyzh.crontab.config.CrontabProperties;
import org.xyzh.crontab.pojo.TaskParams;
import org.xyzh.crontab.task.PythonCommandTask;
@@ -17,7 +18,9 @@ import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @description 新闻爬虫定时任务
@@ -42,43 +45,58 @@ public class NewsCrawlerTask extends PythonCommandTask {
protected List<String> buildPythonArgs(TaskParams taskParams) throws Exception {
List<String> args = new ArrayList<>();
String methodName = taskParams.getMethodName();
String source = "rmrb";
String category = "politics";
String limit = "20";
// 根据不同的方法名称构建不同的参数
if ("关键字搜索爬取".equals(methodName)) {
String query = taskParams.getParamAsString("query");
Integer total = taskParams.getParamAsInt("total");
category = query != null ? query : "politics";
limit = total != null ? total.toString() : "20";
} else if ("排行榜爬取".equals(methodName)) {
category = "ranking";
} else if ("往日精彩头条爬取".equals(methodName)) {
String startDate = taskParams.getParamAsString("startDate");
String endDate = taskParams.getParamAsString("endDate");
Boolean isYesterday = taskParams.getParamAsBoolean("isYesterday");
category = "history";
// 这里可以将日期参数传递给Python脚本
// 1. 从params获取scriptPath
String scriptPath = taskParams.getParamAsString("scriptPath");
if (scriptPath == null || scriptPath.isEmpty()) {
throw new Exception("scriptPath参数缺失");
}
// 生成输出文件名
// 2. 生成输出文件名
String timestamp = String.valueOf(System.currentTimeMillis());
String outputFile = String.format("output/news_%s_%s_%s.json", source, category, timestamp);
String outputFile = String.format("output/news_%s.json", timestamp);
// 保存输出文件路径到params中供handleResult使用
taskParams.setParam("_outputFile", outputFile);
// 添加脚本和参数
args.add("main.py");
args.add(category);
args.add(limit);
// 4. 构建命令参数
args.add(scriptPath); // 动态脚本路径
// 5. 遍历params动态构建命令行参数
if (taskParams.getParams() != null) {
for (Map.Entry<String, Object> entry : taskParams.getParams().entrySet()) {
String key = entry.getKey();
Object value = entry.getValue();
// 跳过特殊参数
if (key.startsWith("_") || key.equals("scriptPath") ||
key.equals("taskId") || key.equals("logId")) {
continue;
}
// 获取对应的Python参数名
String pythonArg = "--"+key;
if (pythonArg != null && value != null) {
if (value instanceof Boolean) {
// Boolean类型: true时只传参数名false时不传
if ((Boolean) value) {
args.add(pythonArg);
}
} else {
// String/Integer类型: 传参数名+值
args.add(pythonArg);
args.add(value.toString());
}
}
}
}
// 6. 统一添加output参数
args.add("--output");
args.add(outputFile);
logger.info("爬虫参数 - 来源: {}, 分类: {}, 数: {}", source, category, limit);
logger.info("Python脚本: {}, 命令行参数: {}", scriptPath, String.join(" ", args.subList(1, args.size())));
return args;
}
@@ -98,11 +116,12 @@ public class NewsCrawlerTask extends PythonCommandTask {
// 读取并解析结果文件
String jsonContent = Files.readString(outputPath);
List<ArticleStruct> newsList = JSON.parseObject(
jsonContent,
new TypeReference<List<ArticleStruct>>() {}
);
ResultDomain<ArticleStruct> result = JSON.parseObject(jsonContent, new TypeReference<ResultDomain<ArticleStruct>>(){});
if (!result.isSuccess()) {
logger.error("爬取新闻失败: {}", result.getMessage());
return;
}
List<ArticleStruct> newsList = result.getDataList();
logger.info("成功爬取 {} 条新闻", newsList.size());
// 获取taskId和logId
@@ -126,6 +145,8 @@ public class NewsCrawlerTask extends PythonCommandTask {
try {
List<TbDataCollectionItem> itemList = new ArrayList<>();
Date now = new Date();
SimpleDateFormat parser = new SimpleDateFormat("yyyy年MM月dd日HH:mm");
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (ArticleStruct news : newsList) {
@@ -133,6 +154,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
TbDataCollectionItem item = new TbDataCollectionItem();
// 基本信息
item.setID(IDUtils.generateID());
item.setTaskId(taskId);
item.setLogId(logId);
item.setTitle(news.getTitle());
@@ -156,7 +178,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
String publishTimeStr = news.getPublishTime();
if (publishTimeStr != null && !publishTimeStr.isEmpty()) {
try {
item.setPublishTime(dateFormat.parse(publishTimeStr));
item.setPublishTime(dateFormat.parse(dateFormat.format(parser.parse(publishTimeStr))));
} catch (Exception e) {
logger.warn("解析发布时间失败: {}", publishTimeStr);
item.setPublishTime(now);

View File

@@ -1,28 +0,0 @@
crawler:
python:
path: F:\Environment\Conda\envs\shoolNewsCrewer
base:
path: F:/Project/schoolNews/schoolNewsCrawler
crontab:
items: #可供前端选择的定时任务列表
- name: 人民日报新闻爬取
methods: #爬取方式
- name: 关键字搜索爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/RmrbSearch.py
params:
query: String #搜索关键字
total: Integer #总新闻数量
- name: 排行榜爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/RmrbHotPoint.py
- name: 往日精彩头条爬取
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
path: crawler/RmrbTrending.py
params:
startDate: String #开始日期
endDate: String #结束日期
isYestoday: Boolean #是否是昨天

View File

@@ -0,0 +1,47 @@
crawler:
# Python 可执行文件路径Windows 建议指向 python.exe如已在 PATH可直接用 "python"
pythonPath: F:/Environment/Conda/envs/schoolNewsCrawler/python.exe
# 爬虫脚本根目录NewsCrawlerTask 的工作目录)
basePath: F:/Project/schoolNews/schoolNewsCrawler
# 下面为原有的定时任务清单(保持不变,仅修正到正确文件)
crontab:
items:
- name: 人民日报新闻爬取
methods:
- name: 关键字搜索爬取
clazz: newsCrewerTask
excuete_method: execute
path: crawler/RmrbSearch.py
params:
- name: query
description: 搜索关键字
type: String
value: ""
- name: total
description: 总新闻数量
type: Integer
value: 10
- name: 排行榜爬取
clazz: newsCrewerTask
excuete_method: execute
path: crawler/RmrbHotPoint.py
- name: 往日精彩头条爬取
clazz: newsCrewerTask
excuete_method: execute
path: crawler/RmrbTrending.py
params:
- name: startDate
description: 开始日期
type: String
value: ""
- name: endDate
description: 结束日期
type: String
value: ""
- name: yesterday
description: 是否是昨天
type: Boolean
value: true

View File

@@ -186,7 +186,7 @@
UPDATE tb_crontab_task
SET deleted = 1,
delete_time = NOW()
WHERE id = #{taskId} AND deleted = 0
WHERE task_id=#{taskId} AND deleted = 0
</update>
<!-- 根据ID查询任务 -->
@@ -194,7 +194,7 @@
SELECT
<include refid="Base_Column_List" />
FROM tb_crontab_task
WHERE id = #{taskId} AND deleted = 0
WHERE task_id=#{taskId} AND deleted = 0
</select>
<!-- 根据过滤条件查询任务列表 -->
@@ -272,7 +272,7 @@
UPDATE tb_crontab_task
SET status = #{status},
update_time = NOW()
WHERE id = #{taskId} AND deleted = 0
WHERE task_id=#{taskId} AND deleted = 0
</update>
<!-- 根据Bean名称和方法名称查询任务 -->

View File

@@ -0,0 +1,400 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.xyzh.crontab.mapper.DataCollectionItemMapper">
<!-- 结果映射 -->
<resultMap id="BaseResultMap" type="org.xyzh.common.dto.crontab.TbDataCollectionItem">
<id column="id" property="id" />
<result column="task_id" property="taskId" />
<result column="log_id" property="logId" />
<result column="title" property="title" />
<result column="content" property="content" />
<result column="summary" property="summary" />
<result column="source" property="source" />
<result column="source_url" property="sourceUrl" />
<result column="category" property="category" />
<result column="author" property="author" />
<result column="publish_time" property="publishTime" />
<result column="cover_image" property="coverImage" />
<result column="images" property="images" />
<result column="tags" property="tags" />
<result column="status" property="status" />
<result column="resource_id" property="resourceId" />
<result column="crawl_time" property="crawlTime" />
<result column="process_time" property="processTime" />
<result column="processor" property="processor" />
<result column="create_time" property="createTime" />
<result column="update_time" property="updateTime" />
<result column="delete_time" property="deleteTime" />
<result column="deleted" property="deleted" />
</resultMap>
<!-- VO结果映射(平铺结构,包含关联的任务和日志信息) -->
<resultMap id="VOResultMap" type="org.xyzh.common.vo.DataCollectionItemVO">
<!-- 采集项基本信息 -->
<result column="item_id" property="id" />
<result column="task_id" property="taskId" />
<result column="log_id" property="logId" />
<result column="title" property="title" />
<result column="content" property="content" />
<result column="summary" property="summary" />
<result column="source" property="source" />
<result column="source_url" property="sourceUrl" />
<result column="category" property="category" />
<result column="author" property="author" />
<result column="publish_time" property="publishTime" />
<result column="cover_image" property="coverImage" />
<result column="images" property="images" />
<result column="tags" property="tags" />
<result column="status" property="status" />
<result column="resource_id" property="resourceId" />
<result column="crawl_time" property="crawlTime" />
<result column="process_time" property="processTime" />
<result column="processor" property="processor" />
<result column="item_create_time" property="createTime" />
<result column="item_update_time" property="updateTime" />
<!-- 关联的任务信息 -->
<result column="task_name" property="taskName" />
<result column="task_group" property="taskGroup" />
<result column="bean_name" property="beanName" />
<result column="method_name" property="methodName" />
<result column="method_params" property="methodParams" />
<!-- 关联的日志信息 -->
<result column="execute_status" property="executeStatus" />
<result column="execute_duration" property="executeDuration" />
<result column="start_time" property="startTime" />
<result column="end_time" property="endTime" />
</resultMap>
<!-- 字段列表 -->
<sql id="Base_Column_List">
id, task_id, log_id, title, content, summary, source, source_url, category, author,
publish_time, cover_image, images, tags, status, resource_id, crawl_time, process_time,
processor, create_time, update_time, delete_time, deleted
</sql>
<!-- VO查询字段列表(包含关联表) -->
<sql id="VO_Column_List">
i.id as item_id,
i.task_id,
i.log_id,
i.title,
i.content,
i.summary,
i.source,
i.source_url,
i.category,
i.author,
i.publish_time,
i.cover_image,
i.images,
i.tags,
i.status,
i.resource_id,
i.crawl_time,
i.process_time,
i.processor,
i.create_time as item_create_time,
i.update_time as item_update_time,
t.task_name,
t.task_group,
t.bean_name,
t.method_name,
t.method_params,
l.execute_status,
l.execute_duration,
l.start_time,
l.end_time
</sql>
<!-- 动态查询条件(用于有@Param("filter")的方法) -->
<sql id="Filter_Where_Clause">
<where>
deleted = 0
<if test="filter != null">
<if test="filter.id != null and filter.id != ''">
AND id = #{filter.id}
</if>
<if test="filter.taskId != null and filter.taskId != ''">
AND task_id = #{filter.taskId}
</if>
<if test="filter.logId != null and filter.logId != ''">
AND log_id = #{filter.logId}
</if>
<if test="filter.title != null and filter.title != ''">
AND title LIKE CONCAT('%', #{filter.title}, '%')
</if>
<if test="filter.source != null and filter.source != ''">
AND source = #{filter.source}
</if>
<if test="filter.sourceUrl != null and filter.sourceUrl != ''">
AND source_url = #{filter.sourceUrl}
</if>
<if test="filter.category != null and filter.category != ''">
AND category = #{filter.category}
</if>
<if test="filter.author != null and filter.author != ''">
AND author LIKE CONCAT('%', #{filter.author}, '%')
</if>
<if test="filter.status != null">
AND status = #{filter.status}
</if>
<if test="filter.resourceId != null and filter.resourceId != ''">
AND resource_id = #{filter.resourceId}
</if>
<if test="filter.processor != null and filter.processor != ''">
AND processor = #{filter.processor}
</if>
</if>
</where>
</sql>
<!-- 动态查询条件(用于没有@Param注解的方法直接使用参数名 -->
<sql id="Item_Where_Clause">
<where>
deleted = 0
<if test="_parameter != null">
<if test="id != null and id != ''">
AND id = #{id}
</if>
<if test="taskId != null and taskId != ''">
AND task_id = #{taskId}
</if>
<if test="logId != null and logId != ''">
AND log_id = #{logId}
</if>
<if test="title != null and title != ''">
AND title LIKE CONCAT('%', #{title}, '%')
</if>
<if test="source != null and source != ''">
AND source = #{source}
</if>
<if test="sourceUrl != null and sourceUrl != ''">
AND source_url = #{sourceUrl}
</if>
<if test="category != null and category != ''">
AND category = #{category}
</if>
<if test="author != null and author != ''">
AND author LIKE CONCAT('%', #{author}, '%')
</if>
<if test="status != null">
AND status = #{status}
</if>
<if test="resourceId != null and resourceId != ''">
AND resource_id = #{resourceId}
</if>
<if test="processor != null and processor != ''">
AND processor = #{processor}
</if>
</if>
</where>
</sql>
<!-- 根据来源URL查询采集项用于去重 -->
<select id="selectBySourceUrl" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_item
WHERE source_url = #{sourceUrl}
AND deleted = 0
LIMIT 1
</select>
<!-- 根据任务ID查询采集项列表 -->
<select id="selectByTaskId" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_item
WHERE task_id = #{taskId}
AND deleted = 0
ORDER BY create_time DESC
</select>
<!-- 查询采集项列表 -->
<select id="selectItemList" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_item
<include refid="Item_Where_Clause" />
ORDER BY create_time DESC
</select>
<!-- 分页查询采集项列表 -->
<select id="selectItemPage" resultMap="BaseResultMap">
SELECT
<include refid="Base_Column_List" />
FROM tb_data_collection_item
<include refid="Filter_Where_Clause" />
ORDER BY create_time DESC
LIMIT #{pageParam.pageSize} OFFSET #{pageParam.offset}
</select>
<!-- 统计采集项总数 -->
<select id="countItems" resultType="long">
SELECT COUNT(*)
FROM tb_data_collection_item
<include refid="Filter_Where_Clause" />
</select>
<!-- 根据状态统计数量 -->
<select id="countByStatus" resultType="long">
SELECT COUNT(*)
FROM tb_data_collection_item
WHERE deleted = 0
<if test="taskId != null and taskId != ''">
AND task_id = #{taskId}
</if>
<if test="status != null">
AND status = #{status}
</if>
</select>
<!-- 批量插入采集项 -->
<insert id="batchInsertItems">
INSERT INTO tb_data_collection_item (
id, task_id, log_id, title, content, summary, source, source_url,
category, author, publish_time, cover_image, images, tags, status,
resource_id, crawl_time, process_time, processor,
create_time, update_time, deleted
)
VALUES
<foreach collection="itemList" item="item" separator=",">
(
#{item.id}, #{item.taskId}, #{item.logId}, #{item.title}, #{item.content},
#{item.summary}, #{item.source}, #{item.sourceUrl}, #{item.category},
#{item.author}, #{item.publishTime}, #{item.coverImage}, #{item.images},
#{item.tags}, #{item.status}, #{item.resourceId}, #{item.crawlTime},
#{item.processTime}, #{item.processor},
NOW(), NOW(), 0
)
</foreach>
</insert>
<!-- ==================== VO查询方法(使用JOIN返回完整VO) ==================== -->
<!-- 根据ID查询采集项VO -->
<select id="selectVOById" resultMap="VOResultMap">
SELECT
<include refid="VO_Column_List" />
FROM tb_data_collection_item i
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
WHERE i.id = #{itemId}
AND i.deleted = 0
</select>
<!-- 查询采集项VO列表 -->
<select id="selectVOList" resultMap="VOResultMap">
SELECT
<include refid="VO_Column_List" />
FROM tb_data_collection_item i
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
<where>
i.deleted = 0
<if test="_parameter != null">
<if test="id != null and id != ''">
AND i.id = #{id}
</if>
<if test="taskId != null and taskId != ''">
AND i.task_id = #{taskId}
</if>
<if test="logId != null and logId != ''">
AND i.log_id = #{logId}
</if>
<if test="title != null and title != ''">
AND i.title LIKE CONCAT('%', #{title}, '%')
</if>
<if test="source != null and source != ''">
AND i.source = #{source}
</if>
<if test="sourceUrl != null and sourceUrl != ''">
AND i.source_url = #{sourceUrl}
</if>
<if test="category != null and category != ''">
AND i.category = #{category}
</if>
<if test="author != null and author != ''">
AND i.author LIKE CONCAT('%', #{author}, '%')
</if>
<if test="status != null">
AND i.status = #{status}
</if>
<if test="resourceId != null and resourceId != ''">
AND i.resource_id = #{resourceId}
</if>
<if test="processor != null and processor != ''">
AND i.processor = #{processor}
</if>
</if>
</where>
ORDER BY i.create_time DESC
</select>
<!-- 分页查询采集项VO列表 -->
<select id="selectVOPage" resultMap="VOResultMap">
SELECT
<include refid="VO_Column_List" />
FROM tb_data_collection_item i
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
<where>
i.deleted = 0
<if test="filter != null">
<if test="filter.id != null and filter.id != ''">
AND i.id = #{filter.id}
</if>
<if test="filter.taskId != null and filter.taskId != ''">
AND i.task_id = #{filter.taskId}
</if>
<if test="filter.logId != null and filter.logId != ''">
AND i.log_id = #{filter.logId}
</if>
<if test="filter.title != null and filter.title != ''">
AND i.title LIKE CONCAT('%', #{filter.title}, '%')
</if>
<if test="filter.source != null and filter.source != ''">
AND i.source = #{filter.source}
</if>
<if test="filter.sourceUrl != null and filter.sourceUrl != ''">
AND i.source_url = #{filter.sourceUrl}
</if>
<if test="filter.category != null and filter.category != ''">
AND i.category = #{filter.category}
</if>
<if test="filter.author != null and filter.author != ''">
AND i.author LIKE CONCAT('%', #{filter.author}, '%')
</if>
<if test="filter.status != null">
AND i.status = #{filter.status}
</if>
<if test="filter.resourceId != null and filter.resourceId != ''">
AND i.resource_id = #{filter.resourceId}
</if>
<if test="filter.processor != null and filter.processor != ''">
AND i.processor = #{filter.processor}
</if>
</if>
</where>
ORDER BY i.create_time DESC
LIMIT #{pageParam.pageSize} OFFSET #{pageParam.offset}
</select>
<!-- 根据任务ID查询采集项VO列表 -->
<select id="selectVOByTaskId" resultMap="VOResultMap">
SELECT
<include refid="VO_Column_List" />
FROM tb_data_collection_item i
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
WHERE i.task_id = #{taskId}
AND i.deleted = 0
ORDER BY i.create_time DESC
</select>
</mapper>

View File

@@ -26,6 +26,7 @@ import org.xyzh.common.vo.UserDeptRoleVO;
import org.xyzh.common.core.enums.ResourceType;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
@@ -270,7 +271,7 @@ public class NCResourceServiceImpl implements ResourceService {
}
// 检查资源是否存在
TbResource existing = resourceMapper.selectById(resource.getResourceID());
TbResource existing = resourceMapper.selectByResourceId(resource.getResourceID());
if (existing == null || existing.getDeleted()) {
resultDomain.fail("资源不存在");
return resultDomain;
@@ -286,33 +287,17 @@ public class NCResourceServiceImpl implements ResourceService {
}
}
Date now = new Date();
// 原始tags
TbResourceTag filter = new TbResourceTag();
filter.setResourceID(resource.getResourceID());
List<TagVO> originalTagVOs = resourceTagMapper.selectResourceTags(filter);
List<TbResourceTag> originalTags = originalTagVOs.stream().map(TagVO::getResourceTag).collect(Collectors.toList());
// 当前tags
List<TbTag> currentTags = resourceVO.getTags();
// 新增tags
List<TbTag> tagsToAdd = currentTags.stream()
.filter(tag -> originalTags.stream().noneMatch(originalTag -> originalTag.getTagID().equals(tag.getID())))
.collect(Collectors.toList());
// 删除tags
List<TbResourceTag> tagsToDelete = originalTags.stream()
.filter(originalTag -> currentTags.stream().noneMatch(tag -> tag.getID().equals(originalTag.getTagID())))
.collect(Collectors.toList());
resourceTagMapper.batchDeleteResourceTags(tagsToDelete.stream().map(TbResourceTag::getID).collect(Collectors.toList()));
resourceTagMapper.batchInsertResourceTags(tagsToAdd.stream().map(tag -> {
TbResourceTag resourceTag = new TbResourceTag();
resourceTag.setResourceID(resource.getResourceID());
resourceTag.setTagID(tag.getID());
resourceTag.setID(IDUtils.generateID());
resourceTag.setCreator(user.getID());
resourceTag.setCreateTime(now);
return resourceTag;
}).collect(Collectors.toList()));
// tag先删后增
TbResourceTag resourceTag = new TbResourceTag();
resourceTag.setID(IDUtils.generateID());
resourceTag.setResourceID(resource.getResourceID());
resourceTag.setCreator(user.getID());
resourceTag.setCreateTime(now);
resourceTag.setDeleted(false);
resourceTag.setTagID(resourceVO.getResource().getTagID());
resourceTagMapper.deleteByResourceId(resource.getResourceID());
resourceTagMapper.batchInsertResourceTags(Arrays.asList(resourceTag));
// 更新时间
resource.setUpdateTime(now);
@@ -321,10 +306,10 @@ public class NCResourceServiceImpl implements ResourceService {
if (result > 0) {
logger.info("更新资源成功: {}", resource.getResourceID());
// 重新查询返回完整数据
TbResource updated = resourceMapper.selectById(resource.getResourceID());
TbResource updated = resourceMapper.selectByResourceId(resource.getResourceID());
ResourceVO updatedResourceVO = new ResourceVO();
updatedResourceVO.setResource(updated);
updatedResourceVO.setTags(currentTags);
updatedResourceVO.setTags(resourceVO.getTags());
resultDomain.success("更新资源成功", updatedResourceVO);
return resultDomain;
} else {
@@ -403,7 +388,7 @@ public class NCResourceServiceImpl implements ResourceService {
if (result > 0) {
logger.info("更新资源状态成功: {}", resourceID);
// 重新查询返回完整数据
TbResource updated = resourceMapper.selectById(resource.getID());
TbResource updated = resourceMapper.selectByResourceId(resource.getID());
resultDomain.success("更新资源状态成功", updated);
return resultDomain;
} else {
@@ -553,7 +538,7 @@ public class NCResourceServiceImpl implements ResourceService {
if (result > 0) {
logger.info("增加资源点赞次数成功: {}", resourceID);
// 重新查询返回完整数据
TbResource updated = resourceMapper.selectById(resource.getID());
TbResource updated = resourceMapper.selectByResourceId(resource.getID());
resultDomain.success("增加点赞次数成功", updated);
return resultDomain;
} else {
@@ -625,7 +610,7 @@ public class NCResourceServiceImpl implements ResourceService {
if (result > 0) {
logger.info("设置资源推荐状态成功: {} -> {}", resourceID, isRecommend);
// 重新查询返回完整数据
TbResource updated = resourceMapper.selectById(resource.getID());
TbResource updated = resourceMapper.selectByResourceId(resource.getID());
resultDomain.success("设置推荐状态成功", updated);
return resultDomain;
} else {
@@ -669,7 +654,7 @@ public class NCResourceServiceImpl implements ResourceService {
if (result > 0) {
logger.info("设置资源轮播状态成功: {} -> {}", resourceID, isBanner);
// 重新查询返回完整数据
TbResource updated = resourceMapper.selectById(resource.getID());
TbResource updated = resourceMapper.selectByResourceId(resource.getID());
resultDomain.success("设置轮播状态成功", updated);
return resultDomain;
} else {