搜索关键字爬虫
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
package org.xyzh.crontab.config;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import lombok.Data;
|
||||
import org.springframework.stereotype.Component;
|
||||
@@ -9,8 +10,10 @@ import org.springframework.stereotype.Component;
|
||||
@Component
|
||||
public class CrawlerProperties {
|
||||
|
||||
@Value("${crawler.pythonPath}")
|
||||
private String pythonPath;
|
||||
|
||||
@Value("${crawler.basePath}")
|
||||
private String basePath;
|
||||
|
||||
}
|
||||
|
||||
@@ -12,6 +12,10 @@ import org.xyzh.common.dto.crontab.TbCrontabTask;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabLog;
|
||||
import org.xyzh.common.utils.IDUtils;
|
||||
import org.xyzh.crontab.pojo.CrontabItem;
|
||||
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.alibaba.fastjson2.JSONObject;
|
||||
|
||||
import org.xyzh.common.utils.spring.SpringContextUtil;
|
||||
import org.xyzh.crontab.config.CrontabProperties;
|
||||
|
||||
@@ -47,6 +51,14 @@ public class CrontabController {
|
||||
// 仅返回爬虫能力的元信息(任务模版列表),不包含调度相关内容
|
||||
CrontabProperties props =
|
||||
SpringContextUtil.getBean(CrontabProperties.class);
|
||||
String jString = JSON.toJSONString(props);
|
||||
props = JSON.parseObject(jString, CrontabProperties.class);
|
||||
props.getItems().forEach(item->item.getMethods().forEach(
|
||||
method->{
|
||||
method.setClazz(null);
|
||||
method.setExcuete_method(null);
|
||||
method.setPath(null);
|
||||
}));
|
||||
rd.success("ok", props.getItems());
|
||||
} catch (Exception e) {
|
||||
rd.fail("获取可创建定时任务失败: " + e.getMessage());
|
||||
@@ -63,6 +75,25 @@ public class CrontabController {
|
||||
public ResultDomain<TbCrontabTask> createCrontab(@RequestBody TbCrontabTask crontabItem) {
|
||||
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
|
||||
try {
|
||||
// 根据taskGroup和methodName查找配置并填充beanName和methodName
|
||||
if (crontabItem.getBeanName() == null || crontabItem.getBeanName().isEmpty()) {
|
||||
CrontabItem.CrontabMethod method = findMethodByTaskGroupAndMethodName(
|
||||
crontabItem.getTaskGroup(),
|
||||
crontabItem.getMethodName()
|
||||
);
|
||||
if (method != null) {
|
||||
crontabItem.setBeanName(method.getClazz()); // 设置Bean名称
|
||||
crontabItem.setMethodName(method.getExcuete_method()); // 设置执行方法名
|
||||
JSONObject methodParams = JSON.parseObject(crontabItem.getMethodParams());
|
||||
methodParams.put("scriptPath", method.getPath());
|
||||
crontabItem.setMethodParams(methodParams.toJSONString());
|
||||
|
||||
} else {
|
||||
rd.fail("未找到对应的配置: taskGroup=" + crontabItem.getTaskGroup()
|
||||
+ ", methodName=" + crontabItem.getMethodName());
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
return crontabService.createTask(crontabItem);
|
||||
} catch (Exception e) {
|
||||
logger.error("创建定时任务失败", e);
|
||||
@@ -71,6 +102,27 @@ public class CrontabController {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据taskGroup和methodName查找对应的方法配置
|
||||
*/
|
||||
private CrontabItem.CrontabMethod findMethodByTaskGroupAndMethodName(String taskGroup, String methodName) {
|
||||
CrontabProperties props = SpringContextUtil.getBean(CrontabProperties.class);
|
||||
if (props == null || props.getItems() == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (CrontabItem item : props.getItems()) {
|
||||
if (item.getName().equals(taskGroup)) {
|
||||
for (CrontabItem.CrontabMethod method : item.getMethods()) {
|
||||
if (method.getName().equals(methodName)) {
|
||||
return method;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新定时任务
|
||||
* @param crontabItem
|
||||
@@ -80,6 +132,21 @@ public class CrontabController {
|
||||
public ResultDomain<TbCrontabTask> updateCrontab(@RequestBody TbCrontabTask crontabItem) {
|
||||
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
|
||||
try {
|
||||
// 根据taskGroup和methodName查找配置并填充beanName和methodName
|
||||
if (crontabItem.getBeanName() == null || crontabItem.getBeanName().isEmpty()) {
|
||||
CrontabItem.CrontabMethod method = findMethodByTaskGroupAndMethodName(
|
||||
crontabItem.getTaskGroup(),
|
||||
crontabItem.getMethodName()
|
||||
);
|
||||
if (method != null) {
|
||||
crontabItem.setBeanName(method.getClazz()); // 设置Bean名称
|
||||
crontabItem.setMethodName(method.getExcuete_method()); // 设置执行方法名
|
||||
} else {
|
||||
rd.fail("未找到对应的配置: taskGroup=" + crontabItem.getTaskGroup()
|
||||
+ ", methodName=" + crontabItem.getMethodName());
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
return crontabService.updateTask(crontabItem);
|
||||
} catch (Exception e) {
|
||||
logger.error("更新定时任务失败", e);
|
||||
@@ -146,6 +213,88 @@ public class CrontabController {
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据ID查询日志详情
|
||||
* @param logId 日志ID
|
||||
* @return ResultDomain<TbCrontabLog>
|
||||
*/
|
||||
@GetMapping("/log/{logId}")
|
||||
public ResultDomain<TbCrontabLog> getLogById(@PathVariable(required = true, name="logId") String logId) {
|
||||
ResultDomain<TbCrontabLog> rd = new ResultDomain<>();
|
||||
try {
|
||||
return crontabService.getLogById(logId);
|
||||
} catch (Exception e) {
|
||||
logger.error("获取日志详情失败", e);
|
||||
rd.fail("获取日志详情失败: " + e.getMessage());
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
|
||||
@GetMapping("/task/validate")
|
||||
public ResultDomain<String> validateCronExpression(@RequestParam(required = true, name="cronExpression") String cronExpression) {
|
||||
ResultDomain<String> rd = new ResultDomain<>();
|
||||
try {
|
||||
return crontabService.validateCronExpression(cronExpression);
|
||||
} catch (Exception e) {
|
||||
logger.error("验证Cron表达式失败", e);
|
||||
rd.fail("验证Cron表达式失败: " + e.getMessage());
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @description 启动定时任务
|
||||
* @param
|
||||
* @author yslg
|
||||
* @since 2025-11-11
|
||||
*/
|
||||
@PostMapping("/task/start/{taskId}")
|
||||
public ResultDomain<TbCrontabTask> startTask(@PathVariable(required = true, name="taskId") String taskId) {
|
||||
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
|
||||
try {
|
||||
return crontabService.startTask(taskId);
|
||||
} catch (Exception e) {
|
||||
logger.error("启动定时任务失败", e);
|
||||
rd.fail("启动定时任务失败: " + e.getMessage());
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 暂停定时任务
|
||||
* @param
|
||||
* @author yslg
|
||||
* @since 2025-11-11
|
||||
*/
|
||||
@PostMapping("/task/pause/{taskId}")
|
||||
public ResultDomain<TbCrontabTask> pauseTask(@PathVariable(required = true, name="taskId") String taskId) {
|
||||
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
|
||||
try {
|
||||
return crontabService.pauseTask(taskId);
|
||||
} catch (Exception e) {
|
||||
logger.error("暂停定时任务失败", e);
|
||||
rd.fail("暂停定时任务失败: " + e.getMessage());
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 立即执行一次任务
|
||||
* @param
|
||||
* @author yslg
|
||||
* @since 2025-11-11
|
||||
*/
|
||||
@PostMapping("/task/execute/{taskId}")
|
||||
public ResultDomain<TbCrontabTask> executeTaskOnce(@PathVariable(required = true, name="taskId") String taskId) {
|
||||
ResultDomain<TbCrontabTask> rd = new ResultDomain<>();
|
||||
try {
|
||||
return crontabService.executeTaskOnce(taskId);
|
||||
} catch (Exception e) {
|
||||
logger.error("执行定时任务失败", e);
|
||||
rd.fail("执行定时任务失败: " + e.getMessage());
|
||||
return rd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.xyzh.common.core.page.PageParam;
|
||||
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
|
||||
import org.xyzh.common.vo.DataCollectionItemVO;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -82,5 +83,45 @@ public interface DataCollectionItemMapper extends BaseMapper<TbDataCollectionIte
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
long countByStatus(@Param("taskId") String taskId, @Param("status") Integer status);
|
||||
|
||||
// ==================== VO查询方法(使用JOIN返回完整VO) ====================
|
||||
|
||||
/**
|
||||
* @description 根据ID查询采集项VO(包含关联的任务和日志信息)
|
||||
* @param itemId 采集项ID
|
||||
* @return DataCollectionItemVO 采集项VO
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
DataCollectionItemVO selectVOById(@Param("itemId") String itemId);
|
||||
|
||||
/**
|
||||
* @description 查询采集项VO列表(包含关联的任务和日志信息)
|
||||
* @param filter 过滤条件
|
||||
* @return List<DataCollectionItemVO> 采集项VO列表
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
List<DataCollectionItemVO> selectVOList(TbDataCollectionItem filter);
|
||||
|
||||
/**
|
||||
* @description 分页查询采集项VO列表(包含关联的任务和日志信息)
|
||||
* @param filter 过滤条件
|
||||
* @param pageParam 分页参数
|
||||
* @return List<DataCollectionItemVO> 采集项VO列表
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
List<DataCollectionItemVO> selectVOPage(@Param("filter") TbDataCollectionItem filter, @Param("pageParam") PageParam pageParam);
|
||||
|
||||
/**
|
||||
* @description 根据任务ID查询采集项VO列表(包含关联的任务和日志信息)
|
||||
* @param taskId 任务ID
|
||||
* @return List<DataCollectionItemVO> 采集项VO列表
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
List<DataCollectionItemVO> selectVOByTaskId(@Param("taskId") String taskId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -16,9 +16,17 @@ public class CrontabItem {
|
||||
@Data
|
||||
public static class CrontabMethod {
|
||||
private String name;
|
||||
@JSONField(name = "class")
|
||||
private String clazz;
|
||||
private String excuete_method;
|
||||
private String path;
|
||||
private Map<String, Object> params;
|
||||
private List<CrontabParam> params;
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class CrontabParam {
|
||||
private String name;
|
||||
private String description;
|
||||
private String type;
|
||||
private Object value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,9 +11,13 @@ import org.xyzh.common.utils.IDUtils;
|
||||
import org.xyzh.crontab.mapper.CrontabLogMapper;
|
||||
import org.xyzh.crontab.pojo.TaskParams;
|
||||
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.alibaba.fastjson2.TypeReference;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @description 任务执行器
|
||||
@@ -138,25 +142,29 @@ public class TaskExecutor {
|
||||
private String injectTaskContext(Object bean, TbCrontabTask task, TbCrontabLog log) {
|
||||
String methodParams = task.getMethodParams();
|
||||
|
||||
// 如果Bean是BaseTask的子类,注入taskId和logId到JSON参数中
|
||||
if (bean instanceof org.xyzh.crontab.task.BaseTask) {
|
||||
try {
|
||||
TaskParams taskParams = TaskParams.fromJson(methodParams);
|
||||
if (taskParams != null) {
|
||||
// 注入taskId和logId
|
||||
if (taskParams.getParams() == null) {
|
||||
taskParams.setParams(new HashMap<>());
|
||||
}
|
||||
taskParams.getParams().put("taskId", task.getTaskId());
|
||||
taskParams.getParams().put("logId", log.getID());
|
||||
methodParams = taskParams.toJson();
|
||||
logger.debug("已注入任务上下文: taskId={}, logId={}", task.getTaskId(), log.getID());
|
||||
}
|
||||
// 从task对象构建完整的TaskParams
|
||||
TaskParams taskParams = new TaskParams();
|
||||
taskParams.setTaskGroup(task.getTaskGroup()); // 从task表获取
|
||||
taskParams.setMethodName(task.getMethodName()); // 从task表获取
|
||||
|
||||
// 将methodParams解析为Map并设置到params字段
|
||||
Map<String, Object> params = JSON.parseObject(methodParams,
|
||||
new TypeReference<Map<String, Object>>(){});
|
||||
|
||||
// 注入taskId和logId
|
||||
params.put("taskId", task.getTaskId());
|
||||
params.put("logId", log.getID());
|
||||
|
||||
taskParams.setParams(params);
|
||||
|
||||
methodParams = taskParams.toJson();
|
||||
} catch (Exception e) {
|
||||
logger.warn("注入任务上下文失败,使用原始参数: {}", e.getMessage());
|
||||
logger.warn("构建TaskParams失败: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return methodParams;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ import org.xyzh.system.utils.LoginUtil;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* @description 数据采集项服务实现类
|
||||
@@ -102,29 +101,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
int successCount = 0;
|
||||
Date now = new Date();
|
||||
|
||||
for (TbDataCollectionItem item : itemList) {
|
||||
// 检查URL是否已存在(去重)
|
||||
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
|
||||
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl());
|
||||
if (existing != null) {
|
||||
logger.debug("跳过已存在的采集项: {}", item.getSourceUrl());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// 设置默认值
|
||||
item.setID(IDUtils.generateID());
|
||||
item.setCreateTime(now);
|
||||
item.setDeleted(false);
|
||||
if (item.getStatus() == null) {
|
||||
item.setStatus(0);
|
||||
}
|
||||
if (item.getCrawlTime() == null) {
|
||||
item.setCrawlTime(now);
|
||||
}
|
||||
|
||||
itemMapper.insert(item);
|
||||
successCount++;
|
||||
int result = itemMapper.batchInsertItems(itemList);
|
||||
if (result > 0) {
|
||||
successCount = result;
|
||||
}
|
||||
|
||||
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
|
||||
@@ -195,9 +174,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
TbDataCollectionItem item = itemMapper.selectById(itemId);
|
||||
if (item != null) {
|
||||
DataCollectionItemVO vo = buildVO(item);
|
||||
DataCollectionItemVO vo = itemMapper.selectVOById(itemId);
|
||||
if (vo != null) {
|
||||
resultDomain.success("查询成功", vo);
|
||||
} else {
|
||||
resultDomain.fail("采集项不存在");
|
||||
@@ -218,10 +196,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
}
|
||||
filter.setDeleted(false);
|
||||
|
||||
List<TbDataCollectionItem> list = itemMapper.selectItemList(filter);
|
||||
List<DataCollectionItemVO> voList = list.stream()
|
||||
.map(this::buildVO)
|
||||
.collect(Collectors.toList());
|
||||
List<DataCollectionItemVO> voList = itemMapper.selectVOList(filter);
|
||||
|
||||
|
||||
resultDomain.success("查询成功", voList);
|
||||
} catch (Exception e) {
|
||||
@@ -244,12 +220,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
pageParam = new PageParam();
|
||||
}
|
||||
|
||||
List<TbDataCollectionItem> list = itemMapper.selectItemPage(filter, pageParam);
|
||||
long total = itemMapper.countItems(filter);
|
||||
List<DataCollectionItemVO> voList = itemMapper.selectVOPage(filter, pageParam);
|
||||
|
||||
List<DataCollectionItemVO> voList = list.stream()
|
||||
.map(this::buildVO)
|
||||
.collect(Collectors.toList());
|
||||
long total = itemMapper.countItems(filter);
|
||||
|
||||
PageDomain<DataCollectionItemVO> pageDomain = new PageDomain<>();
|
||||
pageDomain.setDataList(voList);
|
||||
@@ -274,10 +247,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
List<TbDataCollectionItem> list = itemMapper.selectByTaskId(taskId);
|
||||
List<DataCollectionItemVO> voList = list.stream()
|
||||
.map(this::buildVO)
|
||||
.collect(Collectors.toList());
|
||||
List<DataCollectionItemVO> voList = itemMapper.selectVOByTaskId(taskId);
|
||||
|
||||
|
||||
resultDomain.success("查询成功", voList);
|
||||
} catch (Exception e) {
|
||||
@@ -433,47 +404,5 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 构建VO对象
|
||||
* @param item 采集项
|
||||
* @return DataCollectionItemVO
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
private DataCollectionItemVO buildVO(TbDataCollectionItem item) {
|
||||
DataCollectionItemVO vo = new DataCollectionItemVO();
|
||||
vo.setItem(item);
|
||||
|
||||
// 查询关联的定时任务
|
||||
if (item.getTaskId() != null && !item.getTaskId().isEmpty()) {
|
||||
TbCrontabTask task = taskMapper.selectTaskById(item.getTaskId());
|
||||
vo.setTask(task);
|
||||
}
|
||||
|
||||
// 设置状态文本
|
||||
String statusText = "未处理";
|
||||
if (item.getStatus() != null) {
|
||||
switch (item.getStatus()) {
|
||||
case 0:
|
||||
statusText = "未处理";
|
||||
break;
|
||||
case 1:
|
||||
statusText = "已转换为资源";
|
||||
break;
|
||||
case 2:
|
||||
statusText = "已忽略";
|
||||
break;
|
||||
default:
|
||||
statusText = "未知";
|
||||
}
|
||||
}
|
||||
vo.setStatusText(statusText);
|
||||
|
||||
// 设置操作权限
|
||||
vo.setCanEdit(item.getStatus() == null || item.getStatus() == 0 || item.getStatus() == 2);
|
||||
vo.setCanConvert(item.getStatus() == null || item.getStatus() == 0);
|
||||
|
||||
return vo;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
@@ -41,6 +42,11 @@ public abstract class CommandTask extends BaseTask {
|
||||
processBuilder.directory(workDir.toFile());
|
||||
processBuilder.redirectErrorStream(true);
|
||||
|
||||
// 设置环境变量强制Python使用UTF-8编码(解决Windows GBK编码问题)
|
||||
Map<String, String> env = processBuilder.environment();
|
||||
env.put("PYTHONIOENCODING", "utf-8"); // Python I/O编码
|
||||
env.put("PYTHONUTF8", "1"); // Python 3.7+ UTF-8模式
|
||||
|
||||
// 启动进程
|
||||
Process process = processBuilder.start();
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ public abstract class PythonCommandTask extends CommandTask {
|
||||
|
||||
@Autowired
|
||||
protected CrawlerProperties crawlerProperties;
|
||||
|
||||
/**
|
||||
* 获取Python可执行文件路径
|
||||
*/
|
||||
@@ -47,18 +46,16 @@ public abstract class PythonCommandTask extends CommandTask {
|
||||
|
||||
/**
|
||||
* 构建Python命令
|
||||
*
|
||||
* 注意: 不使用 cmd /c 或 bash -c,直接调用Python可执行文件
|
||||
* 这样可以避免shell对JSON参数中的引号进行错误处理
|
||||
* ProcessBuilder可以直接启动exe文件,参数会正确传递
|
||||
*/
|
||||
@Override
|
||||
protected List<String> buildCommand(TaskParams taskParams) throws Exception {
|
||||
List<String> command = new ArrayList<>();
|
||||
|
||||
// 检查操作系统
|
||||
String os = System.getProperty("os.name").toLowerCase();
|
||||
if (os.contains("win")) {
|
||||
command.add("cmd");
|
||||
command.add("/c");
|
||||
}
|
||||
|
||||
// 直接调用Python可执行文件,不使用shell
|
||||
command.add(getPythonPath());
|
||||
|
||||
// 添加Python脚本和参数(由子类实现)
|
||||
|
||||
@@ -7,6 +7,7 @@ import org.springframework.stereotype.Component;
|
||||
import org.xyzh.api.crontab.DataCollectionItemService;
|
||||
import org.xyzh.common.core.domain.ResultDomain;
|
||||
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
|
||||
import org.xyzh.common.utils.IDUtils;
|
||||
import org.xyzh.crontab.config.CrontabProperties;
|
||||
import org.xyzh.crontab.pojo.TaskParams;
|
||||
import org.xyzh.crontab.task.PythonCommandTask;
|
||||
@@ -17,7 +18,9 @@ import java.nio.file.Paths;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @description 新闻爬虫定时任务
|
||||
@@ -42,43 +45,58 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
protected List<String> buildPythonArgs(TaskParams taskParams) throws Exception {
|
||||
List<String> args = new ArrayList<>();
|
||||
|
||||
String methodName = taskParams.getMethodName();
|
||||
String source = "rmrb";
|
||||
String category = "politics";
|
||||
String limit = "20";
|
||||
|
||||
// 根据不同的方法名称构建不同的参数
|
||||
if ("关键字搜索爬取".equals(methodName)) {
|
||||
String query = taskParams.getParamAsString("query");
|
||||
Integer total = taskParams.getParamAsInt("total");
|
||||
category = query != null ? query : "politics";
|
||||
limit = total != null ? total.toString() : "20";
|
||||
|
||||
} else if ("排行榜爬取".equals(methodName)) {
|
||||
category = "ranking";
|
||||
|
||||
} else if ("往日精彩头条爬取".equals(methodName)) {
|
||||
String startDate = taskParams.getParamAsString("startDate");
|
||||
String endDate = taskParams.getParamAsString("endDate");
|
||||
Boolean isYesterday = taskParams.getParamAsBoolean("isYesterday");
|
||||
category = "history";
|
||||
// 这里可以将日期参数传递给Python脚本
|
||||
// 1. 从params获取scriptPath
|
||||
String scriptPath = taskParams.getParamAsString("scriptPath");
|
||||
if (scriptPath == null || scriptPath.isEmpty()) {
|
||||
throw new Exception("scriptPath参数缺失");
|
||||
}
|
||||
|
||||
// 生成输出文件名
|
||||
// 2. 生成输出文件名
|
||||
String timestamp = String.valueOf(System.currentTimeMillis());
|
||||
String outputFile = String.format("output/news_%s_%s_%s.json", source, category, timestamp);
|
||||
String outputFile = String.format("output/news_%s.json", timestamp);
|
||||
|
||||
// 保存输出文件路径到params中,供handleResult使用
|
||||
taskParams.setParam("_outputFile", outputFile);
|
||||
|
||||
// 添加脚本和参数
|
||||
args.add("main.py");
|
||||
args.add(category);
|
||||
args.add(limit);
|
||||
|
||||
|
||||
// 4. 构建命令参数
|
||||
args.add(scriptPath); // 动态脚本路径
|
||||
|
||||
// 5. 遍历params,动态构建命令行参数
|
||||
if (taskParams.getParams() != null) {
|
||||
for (Map.Entry<String, Object> entry : taskParams.getParams().entrySet()) {
|
||||
String key = entry.getKey();
|
||||
Object value = entry.getValue();
|
||||
|
||||
// 跳过特殊参数
|
||||
if (key.startsWith("_") || key.equals("scriptPath") ||
|
||||
key.equals("taskId") || key.equals("logId")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 获取对应的Python参数名
|
||||
String pythonArg = "--"+key;
|
||||
if (pythonArg != null && value != null) {
|
||||
if (value instanceof Boolean) {
|
||||
// Boolean类型: true时只传参数名,false时不传
|
||||
if ((Boolean) value) {
|
||||
args.add(pythonArg);
|
||||
}
|
||||
} else {
|
||||
// String/Integer类型: 传参数名+值
|
||||
args.add(pythonArg);
|
||||
args.add(value.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. 统一添加output参数
|
||||
args.add("--output");
|
||||
args.add(outputFile);
|
||||
|
||||
logger.info("爬虫参数 - 来源: {}, 分类: {}, 数量: {}", source, category, limit);
|
||||
logger.info("Python脚本: {}, 命令行参数: {}", scriptPath, String.join(" ", args.subList(1, args.size())));
|
||||
|
||||
return args;
|
||||
}
|
||||
@@ -98,11 +116,12 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
|
||||
// 读取并解析结果文件
|
||||
String jsonContent = Files.readString(outputPath);
|
||||
List<ArticleStruct> newsList = JSON.parseObject(
|
||||
jsonContent,
|
||||
new TypeReference<List<ArticleStruct>>() {}
|
||||
);
|
||||
|
||||
ResultDomain<ArticleStruct> result = JSON.parseObject(jsonContent, new TypeReference<ResultDomain<ArticleStruct>>(){});
|
||||
if (!result.isSuccess()) {
|
||||
logger.error("爬取新闻失败: {}", result.getMessage());
|
||||
return;
|
||||
}
|
||||
List<ArticleStruct> newsList = result.getDataList();
|
||||
logger.info("成功爬取 {} 条新闻", newsList.size());
|
||||
|
||||
// 获取taskId和logId
|
||||
@@ -126,6 +145,8 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
try {
|
||||
List<TbDataCollectionItem> itemList = new ArrayList<>();
|
||||
Date now = new Date();
|
||||
SimpleDateFormat parser = new SimpleDateFormat("yyyy年MM月dd日HH:mm");
|
||||
|
||||
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
for (ArticleStruct news : newsList) {
|
||||
@@ -133,6 +154,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
TbDataCollectionItem item = new TbDataCollectionItem();
|
||||
|
||||
// 基本信息
|
||||
item.setID(IDUtils.generateID());
|
||||
item.setTaskId(taskId);
|
||||
item.setLogId(logId);
|
||||
item.setTitle(news.getTitle());
|
||||
@@ -156,7 +178,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
String publishTimeStr = news.getPublishTime();
|
||||
if (publishTimeStr != null && !publishTimeStr.isEmpty()) {
|
||||
try {
|
||||
item.setPublishTime(dateFormat.parse(publishTimeStr));
|
||||
item.setPublishTime(dateFormat.parse(dateFormat.format(parser.parse(publishTimeStr))));
|
||||
} catch (Exception e) {
|
||||
logger.warn("解析发布时间失败: {}", publishTimeStr);
|
||||
item.setPublishTime(now);
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
crawler:
|
||||
python:
|
||||
path: F:\Environment\Conda\envs\shoolNewsCrewer
|
||||
base:
|
||||
path: F:/Project/schoolNews/schoolNewsCrawler
|
||||
|
||||
crontab:
|
||||
items: #可供前端选择的定时任务列表
|
||||
- name: 人民日报新闻爬取
|
||||
methods: #爬取方式
|
||||
- name: 关键字搜索爬取
|
||||
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
|
||||
path: crawler/RmrbSearch.py
|
||||
params:
|
||||
query: String #搜索关键字
|
||||
total: Integer #总新闻数量
|
||||
- name: 排行榜爬取
|
||||
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
|
||||
path: crawler/RmrbHotPoint.py
|
||||
- name: 往日精彩头条爬取
|
||||
class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask
|
||||
path: crawler/RmrbTrending.py
|
||||
params:
|
||||
startDate: String #开始日期
|
||||
endDate: String #结束日期
|
||||
isYestoday: Boolean #是否是昨天
|
||||
|
||||
|
||||
47
schoolNewsServ/crontab/src/main/resources/application.yml
Normal file
47
schoolNewsServ/crontab/src/main/resources/application.yml
Normal file
@@ -0,0 +1,47 @@
|
||||
crawler:
|
||||
# Python 可执行文件路径(Windows 建议指向 python.exe;如已在 PATH,可直接用 "python")
|
||||
pythonPath: F:/Environment/Conda/envs/schoolNewsCrawler/python.exe
|
||||
# 爬虫脚本根目录(NewsCrawlerTask 的工作目录)
|
||||
basePath: F:/Project/schoolNews/schoolNewsCrawler
|
||||
|
||||
# 下面为原有的定时任务清单(保持不变,仅修正到正确文件)
|
||||
crontab:
|
||||
items:
|
||||
- name: 人民日报新闻爬取
|
||||
methods:
|
||||
- name: 关键字搜索爬取
|
||||
clazz: newsCrewerTask
|
||||
excuete_method: execute
|
||||
path: crawler/RmrbSearch.py
|
||||
params:
|
||||
- name: query
|
||||
description: 搜索关键字
|
||||
type: String
|
||||
value: ""
|
||||
- name: total
|
||||
description: 总新闻数量
|
||||
type: Integer
|
||||
value: 10
|
||||
- name: 排行榜爬取
|
||||
clazz: newsCrewerTask
|
||||
excuete_method: execute
|
||||
path: crawler/RmrbHotPoint.py
|
||||
- name: 往日精彩头条爬取
|
||||
clazz: newsCrewerTask
|
||||
excuete_method: execute
|
||||
path: crawler/RmrbTrending.py
|
||||
params:
|
||||
- name: startDate
|
||||
description: 开始日期
|
||||
type: String
|
||||
value: ""
|
||||
- name: endDate
|
||||
description: 结束日期
|
||||
type: String
|
||||
value: ""
|
||||
- name: yesterday
|
||||
description: 是否是昨天
|
||||
type: Boolean
|
||||
value: true
|
||||
|
||||
|
||||
@@ -186,7 +186,7 @@
|
||||
UPDATE tb_crontab_task
|
||||
SET deleted = 1,
|
||||
delete_time = NOW()
|
||||
WHERE id = #{taskId} AND deleted = 0
|
||||
WHERE task_id=#{taskId} AND deleted = 0
|
||||
</update>
|
||||
|
||||
<!-- 根据ID查询任务 -->
|
||||
@@ -194,7 +194,7 @@
|
||||
SELECT
|
||||
<include refid="Base_Column_List" />
|
||||
FROM tb_crontab_task
|
||||
WHERE id = #{taskId} AND deleted = 0
|
||||
WHERE task_id=#{taskId} AND deleted = 0
|
||||
</select>
|
||||
|
||||
<!-- 根据过滤条件查询任务列表 -->
|
||||
@@ -272,7 +272,7 @@
|
||||
UPDATE tb_crontab_task
|
||||
SET status = #{status},
|
||||
update_time = NOW()
|
||||
WHERE id = #{taskId} AND deleted = 0
|
||||
WHERE task_id=#{taskId} AND deleted = 0
|
||||
</update>
|
||||
|
||||
<!-- 根据Bean名称和方法名称查询任务 -->
|
||||
|
||||
@@ -0,0 +1,400 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE mapper
|
||||
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="org.xyzh.crontab.mapper.DataCollectionItemMapper">
|
||||
|
||||
<!-- 结果映射 -->
|
||||
<resultMap id="BaseResultMap" type="org.xyzh.common.dto.crontab.TbDataCollectionItem">
|
||||
<id column="id" property="id" />
|
||||
<result column="task_id" property="taskId" />
|
||||
<result column="log_id" property="logId" />
|
||||
<result column="title" property="title" />
|
||||
<result column="content" property="content" />
|
||||
<result column="summary" property="summary" />
|
||||
<result column="source" property="source" />
|
||||
<result column="source_url" property="sourceUrl" />
|
||||
<result column="category" property="category" />
|
||||
<result column="author" property="author" />
|
||||
<result column="publish_time" property="publishTime" />
|
||||
<result column="cover_image" property="coverImage" />
|
||||
<result column="images" property="images" />
|
||||
<result column="tags" property="tags" />
|
||||
<result column="status" property="status" />
|
||||
<result column="resource_id" property="resourceId" />
|
||||
<result column="crawl_time" property="crawlTime" />
|
||||
<result column="process_time" property="processTime" />
|
||||
<result column="processor" property="processor" />
|
||||
<result column="create_time" property="createTime" />
|
||||
<result column="update_time" property="updateTime" />
|
||||
<result column="delete_time" property="deleteTime" />
|
||||
<result column="deleted" property="deleted" />
|
||||
</resultMap>
|
||||
|
||||
<!-- VO结果映射(平铺结构,包含关联的任务和日志信息) -->
|
||||
<resultMap id="VOResultMap" type="org.xyzh.common.vo.DataCollectionItemVO">
|
||||
<!-- 采集项基本信息 -->
|
||||
<result column="item_id" property="id" />
|
||||
<result column="task_id" property="taskId" />
|
||||
<result column="log_id" property="logId" />
|
||||
<result column="title" property="title" />
|
||||
<result column="content" property="content" />
|
||||
<result column="summary" property="summary" />
|
||||
<result column="source" property="source" />
|
||||
<result column="source_url" property="sourceUrl" />
|
||||
<result column="category" property="category" />
|
||||
<result column="author" property="author" />
|
||||
<result column="publish_time" property="publishTime" />
|
||||
<result column="cover_image" property="coverImage" />
|
||||
<result column="images" property="images" />
|
||||
<result column="tags" property="tags" />
|
||||
<result column="status" property="status" />
|
||||
<result column="resource_id" property="resourceId" />
|
||||
<result column="crawl_time" property="crawlTime" />
|
||||
<result column="process_time" property="processTime" />
|
||||
<result column="processor" property="processor" />
|
||||
<result column="item_create_time" property="createTime" />
|
||||
<result column="item_update_time" property="updateTime" />
|
||||
|
||||
<!-- 关联的任务信息 -->
|
||||
<result column="task_name" property="taskName" />
|
||||
<result column="task_group" property="taskGroup" />
|
||||
<result column="bean_name" property="beanName" />
|
||||
<result column="method_name" property="methodName" />
|
||||
<result column="method_params" property="methodParams" />
|
||||
|
||||
<!-- 关联的日志信息 -->
|
||||
<result column="execute_status" property="executeStatus" />
|
||||
<result column="execute_duration" property="executeDuration" />
|
||||
<result column="start_time" property="startTime" />
|
||||
<result column="end_time" property="endTime" />
|
||||
</resultMap>
|
||||
|
||||
<!-- 字段列表 -->
|
||||
<sql id="Base_Column_List">
|
||||
id, task_id, log_id, title, content, summary, source, source_url, category, author,
|
||||
publish_time, cover_image, images, tags, status, resource_id, crawl_time, process_time,
|
||||
processor, create_time, update_time, delete_time, deleted
|
||||
</sql>
|
||||
|
||||
<!-- VO查询字段列表(包含关联表) -->
|
||||
<sql id="VO_Column_List">
|
||||
i.id as item_id,
|
||||
i.task_id,
|
||||
i.log_id,
|
||||
i.title,
|
||||
i.content,
|
||||
i.summary,
|
||||
i.source,
|
||||
i.source_url,
|
||||
i.category,
|
||||
i.author,
|
||||
i.publish_time,
|
||||
i.cover_image,
|
||||
i.images,
|
||||
i.tags,
|
||||
i.status,
|
||||
i.resource_id,
|
||||
i.crawl_time,
|
||||
i.process_time,
|
||||
i.processor,
|
||||
i.create_time as item_create_time,
|
||||
i.update_time as item_update_time,
|
||||
t.task_name,
|
||||
t.task_group,
|
||||
t.bean_name,
|
||||
t.method_name,
|
||||
t.method_params,
|
||||
l.execute_status,
|
||||
l.execute_duration,
|
||||
l.start_time,
|
||||
l.end_time
|
||||
</sql>
|
||||
|
||||
<!-- 动态查询条件(用于有@Param("filter")的方法) -->
|
||||
<sql id="Filter_Where_Clause">
|
||||
<where>
|
||||
deleted = 0
|
||||
<if test="filter != null">
|
||||
<if test="filter.id != null and filter.id != ''">
|
||||
AND id = #{filter.id}
|
||||
</if>
|
||||
<if test="filter.taskId != null and filter.taskId != ''">
|
||||
AND task_id = #{filter.taskId}
|
||||
</if>
|
||||
<if test="filter.logId != null and filter.logId != ''">
|
||||
AND log_id = #{filter.logId}
|
||||
</if>
|
||||
<if test="filter.title != null and filter.title != ''">
|
||||
AND title LIKE CONCAT('%', #{filter.title}, '%')
|
||||
</if>
|
||||
<if test="filter.source != null and filter.source != ''">
|
||||
AND source = #{filter.source}
|
||||
</if>
|
||||
<if test="filter.sourceUrl != null and filter.sourceUrl != ''">
|
||||
AND source_url = #{filter.sourceUrl}
|
||||
</if>
|
||||
<if test="filter.category != null and filter.category != ''">
|
||||
AND category = #{filter.category}
|
||||
</if>
|
||||
<if test="filter.author != null and filter.author != ''">
|
||||
AND author LIKE CONCAT('%', #{filter.author}, '%')
|
||||
</if>
|
||||
<if test="filter.status != null">
|
||||
AND status = #{filter.status}
|
||||
</if>
|
||||
<if test="filter.resourceId != null and filter.resourceId != ''">
|
||||
AND resource_id = #{filter.resourceId}
|
||||
</if>
|
||||
<if test="filter.processor != null and filter.processor != ''">
|
||||
AND processor = #{filter.processor}
|
||||
</if>
|
||||
</if>
|
||||
</where>
|
||||
</sql>
|
||||
|
||||
<!-- 动态查询条件(用于没有@Param注解的方法,直接使用参数名) -->
|
||||
<sql id="Item_Where_Clause">
|
||||
<where>
|
||||
deleted = 0
|
||||
<if test="_parameter != null">
|
||||
<if test="id != null and id != ''">
|
||||
AND id = #{id}
|
||||
</if>
|
||||
<if test="taskId != null and taskId != ''">
|
||||
AND task_id = #{taskId}
|
||||
</if>
|
||||
<if test="logId != null and logId != ''">
|
||||
AND log_id = #{logId}
|
||||
</if>
|
||||
<if test="title != null and title != ''">
|
||||
AND title LIKE CONCAT('%', #{title}, '%')
|
||||
</if>
|
||||
<if test="source != null and source != ''">
|
||||
AND source = #{source}
|
||||
</if>
|
||||
<if test="sourceUrl != null and sourceUrl != ''">
|
||||
AND source_url = #{sourceUrl}
|
||||
</if>
|
||||
<if test="category != null and category != ''">
|
||||
AND category = #{category}
|
||||
</if>
|
||||
<if test="author != null and author != ''">
|
||||
AND author LIKE CONCAT('%', #{author}, '%')
|
||||
</if>
|
||||
<if test="status != null">
|
||||
AND status = #{status}
|
||||
</if>
|
||||
<if test="resourceId != null and resourceId != ''">
|
||||
AND resource_id = #{resourceId}
|
||||
</if>
|
||||
<if test="processor != null and processor != ''">
|
||||
AND processor = #{processor}
|
||||
</if>
|
||||
</if>
|
||||
</where>
|
||||
</sql>
|
||||
|
||||
<!-- 根据来源URL查询采集项(用于去重) -->
|
||||
<select id="selectBySourceUrl" resultMap="BaseResultMap">
|
||||
SELECT
|
||||
<include refid="Base_Column_List" />
|
||||
FROM tb_data_collection_item
|
||||
WHERE source_url = #{sourceUrl}
|
||||
AND deleted = 0
|
||||
LIMIT 1
|
||||
</select>
|
||||
|
||||
<!-- 根据任务ID查询采集项列表 -->
|
||||
<select id="selectByTaskId" resultMap="BaseResultMap">
|
||||
SELECT
|
||||
<include refid="Base_Column_List" />
|
||||
FROM tb_data_collection_item
|
||||
WHERE task_id = #{taskId}
|
||||
AND deleted = 0
|
||||
ORDER BY create_time DESC
|
||||
</select>
|
||||
|
||||
<!-- 查询采集项列表 -->
|
||||
<select id="selectItemList" resultMap="BaseResultMap">
|
||||
SELECT
|
||||
<include refid="Base_Column_List" />
|
||||
FROM tb_data_collection_item
|
||||
<include refid="Item_Where_Clause" />
|
||||
ORDER BY create_time DESC
|
||||
</select>
|
||||
|
||||
<!-- 分页查询采集项列表 -->
|
||||
<select id="selectItemPage" resultMap="BaseResultMap">
|
||||
SELECT
|
||||
<include refid="Base_Column_List" />
|
||||
FROM tb_data_collection_item
|
||||
<include refid="Filter_Where_Clause" />
|
||||
ORDER BY create_time DESC
|
||||
LIMIT #{pageParam.pageSize} OFFSET #{pageParam.offset}
|
||||
</select>
|
||||
|
||||
<!-- 统计采集项总数 -->
|
||||
<select id="countItems" resultType="long">
|
||||
SELECT COUNT(*)
|
||||
FROM tb_data_collection_item
|
||||
<include refid="Filter_Where_Clause" />
|
||||
</select>
|
||||
|
||||
<!-- 根据状态统计数量 -->
|
||||
<select id="countByStatus" resultType="long">
|
||||
SELECT COUNT(*)
|
||||
FROM tb_data_collection_item
|
||||
WHERE deleted = 0
|
||||
<if test="taskId != null and taskId != ''">
|
||||
AND task_id = #{taskId}
|
||||
</if>
|
||||
<if test="status != null">
|
||||
AND status = #{status}
|
||||
</if>
|
||||
</select>
|
||||
|
||||
<!-- 批量插入采集项 -->
|
||||
<insert id="batchInsertItems">
|
||||
INSERT INTO tb_data_collection_item (
|
||||
id, task_id, log_id, title, content, summary, source, source_url,
|
||||
category, author, publish_time, cover_image, images, tags, status,
|
||||
resource_id, crawl_time, process_time, processor,
|
||||
create_time, update_time, deleted
|
||||
)
|
||||
VALUES
|
||||
<foreach collection="itemList" item="item" separator=",">
|
||||
(
|
||||
#{item.id}, #{item.taskId}, #{item.logId}, #{item.title}, #{item.content},
|
||||
#{item.summary}, #{item.source}, #{item.sourceUrl}, #{item.category},
|
||||
#{item.author}, #{item.publishTime}, #{item.coverImage}, #{item.images},
|
||||
#{item.tags}, #{item.status}, #{item.resourceId}, #{item.crawlTime},
|
||||
#{item.processTime}, #{item.processor},
|
||||
NOW(), NOW(), 0
|
||||
)
|
||||
</foreach>
|
||||
</insert>
|
||||
|
||||
<!-- ==================== VO查询方法(使用JOIN返回完整VO) ==================== -->
|
||||
|
||||
<!-- 根据ID查询采集项VO -->
|
||||
<select id="selectVOById" resultMap="VOResultMap">
|
||||
SELECT
|
||||
<include refid="VO_Column_List" />
|
||||
FROM tb_data_collection_item i
|
||||
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
|
||||
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
|
||||
WHERE i.id = #{itemId}
|
||||
AND i.deleted = 0
|
||||
</select>
|
||||
|
||||
<!-- 查询采集项VO列表 -->
|
||||
<select id="selectVOList" resultMap="VOResultMap">
|
||||
SELECT
|
||||
<include refid="VO_Column_List" />
|
||||
FROM tb_data_collection_item i
|
||||
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
|
||||
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
|
||||
<where>
|
||||
i.deleted = 0
|
||||
<if test="_parameter != null">
|
||||
<if test="id != null and id != ''">
|
||||
AND i.id = #{id}
|
||||
</if>
|
||||
<if test="taskId != null and taskId != ''">
|
||||
AND i.task_id = #{taskId}
|
||||
</if>
|
||||
<if test="logId != null and logId != ''">
|
||||
AND i.log_id = #{logId}
|
||||
</if>
|
||||
<if test="title != null and title != ''">
|
||||
AND i.title LIKE CONCAT('%', #{title}, '%')
|
||||
</if>
|
||||
<if test="source != null and source != ''">
|
||||
AND i.source = #{source}
|
||||
</if>
|
||||
<if test="sourceUrl != null and sourceUrl != ''">
|
||||
AND i.source_url = #{sourceUrl}
|
||||
</if>
|
||||
<if test="category != null and category != ''">
|
||||
AND i.category = #{category}
|
||||
</if>
|
||||
<if test="author != null and author != ''">
|
||||
AND i.author LIKE CONCAT('%', #{author}, '%')
|
||||
</if>
|
||||
<if test="status != null">
|
||||
AND i.status = #{status}
|
||||
</if>
|
||||
<if test="resourceId != null and resourceId != ''">
|
||||
AND i.resource_id = #{resourceId}
|
||||
</if>
|
||||
<if test="processor != null and processor != ''">
|
||||
AND i.processor = #{processor}
|
||||
</if>
|
||||
</if>
|
||||
</where>
|
||||
ORDER BY i.create_time DESC
|
||||
</select>
|
||||
|
||||
<!-- 分页查询采集项VO列表 -->
|
||||
<select id="selectVOPage" resultMap="VOResultMap">
|
||||
SELECT
|
||||
<include refid="VO_Column_List" />
|
||||
FROM tb_data_collection_item i
|
||||
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
|
||||
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
|
||||
<where>
|
||||
i.deleted = 0
|
||||
<if test="filter != null">
|
||||
<if test="filter.id != null and filter.id != ''">
|
||||
AND i.id = #{filter.id}
|
||||
</if>
|
||||
<if test="filter.taskId != null and filter.taskId != ''">
|
||||
AND i.task_id = #{filter.taskId}
|
||||
</if>
|
||||
<if test="filter.logId != null and filter.logId != ''">
|
||||
AND i.log_id = #{filter.logId}
|
||||
</if>
|
||||
<if test="filter.title != null and filter.title != ''">
|
||||
AND i.title LIKE CONCAT('%', #{filter.title}, '%')
|
||||
</if>
|
||||
<if test="filter.source != null and filter.source != ''">
|
||||
AND i.source = #{filter.source}
|
||||
</if>
|
||||
<if test="filter.sourceUrl != null and filter.sourceUrl != ''">
|
||||
AND i.source_url = #{filter.sourceUrl}
|
||||
</if>
|
||||
<if test="filter.category != null and filter.category != ''">
|
||||
AND i.category = #{filter.category}
|
||||
</if>
|
||||
<if test="filter.author != null and filter.author != ''">
|
||||
AND i.author LIKE CONCAT('%', #{filter.author}, '%')
|
||||
</if>
|
||||
<if test="filter.status != null">
|
||||
AND i.status = #{filter.status}
|
||||
</if>
|
||||
<if test="filter.resourceId != null and filter.resourceId != ''">
|
||||
AND i.resource_id = #{filter.resourceId}
|
||||
</if>
|
||||
<if test="filter.processor != null and filter.processor != ''">
|
||||
AND i.processor = #{filter.processor}
|
||||
</if>
|
||||
</if>
|
||||
</where>
|
||||
ORDER BY i.create_time DESC
|
||||
LIMIT #{pageParam.pageSize} OFFSET #{pageParam.offset}
|
||||
</select>
|
||||
|
||||
<!-- 根据任务ID查询采集项VO列表 -->
|
||||
<select id="selectVOByTaskId" resultMap="VOResultMap">
|
||||
SELECT
|
||||
<include refid="VO_Column_List" />
|
||||
FROM tb_data_collection_item i
|
||||
LEFT JOIN tb_crontab_task t ON i.task_id = t.task_id
|
||||
LEFT JOIN tb_crontab_log l ON i.log_id = l.id
|
||||
WHERE i.task_id = #{taskId}
|
||||
AND i.deleted = 0
|
||||
ORDER BY i.create_time DESC
|
||||
</select>
|
||||
|
||||
</mapper>
|
||||
Reference in New Issue
Block a user