调试修改爬虫
This commit is contained in:
@@ -63,6 +63,11 @@ public class DataCollectionItemController {
|
||||
return itemService.convertToResource(request.getItemId(), request.getTagId());
|
||||
}
|
||||
|
||||
@PutMapping("/{itemId}/status/{status}")
|
||||
public ResultDomain<String> updateItemStatus(@PathVariable(name = "itemId") String itemId, @PathVariable(name = "status") int status) {
|
||||
return itemService.updateItemStatus(itemId, status);
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 转换请求
|
||||
*/
|
||||
|
||||
@@ -28,6 +28,15 @@ public interface CrontabLogMapper extends BaseMapper<TbCrontabLog> {
|
||||
*/
|
||||
int insertLog(@Param("log") TbCrontabLog log);
|
||||
|
||||
/**
|
||||
* @description 更新日志
|
||||
* @param log 日志信息
|
||||
* @return int 影响行数
|
||||
* @author yslg
|
||||
* @since 2025-11-12
|
||||
*/
|
||||
int updateLog(@Param("log") TbCrontabLog log);
|
||||
|
||||
/**
|
||||
* @description 根据ID查询日志
|
||||
* @param logId 日志ID
|
||||
|
||||
@@ -84,6 +84,16 @@ public interface DataCollectionItemMapper extends BaseMapper<TbDataCollectionIte
|
||||
*/
|
||||
long countByStatus(@Param("taskId") String taskId, @Param("status") Integer status);
|
||||
|
||||
/**
|
||||
* @description 更新采集项状态
|
||||
* @param itemId 采集项ID
|
||||
* @param status 状态
|
||||
* @return int 影响行数
|
||||
* @author yslg
|
||||
* @since 2025-11-08
|
||||
*/
|
||||
int updateItemStatus(@Param("itemId") String itemId, @Param("status") Integer status);
|
||||
|
||||
// ==================== VO查询方法(使用JOIN返回完整VO) ====================
|
||||
|
||||
/**
|
||||
|
||||
@@ -58,6 +58,10 @@ public class TaskExecutor {
|
||||
log.setDeleted(false);
|
||||
|
||||
try {
|
||||
log.setExecuteStatus(0);
|
||||
log.setExecuteMessage("执行中");
|
||||
int i = logMapper.insertLog(log);
|
||||
|
||||
// 检查是否允许并发执行
|
||||
if (task.getConcurrent() == 0) {
|
||||
// TODO: 可以添加分布式锁来防止并发执行
|
||||
@@ -84,7 +88,7 @@ public class TaskExecutor {
|
||||
log.setEndTime(endTime);
|
||||
log.setExecuteDuration((int) (endTime.getTime() - startTime.getTime()));
|
||||
log.setExecuteStatus(1);
|
||||
log.setExecuteMessage("执行成功");
|
||||
log.setExecuteMessage(null);
|
||||
|
||||
logger.info("任务执行成功: {} [{}ms]", task.getTaskName(), log.getExecuteDuration());
|
||||
} catch (Exception e) {
|
||||
@@ -100,7 +104,7 @@ public class TaskExecutor {
|
||||
} finally {
|
||||
// 保存执行日志
|
||||
try {
|
||||
logMapper.insertLog(log);
|
||||
logMapper.updateLog(log);
|
||||
} catch (Exception e) {
|
||||
logger.error("保存任务执行日志失败: {}", task.getTaskName(), e);
|
||||
}
|
||||
|
||||
@@ -17,10 +17,13 @@ import org.xyzh.common.utils.IDUtils;
|
||||
import org.xyzh.common.vo.DataCollectionItemVO;
|
||||
import org.xyzh.common.vo.ResourceVO;
|
||||
import org.xyzh.crontab.mapper.DataCollectionItemMapper;
|
||||
import org.xyzh.crontab.mapper.CrontabLogMapper;
|
||||
import org.xyzh.crontab.mapper.CrontabTaskMapper;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabLog;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabTask;
|
||||
import org.xyzh.system.utils.LoginUtil;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
@@ -42,6 +45,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
@Autowired
|
||||
private CrontabTaskMapper taskMapper;
|
||||
|
||||
@Autowired
|
||||
private CrontabLogMapper logMapper;
|
||||
|
||||
@Autowired
|
||||
private ResourceService resourceService;
|
||||
|
||||
@@ -100,11 +106,23 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
|
||||
int successCount = 0;
|
||||
Date now = new Date();
|
||||
|
||||
int result = itemMapper.batchInsertItems(itemList);
|
||||
if (result > 0) {
|
||||
successCount = result;
|
||||
List<TbDataCollectionItem> newItems = new ArrayList<>();
|
||||
for (TbDataCollectionItem it : itemList) {
|
||||
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(it.getSourceUrl());
|
||||
if (existing == null) {
|
||||
newItems.add(it);
|
||||
}
|
||||
}
|
||||
|
||||
if (!newItems.isEmpty()) {
|
||||
successCount = itemMapper.batchInsertItems(newItems);
|
||||
}
|
||||
String logId = itemList.get(0).getLogId();
|
||||
TbCrontabLog log = new TbCrontabLog();
|
||||
log.setID(logId);
|
||||
log.setExecuteStatus(1);
|
||||
log.setExecuteMessage("爬取成功,共" + itemList.size() + "条,新增" + successCount + "条");
|
||||
int i = logMapper.updateLog(log);
|
||||
|
||||
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
|
||||
resultDomain.success("批量创建采集项成功", successCount);
|
||||
@@ -404,5 +422,21 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
return resultDomain;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultDomain<String> updateItemStatus(String itemId, int status) {
|
||||
ResultDomain<String> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
int result = itemMapper.updateItemStatus(itemId, status);
|
||||
if (result > 0) {
|
||||
resultDomain.success("更新采集项状态成功", itemId);
|
||||
} else {
|
||||
resultDomain.fail("更新采集项状态失败");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("更新采集项状态异常: ", e);
|
||||
resultDomain.fail("更新采集项状态异常: " + e.getMessage());
|
||||
}
|
||||
return resultDomain;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,9 @@ public class ArticleStruct {
|
||||
private String publishTime;
|
||||
private String author;
|
||||
private String source;
|
||||
private String logId;
|
||||
private Integer executeStatus;
|
||||
private String executeMessage;
|
||||
private List<RowStruct> contentRows;
|
||||
|
||||
@Data
|
||||
|
||||
@@ -158,7 +158,8 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
item.setTaskId(taskId);
|
||||
item.setLogId(logId);
|
||||
item.setTitle(news.getTitle());
|
||||
|
||||
item.setExecuteStatus(news.getExecuteStatus());
|
||||
item.setExecuteMessage(news.getExecuteMessage());
|
||||
// 拼接HTML内容
|
||||
if (news.getContentRows() != null && !news.getContentRows().isEmpty()) {
|
||||
StringBuilder html = new StringBuilder();
|
||||
|
||||
@@ -99,6 +99,20 @@
|
||||
</trim>
|
||||
</insert>
|
||||
|
||||
<!-- updateLog -->
|
||||
|
||||
<update id="updateLog">
|
||||
UPDATE tb_crontab_log
|
||||
SET
|
||||
<if test="log.executeStatus != null">execute_status = #{log.executeStatus},</if>
|
||||
<if test="log.executeMessage != null">execute_message = #{log.executeMessage},</if>
|
||||
<if test="log.exceptionInfo != null">exception_info = #{log.exceptionInfo},</if>
|
||||
<if test="log.endTime != null">end_time = #{log.endTime},</if>
|
||||
<if test="log.executeDuration != null">execute_duration = #{log.executeDuration},</if>
|
||||
update_time = NOW()
|
||||
WHERE id = #{log.ID} AND deleted = 0
|
||||
</update>
|
||||
|
||||
<!-- 根据ID查询日志 -->
|
||||
<select id="selectLogById" resultMap="BaseResultMap">
|
||||
SELECT
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
<result column="crawl_time" property="crawlTime" />
|
||||
<result column="process_time" property="processTime" />
|
||||
<result column="processor" property="processor" />
|
||||
<result column="execute_status" property="executeStatus" />
|
||||
<result column="execute_message" property="executeMessage" />
|
||||
<result column="create_time" property="createTime" />
|
||||
<result column="update_time" property="updateTime" />
|
||||
<result column="delete_time" property="deleteTime" />
|
||||
@@ -53,6 +55,8 @@
|
||||
<result column="crawl_time" property="crawlTime" />
|
||||
<result column="process_time" property="processTime" />
|
||||
<result column="processor" property="processor" />
|
||||
<result column="item_execute_status" property="itemExecuteStatus" />
|
||||
<result column="item_execute_message" property="itemExecuteMessage" />
|
||||
<result column="item_create_time" property="createTime" />
|
||||
<result column="item_update_time" property="updateTime" />
|
||||
|
||||
@@ -74,7 +78,7 @@
|
||||
<sql id="Base_Column_List">
|
||||
id, task_id, log_id, title, content, summary, source, source_url, category, author,
|
||||
publish_time, cover_image, images, tags, status, resource_id, crawl_time, process_time,
|
||||
processor, create_time, update_time, delete_time, deleted
|
||||
processor, execute_status, execute_message, create_time, update_time, delete_time, deleted
|
||||
</sql>
|
||||
|
||||
<!-- VO查询字段列表(包含关联表) -->
|
||||
@@ -98,6 +102,8 @@
|
||||
i.crawl_time,
|
||||
i.process_time,
|
||||
i.processor,
|
||||
i.execute_status as item_execute_status,
|
||||
i.execute_message as item_execute_message,
|
||||
i.create_time as item_create_time,
|
||||
i.update_time as item_update_time,
|
||||
t.task_name,
|
||||
@@ -259,7 +265,7 @@
|
||||
INSERT INTO tb_data_collection_item (
|
||||
id, task_id, log_id, title, content, summary, source, source_url,
|
||||
category, author, publish_time, cover_image, images, tags, status,
|
||||
resource_id, crawl_time, process_time, processor,
|
||||
resource_id, crawl_time, process_time, processor, execute_status, execute_message,
|
||||
create_time, update_time, deleted
|
||||
)
|
||||
VALUES
|
||||
@@ -269,7 +275,7 @@
|
||||
#{item.summary}, #{item.source}, #{item.sourceUrl}, #{item.category},
|
||||
#{item.author}, #{item.publishTime}, #{item.coverImage}, #{item.images},
|
||||
#{item.tags}, #{item.status}, #{item.resourceId}, #{item.crawlTime},
|
||||
#{item.processTime}, #{item.processor},
|
||||
#{item.processTime}, #{item.processor}, #{item.executeStatus}, #{item.executeMessage},
|
||||
NOW(), NOW(), 0
|
||||
)
|
||||
</foreach>
|
||||
@@ -397,4 +403,12 @@
|
||||
ORDER BY i.create_time DESC
|
||||
</select>
|
||||
|
||||
<!-- updateItemStatus -->
|
||||
|
||||
<update id="updateItemStatus">
|
||||
UPDATE tb_data_collection_item
|
||||
SET status = #{status}
|
||||
WHERE id = #{itemId}
|
||||
AND deleted = 0
|
||||
</update>
|
||||
</mapper>
|
||||
Reference in New Issue
Block a user