调试修改爬虫

This commit is contained in:
2025-11-12 19:16:50 +08:00
parent 675e6da7d7
commit e55a52f20b
27 changed files with 1023 additions and 601 deletions

View File

@@ -63,6 +63,11 @@ public class DataCollectionItemController {
return itemService.convertToResource(request.getItemId(), request.getTagId());
}
@PutMapping("/{itemId}/status/{status}")
public ResultDomain<String> updateItemStatus(@PathVariable(name = "itemId") String itemId, @PathVariable(name = "status") int status) {
return itemService.updateItemStatus(itemId, status);
}
/**
* @description 转换请求
*/

View File

@@ -28,6 +28,15 @@ public interface CrontabLogMapper extends BaseMapper<TbCrontabLog> {
*/
int insertLog(@Param("log") TbCrontabLog log);
/**
* @description 更新日志
* @param log 日志信息
* @return int 影响行数
* @author yslg
* @since 2025-11-12
*/
int updateLog(@Param("log") TbCrontabLog log);
/**
* @description 根据ID查询日志
* @param logId 日志ID

View File

@@ -84,6 +84,16 @@ public interface DataCollectionItemMapper extends BaseMapper<TbDataCollectionIte
*/
long countByStatus(@Param("taskId") String taskId, @Param("status") Integer status);
/**
* @description 更新采集项状态
* @param itemId 采集项ID
* @param status 状态
* @return int 影响行数
* @author yslg
* @since 2025-11-08
*/
int updateItemStatus(@Param("itemId") String itemId, @Param("status") Integer status);
// ==================== VO查询方法(使用JOIN返回完整VO) ====================
/**

View File

@@ -58,6 +58,10 @@ public class TaskExecutor {
log.setDeleted(false);
try {
log.setExecuteStatus(0);
log.setExecuteMessage("执行中");
int i = logMapper.insertLog(log);
// 检查是否允许并发执行
if (task.getConcurrent() == 0) {
// TODO: 可以添加分布式锁来防止并发执行
@@ -84,7 +88,7 @@ public class TaskExecutor {
log.setEndTime(endTime);
log.setExecuteDuration((int) (endTime.getTime() - startTime.getTime()));
log.setExecuteStatus(1);
log.setExecuteMessage("执行成功");
log.setExecuteMessage(null);
logger.info("任务执行成功: {} [{}ms]", task.getTaskName(), log.getExecuteDuration());
} catch (Exception e) {
@@ -100,7 +104,7 @@ public class TaskExecutor {
} finally {
// 保存执行日志
try {
logMapper.insertLog(log);
logMapper.updateLog(log);
} catch (Exception e) {
logger.error("保存任务执行日志失败: {}", task.getTaskName(), e);
}

View File

@@ -17,10 +17,13 @@ import org.xyzh.common.utils.IDUtils;
import org.xyzh.common.vo.DataCollectionItemVO;
import org.xyzh.common.vo.ResourceVO;
import org.xyzh.crontab.mapper.DataCollectionItemMapper;
import org.xyzh.crontab.mapper.CrontabLogMapper;
import org.xyzh.crontab.mapper.CrontabTaskMapper;
import org.xyzh.common.dto.crontab.TbCrontabLog;
import org.xyzh.common.dto.crontab.TbCrontabTask;
import org.xyzh.system.utils.LoginUtil;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@@ -42,6 +45,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
@Autowired
private CrontabTaskMapper taskMapper;
@Autowired
private CrontabLogMapper logMapper;
@Autowired
private ResourceService resourceService;
@@ -100,11 +106,23 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
int successCount = 0;
Date now = new Date();
int result = itemMapper.batchInsertItems(itemList);
if (result > 0) {
successCount = result;
List<TbDataCollectionItem> newItems = new ArrayList<>();
for (TbDataCollectionItem it : itemList) {
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(it.getSourceUrl());
if (existing == null) {
newItems.add(it);
}
}
if (!newItems.isEmpty()) {
successCount = itemMapper.batchInsertItems(newItems);
}
String logId = itemList.get(0).getLogId();
TbCrontabLog log = new TbCrontabLog();
log.setID(logId);
log.setExecuteStatus(1);
log.setExecuteMessage("爬取成功,共" + itemList.size() + "条,新增" + successCount + "");
int i = logMapper.updateLog(log);
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
resultDomain.success("批量创建采集项成功", successCount);
@@ -404,5 +422,21 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
return resultDomain;
}
@Override
public ResultDomain<String> updateItemStatus(String itemId, int status) {
ResultDomain<String> resultDomain = new ResultDomain<>();
try {
int result = itemMapper.updateItemStatus(itemId, status);
if (result > 0) {
resultDomain.success("更新采集项状态成功", itemId);
} else {
resultDomain.fail("更新采集项状态失败");
}
} catch (Exception e) {
logger.error("更新采集项状态异常: ", e);
resultDomain.fail("更新采集项状态异常: " + e.getMessage());
}
return resultDomain;
}
}

View File

@@ -23,6 +23,9 @@ public class ArticleStruct {
private String publishTime;
private String author;
private String source;
private String logId;
private Integer executeStatus;
private String executeMessage;
private List<RowStruct> contentRows;
@Data

View File

@@ -158,7 +158,8 @@ public class NewsCrawlerTask extends PythonCommandTask {
item.setTaskId(taskId);
item.setLogId(logId);
item.setTitle(news.getTitle());
item.setExecuteStatus(news.getExecuteStatus());
item.setExecuteMessage(news.getExecuteMessage());
// 拼接HTML内容
if (news.getContentRows() != null && !news.getContentRows().isEmpty()) {
StringBuilder html = new StringBuilder();