自动发布文章,权限插入
This commit is contained in:
@@ -96,8 +96,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public ResultDomain<Integer> batchCreateItems(List<TbDataCollectionItem> itemList) {
|
||||
ResultDomain<Integer> resultDomain = new ResultDomain<>();
|
||||
public ResultDomain<TbDataCollectionItem> batchCreateItems(List<TbDataCollectionItem> itemList) {
|
||||
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
|
||||
try {
|
||||
if (itemList == null || itemList.isEmpty()) {
|
||||
resultDomain.fail("采集项列表为空");
|
||||
@@ -105,18 +105,17 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
}
|
||||
|
||||
int successCount = 0;
|
||||
Date now = new Date();
|
||||
List<TbDataCollectionItem> newItems = new ArrayList<>();
|
||||
List<TbDataCollectionItem> createdItems = new ArrayList<>();
|
||||
// 改为逐条创建,避免单条失败导致整批回滚
|
||||
for (TbDataCollectionItem it : itemList) {
|
||||
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(it.getSourceUrl());
|
||||
if (existing == null) {
|
||||
newItems.add(it);
|
||||
ResultDomain<TbDataCollectionItem> createResult = createItem(it);
|
||||
if (createResult.isSuccess()) {
|
||||
successCount++;
|
||||
if (createResult.getData() != null) {
|
||||
createdItems.add(createResult.getData());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!newItems.isEmpty()) {
|
||||
successCount = itemMapper.batchInsertItems(newItems);
|
||||
}
|
||||
String logId = itemList.get(0).getLogId();
|
||||
TbCrontabLog log = new TbCrontabLog();
|
||||
log.setID(logId);
|
||||
@@ -125,7 +124,7 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
|
||||
int i = logMapper.updateLog(log);
|
||||
|
||||
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
|
||||
resultDomain.success("批量创建采集项成功", successCount);
|
||||
resultDomain.success("批量创建采集项成功", createdItems);
|
||||
} catch (Exception e) {
|
||||
logger.error("批量创建采集项异常: ", e);
|
||||
resultDomain.fail("批量创建采集项异常: " + e.getMessage());
|
||||
|
||||
@@ -9,14 +9,21 @@ import org.xyzh.api.crontab.DataCollectionItemService;
|
||||
import org.xyzh.api.crontab.EmailDefaultService;
|
||||
import org.xyzh.api.crontab.EmailRecipientService;
|
||||
import org.xyzh.api.crontab.TaskMetaService;
|
||||
import org.xyzh.api.news.resource.ResourceService;
|
||||
import org.xyzh.api.system.role.RoleService;
|
||||
import org.xyzh.common.core.domain.ResultDomain;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabEmailDefault;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabEmailRecipient;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabTask;
|
||||
import org.xyzh.common.dto.crontab.TbCrontabTaskMeta;
|
||||
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
|
||||
import org.xyzh.common.dto.resource.TbResource;
|
||||
import org.xyzh.common.utils.EmailUtils;
|
||||
import org.xyzh.common.utils.IDUtils;
|
||||
import org.xyzh.common.utils.NonUtils;
|
||||
import org.xyzh.common.vo.DataCollectionItemVO;
|
||||
import org.xyzh.common.vo.ResourceVO;
|
||||
import org.xyzh.common.vo.UserDeptRoleVO;
|
||||
import org.xyzh.crontab.pojo.TaskParams;
|
||||
import org.xyzh.crontab.task.PythonCommandTask;
|
||||
|
||||
@@ -25,9 +32,13 @@ import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
@@ -58,6 +69,12 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
@Autowired
|
||||
private EmailUtils emailUtils;
|
||||
|
||||
@Autowired
|
||||
private ResourceService resourceService;
|
||||
|
||||
@Autowired
|
||||
private RoleService roleService;
|
||||
|
||||
/**
|
||||
* 构建Python脚本参数
|
||||
*/
|
||||
@@ -165,9 +182,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
ResultDomain<TbCrontabTask> taskResult = crontabService.getTaskById(taskId);
|
||||
|
||||
saveNewsToDatabase(newsList, taskResult.getData(), logId);
|
||||
|
||||
// 发送邮件通知
|
||||
sendEmailNotification(taskId, taskResult.getData(), newsList);
|
||||
|
||||
} else {
|
||||
logger.warn("未提供任务ID或日志ID,跳过数据保存");
|
||||
}
|
||||
@@ -218,11 +233,22 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
item.setSourceUrl(news.getUrl());
|
||||
item.setAuthor(news.getAuthor());
|
||||
|
||||
// 发布时间
|
||||
// 发布时间,兼容多种格式
|
||||
String publishTimeStr = news.getPublishTime();
|
||||
if (publishTimeStr != null && !publishTimeStr.isEmpty()) {
|
||||
try {
|
||||
item.setPublishTime(dateFormat.parse(dateFormat.format(parser.parse(publishTimeStr))));
|
||||
Date parsed;
|
||||
if (publishTimeStr.contains("年")) {
|
||||
// 原始格式:yyyy年MM月dd日HH:mm
|
||||
parsed = parser.parse(publishTimeStr);
|
||||
} else if (publishTimeStr.length() == 10 && publishTimeStr.charAt(4) == '-' && publishTimeStr.charAt(7) == '-') {
|
||||
// 仅日期:yyyy-MM-dd
|
||||
parsed = new SimpleDateFormat("yyyy-MM-dd").parse(publishTimeStr);
|
||||
} else {
|
||||
// 其它情况按原parser尝试
|
||||
parsed = parser.parse(publishTimeStr);
|
||||
}
|
||||
item.setPublishTime(dateFormat.parse(dateFormat.format(parsed)));
|
||||
} catch (Exception e) {
|
||||
logger.warn("解析发布时间失败: {}", publishTimeStr);
|
||||
item.setPublishTime(now);
|
||||
@@ -242,20 +268,28 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
}
|
||||
|
||||
// 批量保存
|
||||
Set<String> insertedUrls = new HashSet<>();
|
||||
ResultDomain<TbDataCollectionItem> dataResult = new ResultDomain<>();
|
||||
if (!itemList.isEmpty()) {
|
||||
ResultDomain<Integer> result = itemService.batchCreateItems(itemList);
|
||||
if (result.isSuccess()) {
|
||||
logger.info("成功保存 {} 条新闻到数据库", result.getData());
|
||||
dataResult = itemService.batchCreateItems(itemList);
|
||||
if (dataResult.isSuccess()) {
|
||||
logger.info("成功保存 {} 条新闻到数据库", itemList.size());
|
||||
insertedUrls.addAll(dataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
|
||||
} else {
|
||||
logger.error("保存新闻到数据库失败: {}", result.getMessage());
|
||||
logger.error("保存新闻到数据库失败: {}", dataResult.getMessage());
|
||||
}
|
||||
} else {
|
||||
logger.warn("没有有效的新闻数据需要保存");
|
||||
}
|
||||
if (taskMeta.getAutoPublish()){
|
||||
publishNewsToArticle(newsList, task, logId);
|
||||
// 自动发布并记录成功发布的 URL 集合
|
||||
Set<String> publishedUrls = new HashSet<>();
|
||||
if (taskMeta.getAutoPublish().booleanValue()){
|
||||
publishedUrls = publishNewsToArticle(dataResult.getDataList(), task, logId);
|
||||
}
|
||||
|
||||
// 发送邮件通知,包含自动发布与新增信息
|
||||
sendEmailNotification(task.getTaskId(), task, newsList, insertedUrls, publishedUrls);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("保存新闻数据到数据库异常: ", e);
|
||||
}
|
||||
@@ -264,7 +298,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
/**
|
||||
* 发送邮件通知
|
||||
*/
|
||||
private void sendEmailNotification(String taskId, TbCrontabTask task, List<ArticleStruct> newsList) {
|
||||
private void sendEmailNotification(String taskId, TbCrontabTask task, List<ArticleStruct> newsList,
|
||||
Set<String> insertedUrls,
|
||||
Set<String> publishedUrls) {
|
||||
try {
|
||||
List<String> recipients = new ArrayList<>();
|
||||
|
||||
@@ -300,7 +336,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
|
||||
// 5. 构建邮件内容
|
||||
String subject = "【新闻爬虫通知】" + task.getTaskName() + " 执行完成";
|
||||
String content = buildEmailContent(task.getTaskName(), newsList);
|
||||
String content = buildEmailContent(task.getTaskName(), newsList, insertedUrls, publishedUrls);
|
||||
|
||||
// 6. 发送邮件
|
||||
int successCount = 0;
|
||||
@@ -320,7 +356,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
/**
|
||||
* 构建邮件HTML内容
|
||||
*/
|
||||
private String buildEmailContent(String taskName, List<ArticleStruct> newsList) {
|
||||
private String buildEmailContent(String taskName, List<ArticleStruct> newsList,
|
||||
java.util.Set<String> insertedUrls,
|
||||
java.util.Set<String> publishedUrls) {
|
||||
StringBuilder html = new StringBuilder();
|
||||
html.append("<!DOCTYPE html>")
|
||||
.append("<html>")
|
||||
@@ -350,17 +388,21 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
.append("<div class='content'>");
|
||||
|
||||
// 摘要信息
|
||||
int insertedCount = (insertedUrls != null) ? insertedUrls.size() : 0;
|
||||
int totalCount = newsList.size();
|
||||
html.append("<div class='summary'>")
|
||||
.append("<p><strong>任务名称:</strong>").append(taskName).append("</p>")
|
||||
.append("<p><strong>执行时间:</strong>").append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())).append("</p>")
|
||||
.append("<p><strong>爬取数量:</strong>").append(newsList.size()).append(" 条</p>")
|
||||
.append("<p><strong>爬取数量:</strong>").append(totalCount).append(" 条</p>")
|
||||
.append("<p><strong>入库情况:</strong>")
|
||||
.append("本次新增 ").append(insertedCount).append(" 条,已有历史记录的新闻不会重复入库。</p>")
|
||||
.append("</div>");
|
||||
|
||||
// 新闻列表
|
||||
|
||||
// 新闻列表(包含自动发布标记)
|
||||
html.append("<div class='news-list'>")
|
||||
.append("<h3>爬取内容:</h3>");
|
||||
|
||||
int count = Math.min(newsList.size(), 10); // 最多显示10条
|
||||
int count = newsList.size(); // 最多显示10条
|
||||
for (int i = 0; i < count; i++) {
|
||||
ArticleStruct news = newsList.get(i);
|
||||
html.append("<div class='news-item'>")
|
||||
@@ -369,11 +411,26 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
.append("来源:").append(news.getSource() != null ? news.getSource() : "未知")
|
||||
.append(" | ")
|
||||
.append("发布时间:").append(news.getPublishTime() != null ? news.getPublishTime() : "未知");
|
||||
|
||||
|
||||
if (news.getUrl() != null && !news.getUrl().isEmpty()) {
|
||||
html.append(" | <a href='").append(news.getUrl()).append("' class='news-link' target='_blank'>查看原文</a>");
|
||||
}
|
||||
|
||||
|
||||
// 入库标记(新增 / 历史已存在)
|
||||
if (news.getUrl() != null && !news.getUrl().isEmpty() && insertedUrls != null) {
|
||||
if (insertedUrls.contains(news.getUrl())) {
|
||||
html.append(" | <span style='color:#2e7d32;font-weight:bold;'>【本次新增】</span>");
|
||||
} else {
|
||||
html.append(" | <span style='color:#666;'>【历史已存在,未重复入库】</span>");
|
||||
}
|
||||
}
|
||||
|
||||
// 如果该新闻已自动发布,追加标记
|
||||
if (publishedUrls != null && !publishedUrls.isEmpty()
|
||||
&& news.getUrl() != null && publishedUrls.contains(news.getUrl())) {
|
||||
html.append(" | <span style='color:#C62828;font-weight:bold;'>【已自动发布】</span>");
|
||||
}
|
||||
|
||||
html.append("</div>")
|
||||
.append("</div>");
|
||||
}
|
||||
@@ -399,8 +456,123 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
return html.toString();
|
||||
}
|
||||
|
||||
// TODO 自动发布功能,把采集的数据发布到文章表
|
||||
private void publishNewsToArticle(List<ArticleStruct> newsList, TbCrontabTask task, String logId) {
|
||||
|
||||
// 自动发布功能,把采集的数据发布到文章资源表
|
||||
private Set<String> publishNewsToArticle(List<TbDataCollectionItem> itemList, TbCrontabTask task, String logId) {
|
||||
if (itemList == null || itemList.isEmpty()) {
|
||||
logger.info("自动发布:itemList 为空,跳过发布,任务ID: {},日志ID: {}", task.getTaskId(), logId);
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
logger.info("自动发布开始,共 {} 条,任务ID: {},日志ID: {}", itemList.size(), task.getTaskId(), logId);
|
||||
List<UserDeptRoleVO> userDeptRoleVOs = null;
|
||||
try {
|
||||
ResultDomain<UserDeptRoleVO> roleResult = roleService.getDeptRolesByUserId(task.getCreator());
|
||||
if (roleResult != null && roleResult.isSuccess()) {
|
||||
userDeptRoleVOs = roleResult.getDataList();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.warn("根据任务创建者获取部门角色失败,taskId: {},creator: {},原因: {}", task.getTaskId(), task.getCreator(), e.getMessage());
|
||||
}
|
||||
Date now = new Date();
|
||||
int successCount = 0;
|
||||
// 记录 url -> resourceId,用于后续更新采集表
|
||||
Map<String, String> urlToResourceId = new HashMap<>();
|
||||
|
||||
for (TbDataCollectionItem item : itemList) {
|
||||
if(item.getContent().isEmpty()) continue;
|
||||
try {
|
||||
TbResource resource = new TbResource();
|
||||
resource.setID(IDUtils.generateID());
|
||||
resource.setResourceID(IDUtils.generateID());
|
||||
resource.setTitle(item.getTitle());
|
||||
// 创建者使用任务创建者
|
||||
resource.setCreator(task.getCreator());
|
||||
|
||||
// 内容直接使用采集表中的 HTML
|
||||
resource.setContent(item.getContent());
|
||||
|
||||
resource.setSummary(null);
|
||||
resource.setCoverImage(null);
|
||||
// 统一打标签为专题报告(tag_article_005)
|
||||
resource.setTagID("tag_article_005");
|
||||
resource.setAuthor(item.getAuthor());
|
||||
resource.setSource(item.getSource());
|
||||
resource.setSourceUrl(item.getSourceUrl());
|
||||
|
||||
// 发布时间:优先使用采集表中的时间
|
||||
Date publishTime = item.getPublishTime() != null ? item.getPublishTime() : now;
|
||||
resource.setPublishTime(publishTime);
|
||||
|
||||
// 状态:已发布
|
||||
resource.setStatus(1);
|
||||
resource.setViewCount(0);
|
||||
resource.setLikeCount(0);
|
||||
resource.setCollectCount(0);
|
||||
resource.setIsRecommend(false);
|
||||
resource.setIsBanner(false);
|
||||
resource.setCreateTime(now);
|
||||
resource.setUpdateTime(now);
|
||||
resource.setDeleted(false);
|
||||
|
||||
// 逐条创建资源,避免整批回滚
|
||||
ResourceVO vo = new ResourceVO();
|
||||
vo.setResource(resource);
|
||||
|
||||
ResultDomain<ResourceVO> createResult = resourceService.createResource(vo, userDeptRoleVOs);
|
||||
if (createResult.isSuccess()) {
|
||||
successCount++;
|
||||
// 记录 url -> resourceId,用于回写采集表
|
||||
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
|
||||
urlToResourceId.put(item.getSourceUrl(), resource.getResourceID());
|
||||
}
|
||||
} else {
|
||||
logger.warn("自动发布:创建资源失败,标题: {},原因: {}", item.getTitle(), createResult.getMessage());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("自动发布单条新闻异常,标题: {}", item.getTitle(), e);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("自动发布完成,共 {} 条,成功 {} 条,任务ID: {},日志ID: {}", itemList.size(), successCount, task.getTaskId(), logId);
|
||||
|
||||
// 成功创建资源后,同步更新采集表状态
|
||||
try {
|
||||
TbDataCollectionItem filter = new TbDataCollectionItem();
|
||||
filter.setTaskId(task.getTaskId());
|
||||
filter.setLogId(logId);
|
||||
|
||||
ResultDomain<DataCollectionItemVO> itemResult = itemService.getItemList(filter);
|
||||
if (itemResult.isSuccess() && itemResult.getDataList() != null) {
|
||||
for (DataCollectionItemVO itemVO : itemResult.getDataList()) {
|
||||
// 只处理未转换的采集项
|
||||
if (itemVO.getStatus() != null && itemVO.getStatus() != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String sourceUrl = itemVO.getSourceUrl();
|
||||
if (sourceUrl == null || sourceUrl.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String resourceId = urlToResourceId.get(sourceUrl);
|
||||
if (resourceId == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
TbDataCollectionItem update = new TbDataCollectionItem();
|
||||
update.setID(itemVO.getId());
|
||||
update.setStatus(1); // 已转换为资源
|
||||
update.setResourceId(resourceId);
|
||||
update.setProcessTime(now);
|
||||
update.setProcessor(task.getCreator());
|
||||
|
||||
itemService.updateItem(update);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("自动发布后更新采集表状态异常,任务ID: {},日志ID: {}", task.getTaskId(), logId, e);
|
||||
}
|
||||
|
||||
return urlToResourceId.keySet();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user