自动发布文章,权限插入

This commit is contained in:
2025-11-19 13:25:32 +08:00
parent 0dcee95a45
commit 4ab8877b80
7 changed files with 307 additions and 54 deletions

View File

@@ -28,11 +28,11 @@ public interface DataCollectionItemService {
/**
* @description 批量创建采集项
* @param itemList 采集项列表
* @return ResultDomain<Integer> 创建数量
* @return ResultDomain<TbDataCollectionItem> 成功创建的采集项列表dataList
* @author yslg
* @since 2025-11-08
*/
ResultDomain<Integer> batchCreateItems(List<TbDataCollectionItem> itemList);
ResultDomain<TbDataCollectionItem> batchCreateItems(List<TbDataCollectionItem> itemList);
/**
* @description 更新采集项

View File

@@ -7,6 +7,7 @@ import org.xyzh.common.dto.resource.TbResource;
import org.xyzh.common.dto.usercenter.TbUserCollection;
import org.xyzh.common.vo.ResourceVO;
import org.xyzh.common.vo.TaskItemVO;
import org.xyzh.common.vo.UserDeptRoleVO;
import java.util.List;
@@ -56,14 +57,24 @@ public interface ResourceService {
ResultDomain<ResourceVO> getResourceById(String resourceID);
/**
* @description 创建资源
* @param resource 资源信息
* @return ResultDomain<TbResource> 创建结果
* @author yslg
* @since 2025-10-15
* @description 创建资源(使用当前登录用户部门角色创建权限)
*/
ResultDomain<ResourceVO> createResource(ResourceVO resource);
/**
* @description 创建资源(显式指定创建者部门角色,适用于定时任务等无登录用户场景)
* @param resource 资源信息
* @param userDeptRoles 创建者部门角色列表
*/
ResultDomain<ResourceVO> createResource(ResourceVO resource, java.util.List<UserDeptRoleVO> userDeptRoles);
/**
* @description 批量创建资源
* @param resources 资源列表
* @return ResultDomain<Integer> 成功创建的数量
*/
ResultDomain<Integer> batchCreateResources(List<TbResource> resources);
/**
* @description 更新资源
* @param resource 资源信息

View File

@@ -30,6 +30,11 @@
<artifactId>api-news</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.xyzh</groupId>
<artifactId>api-system</artifactId>
<version>1.0.0</version>
</dependency>
<!-- Common模块依赖 -->
<dependency>

View File

@@ -96,8 +96,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<Integer> batchCreateItems(List<TbDataCollectionItem> itemList) {
ResultDomain<Integer> resultDomain = new ResultDomain<>();
public ResultDomain<TbDataCollectionItem> batchCreateItems(List<TbDataCollectionItem> itemList) {
ResultDomain<TbDataCollectionItem> resultDomain = new ResultDomain<>();
try {
if (itemList == null || itemList.isEmpty()) {
resultDomain.fail("采集项列表为空");
@@ -105,18 +105,17 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
}
int successCount = 0;
Date now = new Date();
List<TbDataCollectionItem> newItems = new ArrayList<>();
List<TbDataCollectionItem> createdItems = new ArrayList<>();
// 改为逐条创建,避免单条失败导致整批回滚
for (TbDataCollectionItem it : itemList) {
TbDataCollectionItem existing = itemMapper.selectBySourceUrl(it.getSourceUrl());
if (existing == null) {
newItems.add(it);
ResultDomain<TbDataCollectionItem> createResult = createItem(it);
if (createResult.isSuccess()) {
successCount++;
if (createResult.getData() != null) {
createdItems.add(createResult.getData());
}
}
}
if (!newItems.isEmpty()) {
successCount = itemMapper.batchInsertItems(newItems);
}
String logId = itemList.get(0).getLogId();
TbCrontabLog log = new TbCrontabLog();
log.setID(logId);
@@ -125,7 +124,7 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService
int i = logMapper.updateLog(log);
logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount);
resultDomain.success("批量创建采集项成功", successCount);
resultDomain.success("批量创建采集项成功", createdItems);
} catch (Exception e) {
logger.error("批量创建采集项异常: ", e);
resultDomain.fail("批量创建采集项异常: " + e.getMessage());

View File

@@ -9,14 +9,21 @@ import org.xyzh.api.crontab.DataCollectionItemService;
import org.xyzh.api.crontab.EmailDefaultService;
import org.xyzh.api.crontab.EmailRecipientService;
import org.xyzh.api.crontab.TaskMetaService;
import org.xyzh.api.news.resource.ResourceService;
import org.xyzh.api.system.role.RoleService;
import org.xyzh.common.core.domain.ResultDomain;
import org.xyzh.common.dto.crontab.TbCrontabEmailDefault;
import org.xyzh.common.dto.crontab.TbCrontabEmailRecipient;
import org.xyzh.common.dto.crontab.TbCrontabTask;
import org.xyzh.common.dto.crontab.TbCrontabTaskMeta;
import org.xyzh.common.dto.crontab.TbDataCollectionItem;
import org.xyzh.common.dto.resource.TbResource;
import org.xyzh.common.utils.EmailUtils;
import org.xyzh.common.utils.IDUtils;
import org.xyzh.common.utils.NonUtils;
import org.xyzh.common.vo.DataCollectionItemVO;
import org.xyzh.common.vo.ResourceVO;
import org.xyzh.common.vo.UserDeptRoleVO;
import org.xyzh.crontab.pojo.TaskParams;
import org.xyzh.crontab.task.PythonCommandTask;
@@ -25,9 +32,13 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -58,6 +69,12 @@ public class NewsCrawlerTask extends PythonCommandTask {
@Autowired
private EmailUtils emailUtils;
@Autowired
private ResourceService resourceService;
@Autowired
private RoleService roleService;
/**
* 构建Python脚本参数
*/
@@ -166,8 +183,6 @@ public class NewsCrawlerTask extends PythonCommandTask {
saveNewsToDatabase(newsList, taskResult.getData(), logId);
// 发送邮件通知
sendEmailNotification(taskId, taskResult.getData(), newsList);
} else {
logger.warn("未提供任务ID或日志ID跳过数据保存");
}
@@ -218,11 +233,22 @@ public class NewsCrawlerTask extends PythonCommandTask {
item.setSourceUrl(news.getUrl());
item.setAuthor(news.getAuthor());
// 发布时间
// 发布时间,兼容多种格式
String publishTimeStr = news.getPublishTime();
if (publishTimeStr != null && !publishTimeStr.isEmpty()) {
try {
item.setPublishTime(dateFormat.parse(dateFormat.format(parser.parse(publishTimeStr))));
Date parsed;
if (publishTimeStr.contains("")) {
// 原始格式yyyy年MM月dd日HH:mm
parsed = parser.parse(publishTimeStr);
} else if (publishTimeStr.length() == 10 && publishTimeStr.charAt(4) == '-' && publishTimeStr.charAt(7) == '-') {
// 仅日期yyyy-MM-dd
parsed = new SimpleDateFormat("yyyy-MM-dd").parse(publishTimeStr);
} else {
// 其它情况按原parser尝试
parsed = parser.parse(publishTimeStr);
}
item.setPublishTime(dateFormat.parse(dateFormat.format(parsed)));
} catch (Exception e) {
logger.warn("解析发布时间失败: {}", publishTimeStr);
item.setPublishTime(now);
@@ -242,20 +268,28 @@ public class NewsCrawlerTask extends PythonCommandTask {
}
// 批量保存
Set<String> insertedUrls = new HashSet<>();
ResultDomain<TbDataCollectionItem> dataResult = new ResultDomain<>();
if (!itemList.isEmpty()) {
ResultDomain<Integer> result = itemService.batchCreateItems(itemList);
if (result.isSuccess()) {
logger.info("成功保存 {} 条新闻到数据库", result.getData());
dataResult = itemService.batchCreateItems(itemList);
if (dataResult.isSuccess()) {
logger.info("成功保存 {} 条新闻到数据库", itemList.size());
insertedUrls.addAll(dataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
} else {
logger.error("保存新闻到数据库失败: {}", result.getMessage());
logger.error("保存新闻到数据库失败: {}", dataResult.getMessage());
}
} else {
logger.warn("没有有效的新闻数据需要保存");
}
if (taskMeta.getAutoPublish()){
publishNewsToArticle(newsList, task, logId);
// 自动发布并记录成功发布的 URL 集合
Set<String> publishedUrls = new HashSet<>();
if (taskMeta.getAutoPublish().booleanValue()){
publishedUrls = publishNewsToArticle(dataResult.getDataList(), task, logId);
}
// 发送邮件通知,包含自动发布与新增信息
sendEmailNotification(task.getTaskId(), task, newsList, insertedUrls, publishedUrls);
} catch (Exception e) {
logger.error("保存新闻数据到数据库异常: ", e);
}
@@ -264,7 +298,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
/**
* 发送邮件通知
*/
private void sendEmailNotification(String taskId, TbCrontabTask task, List<ArticleStruct> newsList) {
private void sendEmailNotification(String taskId, TbCrontabTask task, List<ArticleStruct> newsList,
Set<String> insertedUrls,
Set<String> publishedUrls) {
try {
List<String> recipients = new ArrayList<>();
@@ -300,7 +336,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
// 5. 构建邮件内容
String subject = "【新闻爬虫通知】" + task.getTaskName() + " 执行完成";
String content = buildEmailContent(task.getTaskName(), newsList);
String content = buildEmailContent(task.getTaskName(), newsList, insertedUrls, publishedUrls);
// 6. 发送邮件
int successCount = 0;
@@ -320,7 +356,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
/**
* 构建邮件HTML内容
*/
private String buildEmailContent(String taskName, List<ArticleStruct> newsList) {
private String buildEmailContent(String taskName, List<ArticleStruct> newsList,
java.util.Set<String> insertedUrls,
java.util.Set<String> publishedUrls) {
StringBuilder html = new StringBuilder();
html.append("<!DOCTYPE html>")
.append("<html>")
@@ -350,17 +388,21 @@ public class NewsCrawlerTask extends PythonCommandTask {
.append("<div class='content'>");
// 摘要信息
int insertedCount = (insertedUrls != null) ? insertedUrls.size() : 0;
int totalCount = newsList.size();
html.append("<div class='summary'>")
.append("<p><strong>任务名称:</strong>").append(taskName).append("</p>")
.append("<p><strong>执行时间:</strong>").append(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date())).append("</p>")
.append("<p><strong>爬取数量:</strong>").append(newsList.size()).append(" 条</p>")
.append("<p><strong>爬取数量:</strong>").append(totalCount).append(" 条</p>")
.append("<p><strong>入库情况:</strong>")
.append("本次新增 ").append(insertedCount).append(" 条,已有历史记录的新闻不会重复入库。</p>")
.append("</div>");
// 新闻列表
// 新闻列表(包含自动发布标记)
html.append("<div class='news-list'>")
.append("<h3>爬取内容:</h3>");
int count = Math.min(newsList.size(), 10); // 最多显示10条
int count = newsList.size(); // 最多显示10条
for (int i = 0; i < count; i++) {
ArticleStruct news = newsList.get(i);
html.append("<div class='news-item'>")
@@ -374,6 +416,21 @@ public class NewsCrawlerTask extends PythonCommandTask {
html.append(" | <a href='").append(news.getUrl()).append("' class='news-link' target='_blank'>查看原文</a>");
}
// 入库标记(新增 / 历史已存在)
if (news.getUrl() != null && !news.getUrl().isEmpty() && insertedUrls != null) {
if (insertedUrls.contains(news.getUrl())) {
html.append(" | <span style='color:#2e7d32;font-weight:bold;'>【本次新增】</span>");
} else {
html.append(" | <span style='color:#666;'>【历史已存在,未重复入库】</span>");
}
}
// 如果该新闻已自动发布,追加标记
if (publishedUrls != null && !publishedUrls.isEmpty()
&& news.getUrl() != null && publishedUrls.contains(news.getUrl())) {
html.append(" | <span style='color:#C62828;font-weight:bold;'>【已自动发布】</span>");
}
html.append("</div>")
.append("</div>");
}
@@ -399,8 +456,123 @@ public class NewsCrawlerTask extends PythonCommandTask {
return html.toString();
}
// TODO 自动发布功能,把采集的数据发布到文章表
private void publishNewsToArticle(List<ArticleStruct> newsList, TbCrontabTask task, String logId) {
// 自动发布功能,把采集的数据发布到文章资源
private Set<String> publishNewsToArticle(List<TbDataCollectionItem> itemList, TbCrontabTask task, String logId) {
if (itemList == null || itemList.isEmpty()) {
logger.info("自动发布itemList 为空跳过发布任务ID: {}日志ID: {}", task.getTaskId(), logId);
return new HashSet<>();
}
logger.info("自动发布开始,共 {} 条任务ID: {}日志ID: {}", itemList.size(), task.getTaskId(), logId);
List<UserDeptRoleVO> userDeptRoleVOs = null;
try {
ResultDomain<UserDeptRoleVO> roleResult = roleService.getDeptRolesByUserId(task.getCreator());
if (roleResult != null && roleResult.isSuccess()) {
userDeptRoleVOs = roleResult.getDataList();
}
} catch (Exception e) {
logger.warn("根据任务创建者获取部门角色失败taskId: {}creator: {},原因: {}", task.getTaskId(), task.getCreator(), e.getMessage());
}
Date now = new Date();
int successCount = 0;
// 记录 url -> resourceId用于后续更新采集表
Map<String, String> urlToResourceId = new HashMap<>();
for (TbDataCollectionItem item : itemList) {
if(item.getContent().isEmpty()) continue;
try {
TbResource resource = new TbResource();
resource.setID(IDUtils.generateID());
resource.setResourceID(IDUtils.generateID());
resource.setTitle(item.getTitle());
// 创建者使用任务创建者
resource.setCreator(task.getCreator());
// 内容直接使用采集表中的 HTML
resource.setContent(item.getContent());
resource.setSummary(null);
resource.setCoverImage(null);
// 统一打标签为专题报告tag_article_005
resource.setTagID("tag_article_005");
resource.setAuthor(item.getAuthor());
resource.setSource(item.getSource());
resource.setSourceUrl(item.getSourceUrl());
// 发布时间:优先使用采集表中的时间
Date publishTime = item.getPublishTime() != null ? item.getPublishTime() : now;
resource.setPublishTime(publishTime);
// 状态:已发布
resource.setStatus(1);
resource.setViewCount(0);
resource.setLikeCount(0);
resource.setCollectCount(0);
resource.setIsRecommend(false);
resource.setIsBanner(false);
resource.setCreateTime(now);
resource.setUpdateTime(now);
resource.setDeleted(false);
// 逐条创建资源,避免整批回滚
ResourceVO vo = new ResourceVO();
vo.setResource(resource);
ResultDomain<ResourceVO> createResult = resourceService.createResource(vo, userDeptRoleVOs);
if (createResult.isSuccess()) {
successCount++;
// 记录 url -> resourceId用于回写采集表
if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) {
urlToResourceId.put(item.getSourceUrl(), resource.getResourceID());
}
} else {
logger.warn("自动发布:创建资源失败,标题: {},原因: {}", item.getTitle(), createResult.getMessage());
}
} catch (Exception e) {
logger.error("自动发布单条新闻异常,标题: {}", item.getTitle(), e);
}
}
logger.info("自动发布完成,共 {} 条,成功 {} 条任务ID: {}日志ID: {}", itemList.size(), successCount, task.getTaskId(), logId);
// 成功创建资源后,同步更新采集表状态
try {
TbDataCollectionItem filter = new TbDataCollectionItem();
filter.setTaskId(task.getTaskId());
filter.setLogId(logId);
ResultDomain<DataCollectionItemVO> itemResult = itemService.getItemList(filter);
if (itemResult.isSuccess() && itemResult.getDataList() != null) {
for (DataCollectionItemVO itemVO : itemResult.getDataList()) {
// 只处理未转换的采集项
if (itemVO.getStatus() != null && itemVO.getStatus() != 0) {
continue;
}
String sourceUrl = itemVO.getSourceUrl();
if (sourceUrl == null || sourceUrl.isEmpty()) {
continue;
}
String resourceId = urlToResourceId.get(sourceUrl);
if (resourceId == null) {
continue;
}
TbDataCollectionItem update = new TbDataCollectionItem();
update.setID(itemVO.getId());
update.setStatus(1); // 已转换为资源
update.setResourceId(resourceId);
update.setProcessTime(now);
update.setProcessor(task.getCreator());
itemService.updateItem(update);
}
}
} catch (Exception e) {
logger.error("自动发布后更新采集表状态异常任务ID: {}日志ID: {}", task.getTaskId(), logId, e);
}
return urlToResourceId.keySet();
}
}

View File

@@ -73,6 +73,65 @@ public class NCResourceServiceImpl implements ResourceService {
}
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<Integer> batchCreateResources(List<TbResource> resources) {
ResultDomain<Integer> resultDomain = new ResultDomain<>();
try {
if (resources == null || resources.isEmpty()) {
resultDomain.fail("资源列表不能为空");
return resultDomain;
}
Date now = new Date();
// 填充必要的默认值
for (TbResource res : resources) {
if (res.getID() == null) {
res.setID(IDUtils.generateID());
}
if (res.getResourceID() == null) {
res.setResourceID(IDUtils.generateID());
}
if (res.getCreateTime() == null) {
res.setCreateTime(now);
}
if (res.getUpdateTime() == null) {
res.setUpdateTime(now);
}
if (res.getStatus() == null) {
res.setStatus(0);
}
if (res.getViewCount() == null) {
res.setViewCount(0);
}
if (res.getLikeCount() == null) {
res.setLikeCount(0);
}
if (res.getCollectCount() == null) {
res.setCollectCount(0);
}
if (res.getIsRecommend() == null) {
res.setIsRecommend(false);
}
if (res.getIsBanner() == null) {
res.setIsBanner(false);
}
}
int count = resourceMapper.batchInsertResources(resources);
if (count > 0) {
logger.info("批量创建资源成功,共 {} 条", count);
resultDomain.success("批量创建资源成功", count);
} else {
resultDomain.fail("批量创建资源失败");
}
} catch (Exception e) {
logger.error("批量创建资源异常: {}", e.getMessage(), e);
resultDomain.fail("批量创建资源失败: " + e.getMessage());
}
return resultDomain;
}
@Override
public ResultDomain<TbResource> getResourcePage(TbResource filter, PageParam pageParam) {
ResultDomain<TbResource> resultDomain = new ResultDomain<>();
@@ -161,6 +220,19 @@ public class NCResourceServiceImpl implements ResourceService {
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<ResourceVO> createResource(ResourceVO resourceVO) {
// 前端/管理端调用:使用当前登录用户的部门角色
List<UserDeptRoleVO> userDeptRoles = null;
try {
userDeptRoles = LoginUtil.getCurrentDeptRole();
} catch (Exception e) {
logger.warn("获取当前用户部门角色失败: {}", e.getMessage());
}
return createResource(resourceVO, userDeptRoles);
}
@Override
@Transactional(rollbackFor = Exception.class)
public ResultDomain<ResourceVO> createResource(ResourceVO resourceVO, List<UserDeptRoleVO> userDeptRoles) {
ResultDomain<ResourceVO> resultDomain = new ResultDomain<>();
try {
// 参数验证
@@ -169,14 +241,6 @@ public class NCResourceServiceImpl implements ResourceService {
return resultDomain;
}
// 检查标题是否已存在
List<UserDeptRoleVO> userDeptRoles = LoginUtil.getCurrentDeptRole();
int count = resourceMapper.countByTitle(resourceVO.getResource().getTitle(), null, userDeptRoles);
if (count > 0) {
resultDomain.fail("资源标题已存在");
return resultDomain;
}
// 设置默认值
if (resourceVO.getResource().getID() == null) {
resourceVO.getResource().setID(IDUtils.generateID());

View File

@@ -288,13 +288,15 @@
<insert id="batchInsertResources" parameterType="java.util.List">
INSERT INTO tb_resource (
id, resource_id, title, content, summary, cover_image, tag_id, author, source,
source_url, creator,create_time
source_url, status, is_recommend, is_banner, publish_time,
creator, updater, create_time, update_time, deleted
) VALUES
<foreach collection="resourceList" item="item" separator=",">
(
#{item.id}, #{item.resourceID}, #{item.title}, #{item.content}, #{item.summary}, #{item.coverImage},
#{item.tagID}, #{item.author}, #{item.source}, #{item.sourceUrl},
#{item.creator}, #{item.createTime}
#{item.status}, #{item.isRecommend}, #{item.isBanner}, #{item.publishTime},
#{item.creator}, #{item.updater}, #{item.createTime}, #{item.updateTime}, #{item.deleted}
)
</foreach>
</insert>