课程、文章审核
This commit is contained in:
@@ -9,6 +9,7 @@ import org.xyzh.api.crontab.DataCollectionItemService;
|
||||
import org.xyzh.api.crontab.EmailDefaultService;
|
||||
import org.xyzh.api.crontab.EmailRecipientService;
|
||||
import org.xyzh.api.crontab.TaskMetaService;
|
||||
import org.xyzh.api.news.resource.ResourceAuditService;
|
||||
import org.xyzh.api.news.resource.ResourceService;
|
||||
import org.xyzh.api.system.role.RoleService;
|
||||
import org.xyzh.common.core.domain.ResultDomain;
|
||||
@@ -65,6 +66,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
|
||||
@Autowired
|
||||
private EmailRecipientService emailRecipientService;
|
||||
|
||||
@Autowired
|
||||
private ResourceAuditService auditService;
|
||||
|
||||
@Autowired
|
||||
private EmailUtils emailUtils;
|
||||
@@ -196,7 +200,6 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
logger.info("开始保存 {} 条新闻到数据库,任务ID: {},日志ID: {}", newsList.size(), taskId, logId);
|
||||
|
||||
try {
|
||||
List<TbDataCollectionItem> itemList = new ArrayList<>();
|
||||
ResultDomain<TbCrontabTaskMeta> metaResult = taskMetaService.getTaskMetaByTaskId(taskId);
|
||||
if (!metaResult.isSuccess() || metaResult.getData() == null) {
|
||||
throw new Exception("未找到任务元数据: taskId=" + taskId);
|
||||
@@ -206,7 +209,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
Date now = new Date();
|
||||
SimpleDateFormat parser = new SimpleDateFormat("yyyy年MM月dd日HH:mm");
|
||||
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
List<TbDataCollectionItem> itemList = new ArrayList<>();
|
||||
List<TbDataCollectionItem> notPassList = new ArrayList<>();
|
||||
List<TbDataCollectionItem> passList = new ArrayList<>();
|
||||
for (ArticleStruct news : newsList) {
|
||||
try {
|
||||
TbDataCollectionItem item = new TbDataCollectionItem();
|
||||
@@ -260,7 +265,14 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
// 状态和时间
|
||||
item.setStatus(0); // 未处理
|
||||
item.setCrawlTime(now);
|
||||
|
||||
ResultDomain<Boolean> pass = auditService.auditText(item.getContent());
|
||||
if(pass.isSuccess() && pass.getData()){
|
||||
item.setIsAudited(true);
|
||||
passList.add(item);
|
||||
}else{
|
||||
item.setIsAudited(false);
|
||||
notPassList.add(item);
|
||||
}
|
||||
itemList.add(item);
|
||||
} catch (Exception e) {
|
||||
logger.error("转换新闻数据失败: ", e);
|
||||
@@ -269,26 +281,40 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
|
||||
// 批量保存
|
||||
Set<String> insertedUrls = new HashSet<>();
|
||||
ResultDomain<TbDataCollectionItem> dataResult = new ResultDomain<>();
|
||||
if (!itemList.isEmpty()) {
|
||||
dataResult = itemService.batchCreateItems(itemList);
|
||||
if (dataResult.isSuccess()) {
|
||||
logger.info("成功保存 {} 条新闻到数据库", itemList.size());
|
||||
insertedUrls.addAll(dataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
|
||||
ResultDomain<TbDataCollectionItem> passDataResult = new ResultDomain<>();
|
||||
if (!passList.isEmpty()) {
|
||||
passDataResult = itemService.batchCreateItems(passList);
|
||||
if (passDataResult.isSuccess()) {
|
||||
logger.info("成功保存 {} 条新闻到数据库", passList.size());
|
||||
insertedUrls.addAll(passDataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
|
||||
} else {
|
||||
logger.error("保存新闻到数据库失败: {}", dataResult.getMessage());
|
||||
logger.error("保存新闻到数据库失败: {}", passDataResult.getMessage());
|
||||
}
|
||||
} else {
|
||||
logger.warn("没有有效的新闻数据需要保存");
|
||||
}
|
||||
ResultDomain<TbDataCollectionItem> notPassDataResult = new ResultDomain<>();
|
||||
if (!notPassList.isEmpty()) {
|
||||
notPassDataResult = itemService.batchCreateItems(notPassList);
|
||||
if (notPassDataResult.isSuccess()) {
|
||||
logger.info("成功保存 {} 条新闻到数据库", notPassList.size());
|
||||
insertedUrls.addAll(notPassDataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
|
||||
} else {
|
||||
logger.error("保存新闻到数据库失败: {}", notPassDataResult.getMessage());
|
||||
}
|
||||
} else {
|
||||
logger.warn("没有有效的新闻数据需要保存");
|
||||
}
|
||||
|
||||
// 自动发布并记录成功发布的 URL 集合
|
||||
Set<String> publishedUrls = new HashSet<>();
|
||||
if (taskMeta.getAutoPublish().booleanValue()){
|
||||
publishedUrls = publishNewsToArticle(dataResult.getDataList(), task, logId);
|
||||
publishedUrls = publishNewsToArticle(passDataResult.getDataList(), task, logId);
|
||||
}
|
||||
|
||||
Set<String> notPathUrls = new HashSet<>(notPassList.stream().map(TbDataCollectionItem::getSourceUrl).toList());
|
||||
// 发送邮件通知,包含自动发布与新增信息
|
||||
sendEmailNotification(task.getTaskId(), task, newsList, insertedUrls, publishedUrls);
|
||||
sendEmailNotification(task.getTaskId(), task, newsList, insertedUrls, publishedUrls, notPathUrls);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("保存新闻数据到数据库异常: ", e);
|
||||
@@ -300,7 +326,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
*/
|
||||
private void sendEmailNotification(String taskId, TbCrontabTask task, List<ArticleStruct> newsList,
|
||||
Set<String> insertedUrls,
|
||||
Set<String> publishedUrls) {
|
||||
Set<String> publishedUrls,
|
||||
Set<String> notPassUrls
|
||||
) {
|
||||
try {
|
||||
List<String> recipients = new ArrayList<>();
|
||||
|
||||
@@ -336,7 +364,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
|
||||
// 5. 构建邮件内容
|
||||
String subject = "【新闻爬虫通知】" + task.getTaskName() + " 执行完成";
|
||||
String content = buildEmailContent(task.getTaskName(), newsList, insertedUrls, publishedUrls);
|
||||
String content = buildEmailContent(task.getTaskName(), newsList, insertedUrls, publishedUrls, notPassUrls);
|
||||
|
||||
// 6. 发送邮件
|
||||
int successCount = 0;
|
||||
@@ -357,8 +385,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
* 构建邮件HTML内容
|
||||
*/
|
||||
private String buildEmailContent(String taskName, List<ArticleStruct> newsList,
|
||||
java.util.Set<String> insertedUrls,
|
||||
java.util.Set<String> publishedUrls) {
|
||||
Set<String> insertedUrls,
|
||||
Set<String> publishedUrls,
|
||||
Set<String> notPathUrls) {
|
||||
StringBuilder html = new StringBuilder();
|
||||
html.append("<!DOCTYPE html>")
|
||||
.append("<html>")
|
||||
@@ -416,6 +445,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
html.append(" | <a href='").append(news.getUrl()).append("' class='news-link' target='_blank'>查看原文</a>");
|
||||
}
|
||||
|
||||
|
||||
// 入库标记(新增 / 历史已存在)
|
||||
if (news.getUrl() != null && !news.getUrl().isEmpty() && insertedUrls != null) {
|
||||
if (insertedUrls.contains(news.getUrl())) {
|
||||
@@ -425,6 +455,12 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
}
|
||||
}
|
||||
|
||||
// 如果该未审核通过,追加标记
|
||||
if (notPathUrls != null && !notPathUrls.isEmpty()
|
||||
&& news.getUrl() != null && notPathUrls.contains(news.getUrl())) {
|
||||
html.append(" | <span style='color:#ff0000;font-weight:bold;'>【未通过审核】</span>");
|
||||
}
|
||||
|
||||
// 如果该新闻已自动发布,追加标记
|
||||
if (publishedUrls != null && !publishedUrls.isEmpty()
|
||||
&& news.getUrl() != null && publishedUrls.contains(news.getUrl())) {
|
||||
@@ -435,12 +471,6 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
.append("</div>");
|
||||
}
|
||||
|
||||
if (newsList.size() > 10) {
|
||||
html.append("<p style='text-align: center; color: #666; margin-top: 15px;'>")
|
||||
.append("还有 ").append(newsList.size() - 10).append(" 条新闻未显示,请登录系统查看详情")
|
||||
.append("</p>");
|
||||
}
|
||||
|
||||
html.append("</div>"); // news-list
|
||||
html.append("</div>"); // content
|
||||
|
||||
@@ -498,7 +528,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
|
||||
resource.setAuthor(item.getAuthor());
|
||||
resource.setSource(item.getSource());
|
||||
resource.setSourceUrl(item.getSourceUrl());
|
||||
|
||||
resource.setIsAudited(true);
|
||||
// 发布时间:优先使用采集表中的时间
|
||||
Date publishTime = item.getPublishTime() != null ? item.getPublishTime() : now;
|
||||
resource.setPublishTime(publishTime);
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
<result column="images" property="images" />
|
||||
<result column="tags" property="tags" />
|
||||
<result column="status" property="status" />
|
||||
<result column="is_audited" property="isAudited" />
|
||||
<result column="resource_id" property="resourceId" />
|
||||
<result column="crawl_time" property="crawlTime" />
|
||||
<result column="process_time" property="processTime" />
|
||||
@@ -51,6 +52,7 @@
|
||||
<result column="images" property="images" />
|
||||
<result column="tags" property="tags" />
|
||||
<result column="status" property="status" />
|
||||
<result column="is_audited" property="isAudited" />
|
||||
<result column="resource_id" property="resourceId" />
|
||||
<result column="crawl_time" property="crawlTime" />
|
||||
<result column="process_time" property="processTime" />
|
||||
@@ -77,7 +79,7 @@
|
||||
<!-- 字段列表 -->
|
||||
<sql id="Base_Column_List">
|
||||
id, task_id, log_id, title, content, summary, source, source_url, category, author,
|
||||
publish_time, cover_image, images, tags, status, resource_id, crawl_time, process_time,
|
||||
publish_time, cover_image, images, tags, status, is_audited, resource_id, crawl_time, process_time,
|
||||
processor, execute_status, execute_message, create_time, update_time, delete_time, deleted
|
||||
</sql>
|
||||
|
||||
@@ -98,6 +100,7 @@
|
||||
i.images,
|
||||
i.tags,
|
||||
i.status,
|
||||
i.is_audited,
|
||||
i.resource_id,
|
||||
i.crawl_time,
|
||||
i.process_time,
|
||||
|
||||
Reference in New Issue
Block a user