课程、文章审核

2025-11-19 15:11:30 +08:00
parent 4ab8877b80
commit d568781ce9
24 changed files with 379 additions and 65 deletions
--- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/newsTask/NewsCrawlerTask.java
+++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/newsTask/NewsCrawlerTask.java
@@ -9,6 +9,7 @@ import org.xyzh.api.crontab.DataCollectionItemService;
 import org.xyzh.api.crontab.EmailDefaultService;
 import org.xyzh.api.crontab.EmailRecipientService;
 import org.xyzh.api.crontab.TaskMetaService;
+import org.xyzh.api.news.resource.ResourceAuditService;
 import org.xyzh.api.news.resource.ResourceService;
 import org.xyzh.api.system.role.RoleService;
 import org.xyzh.common.core.domain.ResultDomain;
@@ -65,6 +66,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
    
    @Autowired
    private EmailRecipientService emailRecipientService;
+
+    @Autowired
+    private ResourceAuditService auditService;
    
    @Autowired
    private EmailUtils emailUtils;
@@ -196,7 +200,6 @@ public class NewsCrawlerTask extends PythonCommandTask {
        logger.info("开始保存 {} 条新闻到数据库，任务ID: {}，日志ID: {}", newsList.size(), taskId, logId);

        try {
-            List<TbDataCollectionItem> itemList = new ArrayList<>();
            ResultDomain<TbCrontabTaskMeta> metaResult = taskMetaService.getTaskMetaByTaskId(taskId);
            if (!metaResult.isSuccess() || metaResult.getData() == null) {
                throw new Exception("未找到任务元数据: taskId=" + taskId);
@@ -206,7 +209,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
            Date now = new Date();
            SimpleDateFormat parser = new SimpleDateFormat("yyyy年MM月dd日HH:mm");
            SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-
+            List<TbDataCollectionItem> itemList = new ArrayList<>();
+            List<TbDataCollectionItem> notPassList = new ArrayList<>();
+            List<TbDataCollectionItem> passList = new ArrayList<>();
            for (ArticleStruct news : newsList) {
                try {
                    TbDataCollectionItem item = new TbDataCollectionItem();
@@ -260,7 +265,14 @@ public class NewsCrawlerTask extends PythonCommandTask {
                    // 状态和时间
                    item.setStatus(0); // 未处理
                    item.setCrawlTime(now);
-
+                    ResultDomain<Boolean> pass = auditService.auditText(item.getContent());
+                    if(pass.isSuccess() && pass.getData()){
+                        item.setIsAudited(true);
+                        passList.add(item);
+                    }else{
+                        item.setIsAudited(false);
+                        notPassList.add(item);
+                    }
                    itemList.add(item);
                } catch (Exception e) {
                    logger.error("转换新闻数据失败: ", e);
@@ -269,26 +281,40 @@ public class NewsCrawlerTask extends PythonCommandTask {

            // 批量保存
            Set<String> insertedUrls = new HashSet<>();
-            ResultDomain<TbDataCollectionItem> dataResult = new ResultDomain<>();
-            if (!itemList.isEmpty()) {
-                dataResult = itemService.batchCreateItems(itemList);
-                if (dataResult.isSuccess()) {
-                    logger.info("成功保存 {} 条新闻到数据库", itemList.size());
-                    insertedUrls.addAll(dataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
+            ResultDomain<TbDataCollectionItem> passDataResult = new ResultDomain<>();
+            if (!passList.isEmpty()) {
+                passDataResult = itemService.batchCreateItems(passList);
+                if (passDataResult.isSuccess()) {
+                    logger.info("成功保存 {} 条新闻到数据库", passList.size());
+                    insertedUrls.addAll(passDataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
                } else {
-                    logger.error("保存新闻到数据库失败: {}", dataResult.getMessage());
+                    logger.error("保存新闻到数据库失败: {}", passDataResult.getMessage());
                }
            } else {
                logger.warn("没有有效的新闻数据需要保存");
            }
+            ResultDomain<TbDataCollectionItem> notPassDataResult = new ResultDomain<>();
+            if (!notPassList.isEmpty()) {
+                notPassDataResult = itemService.batchCreateItems(notPassList);
+                if (notPassDataResult.isSuccess()) {
+                    logger.info("成功保存 {} 条新闻到数据库", notPassList.size());
+                    insertedUrls.addAll(notPassDataResult.getDataList().stream().map(TbDataCollectionItem::getSourceUrl).toList());
+                } else {
+                    logger.error("保存新闻到数据库失败: {}", notPassDataResult.getMessage());
+                }
+            } else {
+                logger.warn("没有有效的新闻数据需要保存");
+            }
+
            // 自动发布并记录成功发布的 URL 集合
            Set<String> publishedUrls = new HashSet<>();
            if (taskMeta.getAutoPublish().booleanValue()){
-                publishedUrls = publishNewsToArticle(dataResult.getDataList(), task, logId);
+                publishedUrls = publishNewsToArticle(passDataResult.getDataList(), task, logId);
            }

+            Set<String> notPathUrls = new HashSet<>(notPassList.stream().map(TbDataCollectionItem::getSourceUrl).toList());
            // 发送邮件通知，包含自动发布与新增信息
-            sendEmailNotification(task.getTaskId(), task, newsList, insertedUrls, publishedUrls);
+            sendEmailNotification(task.getTaskId(), task, newsList, insertedUrls, publishedUrls, notPathUrls);

        } catch (Exception e) {
            logger.error("保存新闻数据到数据库异常: ", e);
@@ -300,7 +326,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
     */
    private void sendEmailNotification(String taskId, TbCrontabTask task, List<ArticleStruct> newsList,
                                       Set<String> insertedUrls,
-                                       Set<String> publishedUrls) {
+                                       Set<String> publishedUrls,
+                                       Set<String> notPassUrls
+                                       ) {
        try {
            List<String> recipients = new ArrayList<>();
            
@@ -336,7 +364,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
            
            // 5. 构建邮件内容
            String subject = "【新闻爬虫通知】" + task.getTaskName() + " 执行完成";
-            String content = buildEmailContent(task.getTaskName(), newsList, insertedUrls, publishedUrls);
+            String content = buildEmailContent(task.getTaskName(), newsList, insertedUrls, publishedUrls, notPassUrls);
            
            // 6. 发送邮件
            int successCount = 0;
@@ -357,8 +385,9 @@ public class NewsCrawlerTask extends PythonCommandTask {
     * 构建邮件HTML内容
     */
    private String buildEmailContent(String taskName, List<ArticleStruct> newsList,
-                                     java.util.Set<String> insertedUrls,
-                                     java.util.Set<String> publishedUrls) {
+                                     Set<String> insertedUrls,
+                                     Set<String> publishedUrls,
+                                     Set<String> notPathUrls) {
        StringBuilder html = new StringBuilder();
        html.append("<!DOCTYPE html>")
            .append("<html>")
@@ -416,6 +445,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
                html.append(" | <a href='").append(news.getUrl()).append("' class='news-link' target='_blank'>查看原文</a>");
            }

+
            // 入库标记（新增 / 历史已存在）
            if (news.getUrl() != null && !news.getUrl().isEmpty() && insertedUrls != null) {
                if (insertedUrls.contains(news.getUrl())) {
@@ -425,6 +455,12 @@ public class NewsCrawlerTask extends PythonCommandTask {
                }
            }

+            // 如果该未审核通过，追加标记
+            if (notPathUrls != null && !notPathUrls.isEmpty()
+                    && news.getUrl() != null && notPathUrls.contains(news.getUrl())) {
+                html.append(" | <span style='color:#ff0000;font-weight:bold;'>【未通过审核】</span>");
+            }
+
            // 如果该新闻已自动发布，追加标记
            if (publishedUrls != null && !publishedUrls.isEmpty()
                    && news.getUrl() != null && publishedUrls.contains(news.getUrl())) {
@@ -435,12 +471,6 @@ public class NewsCrawlerTask extends PythonCommandTask {
                .append("</div>");
        }
        
-        if (newsList.size() > 10) {
-            html.append("<p style='text-align: center; color: #666; margin-top: 15px;'>")
-                .append("还有 ").append(newsList.size() - 10).append(" 条新闻未显示，请登录系统查看详情")
-                .append("</p>");
-        }
-        
        html.append("</div>"); // news-list
        html.append("</div>"); // content
        
@@ -498,7 +528,7 @@ public class NewsCrawlerTask extends PythonCommandTask {
                resource.setAuthor(item.getAuthor());
                resource.setSource(item.getSource());
                resource.setSourceUrl(item.getSourceUrl());
-
+                resource.setIsAudited(true);
                // 发布时间：优先使用采集表中的时间
                Date publishTime = item.getPublishTime() != null ? item.getPublishTime() : now;
                resource.setPublishTime(publishTime);
--- a/schoolNewsServ/crontab/src/main/resources/mapper/DataCollectionItemMapper.xml
+++ b/schoolNewsServ/crontab/src/main/resources/mapper/DataCollectionItemMapper.xml
@@ -21,6 +21,7 @@
        <result column="images" property="images" />
        <result column="tags" property="tags" />
        <result column="status" property="status" />
+        <result column="is_audited" property="isAudited" />
        <result column="resource_id" property="resourceId" />
        <result column="crawl_time" property="crawlTime" />
        <result column="process_time" property="processTime" />
@@ -51,6 +52,7 @@
        <result column="images" property="images" />
        <result column="tags" property="tags" />
        <result column="status" property="status" />
+        <result column="is_audited" property="isAudited" />
        <result column="resource_id" property="resourceId" />
        <result column="crawl_time" property="crawlTime" />
        <result column="process_time" property="processTime" />
@@ -77,7 +79,7 @@
    <!-- 字段列表 -->
    <sql id="Base_Column_List">
        id, task_id, log_id, title, content, summary, source, source_url, category, author,
-        publish_time, cover_image, images, tags, status, resource_id, crawl_time, process_time,
+        publish_time, cover_image, images, tags, status, is_audited, resource_id, crawl_time, process_time,
        processor, execute_status, execute_message, create_time, update_time, delete_time, deleted
    </sql>

@@ -98,6 +100,7 @@
        i.images,
        i.tags,
        i.status,
+        i.is_audited,
        i.resource_id,
        i.crawl_time,
        i.process_time,