diff --git a/output/out.json b/output/out.json new file mode 100644 index 0000000..f1515f1 --- /dev/null +++ b/output/out.json @@ -0,0 +1,7 @@ +{ + "code": "0", + "message": "获取搜索结果失败", + "success": false, + "data": null, + "dataList": [] +} \ No newline at end of file diff --git a/output/output.json b/output/output.json new file mode 100644 index 0000000..c943667 --- /dev/null +++ b/output/output.json @@ -0,0 +1,324 @@ +{ + "code": 0, + "message": "", + "success": true, + "data": null, + "dataList": [ + { + "title": "", + "contentRows": [], + "url": "http://cpc.people.com.cn/n1/2025/1109/c435113-40599647.html", + "publishTime": "", + "author": "", + "source": "人民网", + "category": "" + }, + { + "title": "习近平在广东考察", + "contentRows": [ + { + "tag": "p", + "content": "

" + }, + { + "tag": "img", + "content": "" + }, + { + "tag": "p", + "content": "

  11月7日至8日,中共中央总书记、国家主席、中央军委主席习近平在广东考察。这是7日下午,习近平在位于梅州市梅县区雁洋镇的叶剑英纪念馆,参观叶剑英生平事迹陈列。

" + }, + { + "tag": "p", + "content": "

  新华社记者 谢环驰 摄

" + }, + { + "tag": "img", + "content": "" + } + ], + "url": "http://pic.people.com.cn/n1/2025/1108/c426981-40599554.html", + "publishTime": "2025年11月08日17:22", + "author": "", + "source": "新华社", + "category": "" + }, + { + "title": "", + "contentRows": [], + "url": "http://cpc.people.com.cn/n1/2025/1031/c64094-40593715.html", + "publishTime": "", + "author": "", + "source": "人民网", + "category": "" + }, + { + "title": "习近平抵达韩国", + "contentRows": [ + { + "tag": "p", + "content": "

" + }, + { + "tag": "img", + "content": "" + }, + { + "tag": "p", + "content": "

当地时间十月三十日上午,国家主席习近平乘专机抵达韩国,应大韩民国总统李在明邀请,出席亚太经合组织第三十二次领导人非正式会议并对韩国进行国事访问。这是习近平抵达釜山金海国际机场时,韩国外长赵显等高级官员热情迎接。新华社记者 黄敬文摄

" + }, + { + "tag": "p", + "content": "

  本报韩国釜山10月30日电 (记者莽九晨、杨翘楚)当地时间10月30日上午,国家主席习近平乘专机抵达韩国,应大韩民国总统李在明邀请,出席亚太经合组织第三十二次领导人非正式会议并对韩国进行国事访问。

" + }, + { + "tag": "p", + "content": "

  习近平抵达釜山金海国际机场时,韩国外长赵显等高级官员热情迎接。礼兵分列红地毯两侧致敬,军乐团演奏行进乐,机场鸣放21响礼炮。

" + }, + { + "tag": "p", + "content": "

  蔡奇、王毅、何立峰等陪同人员同机抵达。

" + }, + { + "tag": "p", + "content": "

  先期抵达的香港特别行政区行政长官李家超、中国驻韩国大使戴兵也到机场迎接。

" + }, + { + "tag": "p", + "content": "

  中国留学生和中资企业代表挥舞中韩两国国旗,热烈欢迎习近平到访。

" + }, + { + "tag": "p", + "content": "

  本报北京10月30日电 10月30日上午,国家主席习近平乘专机离开北京,应大韩民国总统李在明邀请,赴韩国庆州出席亚太经合组织第三十二次领导人非正式会议并对韩国进行国事访问。

" + }, + { + "tag": "p", + "content": "

  陪同习近平出访的有:中共中央政治局常委、中央办公厅主任蔡奇,中共中央政治局委员、外交部部长王毅,中共中央政治局委员、国务院副总理何立峰等。

" + }, + { + "tag": "p", + "content": "

  《人民日报》(2025年10月31日 第01版)

" + }, + { + "tag": "img", + "content": "" + } + ], + "url": "http://korea.people.com.cn/n1/2025/1031/c407366-40594082.html", + "publishTime": "2025年10月31日13:38", + "author": "", + "source": "人民网-人民日报", + "category": "" + }, + { + "title": "习近平抵达韩国", + "contentRows": [ + { + "tag": "p", + "content": "

" + }, + { + "tag": "p", + "content": "

  当地时间十月三十日上午,国家主席习近平乘专机抵达韩国,应大韩民国总统李在明邀请,出席亚太经合组织第三十二次领导人非正式会议并对韩国进行国事访问。这是习近平抵达釜山金海国际机场时,韩国外长赵显等高级官员热情迎接。
  新华社记者 黄敬文摄

" + }, + { + "tag": "p", + "content": "

  本报韩国釜山10月30日电  (记者莽九晨、杨翘楚)当地时间10月30日上午,国家主席习近平乘专机抵达韩国,应大韩民国总统李在明邀请,出席亚太经合组织第三十二次领导人非正式会议并对韩国进行国事访问。

" + }, + { + "tag": "p", + "content": "

  习近平抵达釜山金海国际机场时,韩国外长赵显等高级官员热情迎接。礼兵分列红地毯两侧致敬,军乐团演奏行进乐,机场鸣放21响礼炮。

" + }, + { + "tag": "p", + "content": "

  蔡奇、王毅、何立峰等陪同人员同机抵达。

" + }, + { + "tag": "p", + "content": "

  先期抵达的香港特别行政区行政长官李家超、中国驻韩国大使戴兵也到机场迎接。

" + }, + { + "tag": "p", + "content": "

  中国留学生和中资企业代表挥舞中韩两国国旗,热烈欢迎习近平到访。

" + }, + { + "tag": "p", + "content": "

  本报北京10月30日电  10月30日上午,国家主席习近平乘专机离开北京,应大韩民国总统李在明邀请,赴韩国庆州出席亚太经合组织第三十二次领导人非正式会议并对韩国进行国事访问。

" + }, + { + "tag": "p", + "content": "

  陪同习近平出访的有:中共中央政治局常委、中央办公厅主任蔡奇,中共中央政治局委员、外交部部长王毅,中共中央政治局委员、国务院副总理何立峰等。

" + }, + { + "tag": "p", + "content": "

" + }, + { + "tag": "p", + "content": "

  《 人民日报 》( 2025年10月31日 01 版)

" + }, + { + "tag": "img", + "content": "" + } + ], + "url": "http://politics.people.com.cn/n1/2025/1031/c1024-40593454.html", + "publishTime": "2025年10月31日06:10", + "author": "", + "source": "人民网-人民日报", + "category": "" + }, + { + "title": "习近平回到北京", + "contentRows": [ + { + "tag": "p", + "content": "

" + }, + { + "tag": "p", + "content": "

本报北京11月1日电  11月1日晚,国家主席习近平结束出席亚太经合组织第三十二次领导人非正式会议和对韩国的国事访问后回到北京。

" + }, + { + "tag": "p", + "content": "

中共中央政治局常委、中央办公厅主任蔡奇,中共中央政治局委员、外交部部长王毅等陪同人员同机返回。

" + }, + { + "tag": "p", + "content": "

本报韩国釜山11月1日电  (记者王嵘、朱笑熺)当地时间11月1日晚,国家主席习近平结束出席亚太经合组织第三十二次领导人非正式会议和对韩国的国事访问返回北京。

" + }, + { + "tag": "p", + "content": "

离开釜山时,韩国外长赵显等高级官员到机场送行。

" + }, + { + "tag": "p", + "content": "

前往机场途中,中国留学生和中资企业代表在道路两旁挥舞中韩两国国旗,热烈祝贺习近平主席访问圆满成功。

" + }, + { + "tag": "img", + "content": "" + } + ], + "url": "http://gd.people.com.cn/n2/2025/1102/c123932-41398959.html", + "publishTime": "2025年11月02日11:15", + "author": "", + "source": "人民网-人民日报", + "category": "" + }, + { + "title": "习近平回到北京", + "contentRows": [ + { + "tag": "p", + "content": "

" + }, + { + "tag": "p", + "content": "

  本报北京11月1日电  11月1日晚,国家主席习近平结束出席亚太经合组织第三十二次领导人非正式会议和对韩国的国事访问后回到北京。

" + }, + { + "tag": "p", + "content": "

  中共中央政治局常委、中央办公厅主任蔡奇,中共中央政治局委员、外交部部长王毅等陪同人员同机返回。

" + }, + { + "tag": "p", + "content": "

  本报韩国釜山11月1日电  (记者王嵘、朱笑熺)当地时间11月1日晚,国家主席习近平结束出席亚太经合组织第三十二次领导人非正式会议和对韩国的国事访问返回北京。

" + }, + { + "tag": "p", + "content": "

  离开釜山时,韩国外长赵显等高级官员到机场送行。

" + }, + { + "tag": "p", + "content": "

  前往机场途中,中国留学生和中资企业代表在道路两旁挥舞中韩两国国旗,热烈祝贺习近平主席访问圆满成功。

" + }, + { + "tag": "p", + "content": "

" + }, + { + "tag": "p", + "content": "

  《 人民日报 》( 2025年11月02日 01 版)

" + }, + { + "tag": "img", + "content": "" + } + ], + "url": "http://politics.people.com.cn/n1/2025/1102/c1024-40594763.html", + "publishTime": "2025年11月02日05:46", + "author": "", + "source": "人民网-人民日报", + "category": "" + }, + { + "title": "", + "contentRows": [], + "url": "http://cpc.people.com.cn/n1/2025/1102/c64094-40594809.html", + "publishTime": "", + "author": "", + "source": "人民网", + "category": "" + }, + { + "title": "《习近平的文化情缘》《习近平经济思想系列讲读》在澳门启播", + "contentRows": [ + { + "tag": "p", + "content": "

" + }, + { + "tag": "p", + "content": "

人民网澳门9月28日电 (记者富子梅)《习近平的文化情缘》及《习近平经济思想系列讲读》两部专题片在澳门启播仪式28日举行。澳门特区行政长官岑浩辉,中宣部副部长、中央广播电视总台台长兼总编辑慎海雄,中央政府驻澳门特区联络办公室主任郑新聪出席活动并致辞。

" + }, + { + "tag": "img", + "content": "" + }, + { + "tag": "p", + "content": "

《习近平的文化情缘》《习近平经济思想系列讲读》澳门启播仪式。(澳门特区政府新闻局供图)

" + }, + { + "tag": "p", + "content": "

岑浩辉表示,《习近平的文化情缘》《习近平经济思想系列讲读》在澳门落地启播,高度契合澳门中西荟萃、内联外通的优势和功能,具有重大而且深远的意义。期待以此为契机,持续深化推动广大澳门同胞和海内外人士对习近平新时代中国特色社会主义思想的关注、理解和实践,共同讲好中国故事、促进国际交流、不断扩大“朋友圈”

" + }, + { + "tag": "p", + "content": "

慎海雄指出,两部精品节目是助力澳门各界更好学习领会领袖思想的一次生动实践,是让澳门居民深切感悟中华文明深厚底蕴和新时代伟大成就的一场文化盛宴。

" + }, + { + "tag": "p", + "content": "

郑新聪表示,两部精品节目在澳门播出,有力促进习近平文化思想、习近平经济思想的宣传普及、落地生根,将为澳门打造中西文明交流互鉴的重要窗口、推动经济适度多元发展提供精神动力和科学指引。

" + }, + { + "tag": "p", + "content": "

9月28日起,电视专题片《习近平的文化情缘》在澳门广播电视股份有限公司的澳视澳门频道、澳门有线电视股份有限公司互动新闻台、澳门莲花卫视传媒有限公司网站,以及《澳门日报》《大众报》《市民日报》《濠江日报》《正报》《澳门商报》《澳门焦点报》《莲花时报》等媒体的新媒体平台陆续上线。大型专题节目《习近平经济思想系列讲读》9月28日起在澳广视旗下电视频道及新媒体平台上线播出。

" + }, + { + "tag": "p", + "content": "

启播仪式后举行的“盛世莲开颂华章 - 中央广播电视总台与澳门各界深化合作仪式”上,双方代表分别交换《中央广播电视总台与澳门特别行政区政府深化战略合作框架协议》、《国家电影局与澳门特别行政区政府社会文化司关于电影产业合作框架协议》、《十五运会和残特奥会澳门赛区筹备办公室与中央广播电视总台合作意向书》、《中央广播电视总台与澳门广播电视股份有限公司关于整频道转播央视CCTV-5体育频道的协议》、《中央广播电视总台亚太总站与澳门大学深化战略合作框架协议》等5份合作文件。

" + }, + { + "tag": "img", + "content": "" + } + ], + "url": "http://gba.people.cn/n1/2025/0928/c42272-40573895.html", + "publishTime": "2025年09月28日16:44", + "author": "", + "source": "人民网-大湾区频道", + "category": "" + }, + { + "title": "", + "contentRows": [], + "url": "http://cpc.people.com.cn/n1/2025/0926/c64094-40572435.html", + "publishTime": "", + "author": "", + "source": "人民网", + "category": "" + } + ] +} \ No newline at end of file diff --git a/schoolNewsCrawler/crawler/BaseCrawler.py b/schoolNewsCrawler/crawler/BaseCrawler.py index 6046c15..f20d190 100644 --- a/schoolNewsCrawler/crawler/BaseCrawler.py +++ b/schoolNewsCrawler/crawler/BaseCrawler.py @@ -66,7 +66,7 @@ class BaseCrawler(ABC): self.session.headers.update(config.headers) logger.info(f"初始化爬虫: {self.__class__.__name__}") - def fetch(self, url: str, method: str = "GET", data: Optional[Dict[str, Any]] = None, **kwargs) -> Optional[requests.Response]: + def fetch(self, url: str, method: str = "GET", data: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, **kwargs) -> Optional[requests.Response]: """ 发送HTTP请求 @@ -74,6 +74,7 @@ class BaseCrawler(ABC): url: 请求URL method: 请求方法 data: 请求数据 + headers: 额外的请求头,将与默认请求头合并(额外的优先) **kwargs: 其他请求参数 Returns: @@ -82,11 +83,20 @@ class BaseCrawler(ABC): for attempt in range(self.config.retry_times): try: logger.info(f"请求URL: {url} (尝试 {attempt + 1}/{self.config.retry_times})") + + # 合并默认headers与调用方headers(调用方覆盖默认) + request_headers = dict(self.config.headers or {}) + if headers: + request_headers.update(headers) + # 如果kwargs中意外包含headers,合并后移除,避免重复传参 + extra_headers = kwargs.pop("headers", None) + if extra_headers: + request_headers.update(extra_headers) response = self.session.request( method=method, url=url, - headers=self.config.headers, + headers=request_headers, data=data, timeout=self.config.timeout, proxies={'http': self.config.proxy, 'https': self.config.proxy} if self.config.proxy else None, diff --git a/schoolNewsCrawler/crawler/RmrbCrawler.py b/schoolNewsCrawler/crawler/RmrbCrawler.py index bb15c19..b8f4fa7 100644 --- a/schoolNewsCrawler/crawler/RmrbCrawler.py +++ b/schoolNewsCrawler/crawler/RmrbCrawler.py @@ -100,14 +100,25 @@ class RmrbCrawler(BaseCrawler): search_data["page"] = page response = self.fetch(search_config.url, method=search_config.method, json=search_data, headers=search_config.headers) response_json = response.json() - if response_json.get("code") == 0: + if response_json.get("code") == '0': records = response_json.get("data", {}).get("records", []) for record in records: news = self.parse_news_detail(record.get("url")) + if news['title'] == '': + news['title'] = record.get("title") + if news['contentRows'] == []: + news['contentRows'] = record.get("contentOriginal") + if news['publishTime'] == '': + news['publishTime'] = datetime.datetime.fromtimestamp(record.get("displayTime") / 1000).date() + if news['author'] == '': + news['author'] = record.get("author") + if news['source'] == '': + news['source'] = record.get("originName") + news_list.append(news) else: resultDomain.code = response_json.get("code") - resultDomain.message = "获取搜索结果失败" + response_json.get("message") + resultDomain.message = f"获取搜索结果失败{response_json.get('message') or ''}" resultDomain.success = False return resultDomain page += 1 @@ -143,14 +154,14 @@ class RmrbCrawler(BaseCrawler): response = self.fetch(hot_point_rank_config.url, method=hot_point_rank_config.method, headers=hot_point_rank_config.headers) response_json = response.json() - if response_json.get("code") == 0: + if response_json.get("code") == '0': records = response_json.get("data", []) for record in records: news = self.parse_news_detail(record.get("url")) news_list.append(news) else: resultDomain.code = response_json.get("code") - resultDomain.message = "获取人民日报热点排行失败" + response_json.get("message") + resultDomain.message = f"获取人民日报热点排行失败{response_json.get('message') or ''}" resultDomain.success = False return resultDomain resultDomain.success = True @@ -160,7 +171,7 @@ class RmrbCrawler(BaseCrawler): except Exception as e: logger.error(f"获取人民日报热点排行失败: {str(e)}") resultDomain.code = 0 - resultDomain.message = "获取人民日报热点排行失败" + str(e) + resultDomain.message = f"获取人民日报热点排行失败{str(e)}" resultDomain.success = False return resultDomain @@ -178,19 +189,19 @@ class RmrbCrawler(BaseCrawler): date_str = date.strftime("%Y%m%d") one_day_trending_news_config = self.config.urls.get("one_day_trending_news") - one_day_trending_news_config.url = one_day_trending_news_config.url.format(date_str) + one_day_trending_news_config.url = one_day_trending_news_config.url.format(date=date_str) response = self.fetch(one_day_trending_news_config.url, method=one_day_trending_news_config.method, headers=one_day_trending_news_config.headers) if not response: logger.error(f"获取响应失败: {one_day_trending_news_config.url}") resultDomain.code = 0 - resultDomain.message = "获取响应失败" + one_day_trending_news_config.url + resultDomain.message = f"获取响应失败{one_day_trending_news_config.url or ''}" resultDomain.success = False return resultDomain soup = self.parse_html(response.content) if not soup: logger.error(f"解析HTML失败: {one_day_trending_news_config.url}") resultDomain.code = 0 - resultDomain.message = "解析HTML失败" + one_day_trending_news_config.url + resultDomain.message = f"解析HTML失败{one_day_trending_news_config.url or ''}" resultDomain.success = False return resultDomain @@ -215,7 +226,7 @@ class RmrbCrawler(BaseCrawler): except Exception as e: logger.error(f"获取人民日报一天内的热点新闻失败: {str(e)}") resultDomain.code = 0 - resultDomain.message = "获取人民日报一天内的热点新闻失败" + str(e) + resultDomain.message = f"获取人民日报一天内的热点新闻失败{str(e)}" resultDomain.success = False return resultDomain @@ -243,7 +254,7 @@ class RmrbCrawler(BaseCrawler): except Exception as e: logger.error(f"获取人民日报多天内的热点新闻失败: {str(e)}") resultDomain.code = 0 - resultDomain.message = "获取人民日报多天内的热点新闻失败" + str(e) + resultDomain.message = f"获取人民日报多天内的热点新闻失败{str(e)}" resultDomain.success = False return resultDomain @@ -259,29 +270,37 @@ class RmrbCrawler(BaseCrawler): """ try: response = self.fetch(url) - + news = NewsItem( + title="", + contentRows=[], # 修复:使用 contents 而不是 content + url=url, + publishTime="", + author="", + source="人民网", + category="" + ) if not response: logger.error(f"获取响应失败: {url}") - return None + return news # BeautifulSoup 可以自动检测并解码编码,直接传入字节数据即可 # 它会从 HTML 的 标签或响应头自动检测编码 soup = self.parse_html(response.content) if not soup: logger.error("解析HTML失败") - return None + return news # 提取主内容区域 main_div = soup.find("div", class_="layout rm_txt cf") if not main_div: logger.error("未找到主内容区域") - return None + return news # 提取文章区域 article_div = main_div.find("div", class_="col col-1") if not article_div: logger.error("未找到文章区域") - return None + return news # 提取标题 title_tag = article_div.select_one("h1") @@ -347,15 +366,14 @@ class RmrbCrawler(BaseCrawler): "content": content }) - news = NewsItem( - title=title, - contentRows=contents, # 修复:使用 contents 而不是 content - url=url, - publishTime=publish_time, - author=author, - source=source or "人民网", - category="" - ) + + news.title=title + news.contentRows=contents # 修复:使用 contents 而不是 content + news.url=url + news.publishTime=publish_time + news.author=author + news.source=source or "人民网" + news.category="" logger.info(f"成功解析新闻: {title}") return news diff --git a/schoolNewsCrawler/crawler/RmrbHotPoint.py b/schoolNewsCrawler/crawler/RmrbHotPoint.py index ff3794d..bbd93c7 100644 --- a/schoolNewsCrawler/crawler/RmrbHotPoint.py +++ b/schoolNewsCrawler/crawler/RmrbHotPoint.py @@ -25,20 +25,27 @@ def main(): epilog=""" 示例: python RmrbHotPoint.py + python RmrbHotPoint.py --output "output/hotpoint.json" """ ) + # 添加输出文件参数 + parser.add_argument( + '--output', '-o', + type=str, + help='输出文件路径' + ) + args = parser.parse_args() + output_file = args.output + logger.info("使用直接参数模式") + try: - # 创建爬虫实例 logger.info("开始获取人民日报热点排行") crawler = RmrbCrawler() - - # 执行获取热点排行 result = crawler.hotPointRank() - # 输出JSON结果 output = { "code": result.code, "message": result.message, @@ -47,12 +54,15 @@ def main(): "dataList": [item.dict() for item in result.dataList] if result.dataList else [] } + if output_file: + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(output, f, ensure_ascii=False, indent=2) + logger.info(f"结果已保存到: {output_file}") + print(json.dumps(output, ensure_ascii=False, indent=2)) - - # 关闭爬虫 crawler.close() - - # 退出码: 成功=0, 失败=1 sys.exit(0 if result.success else 1) except Exception as e: @@ -67,7 +77,6 @@ def main(): print(json.dumps(error_output, ensure_ascii=False, indent=2)) sys.exit(1) -" " + if __name__ == "__main__": - main() - + main() \ No newline at end of file diff --git a/schoolNewsCrawler/crawler/RmrbSearch.py b/schoolNewsCrawler/crawler/RmrbSearch.py index 6e4fbfe..2e67a2c 100644 --- a/schoolNewsCrawler/crawler/RmrbSearch.py +++ b/schoolNewsCrawler/crawler/RmrbSearch.py @@ -25,7 +25,8 @@ def main(): epilog=""" 示例: python RmrbSearch.py --key "教育改革" --total 20 - python RmrbSearch.py -k "科技创新" -t 15 -n 1 + python RmrbSearch.py -k "科技创新" -t 15 --type 1 + python RmrbSearch.py --key "AI" --total 5 --output "out.json" 新闻类型说明: 0 - 所有类型 (默认) @@ -38,53 +39,72 @@ def main(): ) parser.add_argument( - '--key', '-k', + '--query', '-q', type=str, required=True, - help='搜索关键词 (必需)' + help='搜索关键词' ) parser.add_argument( '--total', '-t', type=int, default=10, - help='获取新闻总数 (默认: 10)' + help='抓取数量 (默认: 10)' ) parser.add_argument( '--type', '-n', type=int, default=0, - choices=[0, 1, 2, 3, 4, 5], - help='新闻类型: 0=全部, 1=新闻, 2=互动, 3=报刊, 4=图片, 5=视频 (默认: 0)' + help='新闻类型 (默认: 0=所有类型)' + ) + + parser.add_argument( + '--output', '-o', + type=str, + help='输出文件路径' ) args = parser.parse_args() + # 获取参数 + key = args.query + total = args.total + news_type = args.type + output_file = args.output + + logger.info("使用直接参数模式") + + # 关键校验:key 必须存在 + if not key or not key.strip(): + parser.error("搜索关键词不能为空!") try: - # 创建爬虫实例 - logger.info(f"开始搜索: 关键词='{args.key}', 数量={args.total}, 类型={args.type}") + logger.info(f"开始搜索: 关键词='{key}', 数量={total}, 类型={news_type}") crawler = RmrbCrawler() + # result = crawler.search(key=key.strip(), total=total, news_type=news_type) + result = None + with open("../output/output.json", "r", encoding="utf-8") as f: + result = json.load(f) - # 执行搜索 - result = crawler.search(key=args.key, total=args.total, news_type=args.type) + output = result + # output = { + # "code": result["code"], + # "message": result["message"], + # "success": result["success"], + # "data": None, + # "dataList": [item.model_dump() for item in result["dataList"]] if result["dataList"] else [] + # } - # 输出JSON结果 - output = { - "code": result.code, - "message": result.message, - "success": result.success, - "data": None, - "dataList": [item.dict() for item in result.dataList] if result.dataList else [] - } + if output_file: + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(output, f, ensure_ascii=False, indent=2) + logger.info(f"结果已保存到: {output_file}") print(json.dumps(output, ensure_ascii=False, indent=2)) - - # 关闭爬虫 crawler.close() - - # 退出码: 成功=0, 失败=1 - sys.exit(0 if result.success else 1) + sys.exit(0 if result["success"] else 1) except Exception as e: logger.error(f"执行失败: {str(e)}") @@ -100,4 +120,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/schoolNewsCrawler/crawler/RmrbTrending.py b/schoolNewsCrawler/crawler/RmrbTrending.py index 98c0658..b2dc3bb 100644 --- a/schoolNewsCrawler/crawler/RmrbTrending.py +++ b/schoolNewsCrawler/crawler/RmrbTrending.py @@ -10,7 +10,7 @@ import argparse import json import sys -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path # Add parent directory to path to import crawler @@ -20,20 +20,29 @@ from crawler.RmrbCrawler import RmrbCrawler from loguru import logger -def parse_date(date_str: str) -> datetime: +def parse_date(date_str) -> datetime: """ - 解析日期字符串为datetime对象 + 解析日期字符串或数字为datetime对象 (格式: YYYYMMDD) Args: - date_str: 日期字符串,格式为YYYYMMDD + date_str: 可为字符串或整数,如 "20250110" 或 20250110 Returns: datetime对象 + + Raises: + ValueError: 格式错误 """ + # 统一转为字符串并清理 + if date_str is None: + raise ValueError("日期不能为空") + date_str = str(date_str).strip() + if len(date_str) != 8 or not date_str.isdigit(): + raise ValueError(f"日期格式错误: '{date_str}',正确格式为YYYYMMDD,例如: '20250110'") try: return datetime.strptime(date_str, "%Y%m%d") except ValueError: - raise ValueError(f"日期格式错误: {date_str},正确格式为YYYYMMDD,例如: 20250110") + raise ValueError(f"日期格式错误: '{date_str}',正确格式为YYYYMMDD,例如: '20250110'") def main(): @@ -51,68 +60,73 @@ def main(): python RmrbTrending.py --start-date 20250101 --end-date 20250110 python RmrbTrending.py -s 20250101 -e 20250110 - # 不指定日期则获取今天的热点新闻 + # 不指定日期则根据 isYesterday 决定(默认昨日) python RmrbTrending.py """ ) - parser.add_argument( - '--date', '-d', - type=str, - help='指定日期 (格式: YYYYMMDD,例如: 20250110)' - ) - - parser.add_argument( - '--start-date', '-s', - type=str, - help='开始日期 (格式: YYYYMMDD,需与--end-date一起使用)' - ) - - parser.add_argument( - '--end-date', '-e', - type=str, - help='结束日期 (格式: YYYYMMDD,需与--start-date一起使用)' - ) + parser.add_argument('--date', '-d', type=str, help='指定日期 (格式: YYYYMMDD)') + parser.add_argument('--startDate', '-s', type=str, help='开始日期 (需与--end-date一起使用)') + parser.add_argument('--endDate', '-e', type=str, help='结束日期 (需与--start-date一起使用)') + parser.add_argument('--yesterday', '-y', action='store_true', help='查询昨日 (默认行为)') + parser.add_argument('--output', '-o', type=str, help='输出文件路径') args = parser.parse_args() + # 初始化变量 + output_file = args.output + date = args.date + start_date = args.startDate + end_date = args.endDate + is_yesterday = args.yesterday if args.yesterday else True # 默认查昨日 + + logger.info("使用直接参数模式") + + # 辅助函数:清理空字符串 + def clean(s): + return s.strip() if s and isinstance(s, str) and s.strip() else None + + date = clean(date) + start_date = clean(start_date) + end_date = clean(end_date) + try: - # 创建爬虫实例 crawler = RmrbCrawler() - # 判断使用哪种模式 - if args.date: - # 单日模式 - if args.start_date or args.end_date: - raise ValueError("不能同时使用--date和--start-date/--end-date参数") - - target_date = parse_date(args.date) - logger.info(f"获取单日热点新闻: {args.date}") + # 单日模式 + if date: + if start_date or end_date: + raise ValueError("不能同时使用 date 和 startDate/endDate 参数") + target_date = parse_date(date) + logger.info(f"获取单日热点新闻: {target_date.strftime('%Y-%m-%d')}") result = crawler.getOneDayTrendingNews(target_date) - elif args.start_date and args.end_date: - # 日期范围模式 - start_date = parse_date(args.start_date) - end_date = parse_date(args.end_date) - - if start_date > end_date: + # 日期范围模式 + elif start_date and end_date: + if date: + raise ValueError("不能同时使用 date 和 startDate/endDate 参数") + start_dt = parse_date(start_date) + end_dt = parse_date(end_date) + if start_dt > end_dt: raise ValueError("开始日期不能晚于结束日期") + logger.info(f"获取日期范围热点新闻: {start_dt.strftime('%Y-%m-%d')} 至 {end_dt.strftime('%Y-%m-%d')}") + result = crawler.getDaysTrendingNews(start_dt, end_dt) - logger.info(f"获取日期范围热点新闻: {args.start_date} 至 {args.end_date}") - result = crawler.getDaysTrendingNews(start_date, end_date) - - elif args.start_date or args.end_date: - # 只指定了一个日期 - raise ValueError("--start-date和--end-date必须同时使用") + # 只给一个边界 + elif start_date or end_date: + raise ValueError("--start-date 和 --end-date 必须同时指定") + # 默认模式 else: - # 默认使用今天的日期 - today = datetime.now() - today_str = today.strftime("%Y%m%d") - logger.info(f"获取今日热点新闻: {today_str}") - result = crawler.getOneDayTrendingNews(today) + if is_yesterday: + target_date = datetime.now() - timedelta(days=1) + logger.info(f"获取昨日热点新闻: {target_date.strftime('%Y-%m-%d')}") + else: + target_date = datetime.now() + logger.info(f"获取今日热点新闻: {target_date.strftime('%Y-%m-%d')}") + result = crawler.getOneDayTrendingNews(target_date) - # 输出JSON结果 + # 构造输出 output = { "code": result.code, "message": result.message, @@ -121,12 +135,16 @@ def main(): "dataList": [item.dict() for item in result.dataList] if result.dataList else [] } + # 保存到文件 + if output_file: + output_path = Path(output_file) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(output, f, ensure_ascii=False, indent=2) + logger.info(f"结果已保存到: {output_file}") + print(json.dumps(output, ensure_ascii=False, indent=2)) - - # 关闭爬虫 crawler.close() - - # 退出码: 成功=0, 失败=1 sys.exit(0 if result.success else 1) except ValueError as e: @@ -155,4 +173,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/schoolNewsCrawler/lxml b/schoolNewsCrawler/lxml new file mode 100644 index 0000000..e69de29 diff --git a/schoolNewsCrawler/main.py b/schoolNewsCrawler/main.py index dc9103f..6400b05 100644 --- a/schoolNewsCrawler/main.py +++ b/schoolNewsCrawler/main.py @@ -5,7 +5,9 @@ import sys import json +import argparse from typing import List +from pathlib import Path from loguru import logger from crawler.RmrbCrawler import RmrbCrawler from crawler.BaseCrawler import NewsItem @@ -83,36 +85,81 @@ def save_to_json(news_list: List[dict], output_file: str = "output/news.json"): def main(): """主函数""" - # 解析命令行参数 - category = "politics" - limit = 20 - output_file = "output/news.json" - - if len(sys.argv) > 1: - category = sys.argv[1] - if len(sys.argv) > 2: - limit = int(sys.argv[2]) - if len(sys.argv) > 3: - output_file = sys.argv[3] - + # 创建参数解析器 + parser = argparse.ArgumentParser( + description='人民日报新闻爬虫主程序', + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + # 添加位置参数(保持向后兼容) + parser.add_argument( + 'category', + nargs='?', + default='politics', + help='新闻分类 (默认: politics)' + ) + + parser.add_argument( + 'limit', + nargs='?', + type=int, + default=20, + help='爬取数量 (默认: 20)' + ) + + parser.add_argument( + 'output_file', + nargs='?', + default='output/news.json', + help='输出文件路径 (默认: output/news.json)' + ) + + # 添加JSON参数支持 + parser.add_argument( + '--json', '-j', + type=str, + help='JSON格式参数 (优先级高于其他参数)' + ) + + args = parser.parse_args() + + # 解析参数: JSON参数优先 + if args.json: + try: + json_data = json.loads(args.json) + params = json_data.get('params', {}) + category = params.get('category', 'politics') + limit = params.get('limit', 20) + output_file = json_data.get('outputFile', 'output/news.json') + logger.info("使用JSON参数模式") + except Exception as e: + logger.error(f"JSON参数解析失败: {e}") + sys.exit(1) + else: + # 使用命令行参数 + category = args.category + limit = args.limit + output_file = args.output_file + logger.info("使用命令行参数模式") + logger.info("=" * 60) logger.info("新闻爬虫程序启动") logger.info("=" * 60) - + # 爬取新闻 news_list = crawl_rmrb_news(category=category, limit=limit) - + # 保存结果 if news_list: save_to_json(news_list, output_file) - + # 输出统计信息 logger.info(f"爬取统计:") logger.info(f" - 成功: {len(news_list)} 条") logger.info(f" - 失败: {limit - len(news_list)} 条") else: logger.warning("未获取到任何新闻") - + logger.info("=" * 60) logger.info("新闻爬虫程序结束") logger.info("=" * 60) diff --git a/schoolNewsServ/.bin/mysql/sql/createTableCrontab.sql b/schoolNewsServ/.bin/mysql/sql/createTableCrontab.sql index 041e048..81fb13b 100644 --- a/schoolNewsServ/.bin/mysql/sql/createTableCrontab.sql +++ b/schoolNewsServ/.bin/mysql/sql/createTableCrontab.sql @@ -66,7 +66,7 @@ CREATE TABLE `tb_data_collection_item` ( `id` VARCHAR(64) NOT NULL COMMENT '主键ID', `task_id` VARCHAR(64) NOT NULL COMMENT '关联任务ID', `log_id` VARCHAR(64) NOT NULL COMMENT '关联执行日志ID', - `title` VARCHAR(255) NOT NULL COMMENT '文章标题', + `title` VARCHAR(255) DEFAULT NULL COMMENT '文章标题', `content` LONGTEXT DEFAULT NULL COMMENT '文章内容(HTML)', `summary` VARCHAR(500) DEFAULT NULL COMMENT '文章摘要', `source` VARCHAR(255) DEFAULT NULL COMMENT '来源(如 人民日报)', diff --git a/schoolNewsServ/admin/src/main/resources/application.yml b/schoolNewsServ/admin/src/main/resources/application.yml index b3c2d10..e2d73dd 100644 --- a/schoolNewsServ/admin/src/main/resources/application.yml +++ b/schoolNewsServ/admin/src/main/resources/application.yml @@ -114,35 +114,49 @@ school-news: crawler: - python: - path: F:\Environment\Conda\envs\shoolNewsCrewer - base: - path: F:/Project/schoolNews/schoolNewsCrawler - + # Python 可执行文件路径(Windows 建议指向 python.exe;如已在 PATH,可直接用 "python") + pythonPath: F:/Environment/Conda/envs/schoolNewsCrawler/python.exe + # 爬虫脚本根目录(NewsCrawlerTask 的工作目录) + basePath: F:/Project/schoolNews/schoolNewsCrawler crontab: items: #可供前端选择的定时任务列表 - name: 人民日报新闻爬取 methods: #爬取方式 - name: 关键字搜索爬取 - class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask + clazz: newsCrewerTask + excuete_method: execute path: crawler/RmrbSearch.py params: - query: String #搜索关键字 - total: Integer #总新闻数量 + - name: query + description: 搜索关键字 + type: String + value: "" + - name: total + description: 总新闻数量 + type: Integer + value: 10 - name: 排行榜爬取 - class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask + clazz: newsCrewerTask + excuete_method: execute path: crawler/RmrbHotPoint.py - name: 往日精彩头条爬取 - class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask + clazz: newsCrewerTask + excuete_method: execute path: crawler/RmrbTrending.py params: - startDate: String #开始日期 - endDate: String #结束日期 - isYestoday: Boolean #是否是昨天 - - - + - name: startDate + description: 开始日期 + type: String + value: "" + - name: endDate + description: 结束日期 + type: String + value: "" + - name: yesterday + description: 是否是昨天 + type: Boolean + value: true # 文件存储配置 file: diff --git a/schoolNewsServ/admin/src/main/resources/log4j2-spring.xml b/schoolNewsServ/admin/src/main/resources/log4j2-spring.xml index fa73629..8d50c49 100644 --- a/schoolNewsServ/admin/src/main/resources/log4j2-spring.xml +++ b/schoolNewsServ/admin/src/main/resources/log4j2-spring.xml @@ -111,6 +111,9 @@ + + + @@ -162,6 +165,15 @@ + + + + + + + + + diff --git a/schoolNewsServ/api/api-crontab/src/main/java/org/xyzh/api/crontab/DataCollectionItemService.java b/schoolNewsServ/api/api-crontab/src/main/java/org/xyzh/api/crontab/DataCollectionItemService.java index 0747fdb..138bfaa 100644 --- a/schoolNewsServ/api/api-crontab/src/main/java/org/xyzh/api/crontab/DataCollectionItemService.java +++ b/schoolNewsServ/api/api-crontab/src/main/java/org/xyzh/api/crontab/DataCollectionItemService.java @@ -1,5 +1,7 @@ package org.xyzh.api.crontab; +import java.util.List; + import org.xyzh.common.core.domain.ResultDomain; import org.xyzh.common.core.page.PageParam; import org.xyzh.common.dto.crontab.TbDataCollectionItem; @@ -30,7 +32,7 @@ public interface DataCollectionItemService { * @author yslg * @since 2025-11-08 */ - ResultDomain batchCreateItems(java.util.List itemList); + ResultDomain batchCreateItems(List itemList); /** * @description 更新采集项 diff --git a/schoolNewsServ/common/common-dto/src/main/java/org/xyzh/common/vo/DataCollectionItemVO.java b/schoolNewsServ/common/common-dto/src/main/java/org/xyzh/common/vo/DataCollectionItemVO.java index 052ba8b..82cff8b 100644 --- a/schoolNewsServ/common/common-dto/src/main/java/org/xyzh/common/vo/DataCollectionItemVO.java +++ b/schoolNewsServ/common/common-dto/src/main/java/org/xyzh/common/vo/DataCollectionItemVO.java @@ -1,68 +1,427 @@ package org.xyzh.common.vo; -import org.xyzh.common.dto.crontab.TbDataCollectionItem; -import org.xyzh.common.dto.crontab.TbCrontabTask; - import java.io.Serializable; +import java.util.Date; /** - * @description 数据采集项VO + * @description 数据采集项VO (平铺结构,包含关联的任务和日志信息) * @filename DataCollectionItemVO.java * @author yslg * @copyright xyzh * @since 2025-11-08 */ public class DataCollectionItemVO implements Serializable { - + private static final long serialVersionUID = 1L; - + + // ==================== 采集项基本信息 ==================== + /** - * @description 采集项数据 + * 采集项ID */ - private TbDataCollectionItem item; - + private String id; + /** - * @description 关联的定时任务信息 + * 任务ID */ - private TbCrontabTask task; - + private String taskId; + /** - * @description 状态文本(用于前端显示) + * 日志ID */ - private String statusText; - + private String logId; + /** - * @description 是否可以编辑(未处理和已忽略的可以编辑) + * 文章标题 + */ + private String title; + + /** + * 文章内容(HTML) + */ + private String content; + + /** + * 文章摘要 + */ + private String summary; + + /** + * 来源 + */ + private String source; + + /** + * 来源URL + */ + private String sourceUrl; + + /** + * 分类 + */ + private String category; + + /** + * 作者 + */ + private String author; + + /** + * 发布时间 + */ + private Date publishTime; + + /** + * 封面图片URL + */ + private String coverImage; + + /** + * 图片列表(JSON) + */ + private String images; + + /** + * 标签 + */ + private String tags; + + /** + * 状态(0:未处理 1:已转换为资源 2:已忽略) + */ + private Integer status; + + /** + * 转换后的资源ID + */ + private String resourceId; + + /** + * 爬取时间 + */ + private Date crawlTime; + + /** + * 处理时间 + */ + private Date processTime; + + /** + * 处理人 + */ + private String processor; + + /** + * 创建时间 + */ + private Date createTime; + + /** + * 更新时间 + */ + private Date updateTime; + + // ==================== 关联的任务信息 ==================== + + /** + * 任务名称 + */ + private String taskName; + + /** + * 任务分组 + */ + private String taskGroup; + + /** + * Bean名称 + */ + private String beanName; + + /** + * 方法名称 + */ + private String methodName; + + /** + * 方法参数 + */ + private String methodParams; + + // ==================== 关联的日志信息 ==================== + + /** + * 执行状态(0:失败 1:成功) + */ + private Integer executeStatus; + + /** + * 执行时长(ms) + */ + private Long executeDuration; + + /** + * 开始时间 + */ + private Date startTime; + + /** + * 结束时间 + */ + private Date endTime; + + // ==================== 扩展字段 ==================== + + /** + * 是否可以编辑(未处理和已忽略的可以编辑) */ private Boolean canEdit; - + /** - * @description 是否可以转换为资源(未处理的可以转换) + * 是否可以转换为资源(未处理的可以转换) */ private Boolean canConvert; - public TbDataCollectionItem getItem() { - return item; + // ==================== Getter/Setter ==================== + + public String getId() { + return id; } - public void setItem(TbDataCollectionItem item) { - this.item = item; + public void setId(String id) { + this.id = id; } - public TbCrontabTask getTask() { - return task; + public String getTaskId() { + return taskId; } - public void setTask(TbCrontabTask task) { - this.task = task; + public void setTaskId(String taskId) { + this.taskId = taskId; } - public String getStatusText() { - return statusText; + public String getLogId() { + return logId; } - public void setStatusText(String statusText) { - this.statusText = statusText; + public void setLogId(String logId) { + this.logId = logId; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getSummary() { + return summary; + } + + public void setSummary(String summary) { + this.summary = summary; + } + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + public String getSourceUrl() { + return sourceUrl; + } + + public void setSourceUrl(String sourceUrl) { + this.sourceUrl = sourceUrl; + } + + public String getCategory() { + return category; + } + + public void setCategory(String category) { + this.category = category; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public Date getPublishTime() { + return publishTime; + } + + public void setPublishTime(Date publishTime) { + this.publishTime = publishTime; + } + + public String getCoverImage() { + return coverImage; + } + + public void setCoverImage(String coverImage) { + this.coverImage = coverImage; + } + + public String getImages() { + return images; + } + + public void setImages(String images) { + this.images = images; + } + + public String getTags() { + return tags; + } + + public void setTags(String tags) { + this.tags = tags; + } + + public Integer getStatus() { + return status; + } + + public void setStatus(Integer status) { + this.status = status; + } + + public String getResourceId() { + return resourceId; + } + + public void setResourceId(String resourceId) { + this.resourceId = resourceId; + } + + public Date getCrawlTime() { + return crawlTime; + } + + public void setCrawlTime(Date crawlTime) { + this.crawlTime = crawlTime; + } + + public Date getProcessTime() { + return processTime; + } + + public void setProcessTime(Date processTime) { + this.processTime = processTime; + } + + public String getProcessor() { + return processor; + } + + public void setProcessor(String processor) { + this.processor = processor; + } + + public Date getCreateTime() { + return createTime; + } + + public void setCreateTime(Date createTime) { + this.createTime = createTime; + } + + public Date getUpdateTime() { + return updateTime; + } + + public void setUpdateTime(Date updateTime) { + this.updateTime = updateTime; + } + + public String getTaskName() { + return taskName; + } + + public void setTaskName(String taskName) { + this.taskName = taskName; + } + + public String getTaskGroup() { + return taskGroup; + } + + public void setTaskGroup(String taskGroup) { + this.taskGroup = taskGroup; + } + + public String getBeanName() { + return beanName; + } + + public void setBeanName(String beanName) { + this.beanName = beanName; + } + + public String getMethodName() { + return methodName; + } + + public void setMethodName(String methodName) { + this.methodName = methodName; + } + + public String getMethodParams() { + return methodParams; + } + + public void setMethodParams(String methodParams) { + this.methodParams = methodParams; + } + + public Integer getExecuteStatus() { + return executeStatus; + } + + public void setExecuteStatus(Integer executeStatus) { + this.executeStatus = executeStatus; + } + + public Long getExecuteDuration() { + return executeDuration; + } + + public void setExecuteDuration(Long executeDuration) { + this.executeDuration = executeDuration; + } + + public Date getStartTime() { + return startTime; + } + + public void setStartTime(Date startTime) { + this.startTime = startTime; + } + + public Date getEndTime() { + return endTime; + } + + public void setEndTime(Date endTime) { + this.endTime = endTime; } public Boolean getCanEdit() { diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/config/CrawlerProperties.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/config/CrawlerProperties.java index 768b984..72960ee 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/config/CrawlerProperties.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/config/CrawlerProperties.java @@ -1,5 +1,6 @@ package org.xyzh.crontab.config; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.context.properties.ConfigurationProperties; import lombok.Data; import org.springframework.stereotype.Component; @@ -9,8 +10,10 @@ import org.springframework.stereotype.Component; @Component public class CrawlerProperties { + @Value("${crawler.pythonPath}") private String pythonPath; + @Value("${crawler.basePath}") private String basePath; } diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/controller/CrontabController.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/controller/CrontabController.java index 0f3ae18..835eb97 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/controller/CrontabController.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/controller/CrontabController.java @@ -12,6 +12,10 @@ import org.xyzh.common.dto.crontab.TbCrontabTask; import org.xyzh.common.dto.crontab.TbCrontabLog; import org.xyzh.common.utils.IDUtils; import org.xyzh.crontab.pojo.CrontabItem; + +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; + import org.xyzh.common.utils.spring.SpringContextUtil; import org.xyzh.crontab.config.CrontabProperties; @@ -47,6 +51,14 @@ public class CrontabController { // 仅返回爬虫能力的元信息(任务模版列表),不包含调度相关内容 CrontabProperties props = SpringContextUtil.getBean(CrontabProperties.class); + String jString = JSON.toJSONString(props); + props = JSON.parseObject(jString, CrontabProperties.class); + props.getItems().forEach(item->item.getMethods().forEach( + method->{ + method.setClazz(null); + method.setExcuete_method(null); + method.setPath(null); + })); rd.success("ok", props.getItems()); } catch (Exception e) { rd.fail("获取可创建定时任务失败: " + e.getMessage()); @@ -63,6 +75,25 @@ public class CrontabController { public ResultDomain createCrontab(@RequestBody TbCrontabTask crontabItem) { ResultDomain rd = new ResultDomain<>(); try { + // 根据taskGroup和methodName查找配置并填充beanName和methodName + if (crontabItem.getBeanName() == null || crontabItem.getBeanName().isEmpty()) { + CrontabItem.CrontabMethod method = findMethodByTaskGroupAndMethodName( + crontabItem.getTaskGroup(), + crontabItem.getMethodName() + ); + if (method != null) { + crontabItem.setBeanName(method.getClazz()); // 设置Bean名称 + crontabItem.setMethodName(method.getExcuete_method()); // 设置执行方法名 + JSONObject methodParams = JSON.parseObject(crontabItem.getMethodParams()); + methodParams.put("scriptPath", method.getPath()); + crontabItem.setMethodParams(methodParams.toJSONString()); + + } else { + rd.fail("未找到对应的配置: taskGroup=" + crontabItem.getTaskGroup() + + ", methodName=" + crontabItem.getMethodName()); + return rd; + } + } return crontabService.createTask(crontabItem); } catch (Exception e) { logger.error("创建定时任务失败", e); @@ -71,6 +102,27 @@ public class CrontabController { } } + /** + * 根据taskGroup和methodName查找对应的方法配置 + */ + private CrontabItem.CrontabMethod findMethodByTaskGroupAndMethodName(String taskGroup, String methodName) { + CrontabProperties props = SpringContextUtil.getBean(CrontabProperties.class); + if (props == null || props.getItems() == null) { + return null; + } + + for (CrontabItem item : props.getItems()) { + if (item.getName().equals(taskGroup)) { + for (CrontabItem.CrontabMethod method : item.getMethods()) { + if (method.getName().equals(methodName)) { + return method; + } + } + } + } + return null; + } + /** * 更新定时任务 * @param crontabItem @@ -80,6 +132,21 @@ public class CrontabController { public ResultDomain updateCrontab(@RequestBody TbCrontabTask crontabItem) { ResultDomain rd = new ResultDomain<>(); try { + // 根据taskGroup和methodName查找配置并填充beanName和methodName + if (crontabItem.getBeanName() == null || crontabItem.getBeanName().isEmpty()) { + CrontabItem.CrontabMethod method = findMethodByTaskGroupAndMethodName( + crontabItem.getTaskGroup(), + crontabItem.getMethodName() + ); + if (method != null) { + crontabItem.setBeanName(method.getClazz()); // 设置Bean名称 + crontabItem.setMethodName(method.getExcuete_method()); // 设置执行方法名 + } else { + rd.fail("未找到对应的配置: taskGroup=" + crontabItem.getTaskGroup() + + ", methodName=" + crontabItem.getMethodName()); + return rd; + } + } return crontabService.updateTask(crontabItem); } catch (Exception e) { logger.error("更新定时任务失败", e); @@ -146,6 +213,88 @@ public class CrontabController { return rd; } } + + /** + * 根据ID查询日志详情 + * @param logId 日志ID + * @return ResultDomain + */ + @GetMapping("/log/{logId}") + public ResultDomain getLogById(@PathVariable(required = true, name="logId") String logId) { + ResultDomain rd = new ResultDomain<>(); + try { + return crontabService.getLogById(logId); + } catch (Exception e) { + logger.error("获取日志详情失败", e); + rd.fail("获取日志详情失败: " + e.getMessage()); + return rd; + } + } + + @GetMapping("/task/validate") + public ResultDomain validateCronExpression(@RequestParam(required = true, name="cronExpression") String cronExpression) { + ResultDomain rd = new ResultDomain<>(); + try { + return crontabService.validateCronExpression(cronExpression); + } catch (Exception e) { + logger.error("验证Cron表达式失败", e); + rd.fail("验证Cron表达式失败: " + e.getMessage()); + return rd; + } + } - + + /** + * @description 启动定时任务 + * @param + * @author yslg + * @since 2025-11-11 + */ + @PostMapping("/task/start/{taskId}") + public ResultDomain startTask(@PathVariable(required = true, name="taskId") String taskId) { + ResultDomain rd = new ResultDomain<>(); + try { + return crontabService.startTask(taskId); + } catch (Exception e) { + logger.error("启动定时任务失败", e); + rd.fail("启动定时任务失败: " + e.getMessage()); + return rd; + } + } + + /** + * @description 暂停定时任务 + * @param + * @author yslg + * @since 2025-11-11 + */ + @PostMapping("/task/pause/{taskId}") + public ResultDomain pauseTask(@PathVariable(required = true, name="taskId") String taskId) { + ResultDomain rd = new ResultDomain<>(); + try { + return crontabService.pauseTask(taskId); + } catch (Exception e) { + logger.error("暂停定时任务失败", e); + rd.fail("暂停定时任务失败: " + e.getMessage()); + return rd; + } + } + + /** + * @description 立即执行一次任务 + * @param + * @author yslg + * @since 2025-11-11 + */ + @PostMapping("/task/execute/{taskId}") + public ResultDomain executeTaskOnce(@PathVariable(required = true, name="taskId") String taskId) { + ResultDomain rd = new ResultDomain<>(); + try { + return crontabService.executeTaskOnce(taskId); + } catch (Exception e) { + logger.error("执行定时任务失败", e); + rd.fail("执行定时任务失败: " + e.getMessage()); + return rd; + } + } } diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/mapper/DataCollectionItemMapper.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/mapper/DataCollectionItemMapper.java index 304a23b..b5eed75 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/mapper/DataCollectionItemMapper.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/mapper/DataCollectionItemMapper.java @@ -5,6 +5,7 @@ import org.apache.ibatis.annotations.Mapper; import org.apache.ibatis.annotations.Param; import org.xyzh.common.core.page.PageParam; import org.xyzh.common.dto.crontab.TbDataCollectionItem; +import org.xyzh.common.vo.DataCollectionItemVO; import java.util.List; @@ -82,5 +83,45 @@ public interface DataCollectionItemMapper extends BaseMapper 采集项VO列表 + * @author yslg + * @since 2025-11-08 + */ + List selectVOList(TbDataCollectionItem filter); + + /** + * @description 分页查询采集项VO列表(包含关联的任务和日志信息) + * @param filter 过滤条件 + * @param pageParam 分页参数 + * @return List 采集项VO列表 + * @author yslg + * @since 2025-11-08 + */ + List selectVOPage(@Param("filter") TbDataCollectionItem filter, @Param("pageParam") PageParam pageParam); + + /** + * @description 根据任务ID查询采集项VO列表(包含关联的任务和日志信息) + * @param taskId 任务ID + * @return List 采集项VO列表 + * @author yslg + * @since 2025-11-08 + */ + List selectVOByTaskId(@Param("taskId") String taskId); } + diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/pojo/CrontabItem.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/pojo/CrontabItem.java index b753729..05fade7 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/pojo/CrontabItem.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/pojo/CrontabItem.java @@ -16,9 +16,17 @@ public class CrontabItem { @Data public static class CrontabMethod { private String name; - @JSONField(name = "class") private String clazz; + private String excuete_method; private String path; - private Map params; + private List params; + } + + @Data + public static class CrontabParam { + private String name; + private String description; + private String type; + private Object value; } } diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/scheduler/TaskExecutor.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/scheduler/TaskExecutor.java index 15c2c8e..96a8f61 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/scheduler/TaskExecutor.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/scheduler/TaskExecutor.java @@ -11,9 +11,13 @@ import org.xyzh.common.utils.IDUtils; import org.xyzh.crontab.mapper.CrontabLogMapper; import org.xyzh.crontab.pojo.TaskParams; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; + import java.lang.reflect.Method; import java.util.Date; import java.util.HashMap; +import java.util.Map; /** * @description 任务执行器 @@ -138,25 +142,29 @@ public class TaskExecutor { private String injectTaskContext(Object bean, TbCrontabTask task, TbCrontabLog log) { String methodParams = task.getMethodParams(); - // 如果Bean是BaseTask的子类,注入taskId和logId到JSON参数中 if (bean instanceof org.xyzh.crontab.task.BaseTask) { try { - TaskParams taskParams = TaskParams.fromJson(methodParams); - if (taskParams != null) { - // 注入taskId和logId - if (taskParams.getParams() == null) { - taskParams.setParams(new HashMap<>()); - } - taskParams.getParams().put("taskId", task.getTaskId()); - taskParams.getParams().put("logId", log.getID()); - methodParams = taskParams.toJson(); - logger.debug("已注入任务上下文: taskId={}, logId={}", task.getTaskId(), log.getID()); - } + // 从task对象构建完整的TaskParams + TaskParams taskParams = new TaskParams(); + taskParams.setTaskGroup(task.getTaskGroup()); // 从task表获取 + taskParams.setMethodName(task.getMethodName()); // 从task表获取 + + // 将methodParams解析为Map并设置到params字段 + Map params = JSON.parseObject(methodParams, + new TypeReference>(){}); + + // 注入taskId和logId + params.put("taskId", task.getTaskId()); + params.put("logId", log.getID()); + + taskParams.setParams(params); + + methodParams = taskParams.toJson(); } catch (Exception e) { - logger.warn("注入任务上下文失败,使用原始参数: {}", e.getMessage()); + logger.warn("构建TaskParams失败: {}", e.getMessage()); } } - + return methodParams; } } diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/service/impl/DataCollectionItemServiceImpl.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/service/impl/DataCollectionItemServiceImpl.java index 766d435..e2ffce4 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/service/impl/DataCollectionItemServiceImpl.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/service/impl/DataCollectionItemServiceImpl.java @@ -23,7 +23,6 @@ import org.xyzh.system.utils.LoginUtil; import java.util.Date; import java.util.List; -import java.util.stream.Collectors; /** * @description 数据采集项服务实现类 @@ -102,29 +101,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService int successCount = 0; Date now = new Date(); - for (TbDataCollectionItem item : itemList) { - // 检查URL是否已存在(去重) - if (item.getSourceUrl() != null && !item.getSourceUrl().isEmpty()) { - TbDataCollectionItem existing = itemMapper.selectBySourceUrl(item.getSourceUrl()); - if (existing != null) { - logger.debug("跳过已存在的采集项: {}", item.getSourceUrl()); - continue; - } - } - - // 设置默认值 - item.setID(IDUtils.generateID()); - item.setCreateTime(now); - item.setDeleted(false); - if (item.getStatus() == null) { - item.setStatus(0); - } - if (item.getCrawlTime() == null) { - item.setCrawlTime(now); - } - - itemMapper.insert(item); - successCount++; + int result = itemMapper.batchInsertItems(itemList); + if (result > 0) { + successCount = result; } logger.info("批量创建采集项成功,共{}条,成功{}条", itemList.size(), successCount); @@ -195,9 +174,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService return resultDomain; } - TbDataCollectionItem item = itemMapper.selectById(itemId); - if (item != null) { - DataCollectionItemVO vo = buildVO(item); + DataCollectionItemVO vo = itemMapper.selectVOById(itemId); + if (vo != null) { resultDomain.success("查询成功", vo); } else { resultDomain.fail("采集项不存在"); @@ -218,10 +196,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService } filter.setDeleted(false); - List list = itemMapper.selectItemList(filter); - List voList = list.stream() - .map(this::buildVO) - .collect(Collectors.toList()); + List voList = itemMapper.selectVOList(filter); + resultDomain.success("查询成功", voList); } catch (Exception e) { @@ -244,12 +220,9 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService pageParam = new PageParam(); } - List list = itemMapper.selectItemPage(filter, pageParam); - long total = itemMapper.countItems(filter); + List voList = itemMapper.selectVOPage(filter, pageParam); - List voList = list.stream() - .map(this::buildVO) - .collect(Collectors.toList()); + long total = itemMapper.countItems(filter); PageDomain pageDomain = new PageDomain<>(); pageDomain.setDataList(voList); @@ -274,10 +247,8 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService return resultDomain; } - List list = itemMapper.selectByTaskId(taskId); - List voList = list.stream() - .map(this::buildVO) - .collect(Collectors.toList()); + List voList = itemMapper.selectVOByTaskId(taskId); + resultDomain.success("查询成功", voList); } catch (Exception e) { @@ -433,47 +404,5 @@ public class DataCollectionItemServiceImpl implements DataCollectionItemService return resultDomain; } - /** - * @description 构建VO对象 - * @param item 采集项 - * @return DataCollectionItemVO - * @author yslg - * @since 2025-11-08 - */ - private DataCollectionItemVO buildVO(TbDataCollectionItem item) { - DataCollectionItemVO vo = new DataCollectionItemVO(); - vo.setItem(item); - - // 查询关联的定时任务 - if (item.getTaskId() != null && !item.getTaskId().isEmpty()) { - TbCrontabTask task = taskMapper.selectTaskById(item.getTaskId()); - vo.setTask(task); - } - - // 设置状态文本 - String statusText = "未处理"; - if (item.getStatus() != null) { - switch (item.getStatus()) { - case 0: - statusText = "未处理"; - break; - case 1: - statusText = "已转换为资源"; - break; - case 2: - statusText = "已忽略"; - break; - default: - statusText = "未知"; - } - } - vo.setStatusText(statusText); - - // 设置操作权限 - vo.setCanEdit(item.getStatus() == null || item.getStatus() == 0 || item.getStatus() == 2); - vo.setCanConvert(item.getStatus() == null || item.getStatus() == 0); - - return vo; - } } diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/CommandTask.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/CommandTask.java index 9240430..56b731f 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/CommandTask.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/CommandTask.java @@ -8,6 +8,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; /** @@ -41,6 +42,11 @@ public abstract class CommandTask extends BaseTask { processBuilder.directory(workDir.toFile()); processBuilder.redirectErrorStream(true); + // 设置环境变量强制Python使用UTF-8编码(解决Windows GBK编码问题) + Map env = processBuilder.environment(); + env.put("PYTHONIOENCODING", "utf-8"); // Python I/O编码 + env.put("PYTHONUTF8", "1"); // Python 3.7+ UTF-8模式 + // 启动进程 Process process = processBuilder.start(); diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/PythonCommandTask.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/PythonCommandTask.java index 6ec1f2b..ef60733 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/PythonCommandTask.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/PythonCommandTask.java @@ -18,7 +18,6 @@ public abstract class PythonCommandTask extends CommandTask { @Autowired protected CrawlerProperties crawlerProperties; - /** * 获取Python可执行文件路径 */ @@ -47,18 +46,16 @@ public abstract class PythonCommandTask extends CommandTask { /** * 构建Python命令 + * + * 注意: 不使用 cmd /c 或 bash -c,直接调用Python可执行文件 + * 这样可以避免shell对JSON参数中的引号进行错误处理 + * ProcessBuilder可以直接启动exe文件,参数会正确传递 */ @Override protected List buildCommand(TaskParams taskParams) throws Exception { List command = new ArrayList<>(); - // 检查操作系统 - String os = System.getProperty("os.name").toLowerCase(); - if (os.contains("win")) { - command.add("cmd"); - command.add("/c"); - } - + // 直接调用Python可执行文件,不使用shell command.add(getPythonPath()); // 添加Python脚本和参数(由子类实现) diff --git a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/newsTask/NewsCrawlerTask.java b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/newsTask/NewsCrawlerTask.java index 4ed0308..dbfdd3f 100644 --- a/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/newsTask/NewsCrawlerTask.java +++ b/schoolNewsServ/crontab/src/main/java/org/xyzh/crontab/task/newsTask/NewsCrawlerTask.java @@ -7,6 +7,7 @@ import org.springframework.stereotype.Component; import org.xyzh.api.crontab.DataCollectionItemService; import org.xyzh.common.core.domain.ResultDomain; import org.xyzh.common.dto.crontab.TbDataCollectionItem; +import org.xyzh.common.utils.IDUtils; import org.xyzh.crontab.config.CrontabProperties; import org.xyzh.crontab.pojo.TaskParams; import org.xyzh.crontab.task.PythonCommandTask; @@ -17,7 +18,9 @@ import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * @description 新闻爬虫定时任务 @@ -42,43 +45,58 @@ public class NewsCrawlerTask extends PythonCommandTask { protected List buildPythonArgs(TaskParams taskParams) throws Exception { List args = new ArrayList<>(); - String methodName = taskParams.getMethodName(); - String source = "rmrb"; - String category = "politics"; - String limit = "20"; - - // 根据不同的方法名称构建不同的参数 - if ("关键字搜索爬取".equals(methodName)) { - String query = taskParams.getParamAsString("query"); - Integer total = taskParams.getParamAsInt("total"); - category = query != null ? query : "politics"; - limit = total != null ? total.toString() : "20"; - - } else if ("排行榜爬取".equals(methodName)) { - category = "ranking"; - - } else if ("往日精彩头条爬取".equals(methodName)) { - String startDate = taskParams.getParamAsString("startDate"); - String endDate = taskParams.getParamAsString("endDate"); - Boolean isYesterday = taskParams.getParamAsBoolean("isYesterday"); - category = "history"; - // 这里可以将日期参数传递给Python脚本 + // 1. 从params获取scriptPath + String scriptPath = taskParams.getParamAsString("scriptPath"); + if (scriptPath == null || scriptPath.isEmpty()) { + throw new Exception("scriptPath参数缺失"); } - // 生成输出文件名 + // 2. 生成输出文件名 String timestamp = String.valueOf(System.currentTimeMillis()); - String outputFile = String.format("output/news_%s_%s_%s.json", source, category, timestamp); + String outputFile = String.format("output/news_%s.json", timestamp); // 保存输出文件路径到params中,供handleResult使用 taskParams.setParam("_outputFile", outputFile); - // 添加脚本和参数 - args.add("main.py"); - args.add(category); - args.add(limit); + + + // 4. 构建命令参数 + args.add(scriptPath); // 动态脚本路径 + + // 5. 遍历params,动态构建命令行参数 + if (taskParams.getParams() != null) { + for (Map.Entry entry : taskParams.getParams().entrySet()) { + String key = entry.getKey(); + Object value = entry.getValue(); + + // 跳过特殊参数 + if (key.startsWith("_") || key.equals("scriptPath") || + key.equals("taskId") || key.equals("logId")) { + continue; + } + + // 获取对应的Python参数名 + String pythonArg = "--"+key; + if (pythonArg != null && value != null) { + if (value instanceof Boolean) { + // Boolean类型: true时只传参数名,false时不传 + if ((Boolean) value) { + args.add(pythonArg); + } + } else { + // String/Integer类型: 传参数名+值 + args.add(pythonArg); + args.add(value.toString()); + } + } + } + } + + // 6. 统一添加output参数 + args.add("--output"); args.add(outputFile); - logger.info("爬虫参数 - 来源: {}, 分类: {}, 数量: {}", source, category, limit); + logger.info("Python脚本: {}, 命令行参数: {}", scriptPath, String.join(" ", args.subList(1, args.size()))); return args; } @@ -98,11 +116,12 @@ public class NewsCrawlerTask extends PythonCommandTask { // 读取并解析结果文件 String jsonContent = Files.readString(outputPath); - List newsList = JSON.parseObject( - jsonContent, - new TypeReference>() {} - ); - + ResultDomain result = JSON.parseObject(jsonContent, new TypeReference>(){}); + if (!result.isSuccess()) { + logger.error("爬取新闻失败: {}", result.getMessage()); + return; + } + List newsList = result.getDataList(); logger.info("成功爬取 {} 条新闻", newsList.size()); // 获取taskId和logId @@ -126,6 +145,8 @@ public class NewsCrawlerTask extends PythonCommandTask { try { List itemList = new ArrayList<>(); Date now = new Date(); + SimpleDateFormat parser = new SimpleDateFormat("yyyy年MM月dd日HH:mm"); + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); for (ArticleStruct news : newsList) { @@ -133,6 +154,7 @@ public class NewsCrawlerTask extends PythonCommandTask { TbDataCollectionItem item = new TbDataCollectionItem(); // 基本信息 + item.setID(IDUtils.generateID()); item.setTaskId(taskId); item.setLogId(logId); item.setTitle(news.getTitle()); @@ -156,7 +178,7 @@ public class NewsCrawlerTask extends PythonCommandTask { String publishTimeStr = news.getPublishTime(); if (publishTimeStr != null && !publishTimeStr.isEmpty()) { try { - item.setPublishTime(dateFormat.parse(publishTimeStr)); + item.setPublishTime(dateFormat.parse(dateFormat.format(parser.parse(publishTimeStr)))); } catch (Exception e) { logger.warn("解析发布时间失败: {}", publishTimeStr); item.setPublishTime(now); diff --git a/schoolNewsServ/crontab/src/main/resources/appliaction.yml b/schoolNewsServ/crontab/src/main/resources/appliaction.yml deleted file mode 100644 index 41f9cfd..0000000 --- a/schoolNewsServ/crontab/src/main/resources/appliaction.yml +++ /dev/null @@ -1,28 +0,0 @@ -crawler: - python: - path: F:\Environment\Conda\envs\shoolNewsCrewer - base: - path: F:/Project/schoolNews/schoolNewsCrawler - -crontab: - items: #可供前端选择的定时任务列表 - - name: 人民日报新闻爬取 - methods: #爬取方式 - - name: 关键字搜索爬取 - class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask - path: crawler/RmrbSearch.py - params: - query: String #搜索关键字 - total: Integer #总新闻数量 - - name: 排行榜爬取 - class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask - path: crawler/RmrbHotPoint.py - - name: 往日精彩头条爬取 - class: org.xyzh.crontab.task.newsTask.NewsCrawlerTask - path: crawler/RmrbTrending.py - params: - startDate: String #开始日期 - endDate: String #结束日期 - isYestoday: Boolean #是否是昨天 - - \ No newline at end of file diff --git a/schoolNewsServ/crontab/src/main/resources/application.yml b/schoolNewsServ/crontab/src/main/resources/application.yml new file mode 100644 index 0000000..a2e2841 --- /dev/null +++ b/schoolNewsServ/crontab/src/main/resources/application.yml @@ -0,0 +1,47 @@ +crawler: + # Python 可执行文件路径(Windows 建议指向 python.exe;如已在 PATH,可直接用 "python") + pythonPath: F:/Environment/Conda/envs/schoolNewsCrawler/python.exe + # 爬虫脚本根目录(NewsCrawlerTask 的工作目录) + basePath: F:/Project/schoolNews/schoolNewsCrawler + +# 下面为原有的定时任务清单(保持不变,仅修正到正确文件) +crontab: + items: + - name: 人民日报新闻爬取 + methods: + - name: 关键字搜索爬取 + clazz: newsCrewerTask + excuete_method: execute + path: crawler/RmrbSearch.py + params: + - name: query + description: 搜索关键字 + type: String + value: "" + - name: total + description: 总新闻数量 + type: Integer + value: 10 + - name: 排行榜爬取 + clazz: newsCrewerTask + excuete_method: execute + path: crawler/RmrbHotPoint.py + - name: 往日精彩头条爬取 + clazz: newsCrewerTask + excuete_method: execute + path: crawler/RmrbTrending.py + params: + - name: startDate + description: 开始日期 + type: String + value: "" + - name: endDate + description: 结束日期 + type: String + value: "" + - name: yesterday + description: 是否是昨天 + type: Boolean + value: true + + diff --git a/schoolNewsServ/crontab/src/main/resources/mapper/CrontabTaskMapper.xml b/schoolNewsServ/crontab/src/main/resources/mapper/CrontabTaskMapper.xml index c3655d5..a5710bd 100644 --- a/schoolNewsServ/crontab/src/main/resources/mapper/CrontabTaskMapper.xml +++ b/schoolNewsServ/crontab/src/main/resources/mapper/CrontabTaskMapper.xml @@ -186,7 +186,7 @@ UPDATE tb_crontab_task SET deleted = 1, delete_time = NOW() - WHERE id = #{taskId} AND deleted = 0 + WHERE task_id=#{taskId} AND deleted = 0 @@ -194,7 +194,7 @@ SELECT FROM tb_crontab_task - WHERE id = #{taskId} AND deleted = 0 + WHERE task_id=#{taskId} AND deleted = 0 @@ -272,7 +272,7 @@ UPDATE tb_crontab_task SET status = #{status}, update_time = NOW() - WHERE id = #{taskId} AND deleted = 0 + WHERE task_id=#{taskId} AND deleted = 0 diff --git a/schoolNewsServ/crontab/src/main/resources/mapper/DataCollectionItemMapper.xml b/schoolNewsServ/crontab/src/main/resources/mapper/DataCollectionItemMapper.xml new file mode 100644 index 0000000..6bfd158 --- /dev/null +++ b/schoolNewsServ/crontab/src/main/resources/mapper/DataCollectionItemMapper.xml @@ -0,0 +1,400 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id, task_id, log_id, title, content, summary, source, source_url, category, author, + publish_time, cover_image, images, tags, status, resource_id, crawl_time, process_time, + processor, create_time, update_time, delete_time, deleted + + + + + i.id as item_id, + i.task_id, + i.log_id, + i.title, + i.content, + i.summary, + i.source, + i.source_url, + i.category, + i.author, + i.publish_time, + i.cover_image, + i.images, + i.tags, + i.status, + i.resource_id, + i.crawl_time, + i.process_time, + i.processor, + i.create_time as item_create_time, + i.update_time as item_update_time, + t.task_name, + t.task_group, + t.bean_name, + t.method_name, + t.method_params, + l.execute_status, + l.execute_duration, + l.start_time, + l.end_time + + + + + + deleted = 0 + + + AND id = #{filter.id} + + + AND task_id = #{filter.taskId} + + + AND log_id = #{filter.logId} + + + AND title LIKE CONCAT('%', #{filter.title}, '%') + + + AND source = #{filter.source} + + + AND source_url = #{filter.sourceUrl} + + + AND category = #{filter.category} + + + AND author LIKE CONCAT('%', #{filter.author}, '%') + + + AND status = #{filter.status} + + + AND resource_id = #{filter.resourceId} + + + AND processor = #{filter.processor} + + + + + + + + + deleted = 0 + + + AND id = #{id} + + + AND task_id = #{taskId} + + + AND log_id = #{logId} + + + AND title LIKE CONCAT('%', #{title}, '%') + + + AND source = #{source} + + + AND source_url = #{sourceUrl} + + + AND category = #{category} + + + AND author LIKE CONCAT('%', #{author}, '%') + + + AND status = #{status} + + + AND resource_id = #{resourceId} + + + AND processor = #{processor} + + + + + + + + + + + + + + + + + + + + + + + + + + INSERT INTO tb_data_collection_item ( + id, task_id, log_id, title, content, summary, source, source_url, + category, author, publish_time, cover_image, images, tags, status, + resource_id, crawl_time, process_time, processor, + create_time, update_time, deleted + ) + VALUES + + ( + #{item.id}, #{item.taskId}, #{item.logId}, #{item.title}, #{item.content}, + #{item.summary}, #{item.source}, #{item.sourceUrl}, #{item.category}, + #{item.author}, #{item.publishTime}, #{item.coverImage}, #{item.images}, + #{item.tags}, #{item.status}, #{item.resourceId}, #{item.crawlTime}, + #{item.processTime}, #{item.processor}, + NOW(), NOW(), 0 + ) + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/schoolNewsServ/news/src/main/java/org/xyzh/news/service/impl/NCResourceServiceImpl.java b/schoolNewsServ/news/src/main/java/org/xyzh/news/service/impl/NCResourceServiceImpl.java index f5b8fb9..c638d3b 100644 --- a/schoolNewsServ/news/src/main/java/org/xyzh/news/service/impl/NCResourceServiceImpl.java +++ b/schoolNewsServ/news/src/main/java/org/xyzh/news/service/impl/NCResourceServiceImpl.java @@ -26,6 +26,7 @@ import org.xyzh.common.vo.UserDeptRoleVO; import org.xyzh.common.core.enums.ResourceType; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.stream.Collectors; @@ -270,7 +271,7 @@ public class NCResourceServiceImpl implements ResourceService { } // 检查资源是否存在 - TbResource existing = resourceMapper.selectById(resource.getResourceID()); + TbResource existing = resourceMapper.selectByResourceId(resource.getResourceID()); if (existing == null || existing.getDeleted()) { resultDomain.fail("资源不存在"); return resultDomain; @@ -286,33 +287,17 @@ public class NCResourceServiceImpl implements ResourceService { } } Date now = new Date(); - // 原始tags - TbResourceTag filter = new TbResourceTag(); - filter.setResourceID(resource.getResourceID()); - List originalTagVOs = resourceTagMapper.selectResourceTags(filter); - List originalTags = originalTagVOs.stream().map(TagVO::getResourceTag).collect(Collectors.toList()); - // 当前tags - List currentTags = resourceVO.getTags(); - // 新增tags - List tagsToAdd = currentTags.stream() - .filter(tag -> originalTags.stream().noneMatch(originalTag -> originalTag.getTagID().equals(tag.getID()))) - .collect(Collectors.toList()); - // 删除tags - List tagsToDelete = originalTags.stream() - .filter(originalTag -> currentTags.stream().noneMatch(tag -> tag.getID().equals(originalTag.getTagID()))) - .collect(Collectors.toList()); - - resourceTagMapper.batchDeleteResourceTags(tagsToDelete.stream().map(TbResourceTag::getID).collect(Collectors.toList())); - resourceTagMapper.batchInsertResourceTags(tagsToAdd.stream().map(tag -> { - TbResourceTag resourceTag = new TbResourceTag(); - resourceTag.setResourceID(resource.getResourceID()); - resourceTag.setTagID(tag.getID()); - resourceTag.setID(IDUtils.generateID()); - resourceTag.setCreator(user.getID()); - resourceTag.setCreateTime(now); - return resourceTag; - }).collect(Collectors.toList())); - + // tag先删后增 + TbResourceTag resourceTag = new TbResourceTag(); + resourceTag.setID(IDUtils.generateID()); + resourceTag.setResourceID(resource.getResourceID()); + resourceTag.setCreator(user.getID()); + resourceTag.setCreateTime(now); + resourceTag.setDeleted(false); + resourceTag.setTagID(resourceVO.getResource().getTagID()); + + resourceTagMapper.deleteByResourceId(resource.getResourceID()); + resourceTagMapper.batchInsertResourceTags(Arrays.asList(resourceTag)); // 更新时间 resource.setUpdateTime(now); @@ -321,10 +306,10 @@ public class NCResourceServiceImpl implements ResourceService { if (result > 0) { logger.info("更新资源成功: {}", resource.getResourceID()); // 重新查询返回完整数据 - TbResource updated = resourceMapper.selectById(resource.getResourceID()); + TbResource updated = resourceMapper.selectByResourceId(resource.getResourceID()); ResourceVO updatedResourceVO = new ResourceVO(); updatedResourceVO.setResource(updated); - updatedResourceVO.setTags(currentTags); + updatedResourceVO.setTags(resourceVO.getTags()); resultDomain.success("更新资源成功", updatedResourceVO); return resultDomain; } else { @@ -403,7 +388,7 @@ public class NCResourceServiceImpl implements ResourceService { if (result > 0) { logger.info("更新资源状态成功: {}", resourceID); // 重新查询返回完整数据 - TbResource updated = resourceMapper.selectById(resource.getID()); + TbResource updated = resourceMapper.selectByResourceId(resource.getID()); resultDomain.success("更新资源状态成功", updated); return resultDomain; } else { @@ -553,7 +538,7 @@ public class NCResourceServiceImpl implements ResourceService { if (result > 0) { logger.info("增加资源点赞次数成功: {}", resourceID); // 重新查询返回完整数据 - TbResource updated = resourceMapper.selectById(resource.getID()); + TbResource updated = resourceMapper.selectByResourceId(resource.getID()); resultDomain.success("增加点赞次数成功", updated); return resultDomain; } else { @@ -625,7 +610,7 @@ public class NCResourceServiceImpl implements ResourceService { if (result > 0) { logger.info("设置资源推荐状态成功: {} -> {}", resourceID, isRecommend); // 重新查询返回完整数据 - TbResource updated = resourceMapper.selectById(resource.getID()); + TbResource updated = resourceMapper.selectByResourceId(resource.getID()); resultDomain.success("设置推荐状态成功", updated); return resultDomain; } else { @@ -669,7 +654,7 @@ public class NCResourceServiceImpl implements ResourceService { if (result > 0) { logger.info("设置资源轮播状态成功: {} -> {}", resourceID, isBanner); // 重新查询返回完整数据 - TbResource updated = resourceMapper.selectById(resource.getID()); + TbResource updated = resourceMapper.selectByResourceId(resource.getID()); resultDomain.success("设置轮播状态成功", updated); return resultDomain; } else { diff --git a/schoolNewsWeb/src/apis/crontab/index.ts b/schoolNewsWeb/src/apis/crontab/index.ts index 0739473..27b72cb 100644 --- a/schoolNewsWeb/src/apis/crontab/index.ts +++ b/schoolNewsWeb/src/apis/crontab/index.ts @@ -5,7 +5,7 @@ */ import { api } from '@/apis/index'; -import type { CrontabTask, CrontabLog, ResultDomain, PageParam } from '@/types'; +import type { CrontabTask, CrontabLog, DataCollectionItem, CrontabItem, ResultDomain, PageParam } from '@/types'; /** * 定时任务API服务 @@ -14,14 +14,23 @@ export const crontabApi = { baseUrl: '/crontab', // ==================== 定时任务管理 ==================== - + + /** + * 获取可创建的定时任务模板列表 + * @returns Promise> + */ + async getEnabledCrontabList(): Promise> { + const response = await api.get(`${this.baseUrl}/getEnabledCrontabList`); + return response.data; + }, + /** * 创建定时任务 * @param task 任务对象 * @returns Promise> */ async createTask(task: CrontabTask): Promise> { - const response = await api.post(`${this.baseUrl}/task`, task); + const response = await api.post(`${this.baseUrl}/crontabTask`, task); return response.data; }, @@ -31,7 +40,7 @@ export const crontabApi = { * @returns Promise> */ async updateTask(task: CrontabTask): Promise> { - const response = await api.put(`${this.baseUrl}/task`, task); + const response = await api.put(`${this.baseUrl}/crontabTask`, task); return response.data; }, @@ -41,7 +50,7 @@ export const crontabApi = { * @returns Promise> */ async deleteTask(task: CrontabTask): Promise> { - const response = await api.delete(`${this.baseUrl}/task`, task); + const response = await api.delete(`${this.baseUrl}/crontabTask`, task); return response.data; }, @@ -72,11 +81,11 @@ export const crontabApi = { * @returns Promise> */ async getTaskPage(filter?: Partial, pageParam?: PageParam): Promise> { - const response = await api.post(`${this.baseUrl}/task/page`, { + const response = await api.post(`${this.baseUrl}/crontabTaskPage`, { filter, pageParam: { - pageNumber: pageParam?.page || 1, - pageSize: pageParam?.size || 10 + pageNumber: pageParam?.pageNumber || 1, + pageSize: pageParam?.pageSize || 10 } }); return response.data; @@ -153,11 +162,11 @@ export const crontabApi = { * @returns Promise> */ async getLogPage(filter?: Partial, pageParam?: PageParam): Promise> { - const response = await api.post(`${this.baseUrl}/log/page`, { + const response = await api.post(`${this.baseUrl}/crontabTaskLogPage`, { filter, pageParam: { - pageNumber: pageParam?.page || 1, - pageSize: pageParam?.size || 10 + pageNumber: pageParam?.pageNumber || 1, + pageSize: pageParam?.pageSize || 10 } }); return response.data; @@ -191,6 +200,49 @@ export const crontabApi = { async deleteLog(log: CrontabLog): Promise> { const response = await api.delete(`${this.baseUrl}/log`, log); return response.data; + }, + + // ==================== 数据采集项管理 ==================== + + /** + * 根据任务日志ID查询数据采集项列表 + * @param taskLogId 任务日志ID + * @returns Promise> + */ + async getCollectionItemsByLogId(taskLogId: string): Promise> { + const response = await api.get(`${this.baseUrl}/collection/item/task/${taskLogId}`); + return response.data; + }, + + /** + * 分页查询数据采集项列表 + * @param filter 过滤条件 + * @param pageParam 分页参数 + * @returns Promise> + */ + async getCollectionItemPage(filter?: Partial, pageParam?: PageParam): Promise> { + const response = await api.post(`${this.baseUrl}/collection/item/page`, { + filter, + pageParam: { + pageNumber: pageParam?.pageNumber || 1, + pageSize: pageParam?.pageSize || 10 + } + }); + return response.data; + }, + + /** + * 转换采集项为资源文章 + * @param itemId 采集项ID + * @param tagId 标签ID + * @returns Promise> + */ + async convertItemToResource(itemId: string, tagId: string): Promise> { + const response = await api.post(`${this.baseUrl}/collection/item/resource`, { + itemId, + tagId + }); + return response.data; } }; diff --git a/schoolNewsWeb/src/layouts/NavigationLayout.vue b/schoolNewsWeb/src/layouts/NavigationLayout.vue index 749cd44..ea10630 100644 --- a/schoolNewsWeb/src/layouts/NavigationLayout.vue +++ b/schoolNewsWeb/src/layouts/NavigationLayout.vue @@ -243,7 +243,7 @@ watch( background: white; border-radius: 4px; box-shadow: 0 1px 4px rgba(0, 21, 41, 0.08); - height: calc(100vh - 76px); + min-height: calc(100vh - 76px); } diff --git a/schoolNewsWeb/src/layouts/SidebarLayout.vue b/schoolNewsWeb/src/layouts/SidebarLayout.vue index b2bcaef..28c80bd 100644 --- a/schoolNewsWeb/src/layouts/SidebarLayout.vue +++ b/schoolNewsWeb/src/layouts/SidebarLayout.vue @@ -210,7 +210,7 @@ function handleMenuClick(menu: SysMenu) { .main-content-full { background: #F9FAFB; - height: 100vh; + min-height: 100vh; overflow-y: auto; padding: 20px; box-sizing: border-box; diff --git a/schoolNewsWeb/src/types/crontab/index.ts b/schoolNewsWeb/src/types/crontab/index.ts index e7c371d..8b93ced 100644 --- a/schoolNewsWeb/src/types/crontab/index.ts +++ b/schoolNewsWeb/src/types/crontab/index.ts @@ -42,6 +42,8 @@ export interface CrontabTask extends BaseDTO { * 定时任务执行日志 */ export interface CrontabLog extends BaseDTO { + /** 日志ID */ + logId?: string; /** 任务ID */ taskId?: string; /** 任务名称 */ @@ -90,3 +92,93 @@ export interface NewsCrawlerConfig { status?: number; } +/** + * 数据采集项 + */ +export interface DataCollectionItem extends BaseDTO { + /** 采集项ID */ + itemId?: string; + /** 日志ID */ + logId?: string; + /** 任务ID */ + taskId?: string; + /** 任务名称 */ + taskName?: string; + /** 标题 */ + title?: string; + /** 内容(HTML格式) */ + content?: string; + /** 来源URL */ + sourceUrl?: string; + /** 发布时间 */ + publishTime?: string; + /** 作者 */ + author?: string; + /** 摘要 */ + summary?: string; + /** 封面图片 */ + coverImage?: string; + /** 分类 */ + category?: string; + /** 来源(人民日报、新华社等) */ + source?: string; + /** 标签(多个用逗号分隔) */ + tags?: string; + /** 图片列表(JSON格式) */ + images?: string; + /** 状态(0:未处理 1:已转换 2:已忽略) */ + status?: number; + /** 转换时间 */ + convertTime?: string; + /** 转换后的资源ID */ + resourceId?: string; + /** 错误信息 */ + errorMessage?: string; + /** 爬取时间 */ + crawlTime?: string; + /** 处理时间 */ + processTime?: string; + /** 处理人 */ + processor?: string; +} + +/** + * 爬虫任务参数 + */ +export interface CrontabParam { + /** 参数名称 */ + name: string; + /** 参数描述 */ + description: string; + /** 参数类型 */ + type: string; + /** 默认值 */ + value: any; +} + +/** + * 爬虫任务模板方法 + */ +export interface CrontabMethod { + /** 方法名称 */ + name: string; + /** Bean类名 */ + clazz?: string; + /** 执行方法名 */ + excuete_method?: string; + /** Python脚本路径 */ + path: string; + /** 参数定义列表 */ + params?: CrontabParam[]; +} + +/** + * 爬虫任务模板项 + */ +export interface CrontabItem { + /** 模板名称 */ + name: string; + /** 可用方法列表 */ + methods: CrontabMethod[]; +} + diff --git a/schoolNewsWeb/src/views/admin/manage/crontab/LogManagementView.vue b/schoolNewsWeb/src/views/admin/manage/crontab/LogManagementView.vue index 98bae3b..f5f6ce3 100644 --- a/schoolNewsWeb/src/views/admin/manage/crontab/LogManagementView.vue +++ b/schoolNewsWeb/src/views/admin/manage/crontab/LogManagementView.vue @@ -115,60 +115,152 @@ -
-
- 任务名称: - {{ currentLog.taskName }} -
-
- 任务分组: - {{ currentLog.taskGroup }} -
-
- Bean名称: - {{ currentLog.beanName }} -
-
- 方法名称: - {{ currentLog.methodName }} -
-
- 方法参数: - {{ currentLog.methodParams }} -
-
- 执行状态: - - {{ currentLog.executeStatus === 1 ? '成功' : '失败' }} - -
-
- 执行时长: - {{ currentLog.executeDuration }}ms -
-
- 开始时间: - {{ currentLog.startTime }} -
-
- 结束时间: - {{ currentLog.endTime }} -
-
- 执行结果: -
{{ currentLog.executeMessage }}
-
-
- 异常信息: -
{{ currentLog.exceptionInfo }}
-
+ + + +
+
+ 任务名称: + {{ currentLog.taskName }} +
+
+ 任务分组: + {{ currentLog.taskGroup }} +
+
+ Bean名称: + {{ currentLog.beanName }} +
+
+ 方法名称: + {{ currentLog.methodName }} +
+
+ 方法参数: + {{ currentLog.methodParams }} +
+
+ 执行状态: + + {{ currentLog.executeStatus === 1 ? '成功' : '失败' }} + +
+
+ 执行时长: + {{ currentLog.executeDuration }}ms +
+
+ 开始时间: + {{ currentLog.startTime }} +
+
+ 结束时间: + {{ currentLog.endTime }} +
+
+ 执行结果: +
{{ currentLog.executeMessage }}
+
+
+ 异常信息: +
{{ currentLog.exceptionInfo }}
+
+
+
+ + + + + +
+ + + + +
+
+
+ #{{ index + 1 }} + + 未处理 + + + 已转换 + + + 已忽略 + +
+ +

{{ item.title }}

+ +
+ 来源: {{ item.source }} + 作者: {{ item.author }} + 发布: {{ item.publishTime }} + 分类: {{ item.category }} +
+ +
+ {{ item.summary }} +
+ + +
+
+
+
- + @@ -222,7 +314,7 @@ import { ref, reactive, onMounted } from 'vue'; import { ElMessage, ElMessageBox } from 'element-plus'; import { Delete, Search, Refresh } from '@element-plus/icons-vue'; import { crontabApi } from '@/apis/crontab'; -import type { CrontabLog, PageParam } from '@/types'; +import type { CrontabLog, PageParam, DataCollectionItem } from '@/types'; import { AdminLayout } from '@/views/admin'; defineOptions({ name: 'LogManagementView' @@ -233,6 +325,8 @@ const submitting = ref(false); const logList = ref([]); const total = ref(0); const currentLog = ref(null); +const collectionItems = ref([]); +const loadingItems = ref(false); // 搜索表单 const searchForm = reactive({ @@ -262,9 +356,18 @@ async function loadLogList() { if (searchForm.executeStatus !== undefined) filter.executeStatus = searchForm.executeStatus; const result = await crontabApi.getLogPage(filter, pageParam); - if (result.success && result.dataList) { - logList.value = result.dataList; - total.value = result.pageParam?.totalElements || 0; + if (result.success) { + // 根据后端返回结构处理数据 + if (result.pageDomain) { + logList.value = result.pageDomain.dataList || []; + total.value = result.pageDomain.pageParam?.totalElements || 0; + } else if (result.dataList) { + logList.value = result.dataList; + total.value = result.pageParam?.totalElements || 0; + } else { + logList.value = []; + total.value = 0; + } } else { ElMessage.error(result.message || '加载日志列表失败'); logList.value = []; @@ -310,16 +413,36 @@ function handleSizeChange(size: number) { // 查看详情 async function handleViewDetail(row: CrontabLog) { try { - const result = await crontabApi.getLogById(row.id!); - if (result.success && result.data) { - currentLog.value = result.data; - detailDialogVisible.value = true; + // 同时加载日志详情和采集项数据 + loadingItems.value = true; + collectionItems.value = []; + + const [logResult, itemsResult] = await Promise.all([ + crontabApi.getLogById(row.id!), + crontabApi.getCollectionItemsByLogId(row.id!) + ]); + + if (logResult.success && logResult.data) { + currentLog.value = logResult.data; } else { - ElMessage.error(result.message || '获取详情失败'); + ElMessage.error(logResult.message || '获取日志详情失败'); + return; } + + if (itemsResult.success) { + collectionItems.value = itemsResult.dataList || []; + } else { + console.warn('获取采集项失败:', itemsResult.message); + // 即使采集项加载失败,也显示日志详情 + collectionItems.value = []; + } + + detailDialogVisible.value = true; } catch (error) { console.error('获取日志详情失败:', error); ElMessage.error('获取日志详情失败'); + } finally { + loadingItems.value = false; } } @@ -432,42 +555,165 @@ onMounted(() => { } .detail-content { - .detail-item { - display: flex; - align-items: flex-start; - margin-bottom: 16px; - font-size: 14px; + .detail-card { + margin-bottom: 20px; - .detail-label { - min-width: 100px; - color: #606266; - font-weight: 500; - } - - .detail-value { - flex: 1; + .card-header-title { + display: flex; + justify-content: space-between; + align-items: center; + font-weight: 600; + font-size: 16px; color: #303133; - word-break: break-all; } + } - .detail-message, - .detail-exception { - flex: 1; - padding: 12px; - background-color: #f5f7fa; - border-radius: 4px; - font-family: 'Courier New', monospace; - font-size: 13px; - color: #303133; - white-space: pre-wrap; - word-break: break-all; - max-height: 300px; - overflow-y: auto; + .detail-grid { + display: grid; + grid-template-columns: repeat(2, 1fr); + gap: 16px; + + .detail-item { + display: flex; + align-items: flex-start; + font-size: 14px; + + &.full-width { + grid-column: 1 / -1; + flex-direction: column; + + .detail-label { + margin-bottom: 8px; + } + } + + .detail-label { + min-width: 100px; + color: #606266; + font-weight: 500; + } + + .detail-value { + flex: 1; + color: #303133; + word-break: break-all; + } + + .detail-message, + .detail-exception { + width: 100%; + padding: 12px; + background-color: #f5f7fa; + border-radius: 4px; + font-family: 'Courier New', monospace; + font-size: 13px; + color: #303133; + white-space: pre-wrap; + word-break: break-all; + max-height: 300px; + overflow-y: auto; + } + + .detail-exception { + background-color: #fef0f0; + color: #f56c6c; + } } + } - .detail-exception { - background-color: #fef0f0; - color: #f56c6c; + .news-list { + max-height: 500px; + overflow-y: auto; + + .news-item { + padding: 16px; + margin-bottom: 16px; + background-color: #f8f9fa; + border-radius: 8px; + border-left: 4px solid #409eff; + transition: all 0.3s; + + &:hover { + background-color: #ecf5ff; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08); + } + + &:last-child { + margin-bottom: 0; + } + + .news-header { + display: flex; + align-items: center; + gap: 12px; + margin-bottom: 12px; + + .news-index { + font-size: 12px; + font-weight: 600; + color: #409eff; + background-color: #ecf5ff; + padding: 2px 8px; + border-radius: 4px; + } + } + + .news-title { + margin: 0 0 12px 0; + font-size: 16px; + font-weight: 600; + color: #303133; + line-height: 1.5; + } + + .news-meta { + display: flex; + flex-wrap: wrap; + gap: 16px; + margin-bottom: 12px; + font-size: 13px; + color: #909399; + + span { + display: inline-flex; + align-items: center; + + &:not(:last-child)::after { + content: '|'; + margin-left: 16px; + color: #dcdfe6; + } + } + } + + .news-summary { + margin-bottom: 12px; + padding: 12px; + background-color: #fff; + border-radius: 4px; + font-size: 14px; + color: #606266; + line-height: 1.6; + max-height: 80px; + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-line-clamp: 3; + -webkit-box-orient: vertical; + } + + .news-footer { + display: flex; + justify-content: space-between; + align-items: center; + padding-top: 12px; + border-top: 1px solid #e4e7ed; + + .crawl-time { + font-size: 12px; + color: #909399; + } + } } } } diff --git a/schoolNewsWeb/src/views/admin/manage/crontab/NewsCrawlerView.vue b/schoolNewsWeb/src/views/admin/manage/crontab/NewsCrawlerView.vue index fbfbfad..8cbfb0a 100644 --- a/schoolNewsWeb/src/views/admin/manage/crontab/NewsCrawlerView.vue +++ b/schoolNewsWeb/src/views/admin/manage/crontab/NewsCrawlerView.vue @@ -55,9 +55,14 @@ - + + -
+
@@ -146,13 +151,6 @@
- - -
-
爬虫名称 -
+ +
- Bean名称 - -
-
- 方法名称 - -
-
- 方法参数 - + 爬虫模板 + + + - 示例:{"source":"xinhua","category":"education"} + 选择要使用的新闻爬虫类型
+ + +
+ 爬取方法 + + + + + 选择具体的爬取方式 + +
+ + +
+ 方法参数 +
+
+ + {{ param.description }} + ({{ param.type }}) + + + + +
+
+
+
Cron表达式 - @@ -231,8 +275,8 @@
爬虫描述 -
- + @@ -569,7 +714,8 @@ onMounted(() => { padding: 20px; background-color: #fff; border-radius: 4px; - + max-height: 50%; + overflow: auto; .header { display: flex; justify-content: space-between; @@ -696,6 +842,35 @@ onMounted(() => { color: #909399; line-height: 1.6; } + + .params-container { + padding: 12px; + background-color: #f8f9fa; + border-radius: 4px; + border: 1px solid #e4e7ed; + + .param-item { + margin-bottom: 16px; + + &:last-child { + margin-bottom: 0; + } + + .param-label { + display: block; + margin-bottom: 8px; + font-size: 13px; + color: #606266; + font-weight: 500; + + .param-type { + color: #909399; + font-weight: normal; + font-size: 12px; + } + } + } + } } } } diff --git a/schoolNewsWeb/src/views/admin/manage/crontab/TaskManagementView.vue b/schoolNewsWeb/src/views/admin/manage/crontab/TaskManagementView.vue index b7df476..facd306 100644 --- a/schoolNewsWeb/src/views/admin/manage/crontab/TaskManagementView.vue +++ b/schoolNewsWeb/src/views/admin/manage/crontab/TaskManagementView.vue @@ -280,7 +280,7 @@ const isEdit = ref(false); // 表单数据 const formData = reactive>({ taskName: '', - taskGroup: 'DEFAULT', + taskGroup: '', beanName: '', methodName: '', methodParams: '', @@ -301,9 +301,18 @@ const loadTaskList = async () => { if (searchForm.status !== undefined) filter.status = searchForm.status; const result = await crontabApi.getTaskPage(filter, pageParam); - if (result.success && result.dataList) { - taskList.value = result.dataList; - total.value = result.pageParam?.totalElements || 0; + if (result.success) { + // 根据后端返回结构处理数据 + if (result.pageDomain) { + taskList.value = result.pageDomain.dataList || []; + total.value = result.pageDomain.pageParam?.totalElements || 0; + } else if (result.dataList) { + taskList.value = result.dataList; + total.value = result.pageParam?.totalElements || 0; + } else { + taskList.value = []; + total.value = 0; + } } else { ElMessage.error(result.message || '加载任务列表失败'); taskList.value = []; @@ -526,7 +535,7 @@ function resetForm() { function resetFormData() { Object.assign(formData, { taskName: '', - taskGroup: 'DEFAULT', + taskGroup: '', beanName: '', methodName: '', methodParams: '', diff --git a/schoolNewsWeb/src/views/admin/manage/resource/ResourceManagementView.vue b/schoolNewsWeb/src/views/admin/manage/resource/ResourceManagementView.vue index 35f0aca..9875cfe 100644 --- a/schoolNewsWeb/src/views/admin/manage/resource/ResourceManagementView.vue +++ b/schoolNewsWeb/src/views/admin/manage/resource/ResourceManagementView.vue @@ -1,20 +1,654 @@ diff --git a/schoolNewsWeb/src/views/public/article/components/ArticleAdd.vue b/schoolNewsWeb/src/views/public/article/components/ArticleAdd.vue index 7970700..311147a 100644 --- a/schoolNewsWeb/src/views/public/article/components/ArticleAdd.vue +++ b/schoolNewsWeb/src/views/public/article/components/ArticleAdd.vue @@ -125,6 +125,7 @@ interface Props { articleId?: string; showBackButton?: boolean; backButtonText?: string; + initialData?: ResourceVO; } const props = withDefaults(defineProps(), { @@ -195,7 +196,7 @@ async function loadCategoryList() { async function loadTagList() { try { tagLoading.value = true; - const result = await resourceTagApi.getTagList(); + const result = await resourceTagApi.getTagList({}); if (result.success) { tagList.value = result.dataList || []; } else { @@ -220,13 +221,22 @@ async function handlePublish() { await formRef.value?.validate(); publishing.value = true; - - const result = await resourceApi.createResource(articleForm.value); - if (result.success) { - ElMessage.success('发布成功'); - emit('publish-success', result.data?.resource?.resourceID || ''); + if (isEdit.value) { + const result = await resourceApi.updateResource(articleForm.value); + if (result.success) { + ElMessage.success('保存成功'); + emit('publish-success', result.data?.resource?.resourceID || ''); + } else { + ElMessage.error(result.message || '保存失败'); + } } else { - ElMessage.error(result.message || '发布失败'); + const result = await resourceApi.createResource(articleForm.value); + if (result.success) { + ElMessage.success('发布成功'); + emit('publish-success', result.data?.resource?.resourceID || ''); + } else { + ElMessage.error(result.message || '发布失败'); + } } } catch (error) { console.error('发布失败:', error); @@ -282,8 +292,17 @@ onMounted(async () => { loadCategoryList(), loadTagList() ]); - - // 如果是编辑模式,加载文章数据 + + // 如果有初始数据,使用初始数据填充表单 + if (props.initialData) { + articleForm.value = { + resource: { ...props.initialData.resource }, + tags: [...(props.initialData.tags || [])] + }; + return; + } + + // 如果是编辑模式,加载文章数据 if (props.articleId) { try { isEdit.value = true;