重要新闻

This commit is contained in:
2025-11-21 14:55:50 +08:00
parent 0e7cee3070
commit 7ccec2b624
4 changed files with 2018 additions and 219 deletions

View File

@@ -0,0 +1,42 @@
"""
测试学习强国重要新闻爬虫
"""
from XxqgCrawler import XxqgCrawler
from loguru import logger
def test_crawl_important():
"""测试爬取重要新闻"""
try:
# 初始化爬虫
logger.info("初始化学习强国爬虫...")
crawler = XxqgCrawler()
# 爬取重要新闻默认最多60篇
logger.info("开始爬取重要新闻...")
result = crawler.crawl_important(max_count=10) # 测试时只爬取10篇
# 检查结果
if result.success:
logger.info(f"爬取成功!{result.message}")
logger.info(f"共爬取到 {len(result.dataList)} 篇新闻")
# 打印前3篇新闻标题
for idx, news in enumerate(result.dataList[:3], 1):
logger.info(f"{idx}. {news.title}")
logger.info(f" 来源: {news.source}")
logger.info(f" 发布时间: {news.publishTime}")
logger.info(f" 内容行数: {len(news.contentRows)}")
logger.info("")
else:
logger.error(f"爬取失败: {result.message}")
# 关闭浏览器
if crawler.driver:
crawler.driver.quit()
logger.info("浏览器已关闭")
except Exception as e:
logger.exception(f"测试过程中发生错误: {str(e)}")
if __name__ == "__main__":
test_crawl_important()