重要新闻
This commit is contained in:
42
schoolNewsCrawler/crawler/xxqg/test_important_crawler.py
Normal file
42
schoolNewsCrawler/crawler/xxqg/test_important_crawler.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""
|
||||
测试学习强国重要新闻爬虫
|
||||
"""
|
||||
from XxqgCrawler import XxqgCrawler
|
||||
from loguru import logger
|
||||
|
||||
def test_crawl_important():
|
||||
"""测试爬取重要新闻"""
|
||||
try:
|
||||
# 初始化爬虫
|
||||
logger.info("初始化学习强国爬虫...")
|
||||
crawler = XxqgCrawler()
|
||||
|
||||
# 爬取重要新闻(默认最多60篇)
|
||||
logger.info("开始爬取重要新闻...")
|
||||
result = crawler.crawl_important(max_count=10) # 测试时只爬取10篇
|
||||
|
||||
# 检查结果
|
||||
if result.success:
|
||||
logger.info(f"爬取成功!{result.message}")
|
||||
logger.info(f"共爬取到 {len(result.dataList)} 篇新闻")
|
||||
|
||||
# 打印前3篇新闻标题
|
||||
for idx, news in enumerate(result.dataList[:3], 1):
|
||||
logger.info(f"{idx}. {news.title}")
|
||||
logger.info(f" 来源: {news.source}")
|
||||
logger.info(f" 发布时间: {news.publishTime}")
|
||||
logger.info(f" 内容行数: {len(news.contentRows)}")
|
||||
logger.info("")
|
||||
else:
|
||||
logger.error(f"爬取失败: {result.message}")
|
||||
|
||||
# 关闭浏览器
|
||||
if crawler.driver:
|
||||
crawler.driver.quit()
|
||||
logger.info("浏览器已关闭")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"测试过程中发生错误: {str(e)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_crawl_important()
|
||||
Reference in New Issue
Block a user