43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
|
|
"""
|
|||
|
|
测试学习强国重要新闻爬虫
|
|||
|
|
"""
|
|||
|
|
from XxqgCrawler import XxqgCrawler
|
|||
|
|
from loguru import logger
|
|||
|
|
|
|||
|
|
def test_crawl_important():
|
|||
|
|
"""测试爬取重要新闻"""
|
|||
|
|
try:
|
|||
|
|
# 初始化爬虫
|
|||
|
|
logger.info("初始化学习强国爬虫...")
|
|||
|
|
crawler = XxqgCrawler()
|
|||
|
|
|
|||
|
|
# 爬取重要新闻(默认最多60篇)
|
|||
|
|
logger.info("开始爬取重要新闻...")
|
|||
|
|
result = crawler.crawl_important(max_count=10) # 测试时只爬取10篇
|
|||
|
|
|
|||
|
|
# 检查结果
|
|||
|
|
if result.success:
|
|||
|
|
logger.info(f"爬取成功!{result.message}")
|
|||
|
|
logger.info(f"共爬取到 {len(result.dataList)} 篇新闻")
|
|||
|
|
|
|||
|
|
# 打印前3篇新闻标题
|
|||
|
|
for idx, news in enumerate(result.dataList[:3], 1):
|
|||
|
|
logger.info(f"{idx}. {news.title}")
|
|||
|
|
logger.info(f" 来源: {news.source}")
|
|||
|
|
logger.info(f" 发布时间: {news.publishTime}")
|
|||
|
|
logger.info(f" 内容行数: {len(news.contentRows)}")
|
|||
|
|
logger.info("")
|
|||
|
|
else:
|
|||
|
|
logger.error(f"爬取失败: {result.message}")
|
|||
|
|
|
|||
|
|
# 关闭浏览器
|
|||
|
|
if crawler.driver:
|
|||
|
|
crawler.driver.quit()
|
|||
|
|
logger.info("浏览器已关闭")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.exception(f"测试过程中发生错误: {str(e)}")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
test_crawl_important()
|