Files
schoolNews/schoolNewsCrawler/crawler/RmrbTrending.py
2025-11-10 19:13:54 +08:00

159 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
人民日报热点新闻爬虫命令行工具
用法:
python RmrbTrending.py --date 20250110
python RmrbTrending.py --start-date 20250101 --end-date 20250110
"""
import argparse
import json
import sys
from datetime import datetime
from pathlib import Path
# Add parent directory to path to import crawler
sys.path.insert(0, str(Path(__file__).parent.parent))
from crawler.RmrbCrawler import RmrbCrawler
from loguru import logger
def parse_date(date_str: str) -> datetime:
"""
解析日期字符串为datetime对象
Args:
date_str: 日期字符串格式为YYYYMMDD
Returns:
datetime对象
"""
try:
return datetime.strptime(date_str, "%Y%m%d")
except ValueError:
raise ValueError(f"日期格式错误: {date_str}正确格式为YYYYMMDD例如: 20250110")
def main():
"""主函数"""
parser = argparse.ArgumentParser(
description='人民日报热点新闻获取工具',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
# 获取单日热点新闻
python RmrbTrending.py --date 20250110
python RmrbTrending.py -d 20250110
# 获取日期范围内的热点新闻
python RmrbTrending.py --start-date 20250101 --end-date 20250110
python RmrbTrending.py -s 20250101 -e 20250110
# 不指定日期则获取今天的热点新闻
python RmrbTrending.py
"""
)
parser.add_argument(
'--date', '-d',
type=str,
help='指定日期 (格式: YYYYMMDD例如: 20250110)'
)
parser.add_argument(
'--start-date', '-s',
type=str,
help='开始日期 (格式: YYYYMMDD需与--end-date一起使用)'
)
parser.add_argument(
'--end-date', '-e',
type=str,
help='结束日期 (格式: YYYYMMDD需与--start-date一起使用)'
)
args = parser.parse_args()
try:
# 创建爬虫实例
crawler = RmrbCrawler()
# 判断使用哪种模式
if args.date:
# 单日模式
if args.start_date or args.end_date:
raise ValueError("不能同时使用--date和--start-date/--end-date参数")
target_date = parse_date(args.date)
logger.info(f"获取单日热点新闻: {args.date}")
result = crawler.getOneDayTrendingNews(target_date)
elif args.start_date and args.end_date:
# 日期范围模式
start_date = parse_date(args.start_date)
end_date = parse_date(args.end_date)
if start_date > end_date:
raise ValueError("开始日期不能晚于结束日期")
logger.info(f"获取日期范围热点新闻: {args.start_date}{args.end_date}")
result = crawler.getDaysTrendingNews(start_date, end_date)
elif args.start_date or args.end_date:
# 只指定了一个日期
raise ValueError("--start-date和--end-date必须同时使用")
else:
# 默认使用今天的日期
today = datetime.now()
today_str = today.strftime("%Y%m%d")
logger.info(f"获取今日热点新闻: {today_str}")
result = crawler.getOneDayTrendingNews(today)
# 输出JSON结果
output = {
"code": result.code,
"message": result.message,
"success": result.success,
"data": None,
"dataList": [item.dict() for item in result.dataList] if result.dataList else []
}
print(json.dumps(output, ensure_ascii=False, indent=2))
# 关闭爬虫
crawler.close()
# 退出码: 成功=0, 失败=1
sys.exit(0 if result.success else 1)
except ValueError as e:
logger.error(f"参数错误: {str(e)}")
error_output = {
"code": 400,
"message": f"参数错误: {str(e)}",
"success": False,
"data": None,
"dataList": []
}
print(json.dumps(error_output, ensure_ascii=False, indent=2))
sys.exit(1)
except Exception as e:
logger.error(f"执行失败: {str(e)}")
error_output = {
"code": 500,
"message": f"执行失败: {str(e)}",
"success": False,
"data": None,
"dataList": []
}
print(json.dumps(error_output, ensure_ascii=False, indent=2))
sys.exit(1)
if __name__ == "__main__":
main()