搜索关键字爬虫

This commit is contained in:
2025-11-12 16:10:34 +08:00
parent 7be02fe396
commit 675e6da7d7
37 changed files with 3382 additions and 572 deletions

View File

@@ -25,20 +25,27 @@ def main():
epilog="""
示例:
python RmrbHotPoint.py
python RmrbHotPoint.py --output "output/hotpoint.json"
"""
)
# 添加输出文件参数
parser.add_argument(
'--output', '-o',
type=str,
help='输出文件路径'
)
args = parser.parse_args()
output_file = args.output
logger.info("使用直接参数模式")
try:
# 创建爬虫实例
logger.info("开始获取人民日报热点排行")
crawler = RmrbCrawler()
# 执行获取热点排行
result = crawler.hotPointRank()
# 输出JSON结果
output = {
"code": result.code,
"message": result.message,
@@ -47,12 +54,15 @@ def main():
"dataList": [item.dict() for item in result.dataList] if result.dataList else []
}
if output_file:
output_path = Path(output_file)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output, f, ensure_ascii=False, indent=2)
logger.info(f"结果已保存到: {output_file}")
print(json.dumps(output, ensure_ascii=False, indent=2))
# 关闭爬虫
crawler.close()
# 退出码: 成功=0, 失败=1
sys.exit(0 if result.success else 1)
except Exception as e:
@@ -67,7 +77,6 @@ def main():
print(json.dumps(error_output, ensure_ascii=False, indent=2))
sys.exit(1)
" "
if __name__ == "__main__":
main()
main()