调试修改爬虫

This commit is contained in:
2025-11-12 19:16:50 +08:00
parent 675e6da7d7
commit e55a52f20b
27 changed files with 1023 additions and 601 deletions

View File

@@ -81,20 +81,19 @@ def main():
try:
logger.info(f"开始搜索: 关键词='{key}', 数量={total}, 类型={news_type}")
crawler = RmrbCrawler()
# result = crawler.search(key=key.strip(), total=total, news_type=news_type)
result = crawler.search(key=key.strip(), total=total, news_type=news_type)
output = {
"code": result.code,
"message": result.message,
"success": result.success,
"data": None,
"dataList": [item.model_dump() for item in result.dataList] if result.dataList else []
}
result = None
with open("../output/output.json", "r", encoding="utf-8") as f:
with open("F:\Project\schoolNews\schoolNewsCrawler\output\output.json", "r", encoding="utf-8") as f:
result = json.load(f)
print(result)
output = result
# output = {
# "code": result["code"],
# "message": result["message"],
# "success": result["success"],
# "data": None,
# "dataList": [item.model_dump() for item in result["dataList"]] if result["dataList"] else []
# }
if output_file:
output_path = Path(output_file)
output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -102,8 +101,11 @@ def main():
json.dump(output, f, ensure_ascii=False, indent=2)
logger.info(f"结果已保存到: {output_file}")
print(json.dumps(output, ensure_ascii=False, indent=2))
crawler.close()
# sys.exit(0 if result.success else 1)
# print(json.dumps(output, ensure_ascii=False, indent=2))
sys.exit(0 if result["success"] else 1)
except Exception as e: