151 lines
7.6 KiB
Plaintext
151 lines
7.6 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "948be230",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"项目根目录: f:\\Project\\schoolNews\n",
|
||
"✓ 已启用自动重载模块功能 - 修改 .py 文件后会自动生效\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 自动重载模块(当文件修改后自动刷新)\n",
|
||
"%reload_ext autoreload\n",
|
||
"%autoreload 2\n",
|
||
"\n",
|
||
"import sys\n",
|
||
"import os\n",
|
||
"\n",
|
||
"# 先添加项目根目录到路径(必须在导入之前)\n",
|
||
"project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))\n",
|
||
"if project_root not in sys.path:\n",
|
||
" sys.path.insert(0, project_root)\n",
|
||
"\n",
|
||
"# 然后再导入模块\n",
|
||
"from crawler.xhw.XhwCrawler import XhwCrawler\n",
|
||
"from crawler.BaseCrawler import NewsItem\n",
|
||
"from loguru import logger\n",
|
||
"import json\n",
|
||
"from pprint import pprint\n",
|
||
"\n",
|
||
"print(f\"项目根目录: {project_root}\")\n",
|
||
"print(\"✓ 已启用自动重载模块功能 - 修改 .py 文件后会自动生效\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "31a8a0dd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\u001b[32m2025-11-20 16:06:16.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.BaseCrawler\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m71\u001b[0m - \u001b[1m初始化爬虫: XhwCrawler\u001b[0m\n",
|
||
"\u001b[32m2025-11-20 16:06:17.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1mChrome浏览器初始化成功\u001b[0m\n",
|
||
"\u001b[32m2025-11-20 16:06:17.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1m访问主页获取初始Cookie\u001b[0m\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"crawler = XhwCrawler()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "e5a6e91c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#crawler.search(\"大学\", 1)\n",
|
||
"# crawler.search(\"中国\", 10, \"xhsz\")\n",
|
||
"# crawler.search(\"中国\", 10, \"news\")\n",
|
||
"# crawler.search(\"中国\", 10, \"xhsz\")\n",
|
||
"# crawler.search(\"中国\", 10, \"news\")\n",
|
||
"# crawler.search(\"中国\", 10, \"news\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "7e0f56fa",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# crawler.parse_xhsz_news_detail(\"https://xhsz.news.cn/focus_news/detail?id=9752\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "47327ebf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"NewsItem(title='全国精神文明建设表彰大会在京召开', contentRows=[{'tag': 'img', 'content': \"<img src='https://www.news.cn/photo/20250523/9fc5e377b19047918dfe0eca2aad5c67/202505239fc5e377b19047918dfe0eca2aad5c67_202505232bbfd057329e448b9540fc5dbf050283.jpg' />\"}, {'tag': 'p', 'content': '<p>\\u2003\\u20035月23日,全国精神文明建设表彰大会在北京召开。中央宣传思想文化工作领导小组决定,授予202个城市(区)全国文明城市(区)称号,授予3316个村镇全国文明村镇称号,授予4688个单位全国文明单位称号,授予798户家庭全国文明家庭称号,授予890所学校全国文明校园称号;授予60名(组)同志第九届全国道德模范荣誉称号,授予239名(组)同志第九届全国道德模范提名奖。</p>'}, {'tag': 'p', 'content': '<p>\\u2003\\u2003新华社记者 翟健岚 摄</p>'}, {'tag': 'img', 'content': \"<img src='https://www.news.cn/photo/20250523/9fc5e377b19047918dfe0eca2aad5c67/202505239fc5e377b19047918dfe0eca2aad5c67_20250523ead9d290ee4c45928fc7ca35e227727b.jpg' />\"}, {'tag': 'p', 'content': '<p>\\u2003\\u20035月23日,全国精神文明建设表彰大会在北京召开。中央宣传思想文化工作领导小组决定,授予202个城市(区)全国文明城市(区)称号,授予3316个村镇全国文明村镇称号,授予4688个单位全国文明单位称号,授予798户家庭全国文明家庭称号,授予890所学校全国文明校园称号;授予60名(组)同志第九届全国道德模范荣誉称号,授予239名(组)同志第九届全国道德模范提名奖。</p>'}, {'tag': 'p', 'content': '<p>\\u2003\\u2003新华社记者 翟健岚 摄</p>'}, {'tag': 'img', 'content': \"<img src='https://www.news.cn/photo/20250523/9fc5e377b19047918dfe0eca2aad5c67/202505239fc5e377b19047918dfe0eca2aad5c67_20250523971c4e94467b451c93cd4810992e2025.jpg' />\"}, {'tag': 'p', 'content': '<p>\\u2003\\u20035月23日,全国精神文明建设表彰大会在北京召开。中央宣传思想文化工作领导小组决定,授予202个城市(区)全国文明城市(区)称号,授予3316个村镇全国文明村镇称号,授予4688个单位全国文明单位称号,授予798户家庭全国文明家庭称号,授予890所学校全国文明校园称号;授予60名(组)同志第九届全国道德模范荣誉称号,授予239名(组)同志第九届全国道德模范提名奖。</p>'}, {'tag': 'p', 'content': '<p>\\u2003\\u2003新华社记者 翟健岚 摄</p>'}, {'tag': 'img', 'content': \"<img src='https://www.news.cn/photo/20250523/9fc5e377b19047918dfe0eca2aad5c67/202505239fc5e377b19047918dfe0eca2aad5c67_20250523cf0f3bf322184d339a91049891f265bb.jpg' />\"}, {'tag': 'p', 'content': '<p>\\u2003\\u20035月23日,全国精神文明建设表彰大会在北京召开。中央宣传思想文化工作领导小组决定,授予202个城市(区)全国文明城市(区)称号,授予3316个村镇全国文明村镇称号,授予4688个单位全国文明单位称号,授予798户家庭全国文明家庭称号,授予890所学校全国文明校园称号;授予60名(组)同志第九届全国道德模范荣誉称号,授予239名(组)同志第九届全国道德模范提名奖。</p>'}, {'tag': 'p', 'content': '<p>\\u2003\\u2003新华社记者 翟健岚 摄</p>'}], url='https://www.news.cn/photo/20250523/9fc5e377b19047918dfe0eca2aad5c67/c.html', viewCount=None, publishTime='2025-05-23 18:33:10', author=None, source='新华网', category=None, executeStatus=0, executeMessage=None)"
|
||
]
|
||
},
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#crawler.parse_xh_news_detail(\"https://www.news.cn/politics/leaders/20250224/5384be3d47c643b3a68e3bb724656152/c.html\")\n",
|
||
"# crawler.parse_xh_news_detail(\"https://www.news.cn/politics/leaders/20240207/2819fe60663140eab9599581dcae8c1e/c.html\") #视频\n",
|
||
"crawler.parse_xh_news_detail(\"https://www.news.cn/photo/20250523/9fc5e377b19047918dfe0eca2aad5c67/c.html\") # 分页"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "fa359d5b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# crawler.commend()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "ebabbd5b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "schoolNewsCrawler",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.12"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|