新华网搜索爬虫+新闻内容提取
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 18,
|
||||
"id": "948be230",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -41,7 +41,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 19,
|
||||
"id": "31a8a0dd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -49,11 +49,11 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2025-11-19 19:03:54.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.BaseCrawler\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m71\u001b[0m - \u001b[1m初始化爬虫: XhwCrawler\u001b[0m\n",
|
||||
"\u001b[32m2025-11-19 19:03:55.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1mChrome浏览器初始化成功\u001b[0m\n",
|
||||
"\u001b[32m2025-11-19 19:03:55.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1m访问主页获取初始Cookie\u001b[0m\n",
|
||||
"\u001b[32m2025-11-19 19:03:55.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m准备访问URL: https://xhsz.news.cn/\u001b[0m\n",
|
||||
"\u001b[32m2025-11-19 19:03:57.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1m成功访问URL: https://xhsz.news.cn/\u001b[0m\n"
|
||||
"\u001b[32m2025-11-20 14:39:07.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.BaseCrawler\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m71\u001b[0m - \u001b[1m初始化爬虫: XhwCrawler\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 14:39:08.884\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1mChrome浏览器初始化成功\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 14:39:08.884\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m24\u001b[0m - \u001b[1m访问主页获取初始Cookie\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 14:39:08.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m25\u001b[0m - \u001b[1m准备访问URL: https://xhsz.news.cn/\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 14:39:10.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36m_init_driver\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1m成功访问URL: https://xhsz.news.cn/\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -63,7 +63,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "e5a6e91c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -71,34 +71,42 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2025-11-19 19:04:12.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m174\u001b[0m - \u001b[1m请求URL: https://xhsz.news.cn/s?k=%E4%B9%A0%E8%BF%91%E5%B9%B3&action=news&page=1\u001b[0m\n",
|
||||
"\u001b[32m2025-11-19 19:04:15.858\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m188\u001b[0m - \u001b[33m\u001b[1m检测到验证页面,尝试手动处理验证\u001b[0m\n",
|
||||
"\u001b[32m2025-11-19 19:04:15.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m189\u001b[0m - \u001b[1m请在30秒内手动完成验证...\u001b[0m\n",
|
||||
"\u001b[32m2025-11-19 19:04:48.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m215\u001b[0m - \u001b[1m解析后的HTML内容: <html lang=\"en\"><head>\n",
|
||||
"<meta charset=\"utf-8\"/>\n",
|
||||
"<meta content=\"IE=edge\" http-equiv=\"X-UA-Compatible\"/>\n",
|
||||
"<meta content=\"webkit\" name=\"renderer\"/>\n",
|
||||
"<title>新华网新华思政-全国高校课程思政教学资源服务平台</title>\n",
|
||||
"<meta content=\"新华思政,课程思政,全国高校课程思政教学资源服务平台,新华网,新华教育,思政教育.\" name=\"keywords\"/>\n",
|
||||
"<meta content=\"新华网作为党和国家重要的网上舆论阵地,适时推出新华思政—全国高校课程思政教学资源服务平台,为全国高校教师针对课程思政建设、交流、学习和共享于一体的教学服务平台,旨在推广课程思政建设先进经验和做法,助力高校课程思政教学资源需求,深入挖掘课程思政元素,助力广泛开展课程思政建设的良好氛围,提升教师开展课程思政建设的意识和能力。\" name=\"description\"/>\n",
|
||||
"<link href=\"/static/skin4/favicon.ico\" rel...\u001b[0m\n"
|
||||
"\u001b[32m2025-11-20 13:19:51.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m请求URL: https://xhsz.news.cn/s?k=%E5%A4%A7%E5%AD%A6&action=news&page=1\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:20:15.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:20:20.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.8B0C5F90441ED5455E088CF6DF7032DE.e.84\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:20:36.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:20:41.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.D41E40A40777EF2D881878B18F35342A.e.114\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:20:57.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:21:02.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.2BA293A49BA4DA88D492D8BDC1E07365.e.157\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:21:18.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:21:23.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.DDC416596722BE8B22A5E84011EA59C3.e.198\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:22:32.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:22:37.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.B9E24DEEF281C700F90635CABAA2B108.e.230\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:22:53.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:22:58.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.EECC90A746E37A0994443791EFF7C402.e.290\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:23:15.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:23:20.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.0188441312BE753DFF48394C16A44F8F.e.330\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:23:36.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:23:41.057\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.F7A148D8A30D006FFCDAC45B01A2E7B5.e.374\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:23:56.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:24:01.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.5A632E0B79568A5FFC8E29FFD5B09507.e.396\")>\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:24:17.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m26\u001b[0m - \u001b[1m未发现滑动验证,直接继续\u001b[0m\n",
|
||||
"\u001b[32m2025-11-20 13:24:22.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcrawler.xhw.XhwCrawler\u001b[0m:\u001b[36mparse_xhsz_news_detail\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m找到新闻主体部分: <selenium.webdriver.remote.webelement.WebElement (session=\"11360ade0a59af3938c0f8faa9b88abf\", element=\"f.6B13A7AB92BA3CB5CE0964EB246896F9.d.6B5B529215D1C2221EEF1597FF0C3D0A.e.445\")>\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "AttributeError",
|
||||
"evalue": "'NoneType' object has no attribute 'find'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mcrawler\u001b[49m\u001b[43m.\u001b[49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m习近平\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m10\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[38;5;66;03m# crawler.search(\"中国\", 10, \"xhsz\")\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;66;03m# crawler.search(\"中国\", 10, \"news\")\u001b[39;00m\n\u001b[32m 4\u001b[39m \u001b[38;5;66;03m# crawler.search(\"中国\", 10, \"xhsz\")\u001b[39;00m\n\u001b[32m 5\u001b[39m \u001b[38;5;66;03m# crawler.search(\"中国\", 10, \"news\")\u001b[39;00m\n\u001b[32m 6\u001b[39m \u001b[38;5;66;03m# crawler.search(\"中国\", 10, \"news\")\u001b[39;00m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32mf:\\Project\\schoolNews\\schoolNewsCrawler\\crawler\\xhw\\XhwCrawler.py:241\u001b[39m, in \u001b[36msearch\u001b[39m\u001b[34m(self, key, total, action)\u001b[39m\n\u001b[32m 239\u001b[39m news_info = news.find(\u001b[33m\"\u001b[39m\u001b[33mdiv.head\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 240\u001b[39m news_title = news_info.find(\u001b[33m\"\u001b[39m\u001b[33mdiv.title\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m241\u001b[39m news_date = news_info.find(\u001b[33m\"\u001b[39m\u001b[33mdiv.date\u001b[39m\u001b[33m\"\u001b[39m).text.strip()\n\u001b[32m 242\u001b[39m url = news_title.find(\u001b[33m\"\u001b[39m\u001b[33ma\u001b[39m\u001b[33m\"\u001b[39m).get(\u001b[33m\"\u001b[39m\u001b[33mhref\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 243\u001b[39m url_base_map[url] = {\u001b[33m\"\u001b[39m\u001b[33mtitle\u001b[39m\u001b[33m\"\u001b[39m: news_title.get_text(strip=\u001b[38;5;28;01mTrue\u001b[39;00m), \u001b[33m\"\u001b[39m\u001b[33mdate\u001b[39m\u001b[33m\"\u001b[39m: news_date}\n",
|
||||
"\u001b[31mAttributeError\u001b[39m: 'NoneType' object has no attribute 'find'"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"ResultDomain(code=0, message='', success=True, data=None, dataList=[])"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"crawler.search(\"习近平\", 10)\n",
|
||||
"crawler.search(\"大学\", 1)\n",
|
||||
"# crawler.search(\"中国\", 10, \"xhsz\")\n",
|
||||
"# crawler.search(\"中国\", 10, \"news\")\n",
|
||||
"# crawler.search(\"中国\", 10, \"xhsz\")\n",
|
||||
@@ -108,10 +116,41 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 17,
|
||||
"id": "7e0f56fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# crawler.parse_xhsz_news_detail(\"https://xhsz.news.cn/focus_news/detail?id=9752\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "47327ebf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"NewsItem(title='《习近平总书记关于党的建设的重要思想概论》出版座谈会在北京召开', contentRows=[{'tag': 'p', 'content': '<p>\\u3000\\u3000新华社北京2月24日电\\u3000《习近平总书记关于党的建设的重要思想概论》出版座谈会2月24日在京召开。与会代表结合《概论》主要内容,交流学习贯彻习近平总书记关于党的建设的重要思想的认识和体会。</p>'}, {'tag': 'p', 'content': '<p>\\u3000\\u3000会议认为,《概论》是广大党员、干部深入学习领会习近平总书记关于党的建设的重要思想的权威辅助读物。习近平总书记关于党的建设的重要思想,是一个逻辑严密、内涵丰富、系统全面、博大精深的科学体系,是对中国化的马克思主义党建理论体系的继承发展,构成习近平新时代中国特色社会主义思想的“党建篇”。在这一重要思想的科学指引下,我们党成功开辟百年大党自我革命新境界,推动党和国家事业取得历史性成就、发生历史性变革,为世界政党建设提供了重要借鉴。</p>'}, {'tag': 'p', 'content': '<p>\\u3000\\u3000会议指出,要以学好用好《概论》为契机,进一步把习近平总书记关于党的建设的重要思想领会深、把握准、落到位,深刻领会其科学体系、理论品质和实践指向,更加深刻领悟“两个确立”的决定性意义,增强“四个意识”、坚定“四个自信”、做到“两个维护”。要不断深化体系化研究、学理化阐释,深刻把握这一重要思想蕴含的深刻道理、透彻学理、深邃哲理。要坚持用这一重要思想武装头脑、指导实践、推动工作,把学习成果转化为工作实效,推进党建研究高质量发展,以党建研究新成果推进党的建设和组织工作高质量发展,为以中国式现代化全面推进强国建设、民族复兴伟业提供坚强组织保证。</p>'}, {'tag': 'p', 'content': '<p>\\u3000\\u3000座谈会由全国党建研究会举办,中央和国家机关有关部门,各省区市和新疆生产建设兵团党建研究会(学会),部分中管企业、高校有关负责同志,党史党建专家代表参加座谈会。</p>'}], url='https://www.news.cn/politics/leaders/20250224/5384be3d47c643b3a68e3bb724656152/c.html', viewCount=None, publishTime='2025-02-24 22:44:25', author=None, source='新华网', category=None, executeStatus=0, executeMessage=None)"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"crawler.parse_xh_news_detail(\"https://www.news.cn/politics/leaders/20250224/5384be3d47c643b3a68e3bb724656152/c.html\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fa359d5b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user