样式修改

This commit is contained in:
2026-01-09 13:33:39 +08:00
parent 38c575fa30
commit 30e3d86c9f
8 changed files with 198 additions and 75 deletions

File diff suppressed because one or more lines are too long

View File

@@ -490,6 +490,7 @@ class XhwCrawler(BaseCrawler):
search_data = search_config.params.copy()
search_data["k"] = key
search_data["action"] = action
max_page = 1
try:
# 获取新闻url
@@ -523,7 +524,20 @@ class XhwCrawler(BaseCrawler):
time.sleep(5)
except Exception as e:
logger.info(f"滑动验证处理失败或未出现: {e}")
# 获取最大分页数并校验
try:
search_foot = self.driver.find_element(By.CSS_SELECTOR, "div.pagebar")
if search_foot:
page_nums = search_foot.find_elements(By.CSS_SELECTOR, "a.num")
if page_nums:
max_page = int(page_nums[-1].text)
if page > max_page:
logger.info(f"当前页 {page} 超过最大页数 {max_page},停止翻页")
break
except Exception as e:
logger.warning(f"获取最大分页失败: {e}")
# 提取新闻列表
try:
search_main = self.driver.find_element(By.CSS_SELECTOR, "div.page-search-main")
@@ -549,6 +563,8 @@ class XhwCrawler(BaseCrawler):
# 从新闻url中获取新闻详情
count = 0
total_urls = len(news_urls)
logger.info(f"开始解析新闻详情,共 {total_urls} 条URL目标获取 {total}")
for news_url in news_urls:
try:
news = self.parse_news_detail(news_url)
@@ -557,11 +573,14 @@ class XhwCrawler(BaseCrawler):
news.publishTime = url_base_map.get(news_url, {}).get("date") or news.publishTime
news_list.append(news)
count += 1
if count % 10 == 0 or count >= total:
logger.info(f"解析进度: {count}/{total}")
if count >= total:
break
except Exception as e:
logger.warning(f"解析新闻失败: {news_url}, {e}")
continue
logger.info(f"新闻详情解析完成,共获取 {count}")
except Exception as e:
logger.error(f"搜索过程整体异常: {e}")