样式修改
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -490,6 +490,7 @@ class XhwCrawler(BaseCrawler):
|
||||
search_data = search_config.params.copy()
|
||||
search_data["k"] = key
|
||||
search_data["action"] = action
|
||||
max_page = 1
|
||||
|
||||
try:
|
||||
# 获取新闻url
|
||||
@@ -523,7 +524,20 @@ class XhwCrawler(BaseCrawler):
|
||||
time.sleep(5)
|
||||
except Exception as e:
|
||||
logger.info(f"滑动验证处理失败或未出现: {e}")
|
||||
|
||||
|
||||
# 获取最大分页数并校验
|
||||
try:
|
||||
search_foot = self.driver.find_element(By.CSS_SELECTOR, "div.pagebar")
|
||||
if search_foot:
|
||||
page_nums = search_foot.find_elements(By.CSS_SELECTOR, "a.num")
|
||||
if page_nums:
|
||||
max_page = int(page_nums[-1].text)
|
||||
if page > max_page:
|
||||
logger.info(f"当前页 {page} 超过最大页数 {max_page},停止翻页")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"获取最大分页失败: {e}")
|
||||
|
||||
# 提取新闻列表
|
||||
try:
|
||||
search_main = self.driver.find_element(By.CSS_SELECTOR, "div.page-search-main")
|
||||
@@ -549,6 +563,8 @@ class XhwCrawler(BaseCrawler):
|
||||
|
||||
# 从新闻url中获取新闻详情
|
||||
count = 0
|
||||
total_urls = len(news_urls)
|
||||
logger.info(f"开始解析新闻详情,共 {total_urls} 条URL,目标获取 {total} 条")
|
||||
for news_url in news_urls:
|
||||
try:
|
||||
news = self.parse_news_detail(news_url)
|
||||
@@ -557,11 +573,14 @@ class XhwCrawler(BaseCrawler):
|
||||
news.publishTime = url_base_map.get(news_url, {}).get("date") or news.publishTime
|
||||
news_list.append(news)
|
||||
count += 1
|
||||
if count % 10 == 0 or count >= total:
|
||||
logger.info(f"解析进度: {count}/{total} 条")
|
||||
if count >= total:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"解析新闻失败: {news_url}, {e}")
|
||||
continue
|
||||
logger.info(f"新闻详情解析完成,共获取 {count} 条")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"搜索过程整体异常: {e}")
|
||||
|
||||
Reference in New Issue
Block a user