无头浏览器

This commit is contained in:
2025-11-25 19:32:23 +08:00
parent 780ed31a1d
commit 6649d956aa
574 changed files with 66559 additions and 15 deletions

View File

@@ -110,26 +110,41 @@ class XhwCrawler(BaseCrawler):
self.driver = self._init_driver()
def _init_driver(self):
"""初始化并返回Chrome WebDriver实例"""
"""初始化并返回Chrome WebDriver实例(无头模式)"""
chrome_options = Options()
# 确保浏览器可见,不使用无头模式
# 或者完全删除这行,因为默认就是有界面模式
# 无头模式配置
chrome_options.add_argument('--headless=new') # 使用新的headless模式
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu')
# 设置窗口大小headless模式必需
chrome_options.add_argument('--window-size=1920,1080')
# 反检测配置
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
# 确保浏览器可见
chrome_options.add_argument('--start-maximized')
chrome_options.add_argument('--disable-gpu')
# 其他安全配置
chrome_options.add_argument('--disable-web-security')
chrome_options.add_argument('--allow-running-insecure-content')
chrome_options.add_argument('--disable-features=VizDisplayCompositor')
# 根据系统选择chromedriver路径和chrome二进制文件路径
chrome_driver_path = 'win/chromedriver.exe'
chrome_binary_path = 'win/chrome-headless/chrome-headless-shell-win64/chrome-headless-shell.exe'
if platform.system() == 'Linux':
chrome_driver_path = 'linux/chromedriver'
chrome_binary_path = 'linux/chrome-headless/chrome-headless/chrome-headless-shell' # Linux使用系统安装的Chrome
# 指定Chrome二进制文件路径用于chrome-headless-shell
if chrome_binary_path and os.path.exists(chrome_binary_path):
chrome_options.binary_location = chrome_binary_path
logger.info(f"使用Chrome二进制: {chrome_binary_path}")
service = Service(executable_path=chrome_driver_path)

View File

@@ -160,27 +160,41 @@ class XxqgCrawler(BaseCrawler):
self.driver = self._init_driver()
def _init_driver(self):
"""初始化并返回Chrome WebDriver实例"""
"""初始化并返回Chrome WebDriver实例(无头模式)"""
chrome_options = Options()
# 确保浏览器可见,不使用无头模式
# 或者完全删除这行,因为默认就是有界面模式
# 无头模式配置
chrome_options.add_argument('--headless=new') # 使用新的headless模式
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu')
# 设置窗口大小headless模式必需
chrome_options.add_argument('--window-size=1920,1080')
# 反检测配置
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
# 确保浏览器可见
chrome_options.add_argument('--start-maximized')
chrome_options.add_argument('--disable-gpu')
# 其他安全配置
chrome_options.add_argument('--disable-web-security')
chrome_options.add_argument('--allow-running-insecure-content')
chrome_options.add_argument('--disable-features=VizDisplayCompositor')
# 判断系统类型获取对应的chromedriver路径
# 根据系统选择chromedriver路径和chrome二进制文件路径
chrome_driver_path = 'win/chromedriver.exe'
chrome_binary_path = 'win/chrome-headless/chrome-headless-shell-win64/chrome-headless-shell.exe'
if platform.system() == 'Linux':
chrome_driver_path = 'linux/chromedriver'
chrome_binary_path = 'linux/chrome-headless/chrome-headless/chrome-headless-shell' # Linux使用系统安装的Chrome
# 指定Chrome二进制文件路径用于chrome-headless-shell
if chrome_binary_path and os.path.exists(chrome_binary_path):
chrome_options.binary_location = chrome_binary_path
logger.info(f"使用Chrome二进制: {chrome_binary_path}")
service = Service(executable_path=chrome_driver_path)

View File

@@ -0,0 +1,9 @@
Google Chrome
Copyright 2025 Google LLC. All rights reserved.
Chrome is made possible by the Chromium open source project
(https://www.chromium.org/) and other open source software
(chrome://credits).
See the Terms of Service at chrome://terms.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
ca-certificates
fonts-liberation
libasound2 (>= 1.0.17)
libatk-bridge2.0-0 (>= 2.5.3)
libatk1.0-0 (>= 2.11.90)
libatspi2.0-0 (>= 2.9.90)
libc6 (>= 2.17)
libcairo2 (>= 1.6.0)
libcups2 (>= 1.6.0)
libcurl3-gnutls | libcurl3-nss | libcurl4 | libcurl3
libdbus-1-3 (>= 1.9.14)
libexpat1 (>= 2.1~beta3)
libgbm1 (>= 17.1.0~rc2)
libglib2.0-0 (>= 2.39.4)
libgtk-3-0 (>= 3.9.10) | libgtk-4-1
libnspr4 (>= 2:4.9-2~)
libnss3 (>= 2:3.35)
libpango-1.0-0 (>= 1.14.0)
libudev1 (>= 183)
libvulkan1
libx11-6 (>= 2:1.4.99.1)
libxcb1 (>= 1.9.2)
libxcomposite1 (>= 1:0.4.4-1)
libxdamage1 (>= 1:1.1)
libxext6
libxfixes3
libxkbcommon0 (>= 0.5.0)
libxrandr2
wget
xdg-utils (>= 1.0.2)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More