无头浏览器
This commit is contained in:
@@ -110,26 +110,41 @@ class XhwCrawler(BaseCrawler):
|
||||
self.driver = self._init_driver()
|
||||
|
||||
def _init_driver(self):
|
||||
"""初始化并返回Chrome WebDriver实例"""
|
||||
"""初始化并返回Chrome WebDriver实例(无头模式)"""
|
||||
chrome_options = Options()
|
||||
# 确保浏览器可见,不使用无头模式
|
||||
# 或者完全删除这行,因为默认就是有界面模式
|
||||
|
||||
# 无头模式配置
|
||||
chrome_options.add_argument('--headless=new') # 使用新的headless模式
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-dev-shm-usage')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
|
||||
# 设置窗口大小(headless模式必需)
|
||||
chrome_options.add_argument('--window-size=1920,1080')
|
||||
|
||||
# 反检测配置
|
||||
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
|
||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||
chrome_options.add_experimental_option('useAutomationExtension', False)
|
||||
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
|
||||
# 确保浏览器可见
|
||||
chrome_options.add_argument('--start-maximized')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
|
||||
# 其他安全配置
|
||||
chrome_options.add_argument('--disable-web-security')
|
||||
chrome_options.add_argument('--allow-running-insecure-content')
|
||||
chrome_options.add_argument('--disable-features=VizDisplayCompositor')
|
||||
|
||||
# 根据系统选择chromedriver路径和chrome二进制文件路径
|
||||
chrome_driver_path = 'win/chromedriver.exe'
|
||||
|
||||
chrome_binary_path = 'win/chrome-headless/chrome-headless-shell-win64/chrome-headless-shell.exe'
|
||||
|
||||
if platform.system() == 'Linux':
|
||||
chrome_driver_path = 'linux/chromedriver'
|
||||
chrome_binary_path = 'linux/chrome-headless/chrome-headless/chrome-headless-shell' # Linux使用系统安装的Chrome
|
||||
|
||||
# 指定Chrome二进制文件路径(用于chrome-headless-shell)
|
||||
if chrome_binary_path and os.path.exists(chrome_binary_path):
|
||||
chrome_options.binary_location = chrome_binary_path
|
||||
logger.info(f"使用Chrome二进制: {chrome_binary_path}")
|
||||
|
||||
service = Service(executable_path=chrome_driver_path)
|
||||
|
||||
|
||||
@@ -160,27 +160,41 @@ class XxqgCrawler(BaseCrawler):
|
||||
self.driver = self._init_driver()
|
||||
|
||||
def _init_driver(self):
|
||||
"""初始化并返回Chrome WebDriver实例"""
|
||||
"""初始化并返回Chrome WebDriver实例(无头模式)"""
|
||||
chrome_options = Options()
|
||||
# 确保浏览器可见,不使用无头模式
|
||||
# 或者完全删除这行,因为默认就是有界面模式
|
||||
|
||||
# 无头模式配置
|
||||
chrome_options.add_argument('--headless=new') # 使用新的headless模式
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-dev-shm-usage')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
|
||||
# 设置窗口大小(headless模式必需)
|
||||
chrome_options.add_argument('--window-size=1920,1080')
|
||||
|
||||
# 反检测配置
|
||||
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
|
||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||
chrome_options.add_experimental_option('useAutomationExtension', False)
|
||||
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
|
||||
# 确保浏览器可见
|
||||
chrome_options.add_argument('--start-maximized')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
|
||||
# 其他安全配置
|
||||
chrome_options.add_argument('--disable-web-security')
|
||||
chrome_options.add_argument('--allow-running-insecure-content')
|
||||
chrome_options.add_argument('--disable-features=VizDisplayCompositor')
|
||||
# 判断系统类型获取对应的chromedriver路径
|
||||
|
||||
# 根据系统选择chromedriver路径和chrome二进制文件路径
|
||||
chrome_driver_path = 'win/chromedriver.exe'
|
||||
|
||||
chrome_binary_path = 'win/chrome-headless/chrome-headless-shell-win64/chrome-headless-shell.exe'
|
||||
|
||||
if platform.system() == 'Linux':
|
||||
chrome_driver_path = 'linux/chromedriver'
|
||||
chrome_binary_path = 'linux/chrome-headless/chrome-headless/chrome-headless-shell' # Linux使用系统安装的Chrome
|
||||
|
||||
# 指定Chrome二进制文件路径(用于chrome-headless-shell)
|
||||
if chrome_binary_path and os.path.exists(chrome_binary_path):
|
||||
chrome_options.binary_location = chrome_binary_path
|
||||
logger.info(f"使用Chrome二进制: {chrome_binary_path}")
|
||||
|
||||
service = Service(executable_path=chrome_driver_path)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user