This commit is contained in:
2025-11-10 19:13:54 +08:00
parent 81ec0f0fc9
commit 3d742bf322
7 changed files with 364 additions and 90 deletions

View File

@@ -1,5 +1,5 @@
# 定义基础爬虫类
from typing import Dict, Optional, List, Any
from typing import Callable, Dict, Optional, List, Any, Union
from abc import ABC, abstractmethod
import requests
from bs4 import BeautifulSoup
@@ -12,7 +12,7 @@ class UrlConfig(BaseModel):
url: str = Field(..., description="请求URL")
params: Optional[Dict[str, Any]] = Field(default=None, description="请求参数")
method: str = Field(default="GET", description="请求方法")
headers: Optional[Dict[str, str]] = Field(default=None, description="请求头")
class Config:
# 允许任意类型
arbitrary_types_allowed = True
@@ -123,15 +123,6 @@ class BaseCrawler(ABC):
logger.error(f"HTML解析失败: {str(e)}")
return None
@abstractmethod
def crawl(self) -> List[NewsItem]:
"""
爬取新闻(子类必须实现)
Returns:
新闻列表
"""
pass
@abstractmethod
def parse_news_detail(self, url: str) -> Optional[NewsItem]: