89 lines
2.8 KiB
Python
89 lines
2.8 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
爬取浙江公共资源交易中心,选择三条进行测试
|
||
"""
|
||
import logging
|
||
import sys
|
||
import os
|
||
import random
|
||
|
||
# 添加当前目录到模块搜索路径
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# 导入配置和处理器
|
||
from config import ZHEJIANG_CONFIG, SPIDER_CONFIG, DATA_DIR
|
||
from spiders import ZhejiangSpider
|
||
from processors import ProcessingPipeline
|
||
|
||
# 配置日志
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def main():
|
||
"""主函数"""
|
||
logger.info("开始爬取浙江公共资源交易中心")
|
||
|
||
# 1. 爬取数据
|
||
logger.info("1. 爬取数据:")
|
||
spider = ZhejiangSpider(ZHEJIANG_CONFIG, SPIDER_CONFIG, DATA_DIR)
|
||
|
||
# 爬取最新数据,限制为10条
|
||
spider.crawl(
|
||
max_pages=2, # 爬取2页
|
||
category="工程建设",
|
||
notice_type="招标文件公示"
|
||
)
|
||
|
||
# 保存到CSV
|
||
spider.save_to_csv()
|
||
|
||
# 获取爬取结果
|
||
results = spider.results
|
||
logger.info(f"爬取完成,共获取 {len(results)} 条数据")
|
||
|
||
if len(results) == 0:
|
||
logger.error("爬取失败,无数据")
|
||
return
|
||
|
||
# 2. 随机选择3条数据
|
||
logger.info("\n2. 选择测试数据:")
|
||
if len(results) >= 3:
|
||
selected_results = random.sample(results, 3)
|
||
else:
|
||
selected_results = results
|
||
|
||
logger.info(f"随机选择了 {len(selected_results)} 条数据进行测试")
|
||
|
||
# 3. 处理数据
|
||
logger.info("\n3. 处理数据:")
|
||
pipeline = ProcessingPipeline()
|
||
|
||
processed = pipeline.process_results(
|
||
selected_results,
|
||
site="zhejiang",
|
||
notice_type="招标文件公示",
|
||
upload=False
|
||
)
|
||
|
||
# 4. 展示结果
|
||
logger.info("\n4. 测试结果:")
|
||
for i, record in enumerate(processed, 1):
|
||
logger.info(f"\n{'-'*60}")
|
||
logger.info(f"测试 {i}")
|
||
logger.info(f"{'-'*60}")
|
||
logger.info(f"项目名称: {record.get('项目名称', '文档未提及')}")
|
||
logger.info(f"项目批准文号: {record.get('项目批准文号', '文档未提及')}")
|
||
logger.info(f"批准文号: {record.get('批准文号', '文档未提及')}")
|
||
logger.info(f"类型: {record.get('类型', '文档未提及')}")
|
||
logger.info(f"地区: {record.get('地区', '文档未提及')}")
|
||
logger.info(f"最高投标限价: {record.get('最高投标限价', '文档未提及')}")
|
||
logger.info(f"最高限价: {record.get('最高限价', '文档未提及')}")
|
||
logger.info(f"评标办法: {record.get('评标办法', '文档未提及')}")
|
||
logger.info(f"链接: {record.get('招标文件链接', '无')}")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|