Files
ztb/test_crawl_three_items.py
2026-02-13 18:15:20 +08:00

89 lines
2.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
爬取浙江公共资源交易中心,选择三条进行测试
"""
import logging
import sys
import os
import random
# 添加当前目录到模块搜索路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 导入配置和处理器
from config import ZHEJIANG_CONFIG, SPIDER_CONFIG, DATA_DIR
from spiders import ZhejiangSpider
from processors import ProcessingPipeline
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def main():
"""主函数"""
logger.info("开始爬取浙江公共资源交易中心")
# 1. 爬取数据
logger.info("1. 爬取数据:")
spider = ZhejiangSpider(ZHEJIANG_CONFIG, SPIDER_CONFIG, DATA_DIR)
# 爬取最新数据限制为10条
spider.crawl(
max_pages=2, # 爬取2页
category="工程建设",
notice_type="招标文件公示"
)
# 保存到CSV
spider.save_to_csv()
# 获取爬取结果
results = spider.results
logger.info(f"爬取完成,共获取 {len(results)} 条数据")
if len(results) == 0:
logger.error("爬取失败,无数据")
return
# 2. 随机选择3条数据
logger.info("\n2. 选择测试数据:")
if len(results) >= 3:
selected_results = random.sample(results, 3)
else:
selected_results = results
logger.info(f"随机选择了 {len(selected_results)} 条数据进行测试")
# 3. 处理数据
logger.info("\n3. 处理数据:")
pipeline = ProcessingPipeline()
processed = pipeline.process_results(
selected_results,
site="zhejiang",
notice_type="招标文件公示",
upload=False
)
# 4. 展示结果
logger.info("\n4. 测试结果:")
for i, record in enumerate(processed, 1):
logger.info(f"\n{'-'*60}")
logger.info(f"测试 {i}")
logger.info(f"{'-'*60}")
logger.info(f"项目名称: {record.get('项目名称', '文档未提及')}")
logger.info(f"项目批准文号: {record.get('项目批准文号', '文档未提及')}")
logger.info(f"批准文号: {record.get('批准文号', '文档未提及')}")
logger.info(f"类型: {record.get('类型', '文档未提及')}")
logger.info(f"地区: {record.get('地区', '文档未提及')}")
logger.info(f"最高投标限价: {record.get('最高投标限价', '文档未提及')}")
logger.info(f"最高限价: {record.get('最高限价', '文档未提及')}")
logger.info(f"评标办法: {record.get('评标办法', '文档未提及')}")
logger.info(f"链接: {record.get('招标文件链接', '')}")
if __name__ == "__main__":
main()