Files
ztb/process_csv.py

69 lines
2.4 KiB
Python
Raw Normal View History

import csv
import re
# 读取CSV文件
with open('data/浙江省公共资源交易中心_20260213_172414.csv', 'r', encoding='utf-8') as file:
reader = csv.reader(file)
headers = next(reader) # 读取表头
rows = list(reader)[:20] # 读取前20条数据
# 打印表头
print('\n原始表头:')
for i, header in enumerate(headers):
print(f'{i+1}. {header}')
# 分析前20条数据
print('\n前20条数据分析:')
print('-' * 100)
print(f'| {"序号":<4} | {"标题":<80} | {"项目批准文号":<30} | {"项目名称":<80} |')
print('-' * 100)
for i, row in enumerate(rows):
title = row[0]
project_id = row[6]
project_name = row[7]
# 从标题中提取批准文号(如果有的话)
id_match = re.search(r'\[(.*?)\]$', title)
extracted_id = id_match.group(1) if id_match else ''
# 从标题中提取纯项目名称
extracted_name = re.sub(r'\[(.*?)\]$', '', title).strip()
# 验证项目批准文号是否一致
id_match_flag = project_id == extracted_id
# 验证项目名称是否正确
name_match_flag = project_name == extracted_name
print(f'| {i+1:<4} | {title} | {project_id} | {project_name} |')
# 如果有不一致,打印详细信息
if not id_match_flag:
print(f' 警告: 项目批准文号不一致 - 标题中提取: {extracted_id}, 列中值: {project_id}')
if not name_match_flag:
print(f' 警告: 项目名称不一致 - 标题中提取: {extracted_name}, 列中值: {project_name}')
print('-' * 100)
# 检查是否所有项目名称都不包含批准文号
print('\n项目名称列检查:')
print('-' * 100)
print(f'| {"序号":<4} | {"项目名称":<80} | {"是否包含批准文号":<15} |')
print('-' * 100)
for i, row in enumerate(rows):
project_name = row[7]
has_id = bool(re.search(r'\[.*?\]$', project_name))
print(f'| {i+1:<4} | {project_name} | {"" if has_id else "":<15} |')
print('-' * 100)
# 总结
print('\n总结:')
print('1. 从CSV文件中可以看到项目批准文号和项目名称已经正确分离到不同列中')
print('2. 标题列包含完整信息:项目名称[项目批准文号]')
print('3. 项目批准文号列第7列只包含批准文号')
print('4. 项目名称列第8列只包含纯项目名称不包含批准文号')
print('5. 前3条数据的项目名称和项目批准文号分离正确')