69 lines
2.4 KiB
Python
69 lines
2.4 KiB
Python
import csv
|
||
import re
|
||
|
||
# 读取CSV文件
|
||
with open('data/浙江省公共资源交易中心_20260213_172414.csv', 'r', encoding='utf-8') as file:
|
||
reader = csv.reader(file)
|
||
headers = next(reader) # 读取表头
|
||
rows = list(reader)[:20] # 读取前20条数据
|
||
|
||
# 打印表头
|
||
print('\n原始表头:')
|
||
for i, header in enumerate(headers):
|
||
print(f'{i+1}. {header}')
|
||
|
||
# 分析前20条数据
|
||
print('\n前20条数据分析:')
|
||
print('-' * 100)
|
||
print(f'| {"序号":<4} | {"标题":<80} | {"项目批准文号":<30} | {"项目名称":<80} |')
|
||
print('-' * 100)
|
||
|
||
for i, row in enumerate(rows):
|
||
title = row[0]
|
||
project_id = row[6]
|
||
project_name = row[7]
|
||
|
||
# 从标题中提取批准文号(如果有的话)
|
||
id_match = re.search(r'\[(.*?)\]$', title)
|
||
extracted_id = id_match.group(1) if id_match else ''
|
||
|
||
# 从标题中提取纯项目名称
|
||
extracted_name = re.sub(r'\[(.*?)\]$', '', title).strip()
|
||
|
||
# 验证项目批准文号是否一致
|
||
id_match_flag = project_id == extracted_id
|
||
|
||
# 验证项目名称是否正确
|
||
name_match_flag = project_name == extracted_name
|
||
|
||
print(f'| {i+1:<4} | {title} | {project_id} | {project_name} |')
|
||
|
||
# 如果有不一致,打印详细信息
|
||
if not id_match_flag:
|
||
print(f' 警告: 项目批准文号不一致 - 标题中提取: {extracted_id}, 列中值: {project_id}')
|
||
if not name_match_flag:
|
||
print(f' 警告: 项目名称不一致 - 标题中提取: {extracted_name}, 列中值: {project_name}')
|
||
|
||
print('-' * 100)
|
||
|
||
# 检查是否所有项目名称都不包含批准文号
|
||
print('\n项目名称列检查:')
|
||
print('-' * 100)
|
||
print(f'| {"序号":<4} | {"项目名称":<80} | {"是否包含批准文号":<15} |')
|
||
print('-' * 100)
|
||
|
||
for i, row in enumerate(rows):
|
||
project_name = row[7]
|
||
has_id = bool(re.search(r'\[.*?\]$', project_name))
|
||
print(f'| {i+1:<4} | {project_name} | {"是" if has_id else "否":<15} |')
|
||
|
||
print('-' * 100)
|
||
|
||
# 总结
|
||
print('\n总结:')
|
||
print('1. 从CSV文件中可以看到,项目批准文号和项目名称已经正确分离到不同列中')
|
||
print('2. 标题列包含完整信息:项目名称[项目批准文号]')
|
||
print('3. 项目批准文号列(第7列)只包含批准文号')
|
||
print('4. 项目名称列(第8列)只包含纯项目名称,不包含批准文号')
|
||
print('5. 前3条数据的项目名称和项目批准文号分离正确')
|