69 lines
2.4 KiB
Python
69 lines
2.4 KiB
Python
|
|
import csv
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
# 读取CSV文件
|
|||
|
|
with open('data/浙江省公共资源交易中心_20260213_172414.csv', 'r', encoding='utf-8') as file:
|
|||
|
|
reader = csv.reader(file)
|
|||
|
|
headers = next(reader) # 读取表头
|
|||
|
|
rows = list(reader)[:20] # 读取前20条数据
|
|||
|
|
|
|||
|
|
# 打印表头
|
|||
|
|
print('\n原始表头:')
|
|||
|
|
for i, header in enumerate(headers):
|
|||
|
|
print(f'{i+1}. {header}')
|
|||
|
|
|
|||
|
|
# 分析前20条数据
|
|||
|
|
print('\n前20条数据分析:')
|
|||
|
|
print('-' * 100)
|
|||
|
|
print(f'| {"序号":<4} | {"标题":<80} | {"项目批准文号":<30} | {"项目名称":<80} |')
|
|||
|
|
print('-' * 100)
|
|||
|
|
|
|||
|
|
for i, row in enumerate(rows):
|
|||
|
|
title = row[0]
|
|||
|
|
project_id = row[6]
|
|||
|
|
project_name = row[7]
|
|||
|
|
|
|||
|
|
# 从标题中提取批准文号(如果有的话)
|
|||
|
|
id_match = re.search(r'\[(.*?)\]$', title)
|
|||
|
|
extracted_id = id_match.group(1) if id_match else ''
|
|||
|
|
|
|||
|
|
# 从标题中提取纯项目名称
|
|||
|
|
extracted_name = re.sub(r'\[(.*?)\]$', '', title).strip()
|
|||
|
|
|
|||
|
|
# 验证项目批准文号是否一致
|
|||
|
|
id_match_flag = project_id == extracted_id
|
|||
|
|
|
|||
|
|
# 验证项目名称是否正确
|
|||
|
|
name_match_flag = project_name == extracted_name
|
|||
|
|
|
|||
|
|
print(f'| {i+1:<4} | {title} | {project_id} | {project_name} |')
|
|||
|
|
|
|||
|
|
# 如果有不一致,打印详细信息
|
|||
|
|
if not id_match_flag:
|
|||
|
|
print(f' 警告: 项目批准文号不一致 - 标题中提取: {extracted_id}, 列中值: {project_id}')
|
|||
|
|
if not name_match_flag:
|
|||
|
|
print(f' 警告: 项目名称不一致 - 标题中提取: {extracted_name}, 列中值: {project_name}')
|
|||
|
|
|
|||
|
|
print('-' * 100)
|
|||
|
|
|
|||
|
|
# 检查是否所有项目名称都不包含批准文号
|
|||
|
|
print('\n项目名称列检查:')
|
|||
|
|
print('-' * 100)
|
|||
|
|
print(f'| {"序号":<4} | {"项目名称":<80} | {"是否包含批准文号":<15} |')
|
|||
|
|
print('-' * 100)
|
|||
|
|
|
|||
|
|
for i, row in enumerate(rows):
|
|||
|
|
project_name = row[7]
|
|||
|
|
has_id = bool(re.search(r'\[.*?\]$', project_name))
|
|||
|
|
print(f'| {i+1:<4} | {project_name} | {"是" if has_id else "否":<15} |')
|
|||
|
|
|
|||
|
|
print('-' * 100)
|
|||
|
|
|
|||
|
|
# 总结
|
|||
|
|
print('\n总结:')
|
|||
|
|
print('1. 从CSV文件中可以看到,项目批准文号和项目名称已经正确分离到不同列中')
|
|||
|
|
print('2. 标题列包含完整信息:项目名称[项目批准文号]')
|
|||
|
|
print('3. 项目批准文号列(第7列)只包含批准文号')
|
|||
|
|
print('4. 项目名称列(第8列)只包含纯项目名称,不包含批准文号')
|
|||
|
|
print('5. 前3条数据的项目名称和项目批准文号分离正确')
|