清理已删除的测试文件,准备云端部署

This commit is contained in:
ztb-system
2026-02-25 18:17:00 +08:00
parent 5f93dbe5e4
commit 305f6b342c
29 changed files with 143 additions and 2536 deletions

View File

@@ -12,11 +12,12 @@ r"""
3. Linux cron每天早上 8:00:
0 8 * * * cd /path/to/ztb && python scheduler.py >> logs/cron.log 2>&1
"""
import glob
import logging
import sys
import os
import traceback
from datetime import datetime
from datetime import datetime, timedelta
# 确保项目根目录在 sys.path 中
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -59,18 +60,40 @@ DAILY_TASKS = [
"process": True,
"upload": True,
},
# 台州 - 工程建设 - 招标文件公示
# 台州 - 工程建设 - 招标计划公示
{
"site": "taizhou",
"max_pages": 100,
"category": "工程建设",
"notice_type": "招标文件公示",
"notice_type": "招标计划公示",
"process": True,
"upload": True,
},
]
# 数据文件保留天数
KEEP_DAYS = 30
def cleanup_old_files(directory: str, keep_days: int = KEEP_DAYS):
"""清理超过 keep_days 天的 CSV 和 JSON 文件"""
if not os.path.isdir(directory):
return
cutoff = datetime.now() - timedelta(days=keep_days)
removed = 0
for pattern in ("*.csv", "*.json"):
for filepath in glob.glob(os.path.join(directory, pattern)):
if os.path.getmtime(filepath) < cutoff.timestamp():
try:
os.remove(filepath)
removed += 1
except OSError:
pass
if removed:
logger.info(f"清理 {directory}{removed} 个超过 {keep_days} 天的文件")
def run_task(task: dict, date_filter: str = "yesterday") -> int:
"""执行单个爬取任务,返回采集条数"""
site = task["site"]
@@ -137,6 +160,9 @@ def run_daily():
logger.debug(traceback.format_exc())
errors.append(desc)
# 清理过期数据文件
cleanup_old_files(DATA_DIR)
elapsed = (datetime.now() - start).total_seconds()
logger.info("=" * 40)
logger.info(f"定时任务完成: 共 {total} 条, 耗时 {elapsed:.0f}s")