清理已删除的测试文件,准备云端部署
This commit is contained in:
32
scheduler.py
32
scheduler.py
@@ -12,11 +12,12 @@ r"""
|
||||
3. Linux cron(每天早上 8:00):
|
||||
0 8 * * * cd /path/to/ztb && python scheduler.py >> logs/cron.log 2>&1
|
||||
"""
|
||||
import glob
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# 确保项目根目录在 sys.path 中
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
@@ -59,18 +60,40 @@ DAILY_TASKS = [
|
||||
"process": True,
|
||||
"upload": True,
|
||||
},
|
||||
# 台州 - 工程建设 - 招标文件公示
|
||||
# 台州 - 工程建设 - 招标计划公示
|
||||
{
|
||||
"site": "taizhou",
|
||||
"max_pages": 100,
|
||||
"category": "工程建设",
|
||||
"notice_type": "招标文件公示",
|
||||
"notice_type": "招标计划公示",
|
||||
"process": True,
|
||||
"upload": True,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# 数据文件保留天数
|
||||
KEEP_DAYS = 30
|
||||
|
||||
|
||||
def cleanup_old_files(directory: str, keep_days: int = KEEP_DAYS):
|
||||
"""清理超过 keep_days 天的 CSV 和 JSON 文件"""
|
||||
if not os.path.isdir(directory):
|
||||
return
|
||||
cutoff = datetime.now() - timedelta(days=keep_days)
|
||||
removed = 0
|
||||
for pattern in ("*.csv", "*.json"):
|
||||
for filepath in glob.glob(os.path.join(directory, pattern)):
|
||||
if os.path.getmtime(filepath) < cutoff.timestamp():
|
||||
try:
|
||||
os.remove(filepath)
|
||||
removed += 1
|
||||
except OSError:
|
||||
pass
|
||||
if removed:
|
||||
logger.info(f"清理 {directory} 中 {removed} 个超过 {keep_days} 天的文件")
|
||||
|
||||
|
||||
def run_task(task: dict, date_filter: str = "yesterday") -> int:
|
||||
"""执行单个爬取任务,返回采集条数"""
|
||||
site = task["site"]
|
||||
@@ -137,6 +160,9 @@ def run_daily():
|
||||
logger.debug(traceback.format_exc())
|
||||
errors.append(desc)
|
||||
|
||||
# 清理过期数据文件
|
||||
cleanup_old_files(DATA_DIR)
|
||||
|
||||
elapsed = (datetime.now() - start).total_seconds()
|
||||
logger.info("=" * 40)
|
||||
logger.info(f"定时任务完成: 共 {total} 条, 耗时 {elapsed:.0f}s")
|
||||
|
||||
Reference in New Issue
Block a user