feat: 添加云端部署准备工作
- 添加 .env.example 环境变量模板 - 创建部署辅助脚本(run_crawler.sh、health_check.sh、backup.sh) - 更新 .gitignore 排除备份目录 - 添加云端部署指南文档
This commit is contained in:
70
scripts/backup.sh
Normal file
70
scripts/backup.sh
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/bin/bash
|
||||
# 数据备份脚本
|
||||
# 用于备份重要数据
|
||||
|
||||
# 获取脚本所在目录
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
echo "=========================================="
|
||||
echo "开始备份: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "=========================================="
|
||||
|
||||
# 配置
|
||||
BACKUP_DIR="${PROJECT_DIR}/backups"
|
||||
DATE=$(date +"%Y%m%d_%H%M%S")
|
||||
KEEP_DAYS=30
|
||||
|
||||
# 创建备份目录
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
|
||||
echo "备份目录: $BACKUP_DIR"
|
||||
echo "备份日期: $DATE"
|
||||
echo ""
|
||||
|
||||
# 1. 备份数据目录
|
||||
echo "[1/3] 备份数据目录..."
|
||||
if [ -d "data" ]; then
|
||||
DATA_BACKUP_FILE="${BACKUP_DIR}/data_${DATE}.tar.gz"
|
||||
tar -czf "$DATA_BACKUP_FILE" data/
|
||||
echo "✅ 数据备份完成: $DATA_BACKUP_FILE"
|
||||
echo " 文件大小: $(du -h "$DATA_BACKUP_FILE" | cut -f1)"
|
||||
else
|
||||
echo "⚠️ data目录不存在,跳过数据备份"
|
||||
fi
|
||||
|
||||
# 2. 备份日志目录(可选)
|
||||
echo ""
|
||||
echo "[2/3] 备份日志目录..."
|
||||
if [ -d "logs" ]; then
|
||||
LOGS_BACKUP_FILE="${BACKUP_DIR}/logs_${DATE}.tar.gz"
|
||||
tar -czf "$LOGS_BACKUP_FILE" logs/
|
||||
echo "✅ 日志备份完成: $LOGS_BACKUP_FILE"
|
||||
echo " 文件大小: $(du -h "$LOGS_BACKUP_FILE" | cut -f1)"
|
||||
else
|
||||
echo "⚠️ logs目录不存在,跳过日志备份"
|
||||
fi
|
||||
|
||||
# 3. 清理旧备份
|
||||
echo ""
|
||||
echo "[3/3] 清理${KEEP_DAYS}天前的旧备份..."
|
||||
OLD_BACKUPS=$(find "$BACKUP_DIR" -name "*.tar.gz" -mtime +${KEEP_DAYS} -print | wc -l)
|
||||
if [ "$OLD_BACKUPS" -gt 0 ]; then
|
||||
find "$BACKUP_DIR" -name "*.tar.gz" -mtime +${KEEP_DAYS} -delete
|
||||
echo "✅ 已删除 ${OLD_BACKUPS} 个旧备份文件"
|
||||
else
|
||||
echo "✅ 没有需要清理的旧备份"
|
||||
fi
|
||||
|
||||
# 统计
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "备份完成: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "备份文件数量: $(ls -1 "$BACKUP_DIR"/*.tar.gz 2>/dev/null | wc -l)"
|
||||
echo "总备份大小: $(du -sh "$BACKUP_DIR" 2>/dev/null | cut -f1)"
|
||||
echo "=========================================="
|
||||
|
||||
echo ""
|
||||
echo "✅ 备份任务完成!"
|
||||
exit 0
|
||||
104
scripts/health_check.sh
Normal file
104
scripts/health_check.sh
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/bin/bash
|
||||
# 健康检查脚本
|
||||
# 用于监控爬虫运行状态
|
||||
|
||||
# 获取脚本所在目录
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
echo "=========================================="
|
||||
echo "健康检查: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "=========================================="
|
||||
|
||||
STATUS=0
|
||||
|
||||
# 1. 检查Python进程
|
||||
echo ""
|
||||
echo "[1/5] 检查Python进程..."
|
||||
if pgrep -f "main.py" > /dev/null; then
|
||||
echo "✅ 爬虫正在运行"
|
||||
echo " 进程ID: $(pgrep -f 'main.py')"
|
||||
else
|
||||
echo "⚠️ 爬虫未运行"
|
||||
fi
|
||||
|
||||
# 2. 检查磁盘空间
|
||||
echo ""
|
||||
echo "[2/5] 检查磁盘空间..."
|
||||
DISK_USAGE=$(df -h . | awk 'NR==2 {print $5}' | sed 's/%//')
|
||||
echo " 当前使用率: ${DISK_USAGE}%"
|
||||
|
||||
if [ "$DISK_USAGE" -gt 80 ]; then
|
||||
echo "❌ 警告:磁盘空间使用率超过80%!"
|
||||
STATUS=1
|
||||
elif [ "$DISK_USAGE" -gt 60 ]; then
|
||||
echo "⚠️ 磁盘空间使用率超过60%"
|
||||
else
|
||||
echo "✅ 磁盘空间正常"
|
||||
fi
|
||||
|
||||
# 3. 检查数据文件
|
||||
echo ""
|
||||
echo "[3/5] 检查数据文件..."
|
||||
if [ -d "data" ]; then
|
||||
CSV_COUNT=$(find data -name "*.csv" 2>/dev/null | wc -l)
|
||||
JSON_COUNT=$(find data -name "*.json" 2>/dev/null | wc -l)
|
||||
echo " CSV文件数: $CSV_COUNT"
|
||||
echo " JSON文件数: $JSON_COUNT"
|
||||
TOTAL_COUNT=$((CSV_COUNT + JSON_COUNT))
|
||||
if [ "$TOTAL_COUNT" -gt 0 ]; then
|
||||
echo "✅ 数据文件存在"
|
||||
else
|
||||
echo "⚠️ 暂无数据文件"
|
||||
fi
|
||||
else
|
||||
echo "❌ 警告:data目录不存在"
|
||||
STATUS=1
|
||||
fi
|
||||
|
||||
# 4. 检查日志文件
|
||||
echo ""
|
||||
echo "[4/5] 检查日志文件..."
|
||||
if [ -d "logs" ]; then
|
||||
LOG_COUNT=$(find logs -name "*.log" 2>/dev/null | wc -l)
|
||||
echo " 日志文件数: $LOG_COUNT"
|
||||
|
||||
# 检查最近的日志
|
||||
LATEST_LOG=$(ls -t logs/*.log 2>/dev/null | head -1)
|
||||
if [ -n "$LATEST_LOG" ]; then
|
||||
echo " 最新日志: $LATEST_LOG"
|
||||
echo " 最后几行:"
|
||||
tail -5 "$LATEST_LOG" | sed 's/^/ /'
|
||||
fi
|
||||
else
|
||||
echo "⚠️ logs目录不存在"
|
||||
fi
|
||||
|
||||
# 5. 检查配置文件
|
||||
echo ""
|
||||
echo "[5/5] 检查配置文件..."
|
||||
if [ -f "config.py" ]; then
|
||||
echo "✅ config.py 存在"
|
||||
else
|
||||
echo "❌ config.py 不存在"
|
||||
STATUS=1
|
||||
fi
|
||||
|
||||
if [ -f "config_fixed.py" ]; then
|
||||
echo "✅ config_fixed.py 存在"
|
||||
else
|
||||
echo "❌ config_fixed.py 不存在"
|
||||
STATUS=1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
if [ $STATUS -eq 0 ]; then
|
||||
echo "✅ 健康检查完成,状态正常"
|
||||
else
|
||||
echo "⚠️ 健康检查完成,存在警告"
|
||||
fi
|
||||
echo "=========================================="
|
||||
|
||||
exit $STATUS
|
||||
57
scripts/run_crawler.sh
Normal file
57
scripts/run_crawler.sh
Normal file
@@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
# 招标公告爬虫执行脚本
|
||||
# 用于云端服务器定时执行
|
||||
|
||||
# 获取脚本所在目录
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
echo "=========================================="
|
||||
echo "开始执行爬虫任务: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "项目目录: $PROJECT_DIR"
|
||||
echo "=========================================="
|
||||
|
||||
# 激活虚拟环境
|
||||
if [ -d "venv" ]; then
|
||||
echo "激活虚拟环境..."
|
||||
source venv/bin/activate
|
||||
else
|
||||
echo "警告: 虚拟环境不存在,将使用系统Python"
|
||||
fi
|
||||
|
||||
# 创建日志目录
|
||||
mkdir -p logs
|
||||
|
||||
# 日期时间
|
||||
DATE=$(date +"%Y%m%d_%H%M%S")
|
||||
LOG_FILE="logs/crawler_${DATE}.log"
|
||||
|
||||
echo "日志文件: $LOG_FILE"
|
||||
echo "" >> "$LOG_FILE"
|
||||
echo "==========================================" >> "$LOG_FILE"
|
||||
echo "开始执行爬虫任务: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"
|
||||
echo "==========================================" >> "$LOG_FILE"
|
||||
|
||||
# 执行爬虫(所有网站,所有公告类型,启用AI处理和上传)
|
||||
echo "执行爬虫命令..."
|
||||
python main.py -s all -P -U >> "$LOG_FILE" 2>&1
|
||||
|
||||
EXIT_CODE=$?
|
||||
|
||||
echo "" >> "$LOG_FILE"
|
||||
echo "==========================================" >> "$LOG_FILE"
|
||||
echo "爬虫任务完成: $(date '+%Y-%m-%d %H:%M:%S')" >> "$LOG_FILE"
|
||||
echo "退出码: $EXIT_CODE" >> "$LOG_FILE"
|
||||
echo "==========================================" >> "$LOG_FILE"
|
||||
|
||||
if [ $EXIT_CODE -eq 0 ]; then
|
||||
echo "✅ 爬虫任务执行成功!"
|
||||
echo "日志文件: $LOG_FILE"
|
||||
else
|
||||
echo "❌ 爬虫任务执行失败!退出码: $EXIT_CODE"
|
||||
echo "请查看日志文件: $LOG_FILE"
|
||||
fi
|
||||
|
||||
echo "=========================================="
|
||||
exit $EXIT_CODE
|
||||
Reference in New Issue
Block a user