镜像制作

This commit is contained in:
2025-11-24 11:50:15 +08:00
parent 12592c5a24
commit 07bd166257
53 changed files with 3822 additions and 2140 deletions

View File

@@ -0,0 +1,182 @@
#!/bin/bash
##############################################
# 敏感词批量导入脚本 (纯Shell实现)
# 功能:从 sensitive_word_dict.txt 读取敏感词并导入数据库
# 优势不需要Python环境只需要MySQL客户端
##############################################
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 数据库配置
DB_HOST="${DB_HOST:-localhost}"
DB_PORT="${DB_PORT:-3306}"
DB_USER="${DB_USER:-root}"
DB_PASSWORD="${DB_PASSWORD:-123456}"
DB_NAME="${DB_NAME:-school_news}"
# 脚本目录
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DICT_FILE="${SCRIPT_DIR}/sensitive_word_dict.txt"
echo "=================================================="
echo "敏感词批量导入工具 (Shell版本)"
echo "=================================================="
log_info "数据库: ${DB_HOST}:${DB_PORT}/${DB_NAME}"
log_info "敏感词文件: ${DICT_FILE}"
echo "=================================================="
echo ""
# 检查敏感词文件
if [ ! -f "${DICT_FILE}" ]; then
log_error "敏感词文件不存在: ${DICT_FILE}"
exit 1
fi
# 统计敏感词数量
TOTAL_WORDS=$(grep -v '^$' "${DICT_FILE}" | wc -l)
log_info "检测到 ${TOTAL_WORDS} 个敏感词"
if [ ${TOTAL_WORDS} -eq 0 ]; then
log_warn "敏感词文件为空"
exit 0
fi
# 检查MySQL连接
log_info "检查数据库连接..."
if ! mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" -e "SELECT 1;" &>/dev/null; then
log_error "数据库连接失败"
exit 1
fi
log_info "数据库连接成功"
echo ""
# 确认导入
log_warn "准备导入 ${TOTAL_WORDS} 个敏感词到数据库"
log_warn "这将清除现有的 deny 类型敏感词"
echo ""
# 检查是否是自动模式(通过 -y 参数或环境变量)
AUTO_CONFIRM=${AUTO_CONFIRM:-false}
if [ "$1" = "-y" ] || [ "$1" = "--yes" ] || [ "${AUTO_CONFIRM}" = "true" ]; then
log_info "自动确认模式,开始导入..."
else
read -p "是否继续?(y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
log_warn "用户取消导入"
exit 0
fi
fi
echo ""
# 开始导入
log_info "开始导入敏感词..."
START_TIME=$(date +%s)
# 创建临时SQL文件
TEMP_SQL=$(mktemp)
trap "rm -f ${TEMP_SQL}" EXIT
# 生成SQL语句
log_info "生成SQL语句..."
cat > "${TEMP_SQL}" <<EOF
-- 敏感词批量导入
USE ${DB_NAME};
-- 设置字符集
SET NAMES utf8mb4;
-- 清除现有的deny类型敏感词
DELETE FROM tb_sensitive_word WHERE type = 'deny';
-- 批量插入敏感词
INSERT INTO tb_sensitive_word (word, type) VALUES
EOF
# 读取敏感词并生成INSERT语句
COUNTER=0
while IFS= read -r word || [ -n "$word" ]; do
# 跳过空行
[ -z "$word" ] && continue
# 转义单引号
word=$(echo "$word" | sed "s/'/''/g")
COUNTER=$((COUNTER + 1))
# 添加到SQL最后一个不加逗号
if [ $COUNTER -eq ${TOTAL_WORDS} ]; then
echo "('${word}', 'deny');" >> "${TEMP_SQL}"
else
echo "('${word}', 'deny')," >> "${TEMP_SQL}"
fi
# 进度提示每1000个
if [ $((COUNTER % 1000)) -eq 0 ]; then
log_info "已处理 ${COUNTER}/${TOTAL_WORDS} 个敏感词..."
fi
done < "${DICT_FILE}"
# 添加查询语句
cat >> "${TEMP_SQL}" <<EOF
-- 验证导入结果
SELECT COUNT(*) AS '导入数量' FROM tb_sensitive_word WHERE type = 'deny';
EOF
log_info "SQL语句生成完成${COUNTER}个敏感词)"
echo ""
# 执行SQL
log_info "执行数据库导入..."
if mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" < "${TEMP_SQL}"; then
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
echo ""
echo "=================================================="
log_info "导入完成!"
log_info "成功导入: ${COUNTER} 个敏感词"
log_info "耗时: ${DURATION}"
echo "=================================================="
else
log_error "数据库导入失败"
exit 1
fi
# 验证结果
log_info "验证导入结果..."
IMPORTED_COUNT=$(mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" -D"${DB_NAME}" -sNe \
"SELECT COUNT(*) FROM tb_sensitive_word WHERE type = 'deny';")
echo ""
log_info "数据库中当前有 ${IMPORTED_COUNT} 个 deny 类型敏感词"
if [ "${IMPORTED_COUNT}" -eq "${COUNTER}" ]; then
log_info "✅ 验证通过:导入数量与预期一致"
else
log_warn "⚠️ 导入数量不匹配:预期 ${COUNTER},实际 ${IMPORTED_COUNT}"
fi
echo ""
log_info "敏感词导入完成!"