Files
schoolNews/schoolNewsServ/.bin/mysql/sql/sensitiveData/importSensitiveWords.sh
2025-11-24 16:53:17 +08:00

147 lines
4.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
##############################################
# 敏感词批量导入脚本 (纯Shell实现)
# 功能:从 sensitive_word_dict.txt 读取敏感词并导入数据库
# 优势不需要Python环境只需要MySQL客户端
##############################################
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 数据库配置
DB_HOST="${DB_HOST:-localhost}"
DB_PORT="${DB_PORT:-3306}"
DB_USER="${DB_USER:-root}"
DB_PASSWORD="${DB_PASSWORD:-123456}"
DB_NAME="${DB_NAME:-school_news}"
# 脚本目录
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DICT_FILE="${SCRIPT_DIR}/sensitive_word_dict.txt"
echo "=================================================="
echo "敏感词批量导入工具 (Shell版本)"
echo "=================================================="
log_info "数据库: ${DB_HOST}:${DB_PORT}/${DB_NAME}"
log_info "敏感词文件: ${DICT_FILE}"
echo "=================================================="
echo ""
# 检查敏感词文件
if [ ! -f "${DICT_FILE}" ]; then
log_error "敏感词文件不存在: ${DICT_FILE}"
exit 1
fi
# 统计敏感词数量
TOTAL_WORDS=$(grep -v '^$' "${DICT_FILE}" | wc -l)
log_info "检测到 ${TOTAL_WORDS} 个敏感词"
if [ ${TOTAL_WORDS} -eq 0 ]; then
log_warn "敏感词文件为空"
exit 0
fi
# 检查MySQL连接
log_info "检查数据库连接..."
if ! mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" -e "SELECT 1;" &>/dev/null; then
log_error "数据库连接失败"
exit 1
fi
log_info "数据库连接成功"
echo ""
# 确认导入
log_warn "准备导入 ${TOTAL_WORDS} 个敏感词到数据库"
log_warn "这将清除现有的 deny 类型敏感词"
echo ""
# 检查是否是自动模式(通过 -y 参数或环境变量)
AUTO_CONFIRM=${AUTO_CONFIRM:-false}
if [ "$1" = "-y" ] || [ "$1" = "--yes" ] || [ "${AUTO_CONFIRM}" = "true" ]; then
log_info "自动确认模式,开始导入..."
else
read -p "是否继续?(y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
log_warn "用户取消导入"
exit 0
fi
fi
echo ""
# 开始导入
log_info "开始导入敏感词..."
START_TIME=$(date +%s)
# 预处理转义单引号生成临时词表文件仅一列word
TEMP_WORDS=$(mktemp)
trap "rm -f ${TEMP_WORDS}" EXIT
sed "s/'/''/g" "${DICT_FILE}" > "${TEMP_WORDS}"
# 使用 LOAD DATA LOCAL INFILE 批量导入,极大提升导入性能
log_info "使用 LOAD DATA LOCAL INFILE 批量导入敏感词..."
if mysql --local-infile=1 -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" "${DB_NAME}" <<EOF
SET NAMES utf8mb4;
-- 清除现有的 deny 类型敏感词
DELETE FROM tb_sensitive_word WHERE type = 'deny';
-- 从预处理文件批量导入敏感词LOCAL 由客户端读取,避免 secure-file-priv 限制)
LOAD DATA LOCAL INFILE '${TEMP_WORDS}'
INTO TABLE tb_sensitive_word
CHARACTER SET utf8mb4
LINES TERMINATED BY '\n'
(word)
SET type = 'deny';
EOF
then
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
echo ""
echo "=================================================="
log_info "导入完成!"
log_info "成功导入: ${TOTAL_WORDS} 个敏感词(基于文件统计)"
log_info "耗时: ${DURATION}"
echo "=================================================="
else
log_error "数据库导入失败"
exit 1
fi
# 验证结果
log_info "验证导入结果..."
IMPORTED_COUNT=$(mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" -D"${DB_NAME}" -sNe \
"SELECT COUNT(*) FROM tb_sensitive_word WHERE type = 'deny';")
echo ""
log_info "数据库中当前有 ${IMPORTED_COUNT} 个 deny 类型敏感词"
if [ "${IMPORTED_COUNT}" -eq "${TOTAL_WORDS}" ]; then
log_info "✅ 验证通过:导入数量与预期一致"
else
log_warn "⚠️ 导入数量不匹配:预期 ${TOTAL_WORDS},实际 ${IMPORTED_COUNT}"
fi
echo ""
log_info "敏感词导入完成!"