镜像制作
This commit is contained in:
@@ -14,12 +14,12 @@ YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 数据库配置
|
||||
DB_HOST="localhost"
|
||||
DB_PORT="3306"
|
||||
DB_USER="root"
|
||||
DB_PASSWORD="123456"
|
||||
DB_NAME="school_news"
|
||||
# 数据库配置(优先使用环境变量)
|
||||
DB_HOST="${DB_HOST:-localhost}"
|
||||
DB_PORT="${DB_PORT:-3306}"
|
||||
DB_USER="${DB_USER:-root}"
|
||||
DB_PASSWORD="${DB_PASSWORD:-123456}"
|
||||
DB_NAME="${DB_NAME:-school_news}"
|
||||
|
||||
# 脚本目录
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
@@ -101,19 +101,6 @@ execute_init_script() {
|
||||
import_sensitive_words() {
|
||||
print_message $BLUE "开始导入敏感词数据..."
|
||||
|
||||
# 检查conda是否可用
|
||||
if ! command -v conda &> /dev/null; then
|
||||
print_message $YELLOW "conda命令未找到,跳过敏感词导入"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# 检查schoolNewsCrawler环境是否存在
|
||||
if ! conda env list | grep -q "schoolNewsCrawler"; then
|
||||
print_message $YELLOW "conda环境 'schoolNewsCrawler' 不存在,跳过敏感词导入"
|
||||
print_message $YELLOW "提示: 可以使用以下命令创建环境: conda create -n schoolNewsCrawler python=3.10"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# 切换到敏感词脚本目录
|
||||
local sensitive_dir="$SCRIPT_DIR/sensitiveData"
|
||||
if [ ! -d "$sensitive_dir" ]; then
|
||||
@@ -121,25 +108,63 @@ import_sensitive_words() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ ! -f "$sensitive_dir/writeWord.py" ]; then
|
||||
print_message $YELLOW "敏感词脚本不存在: $sensitive_dir/writeWord.py"
|
||||
# 检查Shell脚本是否存在
|
||||
if [ -f "$sensitive_dir/importSensitiveWords.sh" ]; then
|
||||
print_message $BLUE "使用Shell脚本导入敏感词"
|
||||
cd "$sensitive_dir"
|
||||
|
||||
# 导出数据库配置环境变量
|
||||
export DB_HOST="$DB_HOST"
|
||||
export DB_PORT="$DB_PORT"
|
||||
export DB_USER="$DB_USER"
|
||||
export DB_PASSWORD="$DB_PASSWORD"
|
||||
export DB_NAME="$DB_NAME"
|
||||
export AUTO_CONFIRM=true
|
||||
|
||||
bash importSensitiveWords.sh -y
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
print_message $GREEN "敏感词数据导入成功"
|
||||
else
|
||||
print_message $YELLOW "敏感词数据导入失败,但不影响系统运行"
|
||||
fi
|
||||
|
||||
cd "$SCRIPT_DIR"
|
||||
return 0
|
||||
fi
|
||||
|
||||
print_message $BLUE "激活conda环境: schoolNewsCrawler"
|
||||
cd "$sensitive_dir"
|
||||
|
||||
# 使用conda run来在指定环境中执行命令,添加-y参数自动确认
|
||||
conda run -n schoolNewsCrawler python writeWord.py -y
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
print_message $GREEN "敏感词数据导入成功"
|
||||
else
|
||||
print_message $YELLOW "敏感词数据导入失败,但不影响系统运行"
|
||||
# 兼容旧的Python脚本(如果Shell脚本不存在)
|
||||
if [ -f "$sensitive_dir/writeWord.py" ]; then
|
||||
print_message $YELLOW "使用Python脚本导入敏感词(建议使用Shell版本)"
|
||||
|
||||
# 检查conda是否可用
|
||||
if ! command -v conda &> /dev/null; then
|
||||
print_message $YELLOW "conda命令未找到,跳过敏感词导入"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# 检查schoolNewsCrawler环境是否存在
|
||||
if ! conda env list | grep -q "schoolNewsCrawler"; then
|
||||
print_message $YELLOW "conda环境 'schoolNewsCrawler' 不存在,跳过敏感词导入"
|
||||
return 0
|
||||
fi
|
||||
|
||||
cd "$sensitive_dir"
|
||||
conda run -n schoolNewsCrawler python writeWord.py -y
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
print_message $GREEN "敏感词数据导入成功"
|
||||
else
|
||||
print_message $YELLOW "敏感词数据导入失败,但不影响系统运行"
|
||||
fi
|
||||
|
||||
cd "$SCRIPT_DIR"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# 返回脚本目录
|
||||
cd "$SCRIPT_DIR"
|
||||
# 如果两个脚本都不存在
|
||||
print_message $YELLOW "敏感词导入脚本不存在,跳过导入"
|
||||
print_message $YELLOW "提示: 可以使用 importSensitiveWords.sh (推荐) 或 writeWord.py"
|
||||
}
|
||||
|
||||
# 验证初始化结果
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
#!/bin/bash
|
||||
|
||||
##############################################
|
||||
# 敏感词批量导入脚本 (纯Shell实现)
|
||||
# 功能:从 sensitive_word_dict.txt 读取敏感词并导入数据库
|
||||
# 优势:不需要Python环境,只需要MySQL客户端
|
||||
##############################################
|
||||
|
||||
set -e
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() {
|
||||
echo -e "${GREEN}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# 数据库配置
|
||||
DB_HOST="${DB_HOST:-localhost}"
|
||||
DB_PORT="${DB_PORT:-3306}"
|
||||
DB_USER="${DB_USER:-root}"
|
||||
DB_PASSWORD="${DB_PASSWORD:-123456}"
|
||||
DB_NAME="${DB_NAME:-school_news}"
|
||||
|
||||
# 脚本目录
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
DICT_FILE="${SCRIPT_DIR}/sensitive_word_dict.txt"
|
||||
|
||||
echo "=================================================="
|
||||
echo "敏感词批量导入工具 (Shell版本)"
|
||||
echo "=================================================="
|
||||
log_info "数据库: ${DB_HOST}:${DB_PORT}/${DB_NAME}"
|
||||
log_info "敏感词文件: ${DICT_FILE}"
|
||||
echo "=================================================="
|
||||
echo ""
|
||||
|
||||
# 检查敏感词文件
|
||||
if [ ! -f "${DICT_FILE}" ]; then
|
||||
log_error "敏感词文件不存在: ${DICT_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 统计敏感词数量
|
||||
TOTAL_WORDS=$(grep -v '^$' "${DICT_FILE}" | wc -l)
|
||||
log_info "检测到 ${TOTAL_WORDS} 个敏感词"
|
||||
|
||||
if [ ${TOTAL_WORDS} -eq 0 ]; then
|
||||
log_warn "敏感词文件为空"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 检查MySQL连接
|
||||
log_info "检查数据库连接..."
|
||||
if ! mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" -e "SELECT 1;" &>/dev/null; then
|
||||
log_error "数据库连接失败"
|
||||
exit 1
|
||||
fi
|
||||
log_info "数据库连接成功"
|
||||
echo ""
|
||||
|
||||
# 确认导入
|
||||
log_warn "准备导入 ${TOTAL_WORDS} 个敏感词到数据库"
|
||||
log_warn "这将清除现有的 deny 类型敏感词"
|
||||
echo ""
|
||||
|
||||
# 检查是否是自动模式(通过 -y 参数或环境变量)
|
||||
AUTO_CONFIRM=${AUTO_CONFIRM:-false}
|
||||
if [ "$1" = "-y" ] || [ "$1" = "--yes" ] || [ "${AUTO_CONFIRM}" = "true" ]; then
|
||||
log_info "自动确认模式,开始导入..."
|
||||
else
|
||||
read -p "是否继续?(y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
log_warn "用户取消导入"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# 开始导入
|
||||
log_info "开始导入敏感词..."
|
||||
START_TIME=$(date +%s)
|
||||
|
||||
# 创建临时SQL文件
|
||||
TEMP_SQL=$(mktemp)
|
||||
trap "rm -f ${TEMP_SQL}" EXIT
|
||||
|
||||
# 生成SQL语句
|
||||
log_info "生成SQL语句..."
|
||||
cat > "${TEMP_SQL}" <<EOF
|
||||
-- 敏感词批量导入
|
||||
USE ${DB_NAME};
|
||||
|
||||
-- 设置字符集
|
||||
SET NAMES utf8mb4;
|
||||
|
||||
-- 清除现有的deny类型敏感词
|
||||
DELETE FROM tb_sensitive_word WHERE type = 'deny';
|
||||
|
||||
-- 批量插入敏感词
|
||||
INSERT INTO tb_sensitive_word (word, type) VALUES
|
||||
EOF
|
||||
|
||||
# 读取敏感词并生成INSERT语句
|
||||
COUNTER=0
|
||||
while IFS= read -r word || [ -n "$word" ]; do
|
||||
# 跳过空行
|
||||
[ -z "$word" ] && continue
|
||||
|
||||
# 转义单引号
|
||||
word=$(echo "$word" | sed "s/'/''/g")
|
||||
|
||||
COUNTER=$((COUNTER + 1))
|
||||
|
||||
# 添加到SQL(最后一个不加逗号)
|
||||
if [ $COUNTER -eq ${TOTAL_WORDS} ]; then
|
||||
echo "('${word}', 'deny');" >> "${TEMP_SQL}"
|
||||
else
|
||||
echo "('${word}', 'deny')," >> "${TEMP_SQL}"
|
||||
fi
|
||||
|
||||
# 进度提示(每1000个)
|
||||
if [ $((COUNTER % 1000)) -eq 0 ]; then
|
||||
log_info "已处理 ${COUNTER}/${TOTAL_WORDS} 个敏感词..."
|
||||
fi
|
||||
done < "${DICT_FILE}"
|
||||
|
||||
# 添加查询语句
|
||||
cat >> "${TEMP_SQL}" <<EOF
|
||||
|
||||
-- 验证导入结果
|
||||
SELECT COUNT(*) AS '导入数量' FROM tb_sensitive_word WHERE type = 'deny';
|
||||
EOF
|
||||
|
||||
log_info "SQL语句生成完成(${COUNTER}个敏感词)"
|
||||
echo ""
|
||||
|
||||
# 执行SQL
|
||||
log_info "执行数据库导入..."
|
||||
if mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" < "${TEMP_SQL}"; then
|
||||
END_TIME=$(date +%s)
|
||||
DURATION=$((END_TIME - START_TIME))
|
||||
|
||||
echo ""
|
||||
echo "=================================================="
|
||||
log_info "导入完成!"
|
||||
log_info "成功导入: ${COUNTER} 个敏感词"
|
||||
log_info "耗时: ${DURATION} 秒"
|
||||
echo "=================================================="
|
||||
else
|
||||
log_error "数据库导入失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 验证结果
|
||||
log_info "验证导入结果..."
|
||||
IMPORTED_COUNT=$(mysql -h"${DB_HOST}" -P"${DB_PORT}" -u"${DB_USER}" -p"${DB_PASSWORD}" -D"${DB_NAME}" -sNe \
|
||||
"SELECT COUNT(*) FROM tb_sensitive_word WHERE type = 'deny';")
|
||||
|
||||
echo ""
|
||||
log_info "数据库中当前有 ${IMPORTED_COUNT} 个 deny 类型敏感词"
|
||||
|
||||
if [ "${IMPORTED_COUNT}" -eq "${COUNTER}" ]; then
|
||||
log_info "✅ 验证通过:导入数量与预期一致"
|
||||
else
|
||||
log_warn "⚠️ 导入数量不匹配:预期 ${COUNTER},实际 ${IMPORTED_COUNT}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log_info "敏感词导入完成!"
|
||||
Reference in New Issue
Block a user