- 完善后端CI/CD部署流程,支持systemd服务管理 - 配置Caddy多域名反向代理 (前端/API/管理后台) - 创建完整的生产环境监控系统 - 添加自动化运维脚本和定时监控 - 优化安全配置和错误处理机制 - 标准化备份、回滚、健康检查流程 🎯 里程碑: 部署和运维体系完善,生产环境就绪 📊 进度: 65.0% (26/40任务完成)
733 lines
20 KiB
Bash
733 lines
20 KiB
Bash
#!/bin/bash
|
||
|
||
# 摄影作品集项目监控系统配置脚本
|
||
# 功能:配置日志收集、性能监控、错误报告、健康检查
|
||
|
||
set -e
|
||
|
||
# 颜色定义
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# 打印函数
|
||
print_info() {
|
||
echo -e "${GREEN}[INFO]${NC} $1"
|
||
}
|
||
|
||
print_warn() {
|
||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||
}
|
||
|
||
print_error() {
|
||
echo -e "${RED}[ERROR]${NC} $1"
|
||
}
|
||
|
||
# 检查权限
|
||
check_permissions() {
|
||
if [[ $EUID -ne 0 ]]; then
|
||
print_error "此脚本需要 root 权限运行"
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
# 检查依赖
|
||
check_dependencies() {
|
||
print_info "检查系统依赖..."
|
||
|
||
# 检查系统包管理器
|
||
if command -v apt-get &> /dev/null; then
|
||
PACKAGE_MANAGER="apt-get"
|
||
elif command -v yum &> /dev/null; then
|
||
PACKAGE_MANAGER="yum"
|
||
else
|
||
print_error "不支持的包管理器"
|
||
exit 1
|
||
fi
|
||
|
||
# 检查必要的工具
|
||
local tools=("curl" "wget" "systemctl" "journalctl" "logrotate")
|
||
for tool in "${tools[@]}"; do
|
||
if ! command -v $tool &> /dev/null; then
|
||
print_warn "$tool 未安装,正在安装..."
|
||
$PACKAGE_MANAGER install -y $tool
|
||
fi
|
||
done
|
||
}
|
||
|
||
# 创建监控目录结构
|
||
create_directories() {
|
||
print_info "创建监控目录结构..."
|
||
|
||
# 创建监控相关目录
|
||
mkdir -p /var/log/photography/{frontend,backend,admin,monitoring}
|
||
mkdir -p /etc/photography/monitoring
|
||
mkdir -p /opt/photography/monitoring/{scripts,config}
|
||
|
||
# 设置权限
|
||
chown -R gitea:gitea /var/log/photography
|
||
chown -R gitea:gitea /etc/photography
|
||
chown -R gitea:gitea /opt/photography
|
||
|
||
chmod 755 /var/log/photography
|
||
chmod 755 /etc/photography
|
||
chmod 755 /opt/photography
|
||
}
|
||
|
||
# 配置日志收集
|
||
setup_logging() {
|
||
print_info "配置日志收集系统..."
|
||
|
||
# 创建 rsyslog 配置文件
|
||
cat > /etc/rsyslog.d/50-photography.conf << 'EOF'
|
||
# Photography Portfolio 日志配置
|
||
|
||
# 前端访问日志
|
||
if $programname == 'caddy' and $msg contains 'photography.iriver.top' then /var/log/photography/frontend/access.log
|
||
& stop
|
||
|
||
# 后端应用日志
|
||
if $programname == 'photography-backend' then /var/log/photography/backend/application.log
|
||
& stop
|
||
|
||
# 管理后台日志
|
||
if $programname == 'caddy' and $msg contains 'admin.photography.iriver.top' then /var/log/photography/admin/access.log
|
||
& stop
|
||
|
||
# API 访问日志
|
||
if $programname == 'caddy' and $msg contains 'api.photography.iriver.top' then /var/log/photography/backend/api.log
|
||
& stop
|
||
EOF
|
||
|
||
# 重启 rsyslog
|
||
systemctl restart rsyslog
|
||
|
||
# 创建 logrotate 配置
|
||
cat > /etc/logrotate.d/photography << 'EOF'
|
||
/var/log/photography/*/*.log {
|
||
daily
|
||
rotate 30
|
||
compress
|
||
delaycompress
|
||
missingok
|
||
notifempty
|
||
sharedscripts
|
||
postrotate
|
||
/usr/bin/systemctl reload rsyslog > /dev/null 2>&1 || true
|
||
endscript
|
||
}
|
||
|
||
/var/log/caddy/*.log {
|
||
daily
|
||
rotate 30
|
||
compress
|
||
delaycompress
|
||
missingok
|
||
notifempty
|
||
sharedscripts
|
||
postrotate
|
||
/usr/bin/systemctl reload caddy > /dev/null 2>&1 || true
|
||
endscript
|
||
}
|
||
EOF
|
||
}
|
||
|
||
# 配置性能监控
|
||
setup_performance_monitoring() {
|
||
print_info "配置性能监控系统..."
|
||
|
||
# 创建性能监控脚本
|
||
cat > /opt/photography/monitoring/scripts/performance-monitor.sh << 'EOF'
|
||
#!/bin/bash
|
||
|
||
# 摄影作品集性能监控脚本
|
||
|
||
LOG_FILE="/var/log/photography/monitoring/performance.log"
|
||
API_URL="http://localhost:8080"
|
||
FRONTEND_URL="https://photography.iriver.top"
|
||
ADMIN_URL="https://admin.photography.iriver.top"
|
||
API_PROXY_URL="https://api.photography.iriver.top"
|
||
|
||
# 创建日志文件
|
||
touch $LOG_FILE
|
||
|
||
# 获取系统指标
|
||
get_system_metrics() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | sed 's/%us,//')
|
||
local memory_usage=$(free | grep Mem | awk '{printf "%.2f", $3/$2 * 100.0}')
|
||
local disk_usage=$(df -h / | awk 'NR==2{print $5}' | sed 's/%//')
|
||
local load_avg=$(uptime | awk -F'load average:' '{print $2}')
|
||
|
||
echo "$timestamp [SYSTEM] CPU: ${cpu_usage}%, Memory: ${memory_usage}%, Disk: ${disk_usage}%, Load:${load_avg}" >> $LOG_FILE
|
||
}
|
||
|
||
# 检查服务状态
|
||
check_services() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
|
||
# 检查 Caddy
|
||
if systemctl is-active --quiet caddy; then
|
||
echo "$timestamp [SERVICE] Caddy: UP" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [SERVICE] Caddy: DOWN" >> $LOG_FILE
|
||
fi
|
||
|
||
# 检查后端服务
|
||
if systemctl is-active --quiet photography-backend; then
|
||
echo "$timestamp [SERVICE] Backend: UP" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [SERVICE] Backend: DOWN" >> $LOG_FILE
|
||
fi
|
||
|
||
# 检查 PostgreSQL
|
||
if systemctl is-active --quiet postgresql; then
|
||
echo "$timestamp [SERVICE] PostgreSQL: UP" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [SERVICE] PostgreSQL: DOWN" >> $LOG_FILE
|
||
fi
|
||
|
||
# 检查 Redis
|
||
if systemctl is-active --quiet redis; then
|
||
echo "$timestamp [SERVICE] Redis: UP" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [SERVICE] Redis: DOWN" >> $LOG_FILE
|
||
fi
|
||
}
|
||
|
||
# 检查 API 响应时间
|
||
check_api_response() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
|
||
# 检查后端 API
|
||
if curl -s -o /dev/null -w "%{time_total}" $API_URL/health > /dev/null 2>&1; then
|
||
local response_time=$(curl -s -o /dev/null -w "%{time_total}" $API_URL/health 2>/dev/null)
|
||
echo "$timestamp [API] Backend Health: ${response_time}s" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [API] Backend Health: FAILED" >> $LOG_FILE
|
||
fi
|
||
|
||
# 检查前端
|
||
if curl -s -o /dev/null -w "%{http_code}" $FRONTEND_URL | grep -q "200"; then
|
||
local response_time=$(curl -s -o /dev/null -w "%{time_total}" $FRONTEND_URL 2>/dev/null)
|
||
echo "$timestamp [WEB] Frontend: ${response_time}s" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [WEB] Frontend: FAILED" >> $LOG_FILE
|
||
fi
|
||
|
||
# 检查管理后台
|
||
if curl -s -o /dev/null -w "%{http_code}" $ADMIN_URL | grep -q "200"; then
|
||
local response_time=$(curl -s -o /dev/null -w "%{time_total}" $ADMIN_URL 2>/dev/null)
|
||
echo "$timestamp [WEB] Admin: ${response_time}s" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [WEB] Admin: FAILED" >> $LOG_FILE
|
||
fi
|
||
|
||
# 检查 API 代理
|
||
if curl -s -o /dev/null -w "%{http_code}" $API_PROXY_URL/health | grep -q "200"; then
|
||
local response_time=$(curl -s -o /dev/null -w "%{time_total}" $API_PROXY_URL/health 2>/dev/null)
|
||
echo "$timestamp [PROXY] API: ${response_time}s" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [PROXY] API: FAILED" >> $LOG_FILE
|
||
fi
|
||
}
|
||
|
||
# 检查磁盘空间
|
||
check_disk_space() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
local disk_usage=$(df -h / | awk 'NR==2{print $5}' | sed 's/%//')
|
||
|
||
if [ $disk_usage -gt 90 ]; then
|
||
echo "$timestamp [ALERT] Disk usage critical: ${disk_usage}%" >> $LOG_FILE
|
||
elif [ $disk_usage -gt 80 ]; then
|
||
echo "$timestamp [WARN] Disk usage high: ${disk_usage}%" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [INFO] Disk usage normal: ${disk_usage}%" >> $LOG_FILE
|
||
fi
|
||
}
|
||
|
||
# 检查内存使用
|
||
check_memory_usage() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
local memory_usage=$(free | grep Mem | awk '{printf "%.0f", $3/$2 * 100.0}')
|
||
|
||
if [ $memory_usage -gt 90 ]; then
|
||
echo "$timestamp [ALERT] Memory usage critical: ${memory_usage}%" >> $LOG_FILE
|
||
elif [ $memory_usage -gt 80 ]; then
|
||
echo "$timestamp [WARN] Memory usage high: ${memory_usage}%" >> $LOG_FILE
|
||
else
|
||
echo "$timestamp [INFO] Memory usage normal: ${memory_usage}%" >> $LOG_FILE
|
||
fi
|
||
}
|
||
|
||
# 主函数
|
||
main() {
|
||
get_system_metrics
|
||
check_services
|
||
check_api_response
|
||
check_disk_space
|
||
check_memory_usage
|
||
}
|
||
|
||
# 执行监控
|
||
main
|
||
EOF
|
||
|
||
# 设置权限
|
||
chmod +x /opt/photography/monitoring/scripts/performance-monitor.sh
|
||
chown gitea:gitea /opt/photography/monitoring/scripts/performance-monitor.sh
|
||
}
|
||
|
||
# 配置错误报告
|
||
setup_error_reporting() {
|
||
print_info "配置错误报告系统..."
|
||
|
||
# 创建错误监控脚本
|
||
cat > /opt/photography/monitoring/scripts/error-monitor.sh << 'EOF'
|
||
#!/bin/bash
|
||
|
||
# 摄影作品集错误监控脚本
|
||
|
||
LOG_FILE="/var/log/photography/monitoring/errors.log"
|
||
ERROR_COUNT_FILE="/tmp/photography-error-count"
|
||
|
||
# 创建日志文件
|
||
touch $LOG_FILE
|
||
touch $ERROR_COUNT_FILE
|
||
|
||
# 检查后端错误
|
||
check_backend_errors() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
local error_count=0
|
||
|
||
# 检查后端应用日志中的错误
|
||
if [ -f "/var/log/photography/backend/application.log" ]; then
|
||
error_count=$(grep -c "ERROR\|FATAL" /var/log/photography/backend/application.log | tail -100 | wc -l)
|
||
fi
|
||
|
||
# 检查系统日志中的后端错误
|
||
backend_errors=$(journalctl -u photography-backend --since "5 minutes ago" --no-pager | grep -c "ERROR\|FATAL" || echo "0")
|
||
error_count=$((error_count + backend_errors))
|
||
|
||
if [ $error_count -gt 0 ]; then
|
||
echo "$timestamp [BACKEND] Found $error_count errors in the last 5 minutes" >> $LOG_FILE
|
||
|
||
# 记录具体错误
|
||
journalctl -u photography-backend --since "5 minutes ago" --no-pager | grep "ERROR\|FATAL" | tail -5 >> $LOG_FILE
|
||
fi
|
||
}
|
||
|
||
# 检查前端错误
|
||
check_frontend_errors() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
|
||
# 检查 Caddy 日志中的 4xx 和 5xx 错误
|
||
if [ -f "/var/log/caddy/photography.log" ]; then
|
||
local error_count=$(grep -E '"status":[4-5][0-9][0-9]' /var/log/caddy/photography.log | wc -l)
|
||
|
||
if [ $error_count -gt 10 ]; then
|
||
echo "$timestamp [FRONTEND] Found $error_count HTTP errors in access logs" >> $LOG_FILE
|
||
fi
|
||
fi
|
||
}
|
||
|
||
# 检查 API 错误
|
||
check_api_errors() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
|
||
# 检查 API 访问日志中的错误
|
||
if [ -f "/var/log/caddy/api.photography.log" ]; then
|
||
local error_count=$(grep -E '"status":[4-5][0-9][0-9]' /var/log/caddy/api.photography.log | wc -l)
|
||
|
||
if [ $error_count -gt 5 ]; then
|
||
echo "$timestamp [API] Found $error_count API errors in access logs" >> $LOG_FILE
|
||
fi
|
||
fi
|
||
}
|
||
|
||
# 检查系统错误
|
||
check_system_errors() {
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
|
||
# 检查系统日志中的严重错误
|
||
system_errors=$(journalctl --since "5 minutes ago" --priority=err --no-pager | wc -l)
|
||
|
||
if [ $system_errors -gt 0 ]; then
|
||
echo "$timestamp [SYSTEM] Found $system_errors system errors in the last 5 minutes" >> $LOG_FILE
|
||
|
||
# 记录具体错误
|
||
journalctl --since "5 minutes ago" --priority=err --no-pager | tail -3 >> $LOG_FILE
|
||
fi
|
||
}
|
||
|
||
# 主函数
|
||
main() {
|
||
check_backend_errors
|
||
check_frontend_errors
|
||
check_api_errors
|
||
check_system_errors
|
||
}
|
||
|
||
# 执行错误监控
|
||
main
|
||
EOF
|
||
|
||
# 设置权限
|
||
chmod +x /opt/photography/monitoring/scripts/error-monitor.sh
|
||
chown gitea:gitea /opt/photography/monitoring/scripts/error-monitor.sh
|
||
}
|
||
|
||
# 配置健康检查
|
||
setup_health_checks() {
|
||
print_info "配置健康检查系统..."
|
||
|
||
# 创建健康检查脚本
|
||
cat > /opt/photography/monitoring/scripts/health-check.sh << 'EOF'
|
||
#!/bin/bash
|
||
|
||
# 摄影作品集健康检查脚本
|
||
|
||
LOG_FILE="/var/log/photography/monitoring/health.log"
|
||
ALERT_FILE="/tmp/photography-health-alert"
|
||
|
||
# 创建日志文件
|
||
touch $LOG_FILE
|
||
|
||
# 健康检查函数
|
||
check_service_health() {
|
||
local service_name=$1
|
||
local check_command=$2
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
|
||
if eval $check_command; then
|
||
echo "$timestamp [HEALTH] $service_name: OK" >> $LOG_FILE
|
||
return 0
|
||
else
|
||
echo "$timestamp [HEALTH] $service_name: FAILED" >> $LOG_FILE
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# 检查各个服务
|
||
main() {
|
||
local failed_services=()
|
||
|
||
# 检查 Caddy
|
||
if ! check_service_health "Caddy" "systemctl is-active --quiet caddy"; then
|
||
failed_services+=("Caddy")
|
||
fi
|
||
|
||
# 检查后端服务
|
||
if ! check_service_health "Backend" "systemctl is-active --quiet photography-backend"; then
|
||
failed_services+=("Backend")
|
||
fi
|
||
|
||
# 检查 PostgreSQL
|
||
if ! check_service_health "PostgreSQL" "systemctl is-active --quiet postgresql"; then
|
||
failed_services+=("PostgreSQL")
|
||
fi
|
||
|
||
# 检查 Redis
|
||
if ! check_service_health "Redis" "systemctl is-active --quiet redis"; then
|
||
failed_services+=("Redis")
|
||
fi
|
||
|
||
# 检查端口监听
|
||
if ! check_service_health "Port 80" "netstat -tlnp | grep -q :80"; then
|
||
failed_services+=("Port 80")
|
||
fi
|
||
|
||
if ! check_service_health "Port 443" "netstat -tlnp | grep -q :443"; then
|
||
failed_services+=("Port 443")
|
||
fi
|
||
|
||
if ! check_service_health "Port 8080" "netstat -tlnp | grep -q :8080"; then
|
||
failed_services+=("Port 8080")
|
||
fi
|
||
|
||
# 检查 API 响应
|
||
if ! check_service_health "API Health" "curl -f -s -o /dev/null http://localhost:8080/health"; then
|
||
failed_services+=("API Health")
|
||
fi
|
||
|
||
# 检查前端可访问性
|
||
if ! check_service_health "Frontend" "curl -f -s -o /dev/null https://photography.iriver.top"; then
|
||
failed_services+=("Frontend")
|
||
fi
|
||
|
||
# 检查管理后台
|
||
if ! check_service_health "Admin" "curl -f -s -o /dev/null https://admin.photography.iriver.top"; then
|
||
failed_services+=("Admin")
|
||
fi
|
||
|
||
# 如果有失败的服务,记录警报
|
||
if [ ${#failed_services[@]} -gt 0 ]; then
|
||
echo "$(date '+%Y-%m-%d %H:%M:%S') [ALERT] Failed services: ${failed_services[*]}" >> $LOG_FILE
|
||
echo "${failed_services[*]}" > $ALERT_FILE
|
||
else
|
||
rm -f $ALERT_FILE
|
||
fi
|
||
}
|
||
|
||
# 执行健康检查
|
||
main
|
||
EOF
|
||
|
||
# 设置权限
|
||
chmod +x /opt/photography/monitoring/scripts/health-check.sh
|
||
chown gitea:gitea /opt/photography/monitoring/scripts/health-check.sh
|
||
}
|
||
|
||
# 配置 cron 任务
|
||
setup_cron_jobs() {
|
||
print_info "配置定时任务..."
|
||
|
||
# 创建 cron 配置文件
|
||
cat > /etc/cron.d/photography-monitoring << 'EOF'
|
||
# Photography Portfolio 监控定时任务
|
||
|
||
# 每分钟执行健康检查
|
||
* * * * * gitea /opt/photography/monitoring/scripts/health-check.sh
|
||
|
||
# 每5分钟执行性能监控
|
||
*/5 * * * * gitea /opt/photography/monitoring/scripts/performance-monitor.sh
|
||
|
||
# 每5分钟执行错误监控
|
||
*/5 * * * * gitea /opt/photography/monitoring/scripts/error-monitor.sh
|
||
|
||
# 每小时执行一次清理(删除超过7天的监控日志)
|
||
0 * * * * gitea find /var/log/photography/monitoring/ -name "*.log" -mtime +7 -delete
|
||
EOF
|
||
|
||
# 重启 cron 服务
|
||
systemctl restart cron
|
||
}
|
||
|
||
# 创建监控仪表板
|
||
create_dashboard() {
|
||
print_info "创建监控仪表板..."
|
||
|
||
# 创建简单的监控仪表板脚本
|
||
cat > /opt/photography/monitoring/scripts/dashboard.sh << 'EOF'
|
||
#!/bin/bash
|
||
|
||
# 摄影作品集监控仪表板
|
||
|
||
echo "=========================="
|
||
echo "摄影作品集系统监控仪表板"
|
||
echo "=========================="
|
||
echo ""
|
||
|
||
# 系统信息
|
||
echo "📊 系统信息:"
|
||
echo " 时间: $(date)"
|
||
echo " 运行时间: $(uptime -p)"
|
||
echo " 负载: $(uptime | awk -F'load average:' '{print $2}')"
|
||
echo ""
|
||
|
||
# 服务状态
|
||
echo "🔧 服务状态:"
|
||
services=("caddy" "photography-backend" "postgresql" "redis")
|
||
for service in "${services[@]}"; do
|
||
if systemctl is-active --quiet $service; then
|
||
echo " $service: ✅ 运行中"
|
||
else
|
||
echo " $service: ❌ 停止"
|
||
fi
|
||
done
|
||
echo ""
|
||
|
||
# 端口监听
|
||
echo "🌐 端口监听:"
|
||
ports=("80" "443" "8080" "5432" "6379")
|
||
for port in "${ports[@]}"; do
|
||
if netstat -tlnp | grep -q ":$port "; then
|
||
echo " 端口 $port: ✅ 监听中"
|
||
else
|
||
echo " 端口 $port: ❌ 未监听"
|
||
fi
|
||
done
|
||
echo ""
|
||
|
||
# 磁盘使用
|
||
echo "💾 磁盘使用:"
|
||
df -h | grep -E '^/dev/' | awk '{print " " $1 ": " $5 " 已使用 (" $3 "/" $2 ")"}'
|
||
echo ""
|
||
|
||
# 内存使用
|
||
echo "🧠 内存使用:"
|
||
free -h | grep -E '^Mem:' | awk '{print " 内存: " $3 "/" $2 " (" int($3/$2*100) "% 已使用)"}'
|
||
echo ""
|
||
|
||
# 最近的错误
|
||
echo "⚠️ 最近的错误 (最近5条):"
|
||
if [ -f "/var/log/photography/monitoring/errors.log" ]; then
|
||
tail -5 /var/log/photography/monitoring/errors.log | sed 's/^/ /'
|
||
else
|
||
echo " 没有错误日志"
|
||
fi
|
||
echo ""
|
||
|
||
# 最近的健康检查
|
||
echo "💚 最近的健康检查 (最近5条):"
|
||
if [ -f "/var/log/photography/monitoring/health.log" ]; then
|
||
tail -5 /var/log/photography/monitoring/health.log | sed 's/^/ /'
|
||
else
|
||
echo " 没有健康检查日志"
|
||
fi
|
||
echo ""
|
||
|
||
echo "=========================="
|
||
echo "监控仪表板完成"
|
||
echo "=========================="
|
||
EOF
|
||
|
||
# 设置权限
|
||
chmod +x /opt/photography/monitoring/scripts/dashboard.sh
|
||
chown gitea:gitea /opt/photography/monitoring/scripts/dashboard.sh
|
||
}
|
||
|
||
# 创建监控配置文件
|
||
create_monitoring_config() {
|
||
print_info "创建监控配置文件..."
|
||
|
||
cat > /etc/photography/monitoring/config.yaml << 'EOF'
|
||
# Photography Portfolio 监控配置
|
||
|
||
monitoring:
|
||
# 日志配置
|
||
logging:
|
||
level: "info"
|
||
max_size: "10MB"
|
||
max_age: 30
|
||
max_backups: 5
|
||
compress: true
|
||
|
||
# 性能监控
|
||
performance:
|
||
check_interval: 300 # 5分钟
|
||
cpu_threshold: 80
|
||
memory_threshold: 80
|
||
disk_threshold: 80
|
||
response_time_threshold: 2.0
|
||
|
||
# 错误监控
|
||
error_monitoring:
|
||
check_interval: 300 # 5分钟
|
||
max_errors_per_check: 10
|
||
alert_threshold: 5
|
||
|
||
# 健康检查
|
||
health_check:
|
||
check_interval: 60 # 1分钟
|
||
timeout: 10
|
||
retry_count: 3
|
||
|
||
# 服务列表
|
||
services:
|
||
- name: "caddy"
|
||
type: "systemd"
|
||
critical: true
|
||
- name: "photography-backend"
|
||
type: "systemd"
|
||
critical: true
|
||
- name: "postgresql"
|
||
type: "systemd"
|
||
critical: true
|
||
- name: "redis"
|
||
type: "systemd"
|
||
critical: false
|
||
|
||
# 端点检查
|
||
endpoints:
|
||
- name: "Frontend"
|
||
url: "https://photography.iriver.top"
|
||
method: "GET"
|
||
expected_status: 200
|
||
timeout: 10
|
||
- name: "Admin"
|
||
url: "https://admin.photography.iriver.top"
|
||
method: "GET"
|
||
expected_status: 200
|
||
timeout: 10
|
||
- name: "API Health"
|
||
url: "https://api.photography.iriver.top/health"
|
||
method: "GET"
|
||
expected_status: 200
|
||
timeout: 5
|
||
- name: "Backend Health"
|
||
url: "http://localhost:8080/health"
|
||
method: "GET"
|
||
expected_status: 200
|
||
timeout: 5
|
||
|
||
# 警报配置
|
||
alerts:
|
||
enabled: true
|
||
channels:
|
||
- type: "log"
|
||
enabled: true
|
||
level: "error"
|
||
- type: "email"
|
||
enabled: false
|
||
smtp_server: ""
|
||
smtp_port: 587
|
||
username: ""
|
||
password: ""
|
||
to: ""
|
||
from: ""
|
||
EOF
|
||
|
||
# 设置权限
|
||
chown gitea:gitea /etc/photography/monitoring/config.yaml
|
||
chmod 644 /etc/photography/monitoring/config.yaml
|
||
}
|
||
|
||
# 主函数
|
||
main() {
|
||
echo "==================================="
|
||
echo "摄影作品集监控系统配置脚本"
|
||
echo "==================================="
|
||
echo ""
|
||
|
||
check_permissions
|
||
check_dependencies
|
||
create_directories
|
||
setup_logging
|
||
setup_performance_monitoring
|
||
setup_error_reporting
|
||
setup_health_checks
|
||
setup_cron_jobs
|
||
create_dashboard
|
||
create_monitoring_config
|
||
|
||
print_info "监控系统配置完成!"
|
||
echo ""
|
||
echo "🔧 监控脚本位置:"
|
||
echo " - 性能监控: /opt/photography/monitoring/scripts/performance-monitor.sh"
|
||
echo " - 错误监控: /opt/photography/monitoring/scripts/error-monitor.sh"
|
||
echo " - 健康检查: /opt/photography/monitoring/scripts/health-check.sh"
|
||
echo " - 监控仪表板: /opt/photography/monitoring/scripts/dashboard.sh"
|
||
echo ""
|
||
echo "📊 监控日志位置:"
|
||
echo " - 性能日志: /var/log/photography/monitoring/performance.log"
|
||
echo " - 错误日志: /var/log/photography/monitoring/errors.log"
|
||
echo " - 健康日志: /var/log/photography/monitoring/health.log"
|
||
echo ""
|
||
echo "📋 使用方法:"
|
||
echo " - 查看监控仪表板: /opt/photography/monitoring/scripts/dashboard.sh"
|
||
echo " - 手动执行健康检查: /opt/photography/monitoring/scripts/health-check.sh"
|
||
echo " - 查看监控配置: /etc/photography/monitoring/config.yaml"
|
||
echo ""
|
||
echo "⚠️ 注意事项:"
|
||
echo " - 定时任务已配置,每分钟执行健康检查"
|
||
echo " - 每5分钟执行性能和错误监控"
|
||
echo " - 日志文件会自动轮转,保留30天"
|
||
echo " - 可以根据需要修改配置文件"
|
||
echo ""
|
||
print_info "监控系统安装完成!"
|
||
}
|
||
|
||
# 运行主函数
|
||
main "$@" |