nginx配置中,如何设置局域网中多个服务器,并且仅当第一个服务器失效时访问第二个服务器,依次类推

在 Nginx 中,您可以通过 upstream模块实现服务器故障转移机制。这里有几种方法可以实现您的需求:

1. 基础故障转移配置

upstream backend_servers {
    # 主服务器
    server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
    
    # 备份服务器1(主服务器失败时使用)
    server 192.168.1.11:8080 backup max_fails=3 fail_timeout=30s;
    
    # 备份服务器2(主和备份1都失败时使用)
    server 192.168.1.12:8080 backup max_fails=3 fail_timeout=30s;
    
    # 备份服务器3
    server 192.168.1.13:8080 backup max_fails=3 fail_timeout=30s;
}

server {
    listen 80;
    server_name example.com;
    
    location / {
        proxy_pass http://backend_servers;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        proxy_connect_timeout 2s;
        proxy_read_timeout 5s;
        proxy_send_timeout 5s;
    }
}

2. 完全按顺序的故障转移

要实现严格按顺序的故障转移(第一个失败 → 第二个 → 第三个),需要使用更复杂的配置:

# 定义主服务器
upstream primary_server {
    server 192.168.1.10:8080 max_fails=2 fail_timeout=30s;
}

# 定义备份服务器组
upstream backup_servers {
    server 192.168.1.11:8080;
    server 192.168.1.12:8080;
    server 192.168.1.13:8080;
}

server {
    listen 80;
    server_name example.com;
    
    location / {
        # 先尝试主服务器
        proxy_pass http://primary_server;
        proxy_intercept_errors on;
        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
        
        # 如果主服务器失败,重试到备用服务器
        error_page 500 502 503 504 = @try_backup;
    }
    
    location @try_backup {
        # 设置变量记录尝试的服务器
        set $backend_try 1;
        
        # 第一次尝试备份服务器1
        if ($backend_try = 1) {
            proxy_pass http://192.168.1.11:8080;
            set $backend_try 2;
        }
        
        proxy_intercept_errors on;
        error_page 500 502 503 504 = @try_backup2;
    }
    
    location @try_backup2 {
        # 第二次尝试备份服务器2
        proxy_pass http://192.168.1.12:8080;
        
        proxy_intercept_errors on;
        error_page 500 502 503 504 = @try_backup3;
    }
    
    location @try_backup3 {
        # 第三次尝试备份服务器3
        proxy_pass http://192.168.1.13:8080;
    }
}

3. 使用 Nginx Plus 的商业版功能

如果您使用 Nginx Plus,有更强大的故障转移功能:

upstream backend {
    zone backend 64k;
    state /var/lib/nginx/state/backend.state;
    
    # 主服务器
    server 192.168.1.10:8080 resolve;
    
    # 备份服务器,按顺序尝试
    server 192.168.1.11:8080 backup resolve;
    server 192.168.1.12:8080 backup resolve;
    server 192.168.1.13:8080 backup resolve;
    
    # 健康检查
    health_check interval=5s fails=3 passes=2;
}

server {
    listen 80;
    
    location / {
        proxy_pass http://backend;
        proxy_next_upstream error timeout http_500 http_502 http_503 http_504;
    }
}

4. 使用 Lua 脚本实现智能故障转移

需要安装 ngx_http_lua_module

http {
    lua_package_path "/etc/nginx/lua/?.lua;;";
    
    upstream backend_servers {
        server 192.168.1.10:8080;
        server 192.168.1.11:8080;
        server 192.168.1.12:8080;
        server 192.168.1.13:8080;
    }
    
    init_worker_by_lua_block {
        local health = require "resty.healthcheck"
        
        local checker = health.new({
            name = "backend",
            shm_name = "healthcheck",
            type = "http",
            checks = {
                active = {
                    timeout = 2,
                    http_path = "/health",
                    healthy = {
                        interval = 5,
                        successes = 2
                    },
                    unhealthy = {
                        interval = 1,
                        http_failures = 3
                    }
                }
            }
        })
        
        -- 检查所有服务器
        local servers = {
            { host = "192.168.1.10", port = 8080 },
            { host = "192.168.1.11", port = 8080 },
            { host = "192.168.1.12", port = 8080 },
            { host = "192.168.1.13", port = 8080 },
        }
        
        for _, server in ipairs(servers) do
            checker:add_target(server.host, server.port, nil)
        end
    }
    
    server {
        listen 80;
        
        location / {
            access_by_lua_block {
                local checker = require("resty.healthcheck")
                local instances = checker:get_targets("backend")
                
                local target
                for _, instance in ipairs(instances) do
                    if instance.healthy then
                        target = instance
                        break
                    end
                end
                
                if target then
                    ngx.var.backend = "http://" .. target.host .. ":" .. target.port
                else
                    ngx.exit(503)
                end
            }
            
            proxy_pass $backend;
        }
        
        location /health {
            access_by_lua_block {
                ngx.status = 200
                ngx.say("OK")
                ngx.exit(200)
            }
        }
    }
}

5. 使用第三方模块:nginx-upstream-fair

upstream backend {
    fair;
    
    # 主服务器
    server 192.168.1.10:8080;
    
    # 备份服务器
    server 192.168.1.11:8080 backup;
    server 192.168.1.12:8080 backup;
    server 192.168.1.13:8080 backup;
    
    # 健康检查
    check interval=3000 rise=2 fall=5 timeout=1000 type=http;
    check_http_send "HEAD / HTTP/1.0\r\n\r\n";
    check_http_expect_alive http_2xx http_3xx;
}

6. 完整生产环境配置示例

# 主配置文件:nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;

events {
    worker_connections 1024;
    use epoll;
    multi_accept on;
}

http {
    include /etc/nginx/mime.types;
    default_type application/octet-stream;
    
    log_format main '$remote_addr - $remote_user [$time_local] "$request" '
                    '$status $body_bytes_sent "$http_referer" '
                    '"$http_user_agent" "$http_x_forwarded_for" '
                    'upstream: $upstream_addr $upstream_status $request_time';
    
    access_log /var/log/nginx/access.log main;
    
    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    keepalive_timeout 65;
    types_hash_max_size 2048;
    
    # 定义服务器组
    upstream backend_group {
        # 主服务器
        server 192.168.1.10:8080 max_fails=3 fail_timeout=30s;
        
        # 第一备份
        server 192.168.1.11:8080 backup max_fails=3 fail_timeout=30s;
        
        # 第二备份
        server 192.168.1.12:8080 backup max_fails=3 fail_timeout=30s;
        
        # 第三备份
        server 192.168.1.13:8080 backup max_fails=3 fail_timeout=30s;
        
        # 负载均衡方法
        least_conn;  # 最少连接数
        
        # 会话保持
        sticky cookie srv_id expires=1h domain=.example.com path=/;
    }
    
    # 健康检查配置
    upstream backend_check {
        server 192.168.1.10:8080;
        server 192.168.1.11:8080;
        server 192.168.1.12:8080;
        server 192.168.1.13:8080;
    }
    
    server {
        listen 80;
        server_name example.com;
        
        # 健康检查端点
        location /nginx_status {
            stub_status on;
            access_log off;
            allow 192.168.1.0/24;
            deny all;
        }
        
        location /health {
            proxy_pass http://backend_check;
            proxy_set_header Host $host;
            proxy_next_upstream error timeout http_500 http_502 http_503 http_504;
            access_log off;
        }
        
        location / {
            # 代理到后端服务器
            proxy_pass http://backend_group;
            
            # 超时设置
            proxy_connect_timeout 3s;
            proxy_read_timeout 10s;
            proxy_send_timeout 10s;
            
            # 重试机制
            proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
            proxy_next_upstream_tries 3;
            proxy_next_upstream_timeout 10s;
            
            # 请求头设置
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header X-Forwarded-Host $host;
            proxy_set_header X-Forwarded-Port $server_port;
            
            # 缓冲设置
            proxy_buffering on;
            proxy_buffer_size 4k;
            proxy_buffers 8 4k;
            proxy_busy_buffers_size 8k;
            
            # 其他设置
            proxy_redirect off;
            proxy_http_version 1.1;
            proxy_set_header Connection "";
        }
        
        # 错误页面
        error_page 500 502 503 504 /50x.html;
        location = /50x.html {
            root /usr/share/nginx/html;
        }
    }
    
    # 监控配置
    server {
        listen 8080;
        server_name localhost;
        
        location /stub_status {
            stub_status;
            access_log off;
            allow 127.0.0.1;
            deny all;
        }
        
        location /upstream_status {
            upstream_status;
            access_log off;
            allow 127.0.0.1;
            deny all;
        }
    }
}

7. 使用 Keepalived 实现高可用

如果需要在多个 Nginx 节点间实现高可用:

# keepalived.conf
vrrp_script chk_nginx {
    script "/usr/bin/pgrep nginx"
    interval 2
    weight 2
    fall 2
    rise 2
}

vrrp_instance VI_1 {
    state MASTER
    interface eth0
    virtual_router_id 51
    priority 100
    advert_int 1
    
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    
    virtual_ipaddress {
        192.168.1.100/24
    }
    
    track_script {
        chk_nginx
    }
}

8. 监控和日志配置

# 创建监控脚本
#!/bin/bash
# /etc/nginx/check_backend.sh

BACKENDS=("192.168.1.10:8080" "192.168.1.11:8080" "192.168.1.12:8080" "192.168.1.13:8080")

for backend in "${BACKENDS[@]}"; do
    IFS=':' read -r ip port <<< "$backend"
    
    if timeout 2 curl -f "http://$backend/health" >/dev/null 2>&1; then
        echo "$(date): $backend is UP"
    else
        echo "$(date): $backend is DOWN"
    fi
done

9. 测试配置

# 测试配置文件
nginx -t

# 重新加载配置
nginx -s reload

# 查看上游服务器状态
curl http://localhost/upstream_status

# 查看 Nginx 状态
curl http://localhost/nginx_status

# 测试故障转移
while true; do
    curl -s http://example.com/health
    sleep 1
done

10. 关键参数说明

参数说明建议值
max_fails最大失败次数3
fail_timeout失败后暂停时间30s
backup标记为备份服务器
weight服务器权重1-10
down手动标记服务器下线
proxy_next_upstream触发切换到下一服务器的条件error timeout http_500…
proxy_next_upstream_tries最大重试次数3
proxy_connect_timeout连接超时2-5s
proxy_read_timeout读取超时10-30s

11. 优化建议

  1. 健康检查:使用第三方模块或 Nginx Plus 进行主动健康检查
  2. 会话保持:如果应用需要会话,使用 sticky模块
  3. 监控告警:设置监控,当服务器故障时发送告警
  4. 自动恢复:配置脚本自动重启失败的服务
  5. 日志分析:分析访问日志,了解故障转移频率

12. 注意事项

  1. 备份服务器只在所有主服务器都不可用时才会被使用
  2. 使用 least_connip_hash算法时,备份服务器的行为可能不同
  3. 生产环境建议结合监控系统(如 Prometheus + Grafana)
  4. 定期进行故障转移测试
  5. 确保备份服务器的数据和配置与主服务器同步

这种配置确保了当第一个服务器失效时,Nginx 会自动尝试第二个服务器,依次类推,直到找到可用的服务器。

Comments

No comments yet. Why don’t you start the discussion?

发表回复