# 负载均衡与反向代理架构:Nginx配置实践与高可用设计
在现代Web架构中,负载均衡器和反向代理作为流量入口,承担着请求分发、安全防护和高可用保障的关键职责。Nginx凭借其高性能和灵活性,成为构建这一基础设施的主流选择。
## Nginx核心配置架构
Nginx配置采用模块化设计,主要包含全局配置、事件模块、HTTP核心模块等层次。
```nginx
# nginx.conf - 主配置文件
user nginx;
worker_processes auto;
worker_rlimit_nofile 65535;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 4096;
multi_accept on;
use epoll;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
# 日志格式
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'rt=$request_time uct="$upstream_connect_time" '
'uht="$upstream_header_time" urt="$upstream_response_time"';
access_log /var/log/nginx/access.log main buffer=32k flush=5s;
# 基础优化
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 75;
keepalive_requests 1000;
client_max_body_size 100m;
client_body_buffer_size 128k;
# 连接限制
limit_conn_zone $binary_remote_addr zone=addr:10m;
limit_conn addr 100;
# 包含子配置
include /etc/nginx/conf.d/*.conf;
include /etc/nginx/sites-enabled/*;
}
```
## 负载均衡策略实现
Nginx支持多种负载均衡算法,可根据业务需求灵活选择。
```nginx
# upstream.conf - 上游服务器配置
upstream backend_cluster {
# 轮询(默认)
server 192.168.1.101:8080 weight=5 max_fails=3 fail_timeout=30s;
server 192.168.1.102:8080 weight=3 max_fails=3 fail_timeout=30s;
server 192.168.1.103:8080 weight=2 max_fails=3 fail_timeout=30s;
# 最少连接数
least_conn;
# 会话保持(基于IP哈希)
ip_hash;
# 健康检查
zone backend_cluster 64k;
}
# 加权最少连接
upstream weighted_least_conn {
least_conn;
server 192.168.1.101:8080 weight=10;
server 192.168.1.102:8080 weight=5;
server 192.168.1.103:8080 weight=3;
}
# 基于URI哈希的负载均衡
upstream uri_hash_backend {
hash $request_uri consistent;
server 192.168.1.101:8080;
server 192.168.1.102:8080;
server 192.168.1.103:8080;
}
# 基于响应时间的动态负载均衡
upstream response_time_backend {
fair;
server 192.168.1.101:8080;
server 192.168.1.102:8080;
server 192.168.1.103:8080;
}
```
## 反向代理高级配置
```nginx
# api_gateway.conf - API网关配置
server {
listen 443 ssl http2;
server_name api.example.com;
ssl_certificate /etc/ssl/certs/api.example.com.crt;
ssl_certificate_key /etc/ssl/private/api.example.com.key;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
ssl_prefer_server_ciphers off;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
# 安全头部
add_header X-Frame-Options DENY always;
add_header X-Content-Type-Options nosniff always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Strict-Transport-Security "max-age=63072000; includeSubDomains" always;
# 限流配置
limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s;
limit_req zone=api_limit burst=20 nodelay;
# API路由配置
location /api/v1/users {
limit_req zone=api_limit burst=30;
proxy_pass http://user_service_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# 连接超时设置
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 30s;
# 缓冲区优化
proxy_buffering on;
proxy_buffer_size 4k;
proxy_buffers 8 4k;
proxy_busy_buffers_size 16k;
# 错误处理
proxy_next_upstream error timeout http_500 http_502 http_503 http_504;
proxy_next_upstream_timeout 2s;
proxy_next_upstream_tries 3;
# 重试配置
proxy_intercept_errors on;
error_page 500 502 503 504 = @fallback;
}
location /api/v1/orders {
proxy_pass http://order_service_backend;
proxy_set_header X-API-Version "v1";
# WebSocket支持
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
# 长连接优化
proxy_set_header Keep-Alive $http_keep_alive;
proxy_set_header Proxy-Connection "keep-alive";
}
# 静态资源服务
location /static/ {
root /var/www/api/static;
expires 1y;
add_header Cache-Control "public, immutable";
access_log off;
}
# 健康检查端点
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
# 备用服务
location @fallback {
proxy_pass http://backup_service;
proxy_set_header X-Fallback-Reason $upstream_status;
}
}
```
## 高可用架构设计
通过Keepalived实现Nginx主备高可用。
```nginx
# keepalived.conf - 主节点配置
global_defs {
router_id nginx_master
enable_script_security
script_user nginx
}
vrrp_script ch k_nginx {
script "/usr/bin/pkill -0 nginx"
interval 2
weight 50
fall 3
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 150
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
virtual_ipaddress {
192.168.1.100/24 dev eth0 label eth0:0
}
track_script {
c h k_nginx
}
notify_master "/etc/keepalived/scripts/notify_master.sh"
notify_backup "/etc/keepalived/scripts/notify_backup.sh"
notify_fault "/etc/keepalived/scripts/notify_fault.sh"
}
```
```bash
#!/bin/bash
# notify_master.sh - 主节点切换脚本
#!/bin/bash
# 主节点切换通知脚本
LOG_FILE="/var/log/keepalived/notify.log"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
echo "[${TIMESTAMP}] 切换为MASTER状态" >> ${LOG_FILE}
# 启动Nginx(如果未运行)
if ! systemctl is-active --quiet nginx; then
systemctl start nginx
echo "[${TIMESTAMP}] Nginx已启动" >> ${LOG_FILE}
fi
# 更新DNS记录(示例)
# curl -X POST "https://api.dns.com/zones/example.com/records" \
# -H "Authorization: Bearer ${API_TOKEN}" \
# -d '{"type":"A","name":"lb.example.com","content":"192.168.1.100"}'
# 发送告警通知
echo "Nginx负载均衡器已切换为主节点 - ${TIMESTAMP}" | \
mail -s "负载均衡状态变更" admin@example.com
# 更新监控状态
echo "loadbalancer_state master" > /var/lib/node_exporter/textfile_collector/loadbalancer.prom
```
## 动态配置管理与服务发现
```nginx
# nginx.conf - 包含动态配置
http {
# 动态上游服务器配置
upstream dynamic_backend {
zone dynamic_backend 64k;
server 127.0.0.1:11111; # 占位符
# 通过API动态更新
resolver 8.8.8.8 valid=30s;
}
server {
listen 8080;
location /upstream_conf {
allow 192.168.1.0/24;
deny all;
# 动态修改上游服务器
if ($request_method = POST) {
content_by_lua_block {
local args = ngx.req.get_post_args()
local upstream = args["upstream"]
local server = args["server"]
local weight = args["weight"] or 1
local cmd = string.format("server %s weight=%s;", server, weight)
local res = ngx.location.capture("/_nginx_status", {
method = ngx.HTTP_POST,
body = string.format("upstream=%s&add=%s", upstream, cmd)
})
<"efg.s6k3.org.cn"><"wwf.s6k3.org.cn"><"wfe.s6k3.org.cn">
ngx.say(res.body)
}
}
}
# 健康检查API
location /health {
stub_status on;
access_log off;
allow 127.0.0.1;
deny all;
}
}
}
```
```python
# service_discovery.py - 服务发现客户端
import requests
import json
import time
import logging
from typing import List, Dict
class ServiceDiscoveryClient:
"""服务发现客户端"""
def __init__(self, consul_host: str = "localhost", consul_port: int = 8500):
self.consul_url = f"http://{consul_host}:{consul_port}"
self.logger = logging.getLogger(__name__)
def discover_services(self, service_name: str) -> List[Dict]:
"""发现指定服务"""
try:
response = requests.get(
f"{self.consul_url}/v1/health/service/{service_name}",
params={"passing": True}
)
response.raise_for_status()
services = response.json()
nodes = []
for service in services:
node = service.get("Node", {})
service_info = service.get("Service", {})
nodes.append({
"address": node.get("Address"),
"port": service_info.get("Port"),
"tags": service_info.get("Tags", []),
"meta": service_info.get("Meta", {})
})
self.logger.info(f"发现服务 {service_name}: {len(nodes)} 个节点")
return nodes
except requests.RequestException as e:
self.logger.error(f"服务发现失败: {e}")
return []
def generate_nginx_upstream(self, service_name: str) -> str:
"""生成Nginx上游配置"""
nodes = self.discover_services(service_name)
config_lines = [f"upstream {service_name} {{"]
config_lines.append(" least_conn;")
for node in nodes:
server_line = f" server {node['address']}:{node['port']}"
# 添加健康检查参数
server_line += " max_fails=3 fail_timeout=30s"
# 添加标签作为参数
if "weight" in node.get("meta", {}):
server_line += f" weight={node['meta']['weight']}"
config_lines.append(server_line + ";")
config_lines.append(" zone {}_zone 64k;".format(service_name))
config_lines.append("}")
return "\n".join(config_lines)
def watch_services(self, service_name: str, callback):
"""监控服务变化"""
index = 0
while True:
try:
response = requests.get(
f"{self.consul_url}/v1/health/service/{service_name}",
params={
"passing": True,
"index": index,
"wait": "30s"
}
)
if response.status_code == 200:
current_index = response.headers.get("X-Consul-Index")
if current_index and current_index != index:
index = current_index
nodes = self.discover_services(service_name)
callback(service_name, nodes)
except requests.RequestException as e:
self.logger.error(f"监控服务失败: {e}")
time.sleep(5)
```
## 缓存与性能优化
```nginx
# cache_config.conf - 缓存配置
proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=api_cache:10m
max_size=10g inactive=60m use_temp_path=off;
server {
listen 80;
server_name cache.example.com;
# 缓存配置
location /api/cacheable {
proxy_cache api_cache;
proxy_cache_key "$scheme$request_method$host$request_uri$http_authorization";
proxy_cache_valid 200 302 5m;
proxy_cache_valid 404 1m;
proxy_cache_valid any 10s;
# 缓存锁定防止惊群效应
proxy_cache_lock on;
proxy_cache_lock_timeout 5s;
proxy_cache_lock_age 10s;
# 缓存状态头部
add_header X-Cache-Status $upstream_cache_status;
# 缓存绕过条件
proxy_cache_bypass $http_cache_control;
proxy_no_cache $http_pragma $http_authorization;
proxy_pass http://backend;
}
# 静态资源缓存
location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
expires 1y;
add_header Cache-Control "public, immutable";
access_log off;
}
# 清除缓存接口
location ~ /purge(/.*) {
allow 192.168.1.0/24;
deny all;
proxy_cache_purge api_cache "$scheme$request_method$host$1";
}
}
```
## 安全防护配置
```nginx
# security.conf - 安全配置
# 请求限制
limit_req_zone $binary_remote_addr zone=auth_limit:10m rate=5r/m;
limit_req_zone $binary_remote_addr zone=api_limit:10m rate=100r/s;
# IP黑白名单
geo $blocked_ip {
default 0;
include /etc/nginx/conf.d/ip_blacklist.conf;
}
# WAF规则
map $request_uri $waf_block {
default 0;
~* "(union select|sleep\(|benchmark|information_schema)" 1;
~* "(
~* "(\.\./|\.\.\\|/etc/passwd)" 1;
}
server {
listen 80;
# IP黑名单拦截
if ($blocked_ip) {
return 403 "Access Denied";
}
# WAF检测
if ($waf_block) {
return 403 "Malicious Request Detected";
}
# 限制HTTP方法
if ($request_method !~ ^(GET|HEAD|POST|PUT|DELETE|OPTIONS)$) {
return 405;
}
location /api/auth {
limit_req zone=auth_limit burst=10 nodelay;
# 防止暴力破解
limit_req_status 429;
proxy_pass http://auth_service;
}
location /api/ {
limit_req zone=api_limit burst=50 delay=20;
# 连接限制
limit_conn addr 20;
# 请求体大小限制
client_max_body_size 10m;
client_body_buffer_size 128k;
proxy_pass http://api_backend;
}
# 隐藏Nginx版本信息
server_tokens off;
}
```
## 监控与日志分析
```nginx
# logging.conf - 日志配置
log_format json_combined escape=json '{'
'"timestamp":"$time_iso8601",'
'"remote_addr":"$remote_addr",'
'"remote_user":"$remote_user",'
'"request":"$request",'
'"status":"$status",'
'"body_bytes_sent":"$body_bytes_sent",'
'"request_time":"$request_time",'
'"http_referrer":"$http_referer",'
'"http_user_agent":"$http_user_agent",'
'"http_x_forwarded_for":"$http_x_forwarded_for",'
'"upstream_addr":"$upstream_addr",'
'"upstream_status":"$upstream_status",'
'"upstream_response_time":"$upstream_response_time"'
'}';
# 访问日志
access_log /var/log/nginx/access.json json_combined buffer=32k flush=5s;
access_log /var/log/nginx/access.log combined;
# 错误日志分级
error_log /var/log/nginx/error.log warn;
<"sad.s6k3.org.cn"><"wqa.s6k3.org.cn"><"bfd.s6k3.org.cn">
# 状态监控
server {
listen 127.0.0.1:8081;
location /nginx_status {
stub_status on;
access_log off;
allow 127.0.0.1;
deny all;
}
location /metrics {
content_by_lua_block {
local metric_data = {}
-- 收集连接数
local f = io.open("/proc/net/tcp", "r")
if f then
local count = 0
for line in f:lines() do
count = count + 1
end
f:close()
metric_data.connections = count - 1
end
-- 收集内存使用
local meminfo = {}
f = io.open("/proc/meminfo", "r")
if f then
for line in f:lines() do
local key, value = line:match("([^:]+):%s+(%d+)")
if key and value then
meminfo[key] = tonumber(value)
end
end
f:close()
if meminfo.MemTotal and meminfo.MemFree then
metric_data.memory_used_percent =
(meminfo.MemTotal - meminfo.MemFree) / meminfo.MemTotal * 100
end
end
ngx.say(require("cjson").encode(metric_data))
}
}
}
```
## 容器化部署配置
```yaml
# docker-compose.yml
version: '3.8'
services:
nginx-lb:
image: nginx:1.24-alpine
container_name: nginx-loadbalancer
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./nginx/conf.d:/etc/nginx/conf.d:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- nginx-logs:/var/log/nginx
- nginx-cache:/var/cache/nginx
networks:
- app-network
- monitor-network
healthcheck:
test: ["CMD", "nginx", "-t"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
memory: 512M
reservations:
memory: 256M
keepalived:
image: osixia/keepalived:2.2.4
container_name: keepalived
restart: unless-stopped
cap_add:
- NET_ADMIN
- NET_BROADCAST
- NET_RAW
volumes:
- ./keepalived/keepalived.conf:/container/service/keepalived/assets/keepalived.conf
networks:
- app-network
depends_on:
- nginx-lb
nginx-exporter:
image: nginx/nginx-prometheus-exporter:0.11.0
container_name: nginx-exporter
restart: unless-stopped
command:
- '-nginx.scrape-uri=http://nginx-lb:8081/stub_status'
networks:
- monitor-network
networks:
app-network:
driver: bridge
monitor-network:
driver: bridge
volumes:
nginx-logs:
nginx-cache:
```
## 总结
Nginx作为负载均衡和反向代理的核心组件,其配置优化和高可用设计直接影响整个系统的稳定性和性能。通过合理的负载均衡策略、完善的安全防护、细致的缓存配置和全面的监控体系,可以构建出既高效又可靠的流量入口层。
在实际生产环境中,需要根据具体的业务特点和技术栈,持续调整和优化Nginx配置。结合容器化部署和服务发现机制,可以实现动态的服务治理。同时,通过实施分层防护和深度监控,能够有效应对各种流量挑战和安全威胁,为上层应用提供坚实的基础设施保障。