Database

MySQL MGR + ShardingProxy + Keepalived + HAProxy + mysqld_exporter 高可用企业级生产部署

1. 首先部署 MySQL Group Replication 集群

2. HAProxy 部署

2.1 从源码编译

$ git clone https://github.com/haproxy/haproxy.git
$ cd haproxy
$ git checkout v2.4.0
$ sudo make TARGET=linux-glibc ARCH=X86_64 PREFIX=$(pwd)
# 耐心等待编译完成..
$ ./haproxy -version
$ sudo cp $(pwd)/haproxy /bin/

HAProxy version 2.4.0 2021/05/14 - https://haproxy.org/
Status: long-term supported branch - will stop receiving fixes around Q2 2026.
Known bugs: http://www.haproxy.org/bugs/bugs-2.4.0.html
Running on: Linux 5.4.0-77-generic #86-Ubuntu SMP Thu Jun 17 02:35:03 UTC 2021 x86_64

2.2 配置

2.2.1 环境配置

sudo mkdir -p /etc/sysconfig/
sudo tee /etc/sysconfig/haproxy <<-'EOF'
# Add extra options to the haproxy daemon here. This can be useful for
# specifying multiple configuration files with multiple -f options.
# See haproxy(1) for a complete list of options.
OPTIONS=""
EOF

2.2.2 规则配置

[展开] Configure haproxy.cfg

sudo mkdir -p /etc/haproxy
sudo mkdir -p /var/lib/haproxy # Using for haproxy unix socks stats
sudo tee /etc/haproxy/haproxy.cfg <<-'EOF'
#---------------------------------------------------------------------
# Example configuration for a possible web application.  See the
# full configuration options online.
#
#   http://haproxy.1wt.eu/download/1.4/doc/configuration.txt
#---------------------------------------------------------------------

global
    # to have these messages end up in /var/log/haproxy.log you will
    # need to:
    #
    # 1) configure syslog to accept network log events.  This is done
    #    by adding the '-r' option to the SYSLOGD_OPTIONS in
    #    /etc/sysconfig/syslog
    #
    # 2) configure local2 events to go to the /var/log/haproxy.log
    #   file. A line like the following can be added to
    #   /etc/sysconfig/syslog
    #
    #    local2.*                       /var/log/haproxy.log
    #
    log         127.0.0.1 local2

    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     50000
    user        root
    group       root
    daemon

    # turn on stats unix socket
    stats socket /var/lib/haproxy/stats

defaults
    mode                    http # tcp|http|health
    log                     global
    option                  tcplog # tcplog|httplog
    option                  dontlognull
    option http-server-close
    #option forwardfor       except 127.0.0.0/8
    option                  redispatch
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         10s
    timeout client          1m
    timeout server          1m
    timeout http-keep-alive 10s
    timeout check           10s
    maxconn                 50000

# Must setup mode=http to be effective, defaults{mode=http} is also possible.
# see: https://www.haproxy.com/blog/haproxy-exposes-a-prometheus-metrics-endpoint/
frontend stats
   bind *:8404
   #option http-use-htx
   #http-request use-service prometheus-exporter if { path /metrics }
   stats enable
   stats uri /stats
   stats refresh 10s

frontend mysql_cluster1
    bind *:3306
    mode        tcp
    default_backend     shardingproxy_cluster1

backend shardingproxy_cluster1
    balance     roundrobin
    mode        tcp
    server      shardingproxy1 10.0.0.162:3307 check
    server      shardingproxy2 10.0.0.163:3307 check
    server      shardingproxy3 10.0.0.163:3307 check
EOF


  • 检查规则配置合法(可选)
$ haproxy -c -f /etc/haproxy/haproxy.cfg
Configuration file is valid

2.2.3 服务配置

[展开] Installation haproxy.service

sudo tee /etc/systemd/system/haproxy.service <<-'EOF'
[Unit]
Description=HAProxy Load Balancer
After=network-online.target
Wants=network-online.target

[Service]
EnvironmentFile=-/etc/sysconfig/haproxy
Environment="CONFIG=/etc/haproxy/haproxy.cfg" "PIDFILE=/run/haproxy.pid" "EXTRAOPTS=-S /run/haproxy-master.sock"
ExecStartPre=/bin/haproxy -f $CONFIG -c -q $EXTRAOPTS
ExecStart=/bin/haproxy -DWs -f $CONFIG -p $PIDFILE $EXTRAOPTS
ExecReload=/bin/haproxy -f $CONFIG -c -q $EXTRAOPTS
ExecReload=/bin/kill -USR2 $MAINPID
StandardOutput=journal
StandardError=journal
KillMode=mixed
Restart=always
User=root
Group=root
SuccessExitStatus=143
Type=forking

# The following lines leverage SystemD's sandboxing options to provide
# defense in depth protection at the expense of restricting some flexibility
# in your setup (e.g. placement of your configuration files) or possibly
# reduced performance. See systemd.service(5) and systemd.exec(5) for further
# information.

# NoNewPrivileges=true
# ProtectHome=true
# If you want to use 'ProtectSystem=strict' you should whitelist the PIDFILE,
# any state files and any other files written using 'ReadWritePaths' or
# 'RuntimeDirectory'.
# ProtectSystem=true
# ProtectKernelTunables=true
# ProtectKernelModules=true
# ProtectControlGroups=true
# If your SystemD version supports them, you can add: @reboot, @swap, @sync
# SystemCallFilter=~@cpu-emulation @keyring @module @obsolete @raw-io

[Install]
WantedBy=multi-user.target
EOF


2.2.4 启动服务

sudo systemctl daemon-reload
sudo systemctl enable haproxy
sudo systemctl restart haproxy
sudo systemctl status haproxy
# 查看日志
sudo tail -f /var/log/syslog
# sudo tail -f /var/log/message

3. Keepalived 部署

3.1 从源码编译

[展开] Installation keepalived

# 前置依赖
sudo yum -y install openssl-devel autoconf automake
# 克隆源码
git clone https://github.com/acassen/keepalived.git
cd keepalived
# 从 configure.ac 生成 configure
./autogen.sh
# 生成 Makefile
chmod +x ./configure; ./configure --prefix=$(pwd)
# 编译安装
make && make install
# 查看编译产物
$ ./keepalived/keepalived --version

Keepalived v2.2.2 (03/05,2021)

Copyright(C) 2001-2021 Alexandre Cassen, 

Built with kernel headers for Linux 3.10.0
Running on Linux 3.10.0-957.21.3.el7.x86_64 #1 SMP Tue Jun 18 16:35:19 UTC 2019
Distro: CentOS Linux 7 (Core)

configure options: --prefix=/root/keepalived

Config options:  LVS VRRP VRRP_AUTH VRRP_VMAC OLD_CHKSUM_COMPAT INIT=systemd
System options:  VSYSLOG RTA_ENCAP RTA_EXPIRES RTA_PREF FRA_TUN_ID RTAX_CC_ALGO RTAX_QUICKACK IFA_FLAGS NET_LINUX_IF_H_COLLISION NET_LINUX_IF_ETHER_H_COLLISION LIBIPTC_LINUX_NET_IF_H_COLLISION IFLA_LINK_NETNSID GLOB_BRACE GLOB_ALTDIRFUNC INET6_ADDR_GEN_MODE SO_MARK


3.2 配置

3.2.1 环境配置

sudo mkdir -p /etc/keepalived
sudo cat<<EOF>/etc/keepalived/keepalived
# Options for keepalived. See 'keepalived --help' output and keepalived(8) and
# keepalived.conf(5) man pages for a list of all options. Here are the most
# common ones :
#
# --vrrp               -P    Only run with VRRP subsystem.
# --check              -C    Only run with Health-checker subsystem.
# --dont-release-vrrp  -V    Dont remove VRRP VIPs & VROUTEs on daemon stop.
# --dont-release-ipvs  -I    Dont remove IPVS topology on daemon stop.
# --dump-conf          -d    Dump the configuration data.
# --log-detail         -D    Detailed log messages.
# --log-facility       -S    0-7 Set local syslog facility (default=LOG_DAEMON)
#

KEEPALIVED_OPTIONS="-D"
EOF

3.2.3 规则配置

[展开] Configure keepalived.conf

sudo mkdir -p /etc/keepalived
sudo cat<<-'EOF' >/etc/keepalived/keepalived.conf
global_defs {
  notification_email {
    983708408@qq.com # 通知接受邮箱 (**可按实际配置**)
  }
  notification_email_from xxx@163.com # 通知发送邮箱 (**可按实际配置**)
  smtp_server smtp.163.com # 邮箱主机 (**可按实际配置**)
  smtp_connect_timeout 10
  router_id  iloqg8n3yb9mje1s
  script_user root
  enable_script_security
  vrrp_garp_interval 0
  vrrp_garp_master_delay 40
  vrrp_garp_master_refresh 60
  vrrp_skip_check_adv_addr
  vrrp_gna_interval 0
  #vrrp_strict
}

vrrp_script haproxy_checkpoint {
  script "/etc/keepalived/haproxy_checker.sh" # 用于检测 real-ip 上服务的自定义脚本 (**可按实际配置**)
  interval 2
  weight 2
}

vrrp_instance keepalived_1 {
  interface eth0 # 使用网卡 (**可按实际配置**)
  virtual_router_id 51 # 虚拟路由的标志,  一组lvs的虚拟路由标识必须相同, 这样才能切换
  priority 150 # 服务启动优先级, 值越大优先级越高, BACKUP 不能大于MASTER
  advert_int 3 # 服务器之间的存活检查时间
  # 通常如果master服务死掉后backup会变成master, 但是当master服务又好了的时候 master 此时会抢占VIP,
  # 这样就会发生两次切换对业务繁忙的网站来说是不好的。所以我们要在配置文件加入 nopreempt 非抢占,
  # 但是这个参数只能用于state 为backup, 故我们在用HA的时候最好 master 和 backup 的 state 都设置成 BACKUP
  # 让其通过 priority 来竞争。
  state BACKUP # MASTER|BACKUP, 可将所有节点都设置为BACKUP时, 第一个启动时会抢占为 MASTER
  nopreempt # 指定非抢占
  authentication {
    auth_type PASS # 认证类型
    auth_pass ufsoft # 认证密码, 一组lvs 服务器的认证密码必须一致
  }

  track_script {
    haproxy_checkpoint # 执行监控 haproxy 进程的脚本
  }

  # 极力推荐显示指定广播 peer, 生产踩过的坑: 当时未指定此配置,
  # 可能会导致VRRPv2包被发到 vrrp.mcast.net 去了, 最终导致闹裂 (可通过 tcpdump -i any vrrp 捕包排查).
  unicast_src_ip 10.111.0.112
  unicast_peer {
    10.111.0.111
    10.111.0.112
  }

  # 虚拟IP地址
  virtual_ipaddress {
    10.111.0.200 # 虚拟vip (**可按实际配置**)
  }
}
EOF


3.2.4 自定义探测脚本

[展开] Configure haproxy_checker.sh

sudo tee /etc/keepalived/haproxy_checker.sh <<-'EOF'
#!/bin/bash

BASE_DIR=$(cd dirname $0; pwd)
APP_NAME=$(echo basename $0)
LOG_DIR="/mnt/disk1/log/keepalived"; mkdir -p $LOG_DIR
LOG_FILE="${LOG_DIR}/$(echo $APP_NAME|sed 's/.sh//g').stdout"

if [ -z "$(ps -ef | grep '/bin/haproxy'| grep -v grep)" ]; then
  echo "$(date '+%Y-%m-%d %H:%M:%S') - HAProxy is dead." | tee -a "$LOG_FILE"
  systemctl stop keepalived
else
  echo "$(date '+%Y-%m-%d %H:%M:%S') - HAPorxy is running." | tee -a "$LOG_FILE"
fi
exit 0
EOF

sudo chmod +x /etc/keepalived/haproxy_checker.sh


3.2.5 服务配置

[展开] Configure keepalived.service(systemd)

sudo tee /etc/systemd/system/keepalived.service <<-'EOF'
[Unit]
Description=LVS and VRRP High Availability Monitor
After=network-online.target syslog.target
Wants=network-online.target

[Service]
Type=forking
PIDFile=/run/keepalived.pid
KillMode=process
EnvironmentFile=-/etc/keepalived/keepalived
ExecStart=/sbin/keepalived $KEEPALIVED_OPTIONS -f /etc/keepalived/keepalived.conf
ExecReload=/bin/kill -HUP $MAINPID
#StandardOutput=/mnt/disk1/log/keepalived.out
StandardError=journal
TimeoutStartSec=5
TimeoutSec=600
Restart=alway

[Install]
WantedBy=multi-user.target
EOF


3.2.5 启动服务

sudo systemctl daemon-reload
sudo systemctl enable keepalived
sudo systemctl restart keepalived
sudo systemctl status keepalived
# 查看日志
sudo tail -f /var/log/syslog
# sudo tail -f /var/log/message
# 查看 vip 漂移状态 (keepalived主节点上应有 192.168.1.200)
ip addr

4. Sharding Proxy 部署


5. 全链路验证

TODO

6. 部署监控 (可选推荐)

6.1 部署 haproxy_exporter

cd /tmp
git clone https://github.com/wl4g-collect/haproxy_exporter.git
cd haproxy_exporter
git checkout v0.12.0
make build
sudo chmod +x haproxy_exporter
cp haproxy_exporter /usr/sbin/
  • 运行配置
sudo cat <<-'EOF' >/etc/sysconfig/haproxy_exporter
OPTIONS='--no-haproxy.ssl-verify --haproxy.scrape-uri="http://localhost:8404/stats?stats;csv"'
EOF
  • 服务配置
sudo curl -o /etc/systemd/system/haproxy_exporter.service 'https://raw.githubusercontent.com/wl4g/prometheus-integration/master/prometheus/haproxy/systemd/haproxy_exporter.service'

sudo systemctl daemon-reload
sudo systemctl enable haproxy_exporter
sudo systemctl restart haproxy_exporter
sudo systemctl status haproxy_exporter
sudo journalctl -afu haproxy_exporter
  • 测试指标接口
# 先看看 haproxy /stats 接口正常与否
curl "http://localhost:8404/stats?;csv"

# 再看看 haproxy_exporter 接口正常与否
curl "http://localhost:9101/metrics"
  • Grafana 导入 haproxy 仪表盘
curl 'https://raw.githubusercontent.com/wl4g/prometheus-integration/master/prometheus/haproxy/dashboard-haproxy-2428-rev7.json'
  • Promethus 配置
# 其他配置省略 ...
# ...
scrape_configs:
  - job_name: haproxy
    static_configs:
    - targets: ['10.111.0.111:9101']
      labels:
        instance: collect-node1:9101
    - targets: ['10.111.0.112:9101']
      labels:
        instance: collect-node2:9101

6.2 部署 mysqld_exporter

  • 下载安装
# Downloading.
sudo cd /tmp
sudo curl -OL https://github.91chifun.workers.dev/https://github.com//prometheus/mysqld_exporter/releases/download/v0.13.0/mysqld_exporter-0.13.0.linux-amd64.tar.gz
sudo tar -xf mysqld_exporter*
sudo cp mysqld_exporter*/mysqld_exporter /usr/bin/

# Environment configuration.
sudo mkdir -p /etc/sysconfig
sudo curl -o /etc/sysconfig/mysqld_exporter.conf 'https://raw.githubusercontent.com/wl4g/prometheus-integration/master/prometheus/mysql/sysconfig/mysqld_exporter.conf'

# Connect mysqld configuration.
sudo curl -o /etc/sysconfig/mysqld_exporter.cnf 'https://raw.githubusercontent.com/wl4g/prometheus-integration/master/prometheus/mysql/sysconfig/mysqld_exporter.cnf'
  • 配置服务
sudo curl -o /etc/systemd/system/mysqld_exporter.service 'https://raw.githubusercontent.com/wl4g/prometheus-integration/master/prometheus/mysql/systemd/mysqld_exporter.service'

sudo systemctl daemon-reload
sudo systemctl enable mysqld_exporter
sudo systemctl restart mysqld_exporter
sudo systemctl status mysqld_exporter
sudo journalctl -u mysqld_exporter -f

# 测试访问
curl http://localhost:9104/metrics
  • 配置 mysql 账号
CREATE USER `mysqld_exporter`@`127.0.0.1` IDENTIFIED WITH mysql_native_password by 'mysqld_exporter';
GRANT Event, Process, Replication Client, Select, Show Databases, Show View ON *.* TO `mysqld_exporter`@`127.0.0.1`;
FLUSH PRIVILEGES;
  • Grafana 导入 mysqld_exporter 仪表盘

mysqld_exporter 默认有提供 grafana mysql-overview.json、alert galera.yamlgeneral.yamlrules.yaml,参见:https://github.com/wl4g-collect/mysqld_exporter

  • Grafana dashboard 配置(例如以下手动下载 dashboard 然后导入)
curl -OL 'https://raw.githubusercontent.com/wl4g-collect/mysqld_exporter/master/mysqld-mixin/dashboards/mysql-overview.json'
# 或
curl -OL 'https://gitee.com/wl4g-collect/mysqld_exporter/raw/master/mysqld-mixin/dashboards/mysql-overview.json'
  • Promethus scrape 配置
# 其他配置省略 ...
# ...
scrape_configs:
  - job_name: mysqld_exporter
    static_configs:
    - targets: ['mysql-node1:9104']
      labels:
        instance: mysql-node1:9104
    - targets: ['mysql-node2:9104']
      labels:
        instance: mysql-node2:9104
    - targets: ['mysql-node3:9104']
      labels:
        instance: mysql-node3:9104

7. FAQ

  • 7.1 当 keepalived master 宕机,vip成功转移到从机,但访问 vip 依然无法连接?
    • 由于 arp(ipv4)/ ndp(ipv6) 路由表缓存未更新,清理缓存执行: arping -I eth0 -c 3 -s 192.168.1.200 192.168.1.1

8. Troubleshooting

留言

您的电子邮箱地址不会被公开。