基於 Patroni-etcd - 流複製搭建構建 PostgreSQL 高可用

Whoami:5年+金融、政府、醫療領域工作經驗的DBA
Certificate:OCP、PCP
Skill:Oracle、Mysql、PostgreSQL
Platform:CSDN、墨天倫、公衆號(呆呆的私房菜)

閱讀本文可以瞭解基於 Patroni + etcd + 流複製的方式來實現 PostgreSQL 數據庫高可用的相關內容,包含 Patroni 高可用架構實現的基本原理、搭建部署及主備切換等。

01 Patroni 概述

02 Patroni 基本原理

03 Patroni 高可用架構搭建

WniZ5m

nS9qRD

# 如無特殊說明,下列命令所有節點都執行
1. 主機名配置
su - root
cat >> /etc/hosts <<EOF
192.168.56.11 pg01
192.168.56.22 pg02
192.168.56.33 pg03
EOF
2. 操作系統配置
su - root
useradd postgres
echo "postgres#2024" | passwd --stdin postgres
cat >> /etc/sysctl.conf <<EOF
kernel.shmmax = 4294967296
kernel.shmall = 2097152
kernel.shmmni = 4096
kernel.sem = 250 32000 100 128
fs.aio-max-nr = 1048576
net.ipv4.ip_local_port_range = 9000 65500
net.core.rmem_default = 262144    
net.core.rmem_max = 4194304    
net.core.wmem_default = 262144    
net.core.wmem_max = 4194304
fs.file-max = 6815744
EOF
cat >> /etc/security/limits.conf <<EOF
postgres soft nproc unlimited
postgres hard nproc unlimited
postgres soft nofile 100000
postgres hard nofile 100000
postgres soft stack unlimited
postgres hard stack unlimited
postgres soft core unlimited
postgres hard core unlimited
postgres soft memlock unlimited
postgres hard memlock unlimited
EOF
3. 主備互信創建
# 數據庫wal需要使用scp命令集中歸檔至同一存儲節點,因此需要對3個數據庫節點的postgres用戶之間配置互信;
# 本次wal歸檔節點使用PostgreSQL主節點,即wal歸檔至 192.168.56.11 的/data/pgwal/archive_wals目錄下。
su - postgres
ssh-keygen -t rsa
ssh-copy-id pg01
ssh-copy-id pg02
ssh-copy-id pg03
4. 創建數據目錄
su - root
mkdir -p /pg/{pghome,patroni}
mkdir -p /data/{pgwal,pgdata,pgbak}
mkdir -p /data/pgwal/archive_wals
mkdir -p /data/pgdata/pg13
chown -R postgres:postgres /pg
chown -R postgres:postgres /data/{pgwal,pgdata,pgbak}
chmod -R 700 /pg
chmod -R 700 /data/{pgwal,pgdata,pgbak}
5. 配置環境變量
su - postgres
cat >> /home/postgres/.bash_profile
export PGHOME=/pg/pghome
export PGDATA=/data/pgdata/pg13
export PGPORT=5432
export PGDATABASE=postgres
export LD_LIBRARY_PATH=\$PGHOME/lib:\$LD_LIBRARY_PATH
export PATH=\$PGHOME/bin:\$PATH
EOF
source .bashrc
6. 安裝依賴包
yum -y install wget gcc gcc-c++  epel-release llvm5.0 llvm5.0-devel clang libicu-devel perl-ExtUtils-Embed readline readline-devel zlib zlib-devel openssl openssl-devel pam-devel libxml2-devel libxslt-devel openldap-devel systemd-devel tcl-devel python-devel
7. 源碼安裝數據庫
# 上傳PostgreSQL13.4源碼安裝包至/data目錄,使用postgres用戶解壓並編譯安裝
su - postgres
cd /data
tar zxvf postgresql-13.4.tar.gz
cd postgresql-13.4
./configure --prefix=/pg/pghome --with-pgport=5432
./configure --prefix=/pg/pghome --with-pgport=5432
gmake world
gmake install
cd contrib 
make && make instal
8. 主庫初始化並修改配置文件(主庫執行)
su - postgres
initdb -D /data/pgdata/pg13 -E UTF8 --locale=zh_CN.UTF-8
cat >> $PGDATA/pg_hba.conf <<EOF
echo "host all all 0.0.0.0/0 md5"
echo "host replication replicator 192.168.56.11/24 md5"
echo "host replication replicator 192.168.56.22/24 md5"
echo "host replication replicator 192.168.56.33/24 md5"
EOF
cat >> $PGDATA/postgresql.conf <<EOF
archive_command = 'scp %p 192.168.56.11:/data/pgwal/archive_wals/%f'
archive_mode = 'on'
archive_timeout = '1800s'
cluster_name = 'PGCluster'
hot_standby = 'on'
listen_addresses = '0.0.0.0'
max_connections = '2000'
max_locks_per_transaction = '64'
max_prepared_transactions = '0'
max_replication_slots = '10'
max_wal_senders = '50'
max_worker_processes = '8'
port = '5432'
track_commit_timestamp = 'off'
wal_keep_size = 16GB
wal_level = 'replica'
wal_log_hints = 'on'
hba_file = '/data/pgdata/pg13/pg_hba.conf'
ident_file = '/data/pgdata/pg13/pg_ident.conf'
restore_command = 'scp 192.168.56.11:/data/pgwal/archive_wals/%f %p'
recovery_target_timeline = 'latest'
EOF
pg_ctl start
psql -U postgres -c "ALTER USER postgres WITH PASSWORD 'postgres#2024';"
psql -U postgres -c "create user replicator replication login encrypted password 'replicator#2024';"
9. 創建備庫(只需要在備庫執行)
su - postgres
pg_basebackup -D $PGDATA -Fp -Xs -v -P -h 192.168.56.11 -p 5432 -U replicator
cat >> $PGDATA/standby.signal <<EOF
standby_mode = 'on'
EOF
cat >> $PGDATA/postgresql.auto.conf <<EOF
primary_conninfo='application_name=pg01 host=192.168.56.11 port=5432 user=replicator password=replicator'            
max_standby_streaming_delay = 30s
wal_receiver_status_interval = 10s
hot_standby_feedback = on
recovery_target_timeline = 'latest'
EOF
pg_ctl start
10. 檢查主備狀態
主庫查看同步狀態
select * from pg_stat_replication;
備庫查看同步狀態
select * from pg_stat_wal_receiver;
# 如無特殊說明,下列命令所有節點都執行
1. 依賴安裝,注意python版本要求2.7以上
yum install -y haproxy postgresql-devel* watchdog libnfnetlink python-devel libevent-devel libdbi* openssl-devel bzip2-devel *bsddb* sqlite-devel gdbm-devel
2. pip安裝
wget https://mirrors.aliyun.com/pypi/packages/ce/ea/9b445176a65ae4ba22dce1d93e4b5fe182f953df71a145f557cffaffc1bf/pip-19.3.1.tar.gz
tar -xf pip-19.3.1.tar.gz
cd pip-19.3.1
python  setup.py install
3. setuptools安裝
wget https://mirrors.aliyun.com/pypi/packages/54/28/c45d8b54c1339f9644b87663945e54a8503cfef59cf0f65b3ff5dd17cf64/setuptools-44.0.2-py2.py3-none-any.whl
pip install setuptools-44.0.2-py2.py3-none-any.whl
4. 下載所有patroni用到的pip包
wget http://mirrors.163.com/pypi/packages/97/2a/b854019bcb9b925cd10ff245dbc9448a82fe7fdb40127e5cf1733ad0765c/psycopg2_binary-2.8.4-cp27-cp27mu-manylinux1_x86_64.whl
wget https://mirrors.163.com/pypi/packages/b4/40/a9837291310ee1ccc242ceb6ebfd9eb21539649f193a7c8c86ba15b98539/urllib3-1.25.7-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/fa/37/45185cb5abbc30d7257104c434fe0b07e5a195a6847506c074527aa599ec/Click-7.0-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/ef/99/53bd1ac9349262f59c1c421d8fcc2559ae8a5eeffed9202684756b648d33/tzlocal-2.0.0-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/3f/d0/59bc5f1c6c4d4b498c41d8ce7052ee9e9d68be19e16038a55252018a6c4d/python_consul-1.1.0-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/e7/f9/f0b53f88060247251bf481fa6ea62cd0d25bf1b11a87888e53ce5b7c8ad2/pytz-2019.3-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/ec/d3/3aa0e7213ef72b8585747aa0e271a9523e713813b9a20177ebe1e939deb0/dnspython-1.16.0-py2.py3-none-any.whl
pip install psycopg2_binary-2.8.4-cp27-cp27mu-manylinux1_x86_64.whl
pip install urllib3-1.25.7-py2.py3-none-any.whl
pip install six-1.13.0-py2.py3-none-any.whl
pip install Click-7.0-py2.py3-none-any.whl
pip install pytz-2019.3-py2.py3-none-any.whl
pip install tzlocal-2.0.0-py2.py3-none-any.whl
pip install python_dateutil-2.8.1-py2.py3-none-any.whl
pip install dnspython-1.16.0-py2.py3-none-any.whl
pip install urllib3-1.26.6-py2.py3-none-any.whl 
pip install idna-2.10-py2.py3-none-any.whl 
pip install chardet-4.0.0-py2.py3-none-any.whl 
pip install certifi-2021.5.30-py2.py3-none-any.whl 
pip install requests-2.26.0-py2.py3-none-any.whl 
pip install python_consul-1.1.0-py2.py3-none-any.whl
pip install backports.functools_lru_cache-1.6.4-py2.py3-none-any.whl 
pip install wcwidth-0.2.5-py2.py3-none-any.whl 
pip install prettytable-1.0.1-py2.py3-none-any.whl
wget http://mirrors.163.com/pypi/packages/32/80/8c0fbf433eb9e09eb025bb4f942881cfcf2e343db0750fd50873b4722b43/patroni-1.6.1.tar.gz
wget http://mirrors.163.com/pypi/packages/73/93/4f8213fbe66fc20cb904f35e6e04e20b47b85bee39845cc66a0bcf5ccdcb/psutil-5.6.7.tar.gz
wget http://mirrors.163.com/pypi/packages/69/6c/301876940e760a8b46c1caacf08c298f511f517c70eec32e43f38e9cc6f5/cdiff-1.0.tar.gz
wget http://mirrors.163.com/pypi/packages/a1/da/616a4d073642da5dd432e5289b7c1cb0963cc5dde23d1ecb8d726821ab41/python-etcd-0.4.5.tar.gz
pip install psutil-5.6.7.tar.gz
pip install cdiff-1.0.tar.gz
pip install python-etcd-0.4.5.tar.gz
pip install patroni-1.6.1.tar.gz
5. 檢查python配置文件
如果使用python2安裝PG13以上版本檢查下面文件是否含有wal_keep_segments 參數,如果有需要註釋掉。
vi /lib/python2.7/site-packages/patroni/postgresql/config.py

# 編輯patroni配置文件pg.yml,寫入etcd、PostgreSQL相關信息。
# 3個數據庫節點的patroni配置文件略有不同,需要注意修改。
1. 主節點配置如下:
cat > /pg/patroni/pg.yml <<EOF
scope: PGCluster
namespace: /pgsql/
name: db1
restapi:
  listen: 192.168.56.11:8008
  connect_address: 192.168.56.11:8008
etcd:
  hosts: 192.168.56.11:2379,192.168.56.22:2379,192.168.56.33:2379
bootstrap:
  # this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
  # and all other cluster members will use it as a global configuration
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    master_start_timeout: 300
    synchronous_mode: true
    synchronous_standby_names: "ANY 1 (pg01,pg02,pg03)"
    postgresql:
      use_pg_rewind: true
      use_slots: true
      parameters:
        listen_addresses: "*"
        port: 5432
        wal_level: hot_standby
        hot_standby: "on"
        max_connections: 2000
        wal_keep_size: 16GB
        max_wal_senders: 50
        max_replication_slots: 10
        wal_log_hints: "on"
        archive_mode: "on"
        archive_timeout: 1800s
        archive_command: scp %p 192.168.56.11:/data/pgwal/archive_wals/%f
      recovery_conf:
        restore_command: scp 192.168.56.11:/data/pgwal/archive_wals/%f %p
postgresql:
  callbacks:
    on_start: /pg/patroni/patroni_callback.sh
    on_stop: /pg/patroni/patroni_callback.sh
    on_role_change: /pg/patroni/patroni_callback.sh
  listen: 0.0.0.0:5432
  connect_address: 192.168.56.11:5432
  data_dir: /data/pgdata/pg13
  bin_dir: /pg/pghome/bin
#  config_dir: /etc/postgresql/9.6/main
  authentication:
    replication:
      username: replicator
      password: Postgres%clover
    superuser:
      username: postgres
      password: Postgres%clover
watchdog:
  mode: automatic # Allowed values: off, automatic, required
  device: /dev/watchdog
  safety_margin: 5
tags:
    nofailover: false
    noloadbalance: false
    clonefrom: false
    nosync: false
2. 從節點1配置如下:
cat > /pg/patroni/pg.yml <<EOF
scope: PGCluster
namespace: /pgsql/
name: db2
restapi:
  listen: 192.168.56.22:8008
  connect_address: 192.168.56.22:8008
etcd:
  hosts: 192.168.56.11:2379,192.168.56.22:2379,192.168.56.33:2379
bootstrap:
  # this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
  # and all other cluster members will use it as a global configuration
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    master_start_timeout: 300
    synchronous_mode: true
    synchronous_standby_names: "ANY 1 (pg01,pg02,pg03)"
    postgresql:
      use_pg_rewind: true
      use_slots: true
      parameters:
        listen_addresses: "*"
        port: 5432
        wal_level: hot_standby
        hot_standby: "on"
        max_connections: 2000
        wal_keep_size: 16GB
        max_wal_senders: 50
        max_replication_slots: 10
        wal_log_hints: "on"
        archive_mode: "on"
        archive_timeout: 1800s
        archive_command: scp %p 192.168.56.22:/data/pgwal/archive_wals/%f
      recovery_conf:
        restore_command: scp 192.168.56.22:/data/pgwal/archive_wals/%f %p
postgresql:
  callbacks:
    on_start: /bin/sh /pg/patroni/patroni_callback.sh
    on_stop: /bin/sh /pg/patroni/patroni_callback.sh
    on_role_change: /bin/sh /pg/patroni/patroni_callback.sh
  listen: 0.0.0.0:5432
  connect_address: 192.168.56.22:5432
  data_dir: /data/pgdata/pg13
  bin_dir: /pg/pghome/bin
#  config_dir: /etc/postgresql/9.6/main
  authentication:
    replication:
      username: replicator
      password: Postgres%clover
    superuser:
      username: postgres
      password: Postgres%clover
watchdog:
  mode: automatic # Allowed values: off, automatic, required
  device: /dev/watchdog
  safety_margin: 5
tags:
    nofailover: false
    noloadbalance: false
    clonefrom: false
    nosync: false
EOF
3. 從節點2配置如下:
cat > /pg/patroni/pg.yml <<EOF
scope: PGCluster
namespace: /pgsql/
name: db3
restapi:
  listen: 192.168.56.33:8008
  connect_address: 192.168.56.33:8008
etcd:
  hosts: 192.168.56.11:2379,192.168.56.22:2379,192.168.56.33:2379
bootstrap:
  # this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
  # and all other cluster members will use it as a global configuration
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    master_start_timeout: 300
    synchronous_mode: true
    synchronous_standby_names: "ANY 1 (pg01,pg02,pg03)"
    postgresql:
      use_pg_rewind: true
      use_slots: true
      parameters:
        listen_addresses: "*"
        port: 5432
        wal_level: hot_standby
        hot_standby: "on"
        max_connections: 2000
        wal_keep_size: 16GB
        max_wal_senders: 50
        max_replication_slots: 10
        wal_log_hints: "on"
        archive_mode: "on"
        archive_timeout: 1800s
        archive_command: scp %p 192.168.56.33:/data/pgwal/archive_wals/%f
      recovery_conf:
        restore_command: scp 192.168.56.33:/data/pgwal/archive_wals/%f %p
postgresql:
  callbacks:
    on_start: /pg/patroni/patroni_callback.sh
    on_stop: /pg/patroni/patroni_callback.sh
    on_role_change: /pg/patroni/patroni_callback.sh
  listen: 0.0.0.0:5432
  connect_address: 192.168.56.33:5432
  data_dir: /data/pgdata/pg13
  bin_dir: /pg/pghome/bin
#  config_dir: /etc/postgresql/9.6/main
  authentication:
    replication:
      username: replicator
      password: Postgres%clover
    superuser:
      username: postgres
      password: Postgres%clover
watchdog:
  mode: automatic # Allowed values: off, automatic, required
  device: /dev/watchdog
  safety_margin: 5
tags:
    nofailover: false
    noloadbalance: false
    clonefrom: false
    nosync: false
EOF
PostgreSQL高可用環境下,主節點可以是3個數據庫節點中的任意一個。
因此,如果應用通過物理IP訪問主庫,每當主備切換時,都需要人工調整應用訪問數據庫的連接串,這顯然很不友好。
因此,需要引入VIP,即主庫虛擬IP,以此訪問主庫。
1. 添加postgres用戶爲sudoer,以root用戶執行:
su - root
visudo 
postgres ALL=(ALL) NOPASSWD:ALL
2. 配置腳本
cat > /pg/patroni/patroni_callback.sh <<EOF
#!/bin/bash
readonly cb_name=$1
readonly role=$2
readonly scope=$3
VIP=192.168.56.66
VIPBRD=192.168.56.255
VIPNETMASK=255.255.255.0
VIPNETMASKBIT=22
#VIPifconfig 
VIPDEV=em1
VIPLABEL=1
function usage() {
    echo "Usage: $0 <on_start|on_stop|on_role_change> <role> <scope>";
    exit 1;
}
function addvip(){
    echo "`date +%Y-%m-%d\ %H:%M:%S,%3N` INFO: /sbin/ip addr add ${VIP}/${VIPNETMASKBIT} brd ${VIPBRD} dev ${VIPDEV} label ${VIPDEV}:${VIPLABEL}"
    sudo /sbin/ip addr add ${VIP}/${VIPNETMASKBIT} brd ${VIPBRD} dev ${VIPDEV} label ${VIPDEV}:${VIPLABEL}
    sudo /usr/sbin/arping -q -A -c 1 -I ${VIPDEV}:${VIPLABEL} ${VIP}
    #sudo /sbin/iptables -F
}
function delvip(){
    echo "`date +%Y-%m-%d\ %H:%M:%S,%3N` INFO: sudo /sbin/ip addr del ${VIP}/${VIPNETMASKBIT} dev ${VIPDEV} label ${VIPDEV}:${VIPLABEL}"
    sudo /sbin/ip addr del ${VIP}/${VIPNETMASKBIT} dev ${VIPDEV} label ${VIPDEV}:${VIPLABEL}
    sudo /usr/sbin/arping -q -A -c 1 -I ${VIPDEV} ${VIP}
    #sudo /sbin/iptables -F
}
echo "`date +%Y-%m-%d\ %H:%M:%S,%3N` WARNING: patroni callback $cb_name $role $scope"
case $cb_name in
    on_stop)
        delvip
        ;;
    on_start)
        ;;
    on_role_change)
        if [[ $role == 'master' ]]; then
            addvip
        elif [[ $role == 'slave' ]]||[[ $role == 'replica' ]]||[[ $role == 'logical' ]]; then
            delvip
        fi
        ;; 
    *)
        usage
        ;;
esac
EOF
chmod +x /pg/patroni/patroni_callback.sh
# 啓動前提:
# PostgreSQL流複製正常,且etcd也正常運行的情況下
1. 手工啓動3個數據庫節點的patroni程序
su – postgres
nohup patroni /pg/patroni/pg.yml & >> /pg/patroni/patroni.log 2>&&
2. 配置開機啓動
su – postgres 
crontab -e
@reboot patroni /pg/patroni/pg.yml & >> /pg/patroni/patroni.log 2>&1

04 Patroni 高可用測試

1. 查看集羣狀態
su - postgres
patronictl -c /pg/patroni/pg.yml list 
2. 主備切換
patronictl -c /pg/patroni/pg.yml switchover
3. 故障切換
模擬節點故障,關掉主節點,同步備庫將自動升級爲主庫;
原主節點恢復後,將自動降級爲備庫。重啓其他節點同理。
reboot
patronictl -c /pg/patroni/pg.yml list
本文由 Readfog 進行 AMP 轉碼,版權歸原作者所有。
來源https://mp.weixin.qq.com/s/xqIAODnWc7OYMDRSKSmPrQ