@saltyang 2018-06-19T03:38:53.000000Z 字数 9325 阅读 2614

NFS HA： NFS + Keepalived + Rsync + Inotify

NFS, Keepalived, Rsync, Inotify

存储服务器采用NFS的方式向接收服务器提供存储服务，存储服务器本身采用rsync + inotify实现文件的实时同步，其中Keepalived保证NFS服务的高可用，实现故障转移和自动切换。

本系统采用Keepalived + Keepalived + Rsync + Inotify来保证NFS服务的高可用

Linux: Centos 6.9
Rsync Version: 3.0.6
master_nfs ip: 192.168.1.176
salve_nfs ip: 192.168.1.177
VIP: 192.168.1.178

ENV Prepare

yum install rpcbind nfs-utils;
yum install keepalived rsync inotify-tools; 
chkconfig keepalived on; chkconfig nfs on;

Rsync Server Config

Rsync Server is the backup machine, the first is salve_nfs.

config file in salve_nfs machine: vim /etc/rsyncd.cnf

##rsyncd.conf start##
#工作中指定用户(需要指定用户)
uid = root
gid = root
use chroot = no
#有多少个客户端同时传文件
max connections = 36000
#超时时间
timeout = 300
#进程号文件
pid file = /var/run/rsyncd.pid
#日志文件
lock file = /var/run/rsync.lock
#日志文件
log file = /var/log/rsyncd.log
#模块开始
#这个模块对应的是推送目录
#模块名称随便起
[backup]
#需要同步的目录
path = /data/
#表示出现错误忽略错误
ignore errors = yes
#表示网络权限可写(本地控制真正可写)
read only = false
#这里设置IP或让不让同步
list = false
#指定允许的网段
hosts allow = 192.168.1.0/24
#拒绝链接的地址，一下表示没有拒绝的链接。
hosts deny = 0.0.0.0/32
#不要动的东西(默认情况)
#虚拟用户
auth users = rsync_backup
#虚拟用户的密码文件
secrets file = /etc/rsync.password

Create rsync password file and change mod to 600

echo "rsync_backup:ems" > /etc/rsync.password
chmod 600 /etc/rsync.password

Start rsync deamon

rsync --daemon
ps -ef | grep "rsync" | grep -v "grep" # check daemon success or not
echo "/usr/bin/rsync --daemon" >>/etc/rc.local # add daemon to machine start

Rsync Client Config

Rsync Client is the machine which satrted nfs service, the first is master_nfs.

Inotify Config

#!/bin/bash
# rsync server address
RSYNC_SERVER=192.168.1.177
# rsync client monitor doc
SYC_DIR=/data/
# rsync server service module name
DST_DIR=backup
# rsync server service auth user
USER=rsync_backup
# rsync service password file
RSYNC_PASSFILE=/etc/rsync_client.password
INOTIFYWAIT=/usr/bin/inotifywait
LOG_FILE=/var/log/rsyncd.log
if [ ! -e "$SYC_DIR" ] || [ ! -e "${RSYNC_PASSFILE}" ] || [ ! -e "${INOTIFYWAIT}" ] \
|| [ ! -e "/usr/bin/rsync" ];
then
    echo "Check File and Folder" > $LOG_FILE
    exit 9
fi
${INOTIFYWAIT} -mrq --timefmt '%d/%m/%y %H:%M' --format '%T %w%f' -e close_write,delete,create,attrib $SYC_DIR \
| while read file
do
    cd $SYC_DIR
    rsync -aruz -R --delete ./  --timeout=100 $USER@$RSYNC_SERVER::$DST_DIR --password-file=${RSYNC_PASSFILE} >/dev/null 2>&1
done
exit 0

file rsync_client.password

echo ems > /etc/rsync_client.password

Note: Config these files in salve_nfs and master_nfs to keep HA swtich

Keepalived Config

Config file in master_nfs, vim /etc/keepalived/keepalive.conf

! Configuration File for keepalived
global_defs {
   notification_email {
     salt_yang@puyacn.com
   }
   notification_email_from service@webackup.cn
   smtp_server smtp.mxichina.cn
   smtp_connect_timeout 30
   router_id NFS_MASTER
}
vrrp_script chk_nfs {
    script "/etc/keepalived/checknfs.sh check"
    interval 30
}
vrrp_instance NFS_MASTER {
    state MASTER
    interface eth5
    virtual_router_id 54
    priority 100
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.1.178
    }
    track_script {
        chk_nfs
    }
    debug
    nopreempt
    notify_master "/etc/keepalived/checknfs.sh master"
    notify_backup "/etc/keepalived/checknfs.sh backup"
    notify_fault  "/etc/keepalived/checknfs.sh fault"
    notify_stop  "/etc/keepalived/checknfs.sh fault"
}

Config file in salve_nfs, vim /etc/keepalived/keepalive.conf

! Configuration File for keepalived
global_defs {
   notification_email {
     salt_yang@puyacn.com
   }
   notification_email_from service@webackup.cn
   smtp_server smtp.mxichina.cn
   smtp_connect_timeout 30
   router_id NFS_BACCKUP
}
vrrp_script chk_nfs {
    script "/etc/keepalived/checknfs2.sh check"
    interval 30
}
vrrp_instance NFS_BACKUP {
    state BACKUP
    interface eth5
    virtual_router_id 54
    priority 80
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.1.178
    }
    track_script {
        chk_nfs
    }
    debug
    nopreempt
    notify_master "/etc/keepalived/checknfs.sh master"                          
    notify_backup "/etc/keepalived/checknfs.sh backup"                          
    notify_fault  "/etc/keepalived/checknfs.sh fault"                           
    notify_stop  "/etc/keepalived/checknfs.sh fault"
}

checknfs.sh

#!/bin/dash
# Script to handle NFS from keepalived.
#
# Usage: checknfs.sh action
#
# Note: you can use $MAINTENANCE (/etc/keepalived/maintenance) to disable NFS checks
# in case of short NFS maintenance
#
# Usage func :
[ "$1" = "--help" ] && { sed -n -e '/^# Usage:/,/^$/ s/^# \?//p' < $0; exit; }
#
# CONFIG
#
# Keepalived
KEEPALIVEDPID=/var/run/keepalived.pid
KEEPALIVED=/etc/init.d/keepalived
# NFSD
NFS_FLAG="nfsd"
# rync + inotify
RSYNC=/usr/bin/rsync
RSYNCDPID=/var/run/rsyncd.pid
INOTIFY_SCRIPT=/usr/local/inotify.sh
INOTIFY_FLAG="inotify"
RSYNC_FLAG="rsync"
# local mount point
MOUNTPOINT="/data"
# warmup delay
MAXWAIT=240
VIP=192.168.1.178
# how to handle potential split-brain
# 0: manual
# 1: invalidate local data
# 2: invalidate remote data
SPLIT_BRAIN_METHOD=1
# maintenance flag: used to do maintenance on NFS without switch between nodes
MAINTENANCE="/etc/keepalived/maintenance"
#
# CONFIG LOGGER
#
# tail -f /var/log/syslog | grep Keep
LOG="logger -t KeepNFS[$$] -p syslog" # do not use -i
LOGINFO="$LOG.info"
LOGWARN="$LOG.warn"
LOGERR="$LOG.err"
check() {
    if ip addr | grep "${VIP}"
    then
        # if nfs server is down, need restart it.
        # Keepalive need switch after restart nfs failed
        $LOGWARN "This is Master."
        if check_nfs
        then
            $LOGWARN "NFS is OK."
	    else
	        start_nfs
		    $LOGWARN "Restart NFS service"
            if check_nfs
            then
                $LOGWARN "After restart NFS service, but it's also down. Switch keepalive status"
		        $KEEPALIVED restart
            fi
        fi
        # if inotify server is down, need restart it.
        # nfs is also OK, keepalived needn't  switch after restart nfs failed
        if check_inotify
        then
            $LOGWARN "Inotify is OK."
	    else
	        $LOGWARN "Start Inotify script......"
            /bin/bash $INOTIFY_SCRIPT  &
	        if check_inotify
		    then
		        $LOGWARN "After restart Inotify script, but it's also down and need man repair"
            fi
        fi
    else
        $LOGWARN "This is Salve."
	    if check_rsycn_daemon
	    then
	        $LOGWARN "Rsync daemon is OK."
        else
            $LOGWARN "Start Rsync daemon....."
	        start_rsync_daemon
	        if check_rsycn_daemon
		    then
		        $LOGWARN "After restart Rsync service, but it's also down and need man repair"
            fi
        fi
    fi
    return $?
}
set_fault() {
    set_backup
}
set_backup() {
    if check_nfs
    then
        $LOGWARN "NFS Service is UP, now kill it"
        kill_nfs
    fi
    set_rsync_server
}
set_rsync_server() {
    # start rsync daemon
    if check_rsycn_daemon
    then
       $LOGWARN "[Rsync daemon Service is Up.]"
    else
        $LOGWARN "[Begin to start daemon Service.]"
        start_rsync_daemon
        for i in $( seq 1 $MAXWAIT )
        do
            sleep 1
            if check_rsycn_daemon
            then
                break
            fi
        done
    fi
    if ! check_rsycn_daemon
    then
        $LOGWARN "[After 240s, rsync daemon doesn't start, need adminstrator to repair]"
    fi
    # kill inotify service
    if check_inotify
    then
        if kill_inotify
        then
            $LOGWARN "[Kill inotify Service.]"
        else
            $LOGWARN "[Kill inotify Service failed, need adminstrator to repair]"
        fi
    fi
}
# WARNING set_master is called at keepalived start
# So if already in "good" state we must do nothing :)
set_master() {
    # Starting NFS
    if [ $( pidof nfsd | wc -w ) -gt 0 ]
	then
		$LOGWARN "NFS already started ? What did I have to do ?"
    else
        $LOGWARN "Starting NFS ..."
		/sbin/service nfs restart
		for i in $( seq 1 $MAXWAIT )
        do
            sleep 1
            if check_nfs
            then
                break
            fi
        done
    fi
    if check_nfs
    then
        $LOGWARN "NFS service has started"
	else
		$LOGWARN "NFS Service is broken and need a manual repair."
    fi
    # check inotify whether is not active. if not, start it.
    if check_inotify
    then
        $LOGWARN "Inotify has started"
	else
	    $LOGWARN "Started Inotify"
        start_inotify
    fi
    # check rsync daemon service. if it's active, kill it.
    if check_rsync_daemon
    then
        if kill_rsync_daemon
        then
            $LOGWARN "[Kill Rsync Service successed!]"
        else
            $LOGWARN "[Kill Rsync Service failed, need adminstrator to repair]"
        fi
    fi
}
# Check that NFS is responding
# Return:
# 0 if nfs is OK (or in maintenance mode)
# 1 if nfs is down
check_nfs() {
    if [ -e $MAINTENANCE ]
    then
        return 0
    fi
    flag_exists=$(ps -ef | grep "${NFS_FLAG}" | grep -v grep | wc -l)
    if [ "${flag_exists}" -eq 0 ]
	then
	    $LOGWARN "[NFS service is unavailable.]"
        return 1
    else
        return 0
    fi
}
kill_nfs() {
    /sbin/service nfs stop
    return $?
}
start_nfs() {
    if [ $( pidof rpcbind | wc -w ) -eq 0 ]
    then
        /sbin/service rpcbind start
    fi
    /sbin/service nfs start
    return $?
}
# Check that inotify service is responding
# Return:
# 0 if inotify is OK (or in maintenance mode)
# 1 if inotify is down
check_inotify() {
    flag_exists=$(ps -ef | grep "${INOTIFY_FLAG}" | grep -v grep | wc -l)
    if [ "${flag_exists}" -eq 0 ]
	then
	    $LOGWARN "[Inotify daemon service is unavailable.]"
        return 1
    else
        return 0
    fi
}
start_inotify() {
    /bin/bash $INOTIFY_SCRIPT &
    return $?
}
kill_inotify() {
    ps -ef | grep "${INOTIFY_FLAG}" | grep -v grep | awk '{print $2 }' | xargs kill -9
    return $?
}
# Check that rsync daemon is responding
# Return:
# 0 if rsync daemon is OK (or in maintenance mode)
# 1 if rsync daemon is down
check_rsycn_daemon() {
    flag_exists=$(ps -ef | grep "${RSYNC_FLAG}" | grep -v grep | wc -l)
    if [ "${flag_exists}" -eq 0 ]
	then
	    $LOGWARN "[Rsync daemon service is unavailable.]"
        return 1
    else
        return 0
    fi
}
# Start Rsync daemon
start_rsync_daemon() {
    if [ -e $RSYNCDPID ]
	then
	    rm -rf $RSYNCDPID
    fi
    $RSYNC --daemon
	return $?
}
# Kill Rsync daemon
kill_rsync_daemon() {
    ps -ef | grep "${RSYNC_FLAG}" | grep -v grep | awk '{print $2 }' | xargs kill -9
    return $?
}
case "$1" in
	check)
		check
		exit $?
    ;;
    backup)
        $LOGWARN "=> set to backup state <="
		set_backup
		exit $?
    ;;
    fault)
        $LOGWARN "=> set to fault state <="
		set_fault
		exit $?
    ;;
    master)
        $LOGWARN "=> set to master state <="
		set_master
		exit $?
    ;;
esac

Creat share dir

Note: Use LVM to aviod nfs state file handle when switch nfs server

Creat a Lvm

# check disk partition
fdisk -l 
# Creat partition and enter partition management
fdisk /dev/sda 
Enter these choices: n -> p(partition type) -> +500M(size) -> t(change partition type) -> 4(partition num) -> 8e(LVM type) -> p(check partition) -> w (write partition) 
# make partition valid and not need restart
partprobe 
# Create PV and check
pvcreate /dev/sda4
pvdisplay
# Create vg and add pv into 
vgcreate VolGroup /dev/sda4 
# If VolGroup is existed, not need to create vg
extendvg VolGroup /dev/sda4 
# Creat a LV and named lvData and check
lvcreate -L 100M -n lvData VolGroup 
lvdisplay 
# Format and mount
mkfs -t ext4 /dev/VolGroup/lvData 
mount /dev/VolGroup/lvData /data
# add fstab in /etc/fstab
/dev/VolGroup/lvData    /data                   ext4    defaults        1 2

Extend share dir

If /dev/sda has no free sectors

# Add a disk 
# Creat disk partition
fdisk /dev/sdb 
Enter these choices: n -> p(partition type) -> +20G(size) -> t(change partition type) -> 1(partition num) -> p(check partition) -> w (write partition) 
# make partition valid and not need restart
partprobe 
# Create PV and check
pvcreate /dev/sdb1
mkfs –t ext4 /dev/sdb1
# Extend vg 
vgextend VolGroup /dev/sdb1
# Extend lv
lvextend -L 20G /dev/VolGroup/lvData
resize2fs /dev/VolGroup/lvData

If /dev/sda has free sectors

NFS HA： NFS + Keepalived + Rsync + Inotify

本系统采用Keepalived + Keepalived + Rsync + Inotify来保证NFS服务的高可用

内容目录