[关闭]
@saltyang 2018-06-19T11:38:53.000000Z 字数 9325 阅读 2503

NFS HA: NFS + Keepalived + Rsync + Inotify

NFS, Keepalived, Rsync, Inotify


存储服务器采用NFS的方式向接收服务器提供存储服务,存储服务器本身采用rsync + inotify实现文件的实时同步,其中Keepalived保证NFS服务的高可用,实现故障转移和自动切换。


本系统采用Keepalived + Keepalived + Rsync + Inotify来保证NFS服务的高可用

  1. Linux: Centos 6.9
  2. Rsync Version: 3.0.6
  3. master_nfs ip: 192.168.1.176
  4. salve_nfs ip: 192.168.1.177
  5. VIP: 192.168.1.178

ENV Prepare

  1. yum install rpcbind nfs-utils;
  2. yum install keepalived rsync inotify-tools;
  3. chkconfig keepalived on; chkconfig nfs on;

Rsync Server Config

Rsync Server is the backup machine, the first is salve_nfs.

  1. ##rsyncd.conf start##
  2. #工作中指定用户(需要指定用户)
  3. uid = root
  4. gid = root
  5. use chroot = no
  6. #有多少个客户端同时传文件
  7. max connections = 36000
  8. #超时时间
  9. timeout = 300
  10. #进程号文件
  11. pid file = /var/run/rsyncd.pid
  12. #日志文件
  13. lock file = /var/run/rsync.lock
  14. #日志文件
  15. log file = /var/log/rsyncd.log
  16. #模块开始
  17. #这个模块对应的是推送目录
  18. #模块名称随便起
  19. [backup]
  20. #需要同步的目录
  21. path = /data/
  22. #表示出现错误忽略错误
  23. ignore errors = yes
  24. #表示网络权限可写(本地控制真正可写)
  25. read only = false
  26. #这里设置IP或让不让同步
  27. list = false
  28. #指定允许的网段
  29. hosts allow = 192.168.1.0/24
  30. #拒绝链接的地址,一下表示没有拒绝的链接。
  31. hosts deny = 0.0.0.0/32
  32. #不要动的东西(默认情况)
  33. #虚拟用户
  34. auth users = rsync_backup
  35. #虚拟用户的密码文件
  36. secrets file = /etc/rsync.password
  1. echo "rsync_backup:ems" > /etc/rsync.password
  2. chmod 600 /etc/rsync.password
  1. rsync --daemon
  2. ps -ef | grep "rsync" | grep -v "grep" # check daemon success or not
  3. echo "/usr/bin/rsync --daemon" >>/etc/rc.local # add daemon to machine start

Rsync Client Config

Rsync Client is the machine which satrted nfs service, the first is master_nfs.

  1. #!/bin/bash
  2. # rsync server address
  3. RSYNC_SERVER=192.168.1.177
  4. # rsync client monitor doc
  5. SYC_DIR=/data/
  6. # rsync server service module name
  7. DST_DIR=backup
  8. # rsync server service auth user
  9. USER=rsync_backup
  10. # rsync service password file
  11. RSYNC_PASSFILE=/etc/rsync_client.password
  12. INOTIFYWAIT=/usr/bin/inotifywait
  13. LOG_FILE=/var/log/rsyncd.log
  14. if [ ! -e "$SYC_DIR" ] || [ ! -e "${RSYNC_PASSFILE}" ] || [ ! -e "${INOTIFYWAIT}" ] \
  15. || [ ! -e "/usr/bin/rsync" ];
  16. then
  17. echo "Check File and Folder" > $LOG_FILE
  18. exit 9
  19. fi
  20. ${INOTIFYWAIT} -mrq --timefmt '%d/%m/%y %H:%M' --format '%T %w%f' -e close_write,delete,create,attrib $SYC_DIR \
  21. | while read file
  22. do
  23. cd $SYC_DIR
  24. rsync -aruz -R --delete ./ --timeout=100 $USER@$RSYNC_SERVER::$DST_DIR --password-file=${RSYNC_PASSFILE} >/dev/null 2>&1
  25. done
  26. exit 0
  1. echo ems > /etc/rsync_client.password

Keepalived Config

  1. ! Configuration File for keepalived
  2. global_defs {
  3. notification_email {
  4. salt_yang@puyacn.com
  5. }
  6. notification_email_from service@webackup.cn
  7. smtp_server smtp.mxichina.cn
  8. smtp_connect_timeout 30
  9. router_id NFS_MASTER
  10. }
  11. vrrp_script chk_nfs {
  12. script "/etc/keepalived/checknfs.sh check"
  13. interval 30
  14. }
  15. vrrp_instance NFS_MASTER {
  16. state MASTER
  17. interface eth5
  18. virtual_router_id 54
  19. priority 100
  20. advert_int 1
  21. authentication {
  22. auth_type PASS
  23. auth_pass 1111
  24. }
  25. virtual_ipaddress {
  26. 192.168.1.178
  27. }
  28. track_script {
  29. chk_nfs
  30. }
  31. debug
  32. nopreempt
  33. notify_master "/etc/keepalived/checknfs.sh master"
  34. notify_backup "/etc/keepalived/checknfs.sh backup"
  35. notify_fault "/etc/keepalived/checknfs.sh fault"
  36. notify_stop "/etc/keepalived/checknfs.sh fault"
  37. }
  1. ! Configuration File for keepalived
  2. global_defs {
  3. notification_email {
  4. salt_yang@puyacn.com
  5. }
  6. notification_email_from service@webackup.cn
  7. smtp_server smtp.mxichina.cn
  8. smtp_connect_timeout 30
  9. router_id NFS_BACCKUP
  10. }
  11. vrrp_script chk_nfs {
  12. script "/etc/keepalived/checknfs2.sh check"
  13. interval 30
  14. }
  15. vrrp_instance NFS_BACKUP {
  16. state BACKUP
  17. interface eth5
  18. virtual_router_id 54
  19. priority 80
  20. advert_int 1
  21. authentication {
  22. auth_type PASS
  23. auth_pass 1111
  24. }
  25. virtual_ipaddress {
  26. 192.168.1.178
  27. }
  28. track_script {
  29. chk_nfs
  30. }
  31. debug
  32. nopreempt
  33. notify_master "/etc/keepalived/checknfs.sh master"
  34. notify_backup "/etc/keepalived/checknfs.sh backup"
  35. notify_fault "/etc/keepalived/checknfs.sh fault"
  36. notify_stop "/etc/keepalived/checknfs.sh fault"
  37. }
  1. #!/bin/dash
  2. # Script to handle NFS from keepalived.
  3. #
  4. # Usage: checknfs.sh action
  5. #
  6. # Note: you can use $MAINTENANCE (/etc/keepalived/maintenance) to disable NFS checks
  7. # in case of short NFS maintenance
  8. #
  9. # Usage func :
  10. [ "$1" = "--help" ] && { sed -n -e '/^# Usage:/,/^$/ s/^# \?//p' < $0; exit; }
  11. #
  12. # CONFIG
  13. #
  14. # Keepalived
  15. KEEPALIVEDPID=/var/run/keepalived.pid
  16. KEEPALIVED=/etc/init.d/keepalived
  17. # NFSD
  18. NFS_FLAG="nfsd"
  19. # rync + inotify
  20. RSYNC=/usr/bin/rsync
  21. RSYNCDPID=/var/run/rsyncd.pid
  22. INOTIFY_SCRIPT=/usr/local/inotify.sh
  23. INOTIFY_FLAG="inotify"
  24. RSYNC_FLAG="rsync"
  25. # local mount point
  26. MOUNTPOINT="/data"
  27. # warmup delay
  28. MAXWAIT=240
  29. VIP=192.168.1.178
  30. # how to handle potential split-brain
  31. # 0: manual
  32. # 1: invalidate local data
  33. # 2: invalidate remote data
  34. SPLIT_BRAIN_METHOD=1
  35. # maintenance flag: used to do maintenance on NFS without switch between nodes
  36. MAINTENANCE="/etc/keepalived/maintenance"
  37. #
  38. # CONFIG LOGGER
  39. #
  40. # tail -f /var/log/syslog | grep Keep
  41. LOG="logger -t KeepNFS[$$] -p syslog" # do not use -i
  42. LOGINFO="$LOG.info"
  43. LOGWARN="$LOG.warn"
  44. LOGERR="$LOG.err"
  45. check() {
  46. if ip addr | grep "${VIP}"
  47. then
  48. # if nfs server is down, need restart it.
  49. # Keepalive need switch after restart nfs failed
  50. $LOGWARN "This is Master."
  51. if check_nfs
  52. then
  53. $LOGWARN "NFS is OK."
  54. else
  55. start_nfs
  56. $LOGWARN "Restart NFS service"
  57. if check_nfs
  58. then
  59. $LOGWARN "After restart NFS service, but it's also down. Switch keepalive status"
  60. $KEEPALIVED restart
  61. fi
  62. fi
  63. # if inotify server is down, need restart it.
  64. # nfs is also OK, keepalived needn't switch after restart nfs failed
  65. if check_inotify
  66. then
  67. $LOGWARN "Inotify is OK."
  68. else
  69. $LOGWARN "Start Inotify script......"
  70. /bin/bash $INOTIFY_SCRIPT &
  71. if check_inotify
  72. then
  73. $LOGWARN "After restart Inotify script, but it's also down and need man repair"
  74. fi
  75. fi
  76. else
  77. $LOGWARN "This is Salve."
  78. if check_rsycn_daemon
  79. then
  80. $LOGWARN "Rsync daemon is OK."
  81. else
  82. $LOGWARN "Start Rsync daemon....."
  83. start_rsync_daemon
  84. if check_rsycn_daemon
  85. then
  86. $LOGWARN "After restart Rsync service, but it's also down and need man repair"
  87. fi
  88. fi
  89. fi
  90. return $?
  91. }
  92. set_fault() {
  93. set_backup
  94. }
  95. set_backup() {
  96. if check_nfs
  97. then
  98. $LOGWARN "NFS Service is UP, now kill it"
  99. kill_nfs
  100. fi
  101. set_rsync_server
  102. }
  103. set_rsync_server() {
  104. # start rsync daemon
  105. if check_rsycn_daemon
  106. then
  107. $LOGWARN "[Rsync daemon Service is Up.]"
  108. else
  109. $LOGWARN "[Begin to start daemon Service.]"
  110. start_rsync_daemon
  111. for i in $( seq 1 $MAXWAIT )
  112. do
  113. sleep 1
  114. if check_rsycn_daemon
  115. then
  116. break
  117. fi
  118. done
  119. fi
  120. if ! check_rsycn_daemon
  121. then
  122. $LOGWARN "[After 240s, rsync daemon doesn't start, need adminstrator to repair]"
  123. fi
  124. # kill inotify service
  125. if check_inotify
  126. then
  127. if kill_inotify
  128. then
  129. $LOGWARN "[Kill inotify Service.]"
  130. else
  131. $LOGWARN "[Kill inotify Service failed, need adminstrator to repair]"
  132. fi
  133. fi
  134. }
  135. # WARNING set_master is called at keepalived start
  136. # So if already in "good" state we must do nothing :)
  137. set_master() {
  138. # Starting NFS
  139. if [ $( pidof nfsd | wc -w ) -gt 0 ]
  140. then
  141. $LOGWARN "NFS already started ? What did I have to do ?"
  142. else
  143. $LOGWARN "Starting NFS ..."
  144. /sbin/service nfs restart
  145. for i in $( seq 1 $MAXWAIT )
  146. do
  147. sleep 1
  148. if check_nfs
  149. then
  150. break
  151. fi
  152. done
  153. fi
  154. if check_nfs
  155. then
  156. $LOGWARN "NFS service has started"
  157. else
  158. $LOGWARN "NFS Service is broken and need a manual repair."
  159. fi
  160. # check inotify whether is not active. if not, start it.
  161. if check_inotify
  162. then
  163. $LOGWARN "Inotify has started"
  164. else
  165. $LOGWARN "Started Inotify"
  166. start_inotify
  167. fi
  168. # check rsync daemon service. if it's active, kill it.
  169. if check_rsync_daemon
  170. then
  171. if kill_rsync_daemon
  172. then
  173. $LOGWARN "[Kill Rsync Service successed!]"
  174. else
  175. $LOGWARN "[Kill Rsync Service failed, need adminstrator to repair]"
  176. fi
  177. fi
  178. }
  179. # Check that NFS is responding
  180. # Return:
  181. # 0 if nfs is OK (or in maintenance mode)
  182. # 1 if nfs is down
  183. check_nfs() {
  184. if [ -e $MAINTENANCE ]
  185. then
  186. return 0
  187. fi
  188. flag_exists=$(ps -ef | grep "${NFS_FLAG}" | grep -v grep | wc -l)
  189. if [ "${flag_exists}" -eq 0 ]
  190. then
  191. $LOGWARN "[NFS service is unavailable.]"
  192. return 1
  193. else
  194. return 0
  195. fi
  196. }
  197. kill_nfs() {
  198. /sbin/service nfs stop
  199. return $?
  200. }
  201. start_nfs() {
  202. if [ $( pidof rpcbind | wc -w ) -eq 0 ]
  203. then
  204. /sbin/service rpcbind start
  205. fi
  206. /sbin/service nfs start
  207. return $?
  208. }
  209. # Check that inotify service is responding
  210. # Return:
  211. # 0 if inotify is OK (or in maintenance mode)
  212. # 1 if inotify is down
  213. check_inotify() {
  214. flag_exists=$(ps -ef | grep "${INOTIFY_FLAG}" | grep -v grep | wc -l)
  215. if [ "${flag_exists}" -eq 0 ]
  216. then
  217. $LOGWARN "[Inotify daemon service is unavailable.]"
  218. return 1
  219. else
  220. return 0
  221. fi
  222. }
  223. start_inotify() {
  224. /bin/bash $INOTIFY_SCRIPT &
  225. return $?
  226. }
  227. kill_inotify() {
  228. ps -ef | grep "${INOTIFY_FLAG}" | grep -v grep | awk '{print $2 }' | xargs kill -9
  229. return $?
  230. }
  231. # Check that rsync daemon is responding
  232. # Return:
  233. # 0 if rsync daemon is OK (or in maintenance mode)
  234. # 1 if rsync daemon is down
  235. check_rsycn_daemon() {
  236. flag_exists=$(ps -ef | grep "${RSYNC_FLAG}" | grep -v grep | wc -l)
  237. if [ "${flag_exists}" -eq 0 ]
  238. then
  239. $LOGWARN "[Rsync daemon service is unavailable.]"
  240. return 1
  241. else
  242. return 0
  243. fi
  244. }
  245. # Start Rsync daemon
  246. start_rsync_daemon() {
  247. if [ -e $RSYNCDPID ]
  248. then
  249. rm -rf $RSYNCDPID
  250. fi
  251. $RSYNC --daemon
  252. return $?
  253. }
  254. # Kill Rsync daemon
  255. kill_rsync_daemon() {
  256. ps -ef | grep "${RSYNC_FLAG}" | grep -v grep | awk '{print $2 }' | xargs kill -9
  257. return $?
  258. }
  259. case "$1" in
  260. check)
  261. check
  262. exit $?
  263. ;;
  264. backup)
  265. $LOGWARN "=> set to backup state <="
  266. set_backup
  267. exit $?
  268. ;;
  269. fault)
  270. $LOGWARN "=> set to fault state <="
  271. set_fault
  272. exit $?
  273. ;;
  274. master)
  275. $LOGWARN "=> set to master state <="
  276. set_master
  277. exit $?
  278. ;;
  279. esac

Creat share dir

Note: Use LVM to aviod nfs state file handle when switch nfs server

  1. # check disk partition
  2. fdisk -l
  3. # Creat partition and enter partition management
  4. fdisk /dev/sda
  5. Enter these choices: n -> p(partition type) -> +500M(size) -> t(change partition type) -> 4(partition num) -> 8e(LVM type) -> p(check partition) -> w (write partition)
  6. # make partition valid and not need restart
  7. partprobe
  8. # Create PV and check
  9. pvcreate /dev/sda4
  10. pvdisplay
  11. # Create vg and add pv into
  12. vgcreate VolGroup /dev/sda4
  13. # If VolGroup is existed, not need to create vg
  14. extendvg VolGroup /dev/sda4
  15. # Creat a LV and named lvData and check
  16. lvcreate -L 100M -n lvData VolGroup
  17. lvdisplay
  18. # Format and mount
  19. mkfs -t ext4 /dev/VolGroup/lvData
  20. mount /dev/VolGroup/lvData /data
  21. # add fstab in /etc/fstab
  22. /dev/VolGroup/lvData /data ext4 defaults 1 2

Extend share dir

  1. # Add a disk
  2. # Creat disk partition
  3. fdisk /dev/sdb
  4. Enter these choices: n -> p(partition type) -> +20G(size) -> t(change partition type) -> 1(partition num) -> p(check partition) -> w (write partition)
  5. # make partition valid and not need restart
  6. partprobe
  7. # Create PV and check
  8. pvcreate /dev/sdb1
  9. mkfs t ext4 /dev/sdb1
  10. # Extend vg
  11. vgextend VolGroup /dev/sdb1
  12. # Extend lv
  13. lvextend -L 20G /dev/VolGroup/lvData
  14. resize2fs /dev/VolGroup/lvData
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注