[Linux-ha-jp] 多対1のフェイルオーバ動作について

アーカイブの一覧に戻る

Shinjiro Hamaguchi hamag****@agile*****
2016年 9月 9日 (金) 12:58:32 JST


お世話になります。
濱口と申します。


pacemaker + cmanで5対1の冗長化をしようとしているのですが
うまくいっておらず、質問させていただきました。


【問題】
フェイルオーバ時に同一のリソースが2つのノードで動作してしまう


【環境】
OS: CentOS 6.8
pacemaker:1.1.14-8.el6_8.1
cman:3.0.12.1-78.el6


【構成】
ノード:pm1.local - pm6.local
リソースグループ:groupA - groupE

※リソースグループと当該リソースを活性化するノードの対応は以下の通り。

groupA, pm1.local
groupB, pm2.local
groupC, pm3.local
groupD, pm4.local
groupE, pm5.local
pm6.local (groupA - groupEのフェイルオーバ先)


【問題発生の手順】
1. pm1.localから順にpm6.localまでのノードをクラスターに参加させる
2. pm1.localのリソースを故障させる(例:service asterisk stop)
3. pm2.localのリソースを故障させる(例:service asterisk stop)

上記手順の1でそれぞれのノードで意図した通りのリソースが起動します。
手順の2でgroupAがpm1.localからpm6.localへフェイルオーバします。
手順の3でgroupBがpm2.localからpm6.locaへフェイルオーバします。


【期待する動作】
手順の3でgroupBがpm6.localへフェイルオーバせず(すでにgroupAが動作しているため)、故障した状態でpm2.localに残る


【/etc/cluster/cluster.conf 】

<cluster config_version="15" name="clusterpm666">

  <fence_daemon/>

  <clusternodes>

    <clusternode name="pm1.local" nodeid="1">

      <fence>

        <method name="pcmk-redirect">

          <device name="pcmk" port="pm1.local"/>

        </method>

      </fence>

    </clusternode>

    <clusternode name="pm2.local" nodeid="2">

      <fence>

        <method name="pcmk-redirect">

          <device name="pcmk" port="pm2.local"/>

        </method>

      </fence>

    </clusternode>

    <clusternode name="pm3.local" nodeid="3">

      <fence>

        <method name="pcmk-redirect">

          <device name="pcmk" port="pm3.local"/>

        </method>

      </fence>

    </clusternode>

    <clusternode name="pm4.local" nodeid="4">

      <fence>

        <method name="pcmk-redirect">

          <device name="pcmk" port="pm4.local"/>

        </method>

      </fence>

    </clusternode>

    <clusternode name="pm5.local" nodeid="5">

      <fence>

        <method name="pcmk-redirect">

          <device name="pcmk" port="pm5.local"/>

        </method>

      </fence>

    </clusternode>

    <clusternode name="pm6.local" nodeid="6">

      <fence>

        <method name="pcmk-redirect">

          <device name="pcmk" port="pm6.local"/>

        </method>

      </fence>

    </clusternode>

  </clusternodes>

  <cman/>

  <fencedevices>

    <fencedevice agent="fence_pcmk" name="pcmk"/>

  </fencedevices>

  <rm>

    <failoverdomains/>

    <resources/>

  </rm>

</cluster>


【crm configure showの出力】

node pm1.local \

attributes

node pm2.local \

attributes

node pm3.local \

attributes

node pm4.local \

attributes

node pm5.local \

attributes

node pm6.local \

attributes

primitive asteriskA lsb:asterisk \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive asteriskB lsb:asterisk \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive asteriskC lsb:asterisk \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive asteriskD lsb:asterisk \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive asteriskE lsb:asterisk \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive changeSrcIpA ocf:pacemaker:changeSrcIp \

params vip=192.168.12.215 mask=23 device=eth0 \

op start interval=0s timeout=0 \

op monitor interval=10s \

op stop interval=0s on-fail=ignore

primitive changeSrcIpB ocf:pacemaker:changeSrcIp \

params vip=192.168.12.216 mask=23 device=eth0 \

op start interval=0s timeout=0 \

op monitor interval=10s \

op stop interval=0s on-fail=ignore

primitive changeSrcIpC ocf:pacemaker:changeSrcIp \

params vip=192.168.12.217 mask=23 device=eth0 \

op start interval=0s timeout=0 \

op monitor interval=10s \

op stop interval=0s on-fail=ignore

primitive changeSrcIpD ocf:pacemaker:changeSrcIp \

params vip=192.168.12.218 mask=23 device=eth0 \

op start interval=0s timeout=0 \

op monitor interval=10s \

op stop interval=0s on-fail=ignore

primitive changeSrcIpE ocf:pacemaker:changeSrcIp \

params vip=192.168.12.219 mask=23 device=eth0 \

op start interval=0s timeout=0 \

op monitor interval=10s \

op stop interval=0s on-fail=ignore

primitive cronA lsb:crond \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive cronB lsb:crond \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive cronC lsb:crond \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive cronD lsb:crond \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive cronE lsb:crond \

params \

meta migration-threshold=2 \

op monitor interval=20s start-delay=5s timeout=15s \

op stop interval=0s on-fail=ignore

primitive sipsakA ocf:pacemaker:sipsak \

params sipuri="sip:201****@192*****" \

meta migration-threshold=2 \

op start interval=0s \

op monitor interval=20s start-delay=10s timeout=10s \

op stop interval=0s on-fail=fence

primitive sipsakB ocf:pacemaker:sipsak \

params sipuri="sip:201****@192*****" \

meta migration-threshold=2 \

op start interval=0s \

op monitor interval=20s start-delay=10s timeout=10s \

op stop interval=0s on-fail=fence

primitive sipsakC ocf:pacemaker:sipsak \

params sipuri="sip:201****@192*****" \

meta migration-threshold=2 \

op start interval=0s \

op monitor interval=20s start-delay=10s timeout=10s \

op stop interval=0s on-fail=fence

primitive sipsakD ocf:pacemaker:sipsak \

params sipuri="sip:201****@192*****" \

meta migration-threshold=2 \

op start interval=0s \

op monitor interval=20s start-delay=10s timeout=10s \

op stop interval=0s on-fail=fence

primitive sipsakE ocf:pacemaker:sipsak \

params sipuri="sip:201****@192*****" \

meta migration-threshold=2 \

op start interval=0s \

op monitor interval=20s start-delay=10s timeout=10s \

op stop interval=0s on-fail=fence

primitive vip-local-checkA VIPcheck \

params target_ip=192.168.12.215 count=1 wait=5 \

op start interval=0s on-fail=restart timeout=60s \

op monitor interval=10s timeout=60s \

op stop interval=0s on-fail=ignore timeout=60s

primitive vip-local-checkB VIPcheck \

params target_ip=192.168.12.216 count=1 wait=5 \

op start interval=0s on-fail=restart timeout=60s \

op monitor interval=10s timeout=60s \

op stop interval=0s on-fail=ignore timeout=60s

primitive vip-local-checkC VIPcheck \

params target_ip=192.168.12.217 count=1 wait=5 \

op start interval=0s on-fail=restart timeout=60s \

op monitor interval=10s timeout=60s \

op stop interval=0s on-fail=ignore timeout=60s

primitive vip-local-checkD VIPcheck \

params target_ip=192.168.12.218 count=1 wait=5 \

op start interval=0s on-fail=restart timeout=60s \

op monitor interval=10s timeout=60s \

op stop interval=0s on-fail=ignore timeout=60s

primitive vip-local-checkE VIPcheck \

params target_ip=192.168.12.219 count=1 wait=5 \

op start interval=0s on-fail=restart timeout=60s \

op monitor interval=10s timeout=60s \

op stop interval=0s on-fail=ignore timeout=60s

primitive vip-localA IPaddr2 \

params ip=192.168.12.215 cidr_netmask=23 nic=eth0 iflabel=0
broadcast=192.168.13.255 \

op start interval=0s timeout=20s \

op monitor interval=5s timeout=20s \

op stop interval=0s on-fail=ignore

primitive vip-localB IPaddr2 \

params ip=192.168.12.216 cidr_netmask=23 nic=eth0 iflabel=0
broadcast=192.168.13.255 \

op start interval=0s timeout=20s \

op monitor interval=5s timeout=20s \

op stop interval=0s on-fail=ignore

primitive vip-localC IPaddr2 \

params ip=192.168.12.217 cidr_netmask=23 nic=eth0 iflabel=0
broadcast=192.168.13.255 \

op start interval=0s timeout=20s \

op monitor interval=5s timeout=20s \

op stop interval=0s on-fail=ignore

primitive vip-localD IPaddr2 \

params ip=192.168.12.218 cidr_netmask=23 nic=eth0 iflabel=0
broadcast=192.168.13.255 \

op start interval=0s timeout=20s \

op monitor interval=5s timeout=20s \

op stop interval=0s on-fail=ignore

primitive vip-localE IPaddr2 \

params ip=192.168.12.219 cidr_netmask=23 nic=eth0 iflabel=0
broadcast=192.168.13.255 \

op start interval=0s timeout=20s \

op monitor interval=5s timeout=20s \

op stop interval=0s on-fail=ignore

group groupA vip-local-checkA vip-localA changeSrcIpA cronA asteriskA
sipsakA

group groupB vip-local-checkB vip-localB changeSrcIpB cronB asteriskB
sipsakB

group groupC vip-local-checkC vip-localC changeSrcIpC cronC asteriskC
sipsakC

group groupD vip-local-checkD vip-localD changeSrcIpD cronD asteriskD
sipsakD

group groupE vip-local-checkE vip-localE changeSrcIpE cronE asteriskE
sipsakE

location location-groupA-pm1.local-INFINITY groupA inf: pm1.local

location location-groupA-pm2.local--INFINITY groupA
resource-discovery=never -inf: pm2.local

location location-groupA-pm3.local--INFINITY groupA
resource-discovery=never -inf: pm3.local

location location-groupA-pm4.local--INFINITY groupA
resource-discovery=never -inf: pm4.local

location location-groupA-pm5.local--INFINITY groupA
resource-discovery=never -inf: pm5.local

location location-groupA-pm6.local-100 groupA 100: pm6.local

location location-groupB-pm1.local--INFINITY groupB
resource-discovery=never -inf: pm1.local

location location-groupB-pm2.local-INFINITY groupB inf: pm2.local

location location-groupB-pm3.local--INFINITY groupB
resource-discovery=never -inf: pm3.local

location location-groupB-pm4.local--INFINITY groupB
resource-discovery=never -inf: pm4.local

location location-groupB-pm5.local--INFINITY groupB
resource-discovery=never -inf: pm5.local

location location-groupB-pm6.local-100 groupB 100: pm6.local

location location-groupC-pm1.local--INFINITY groupC
resource-discovery=never -inf: pm1.local

location location-groupC-pm2.local--INFINITY groupC
resource-discovery=never -inf: pm2.local

location location-groupC-pm3.local-INFINITY groupC inf: pm3.local

location location-groupC-pm4.local--INFINITY groupC
resource-discovery=never -inf: pm4.local

location location-groupC-pm5.local--INFINITY groupC
resource-discovery=never -inf: pm5.local

location location-groupC-pm6.local-100 groupC 100: pm6.local

location location-groupD-pm1.local--INFINITY groupD
resource-discovery=never -inf: pm1.local

location location-groupD-pm2.local--INFINITY groupD
resource-discovery=never -inf: pm2.local

location location-groupD-pm3.local--INFINITY groupD
resource-discovery=never -inf: pm3.local

location location-groupD-pm4.local-INFINITY groupD inf: pm4.local

location location-groupD-pm5.local--INFINITY groupD
resource-discovery=never -inf: pm5.local

location location-groupD-pm6.local-100 groupD 100: pm6.local

location location-groupE-pm1.local--INFINITY groupE
resource-discovery=never -inf: pm1.local

location location-groupE-pm2.local--INFINITY groupE
resource-discovery=never -inf: pm2.local

location location-groupE-pm3.local--INFINITY groupE
resource-discovery=never -inf: pm3.local

location location-groupE-pm4.local--INFINITY groupE
resource-discovery=never -inf: pm4.local

location location-groupE-pm5.local-INFINITY groupE inf: pm5.local

location location-groupE-pm6.local-100 groupE 100: pm6.local

property cib-bootstrap-options: \

have-watchdog=false \

dc-version=1.1.14-8.el6_8.1-70404b0 \

cluster-infrastructure=cman \

stonith-enabled=false \

no-quorum-policy=ignore \

maintenance-mode=false \

startup-fencing=false \

dc-deadtime=20s \

last-lrm-refresh=1473360195

rsc_defaults rsc_defaults-options: \

migration-threshold=1 \

resource-stickiness=INFINITY


長文で誠に恐縮ですが、何か情報ございましたらご教示いただきたく存じます。
何卒、よろしくお願いいたします。
-------------- next part --------------
HTML$B$NE:IU%U%!%$%k$rJ]4I$7$^$7$?(B...
ダウンロード 



Linux-ha-japan メーリングリストの案内
アーカイブの一覧に戻る