From feeac83ee0436d3f885a9433f4c015c5938d5271 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Tue, 1 Aug 2023 20:23:15 +0800
Subject: [PATCH 01/23] =?UTF-8?q?gs=5Fddr=E5=B7=A5=E5=85=B7=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 script/gs_ddr                                 |   96 +
 .../impl/dorado_disaster_recovery/__init__.py |    0
 .../impl/dorado_disaster_recovery/ddr_base.py | 2504 +++++++++++++++++
 .../dorado_disaster_recovery/ddr_constants.py |   91 +
 .../ddr_modules/__init__.py                   |    0
 .../dorado_diaster_recovery_start.py          |  246 ++
 .../dorado_disaster_recovery_failover.py      |   70 +
 .../dorado_disaster_recovery_query.py         |  168 ++
 .../dorado_disaster_recovery_stop.py          |  105 +
 .../dorado_disaster_recovery_switchover.py    |  476 ++++
 .../params_handler.py                         |  346 +++
 11 files changed, 4102 insertions(+)
 create mode 100644 script/gs_ddr
 create mode 100644 script/impl/dorado_disaster_recovery/__init__.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_base.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_constants.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_modules/__init__.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
 create mode 100644 script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
 create mode 100644 script/impl/dorado_disaster_recovery/params_handler.py

diff --git a/script/gs_ddr b/script/gs_ddr
new file mode 100644
index 00000000..f7c3793f
--- /dev/null
+++ b/script/gs_ddr
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : gs_ddr is a utility for dorado
+# disaster recovery fully options.
+#############################################################################
+
+import os
+import uuid
+
+from gspylib.common.Common import DefaultValue
+from gspylib.common.ErrorCode import ErrorCode
+from gspylib.common.GaussLog import GaussLog
+from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryConstants
+from base_utils.os.user_util import UserUtil
+from domain_utils.cluster_file.cluster_log import ClusterLog
+from impl.dorado_disaster_recovery.params_handler import ParamsHandler
+from impl.dorado_disaster_recovery.ddr_modules.\
+    dorado_diaster_recovery_start import DisasterRecoveryStartHandler
+from impl.dorado_disaster_recovery.ddr_modules.\
+    dorado_disaster_recovery_stop import DisasterRecoveryStopHandler
+from impl.dorado_disaster_recovery.ddr_modules.\
+    dorado_disaster_recovery_failover import DisasterRecoveryFailoverHandler
+from impl.dorado_disaster_recovery.ddr_modules.\
+    dorado_disaster_recovery_switchover import DisasterRecoverySwitchoverHandler
+from impl.dorado_disaster_recovery.ddr_modules.\
+    dorado_disaster_recovery_query import StreamingQueryHandler
+
+HANDLER_MAPPING = {
+    "start": DisasterRecoveryStartHandler,
+    "stop": DisasterRecoveryStopHandler,
+    "switchover": DisasterRecoverySwitchoverHandler,
+    "failover": DisasterRecoveryFailoverHandler,
+    #"query": StreamingQueryHandler
+}
+
+
+class DoradoStorageDisasterRecoveryBase(object):
+    def __init__(self):
+        self.params = None
+        self.user = None
+        self.log_file = None
+        self.logger = None
+        self.trace_id = uuid.uuid1().hex
+        self.dorado_info = None
+        DoradoStorageDisasterRecoveryBase.mock_process_user_sensitive_info()
+        self.__init_globals()
+
+    @staticmethod
+    def mock_process_user_sensitive_info():
+        """mock_process_user_sensitive_info"""
+        cmdline = DefaultValue.get_proc_title("-W")
+        DefaultValue.set_proc_title(cmdline)
+
+    def __init_globals(self):
+        self.user = UserUtil.getUserInfo()['name']
+        tmp_logger_file = ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.STREAMING_LOG_FILE, self.user)
+        tmp_logger = GaussLog(tmp_logger_file, 'parse_and_validate_params', trace_id=self.trace_id)
+        self.params = ParamsHandler(tmp_logger, self.trace_id).get_valid_params()
+        self.log_file = self.params.logFile if self.params.logFile else \
+            ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.STREAMING_LOG_FILE, self.user)
+        self.logger = GaussLog(self.log_file, self.params.task, trace_id=self.trace_id)
+
+
+if __name__ == '__main__':
+    if os.getuid() == 0:
+        GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"])
+
+    base = DoradoStorageDisasterRecoveryBase()
+    handler = HANDLER_MAPPING[base.params.task](base.params, base.user,
+                                                base.logger, base.trace_id, base.log_file)
+    handler.handle_lock_file(handler.trace_id, 'create')
+    try:
+        if base.params.task in DoradoDisasterRecoveryConstants.TASK_EXIST_CHECK:
+            handler.check_parallel_process_is_running()
+        handler.run()
+    except Exception as error:
+        handler.logger.error(error)
+        raise Exception(str(error))
+    finally:
+        handler.handle_lock_file(handler.trace_id, 'remove')
diff --git a/script/impl/dorado_disaster_recovery/__init__.py b/script/impl/dorado_disaster_recovery/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
new file mode 100644
index 00000000..0424c911
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -0,0 +1,2504 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : streaming_base.py is a base module for streaming disaster recovery.
+#############################################################################
+import json
+import os
+import re
+import time
+from datetime import datetime
+from datetime import timedelta
+
+from domain_utils.cluster_file.version_info import VersionInfo
+from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryConstants
+from impl.dorado_disaster_recovery.params_handler import check_local_cluster_conf
+from impl.dorado_disaster_recovery.params_handler import check_remote_cluster_conf
+from gspylib.common.DbClusterInfo import dbClusterInfo
+from gspylib.common.Common import DefaultValue, ClusterInstanceConfig
+from gspylib.common.ErrorCode import ErrorCode
+from gspylib.common.Common import ClusterCommand
+from gspylib.common.OMCommand import OMCommand
+from gspylib.common.DbClusterStatus import DbClusterStatus
+from gspylib.threads.SshTool import SshTool
+from gspylib.threads.parallelTool import parallelTool
+from gspylib.os.gsfile import g_file
+from base_utils.os.cmd_util import CmdUtil
+from base_utils.os.env_util import EnvUtil
+from base_utils.os.net_util import NetUtil
+from base_utils.os.file_util import FileUtil
+from base_utils.os.user_util import UserUtil
+from base_utils.security.sensitive_mask import SensitiveMask
+from base_utils.common.constantsbase import ConstantsBase
+
+
+class DoradoDisasterRecoveryBase(object):
+    def __init__(self, params, user, logger, trace_id, log_file=None):
+        self.user = user
+        self.params = params
+        self.logger = logger
+        self.trace_id = trace_id
+        self.log_file = log_file
+        self.cluster_info = None
+        self.gp_home = None
+        self.pg_host = None
+        self.gauss_home = None
+        self.bin_path = None
+        self.local_host = None
+        self.local_ip = None
+        self.is_single_inst = None
+        self.streaming_file_dir = None
+        self.streaming_xml = None
+        self.cluster_node_names = None
+        self.normal_cm_ips = []
+        self.normal_node_list = []
+        self.ssh_tool = None
+        self.mpp_file = None
+        self.status_info = None
+        self.step_file_path = ""
+        self.cluster_status = ''
+        self.normal_dn_ids = []
+        self.normal_cn_ids = []
+        self.normal_etcd_ids = []
+        self.normal_gtm_ids = []
+        self.normal_cm_ids = []
+        self.normal_instances = []
+        self.primary_dn_ids = []
+        self.main_standby_ids = []
+        self.cascade_standby_ids = []
+        self.connected_nodes = []
+        self.__init_globals()
+        self.backup_open_key = DoradoDisasterRecoveryConstants.BACKUP_OPEN % user
+
+    def __init_globals(self):
+        self.cluster_info = dbClusterInfo()
+        self.cluster_info.initFromStaticConfig(self.user)
+        self.gp_home = EnvUtil.getEnvironmentParameterValue("GPHOME", self.user)
+        self.pg_host = EnvUtil.getEnvironmentParameterValue("PGHOST", self.user)
+        self.gauss_home = EnvUtil.getEnvironmentParameterValue("GAUSSHOME", self.user)
+        self.bin_path = os.path.join(os.path.realpath(self.gauss_home), 'bin')
+        self.local_host = NetUtil.GetHostIpOrName()
+        self.local_ip = DefaultValue.getIpByHostName()
+        self.is_single_inst = True if self.cluster_info.isSingleInstCluster() else None
+        self.cluster_node_names = self.cluster_info.getClusterNodeNames()
+        self.streaming_file_dir = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.DDR_FILES_DIR)
+        self.streaming_xml = os.path.join(self.streaming_file_dir,
+                                          DoradoDisasterRecoveryConstants.STREAMING_CONFIG_XML)
+        self.ssh_tool = SshTool(self.cluster_node_names, self.log_file)
+        self.mpp_file = EnvUtil.getMpprcFile()
+        self.dss_home_dir = ""
+        self._init_step_file_path()
+
+    def init_cluster_conf(self):
+        """
+        Init cluster conf from file
+        """
+        if (not hasattr(self.params, "localClusterConf")) \
+                or (not hasattr(self.params, "remoteClusterConf")):
+            self.logger.log("Parse cluster conf from file.")
+            local_conf, remote_conf = self.read_cluster_conf_record()
+            self.logger.debug("Start validte cluster conf info.")
+            check_local_cluster_conf(local_conf)
+            check_remote_cluster_conf(remote_conf)
+            setattr(self.params, "localClusterConf", local_conf)
+            setattr(self.params, "remoteClusterConf", remote_conf)
+            self.logger.log("Successfully parse cluster conf from file.")
+
+    def _init_step_file_path(self):
+        """
+        Init step file path
+        """
+        if self.params.task == DoradoDisasterRecoveryConstants.ACTION_START:
+            if self.params.mode == "primary":
+                step_file_name = DoradoDisasterRecoveryConstants.DDR_STEP_FILES["start_primary"]
+            elif self.params.mode == "disaster_standby":
+                step_file_name = DoradoDisasterRecoveryConstants.DDR_STEP_FILES["start_standby"]
+            else:
+                raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "init step file path")
+        elif self.params.task == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+            if self.params.mode == "primary":
+                step_file_name = DoradoDisasterRecoveryConstants.DDR_STEP_FILES["switchover_primary"]
+            elif self.params.mode == "disaster_standby":
+                step_file_name = DoradoDisasterRecoveryConstants.DDR_STEP_FILES["switchover_standby"]
+            else:
+                raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "init step file path")
+        else:
+            step_file_name = DoradoDisasterRecoveryConstants.DDR_STEP_FILES[self.params.task]
+        self.step_file_path = os.path.join(self.streaming_file_dir, step_file_name)
+        self.logger.debug("Init step file:%s." % self.step_file_path)
+
+    def read_cluster_conf_record(self, check_file_exist=True):
+        """
+        Read cluster conf from file
+        """
+        cluster_conf_record = os.path.join(self.streaming_file_dir,
+                                           DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
+        if not os.path.isfile(cluster_conf_record):
+            if check_file_exist:
+                raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
+                                % "check cluster conf, cluster_conf_record is lost")
+            else:
+                self.logger.log("Not found file cluster_conf_record.")
+                return '', ''
+        content = DefaultValue.obtain_file_content(cluster_conf_record, is_list=False)
+        json_content = json.loads(content)
+        local_conf = json_content["localClusterConf"]
+        remote_conf = json_content["remoteClusterConf"]
+        return local_conf, remote_conf
+
+    def handle_lock_file(self, trace_id, action):
+        """
+        Create lock file for other streaming process.
+        """
+        if self.params.task not in DoradoDisasterRecoveryConstants.TASK_EXIST_CHECK:
+            return
+        file_name = DoradoDisasterRecoveryConstants.PROCESS_LOCK_FILE + trace_id
+        file_path = os.path.join(self.pg_host, file_name)
+        self.logger.debug("Start %s lock file:%s." % (action, file_path))
+        if action == 'create':
+            FileUtil.createFile(file_path, DefaultValue.KEY_FILE_MODE)
+        elif action == 'remove':
+            if os.path.isfile(file_path):
+                FileUtil.removeFile(file_path, DefaultValue.KEY_FILE_MODE)
+            else:
+                self.logger.warn("Not found:%s." % file_path)
+        self.logger.debug("Successfully %s lock file:%s." % (action, file_path))
+
+    def check_parallel_process_is_running(self):
+        """
+        Check streaming process is running
+        """
+        hostnames = ' -H '.join(self.cluster_node_names)
+        file_path = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.PROCESS_LOCK_FILE)
+        cmd = 'source %s && pssh -t 10 -H %s "ls %s*"' % (self.mpp_file, hostnames, file_path)
+        # waiting for check
+        time.sleep(DoradoDisasterRecoveryConstants.CHECK_PROCESS_WAIT_TIME)
+        _, output = CmdUtil.retryGetstatusoutput(cmd, retry_time=0)
+        host_file_str_list = re.findall(r'.* ?: *%s[^\*^\s]+' % file_path, output)
+        process_list = []
+        for item in host_file_str_list:
+            hostname = item.split(':')[0].strip()
+            file_name = item.split(':')[1].strip()
+            uuid = os.path.basename(file_name).split('_')[-1]
+            if uuid != self.trace_id:
+                process_list.append([hostname, file_name])
+        if process_list:
+            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
+                  % 'check dorado disaster recovery process, please execute after other ' \
+                    'process exited, if you ensure no other process is running, ' \
+                    'remove the lock file [%s] on node [%s], and try again' \
+                  % (process_list[0][-1], process_list[0][0])
+            self.logger.error(msg)
+            raise Exception(msg)
+
+    def create_disaster_recovery_dir(self, dir_path):
+        """
+        Create disaster recovery files dir
+        """
+        cmd = g_file.SHELL_CMD_DICT["createDir"] % (
+            dir_path, dir_path, DefaultValue.MAX_DIRECTORY_MODE)
+        self.ssh_tool.executeCommand(cmd)
+        self.logger.debug("Successfully create dir [%s] on all nodes." % dir_path)
+
+    def check_hadr_pwd(self, only_mode=None):
+        """
+        Check hadr pwd is correct or not
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Checking hadr user is not for mode:%s." % self.params.mode)
+            return
+        self.logger.debug("Start checking disaster user password.")
+        sql = "select 1;"
+        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
+                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        if not primary_dns:
+            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
+                            % "obtain primary dn when check disaster user")
+        status, output = ClusterCommand.remoteSQLCommand(
+            sql, self.user, primary_dns[0].hostname, primary_dns[0].port, False,
+            user_name=self.params.hadrUserName, user_pwd=self.params.hadrUserPassword)
+        if status != 0:
+            if "Invalid username/password" in output:
+                self.logger.debug("Logging denied, please check your password.")
+            self.logger.logExit(ErrorCode.GAUSS_516['GAUSS_51632']
+                                % "check disaster user password")
+        self.logger.debug("Successfully check disaster user password.")
+
+    def check_hadr_user(self, only_mode=None):
+        """
+        Check hadr user is exist
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Checking hadr user is not for mode:%s." % self.params.mode)
+            return
+        self.logger.log("Start checking disaster recovery user.")
+        sql = "select usename, userepl from pg_user;"
+        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
+                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        if not primary_dns:
+            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
+                            % "obtain primary dn when check disaster user")
+        status, output = ClusterCommand.remoteSQLCommand(
+            sql, self.user, primary_dns[0].hostname, primary_dns[0].port, True)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
+                            % "execute sql for checking disaster user.")
+        user_dict = {user_info.split('|')[0].strip(): user_info.split('|')[-1].strip()
+                     for user_info in output.strip().split('\n')}
+        for user_name, repl in user_dict.items():
+            if user_name == self.params.hadrUserName and repl == 't':
+                self.logger.log("Successfully check disaster recovery user.")
+                return
+        msg = ErrorCode.GAUSS_516['GAUSS_51632'] % 'checking disaster user, please confirm ' \
+                                                   'disaster user is exist and with ' \
+                                                   'replication role'
+        self.logger.logExit(msg + "Users:%s" % user_dict)
+
+    def __copy_hadr_user_key(self, secure_dir_path, update=False):
+        """
+        Copy hadr.key.cipher and hadr.key.rand
+        """
+        self.logger.log("Start copy hadr user key files.")
+        hadr_cipher_path = os.path.join(self.bin_path, "hadr.key.cipher")
+        hadr_rand_path = os.path.join(self.bin_path, "hadr.key.rand")
+        secure_cipher_path = os.path.join(secure_dir_path, "hadr.key.cipher")
+        secure_rand_path = os.path.join(secure_dir_path, "hadr.key.rand")
+        if not update:
+            if (not os.path.isfile(hadr_cipher_path)) or (not os.path.isfile(hadr_rand_path)):
+                self.logger.debug("Not found hadr user key, no need to copy.")
+                return
+            FileUtil.cpFile(hadr_cipher_path, secure_cipher_path, cmd_type="shell")
+            FileUtil.cpFile(hadr_rand_path, secure_rand_path, cmd_type="shell")
+            self.logger.debug("Successfully copy hadr key files into temp secure dir.")
+        else:
+            if (not os.path.isfile(secure_cipher_path)) or (not os.path.isfile(secure_rand_path)):
+                self.logger.debug("Not found hadr user key, no need to update.")
+                return
+            host_names = self.get_all_connection_node_name("update_hadr_key")
+            self.ssh_tool.scpFiles(secure_cipher_path, self.bin_path, hostList=host_names)
+            self.ssh_tool.scpFiles(secure_rand_path, self.bin_path, hostList=host_names)
+            FileUtil.removeFile(secure_cipher_path)
+            FileUtil.removeFile(secure_rand_path)
+            self.logger.debug("Finished copy hadr key files to nodes:%s." % host_names)
+
+    def remove_secure_dir(self, dir_path, host_name):
+        """
+        Remove gs_secure_files dir in PGDATA
+        """
+        secure_dir_path = os.path.join(dir_path, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
+        cmd = "echo \"if [ -d '%s' ];then rm -rf '%s';fi\" | pssh -s -H %s" % \
+              (secure_dir_path, secure_dir_path, host_name)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        self.logger.debug("Remove gs_secure_files cmd:%s" % cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s " % output)
+
+    def __stream_copy_file_to_all_dn(self, temp_secure_dir_path):
+        """
+        copy key file dir to all dn dir
+        """
+        dn_infos = DefaultValue.get_dn_info(self.cluster_info)
+        self.logger.debug("Got dns:%s" % dn_infos)
+        copy_succeed = 0
+        host_names = self.get_all_connection_node_name("copy gs_secure_files to dns")
+        for dn_info in dn_infos:
+            if dn_info["host_name"] not in host_names:
+                continue
+            self.logger.debug("Copy disaster recovery secure files to inst[%s][%s][%s]." %
+                              (dn_info['id'], dn_info['data_dir'], dn_info['host_name']))
+            try:
+                self.remove_secure_dir(dn_info['data_dir'], dn_info['host_name'])
+                self.ssh_tool.scpFiles(
+                    temp_secure_dir_path, dn_info['data_dir'], [dn_info['host_name']])
+                copy_succeed += 1
+            except Exception as error:
+                self.logger.debug("Failed copy secure files to inst[%s][%s][%s],error:%s." %
+                                  (dn_info['id'], dn_info['data_dir'], dn_info['host_name'],
+                                   str(error)))
+        if copy_succeed == 0:
+            raise Exception(
+                ErrorCode.GAUSS_516["GAUSS_51632"] % "copy secure dir to all dn data dir")
+        self.logger.log("Successfully copy secure files.")
+
+    def __prepare_cluster_user_record(self, temp_secure_dir_path):
+        """
+        Save cluster user record
+        """
+        cluster_user_record = os.path.join(temp_secure_dir_path,
+                                           DoradoDisasterRecoveryConstants.CLUSTER_USER_RECORD)
+        DefaultValue.write_content_on_file(cluster_user_record, self.user)
+        self.logger.debug("Record current cluster user:%s." % self.user)
+
+    def prepare_gs_secure_files(self, only_mode=None):
+        """
+        Prepare gs_secure_files on primary cluster
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Prepare gs_secure_files is not for mode:%s." % self.params.mode)
+            return
+        self.logger.log("Start prepare secure files.")
+        secure_dir_name = DoradoDisasterRecoveryConstants.GS_SECURE_FILES
+        temp_secure_dir_path = os.path.realpath(
+            os.path.join(self.streaming_file_dir, secure_dir_name))
+        if os.path.isdir(temp_secure_dir_path):
+            self.logger.debug("Secure file dir exist, cleaning...")
+            FileUtil.removeDirectory(temp_secure_dir_path)
+        FileUtil.createDirectory(temp_secure_dir_path, True, DefaultValue.KEY_DIRECTORY_MODE)
+        if os.path.isdir(temp_secure_dir_path):
+            self.logger.debug("Successfully create secure file dir.")
+        version_file_path = os.path.realpath(os.path.join(self.gp_home, "version.cfg"))
+        FileUtil.cpFile(version_file_path, temp_secure_dir_path)
+        self.__prepare_cluster_user_record(temp_secure_dir_path)
+        self.__copy_hadr_user_key(temp_secure_dir_path, update=False)
+        self.__stream_copy_file_to_all_dn(temp_secure_dir_path)
+        FileUtil.removeDirectory(temp_secure_dir_path)
+
+    def stream_clean_gs_secure(self, params):
+        """
+        clean gs secure dir
+        """
+        inst, file_path = params
+        self.logger.debug("Starting clean instance %s gs secure dir." % inst.instanceId)
+        cmd = "source %s && pssh -s -H %s 'if [ -d %s ]; then rm -rf %s; fi'" \
+              % (self.mpp_file, inst.hostname, file_path, file_path)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            self.logger.debug("Clean gs secure dir for instance [%s] result:%s." %
+                              (inst.instanceId, output))
+        self.logger.debug("Successfully clean instance %s gs secure dir." % inst.instanceId)
+
+    def clean_gs_secure_dir(self, only_mode=None):
+        """
+        Clean gs secure dir if exist
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Clean gs_secure_files is not for mode:%s." % self.params.mode)
+            return
+        self.logger.debug("Start clean gs secure dir.")
+        params = []
+        for node in self.cluster_info.dbNodes:
+            for inst in node.datanodes:
+                if inst.hostname not in self.connected_nodes:
+                    continue
+                file_path = os.path.realpath(os.path.join(
+                    inst.datadir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES))
+                params.append((inst, file_path))
+        if params:
+            parallelTool.parallelExecute(self.stream_clean_gs_secure, params)
+        self.logger.debug("Finished clean gs secure dir.")
+
+    def remove_streaming_dir(self, dir_path):
+        """
+        Remove streaming files dir
+        """
+        cmd = "if [ -d %s ]; then rm %s -rf;fi" % (dir_path, self.streaming_file_dir)
+        self.ssh_tool.executeCommand(cmd)
+        self.logger.debug("Successfully remove dir [%s] on all nodes." % dir_path)
+
+    def query_streaming_step(self):
+        """
+        Streaming step
+        """
+        step = -1
+        if os.path.isfile(self.step_file_path):
+            step_list = FileUtil.readFile(self.step_file_path)
+            if step_list:
+                step = int(step_list[0].split("_")[0])
+        if step == -1:
+            self.logger.log("Got the step for action:[%s]." % self.params.task)
+        else:
+            self.logger.log("Got the continue step:[%s] for action:[%s]." %
+                            (step, self.params.task))
+        return step
+
+    def write_streaming_step(self, step):
+        """
+        write streaming step
+        :return: NA
+        """
+        self.logger.debug("Streaming action:[%s] record current step:[%s]"
+                          % (self.params.task, step))
+        with os.fdopen(os.open(self.step_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+                               DefaultValue.KEY_FILE_MODE_IN_OS), "w") as fp_write:
+            fp_write.write(step)
+
+    def init_cluster_status(self):
+        """
+        Generate cluster status file
+        """
+        tmp_file = os.path.join(self.streaming_file_dir,
+                                DoradoDisasterRecoveryConstants.DDR_CLUSTER_STATUS_TMP_FILE)
+        cmd = ClusterCommand.getQueryStatusCmd("", tmp_file)
+        self.logger.debug("Command for checking cluster state: %s" % cmd)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            msg = ErrorCode.GAUSS_516["GAUSS_51632"] \
+                  % "check cluster state, status:%s, output:%s" % (status, output)
+            self.logger.debug(msg)
+            raise Exception(msg)
+        self.logger.debug("Successfully init cluster status.")
+
+    def query_cluster_info(self, cm_check=False):
+        """
+        Query cluster info
+        """
+        cmd = ClusterCommand.getQueryStatusCmd()
+        if cm_check:
+            cmd = "source %s; cm_ctl query -Cv" % self.mpp_file
+        self.logger.debug("Command for checking cluster state: %s" % cmd)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0 or not output.strip():
+            msg = ErrorCode.GAUSS_516["GAUSS_51632"] \
+                  % "check cluster state, status:%s, output:%s" % (status, output)
+            self.logger.debug(msg)
+            return ""
+        return output.strip()
+
+    def __clean_cluster_status(self):
+        """
+        Clean status
+        """
+        self.normal_cm_ids = []
+        self.normal_gtm_ids = []
+        self.normal_cn_ids = []
+        self.primary_dn_ids = []
+        self.main_standby_ids = []
+        self.cascade_standby_ids = []
+        self.normal_dn_ids = []
+        self.normal_etcd_ids = []
+        self.normal_instances = []
+
+    def __parse_instance_status(self):
+        """
+        Parse instance status
+        """
+        abnormal_insts = []
+        for db_node in self.status_info.dbNodes:
+            for cms_inst in db_node.cmservers:
+                if cms_inst.status in ["Primary", "Standby"]:
+                    self.normal_cm_ids.append(cms_inst.instanceId)
+                    self.normal_instances.append(cms_inst)
+                else:
+                    abnormal_insts.append({cms_inst.instanceId: cms_inst.status})
+            for gtm_inst in db_node.gtms:
+                if gtm_inst.status in ["Primary", "Standby"] and gtm_inst.isInstanceHealthy():
+                    self.normal_gtm_ids.append(gtm_inst.instanceId)
+                    self.normal_instances.append(gtm_inst)
+                else:
+                    abnormal_insts.append({gtm_inst.instanceId: gtm_inst.status})
+            for coo_inst in db_node.coordinators:
+                if coo_inst.status == "Normal":
+                    self.normal_cn_ids.append(coo_inst.instanceId)
+                    self.normal_instances.append(coo_inst)
+                else:
+                    abnormal_insts.append({coo_inst.instanceId: coo_inst.status})
+            for data_inst in db_node.datanodes:
+                if data_inst.status in ["Primary"]:
+                    self.primary_dn_ids.append(data_inst.instanceId)
+                if data_inst.status in ["Main Standby"]:
+                    self.main_standby_ids.append(data_inst.instanceId)
+                if data_inst.status in ["Cascade Standby"]:
+                    self.cascade_standby_ids.append(data_inst.instanceId)
+                if data_inst.status in ["Primary", "Standby", "Cascade Standby", "Main Standby"
+                                        ] and data_inst.isInstanceHealthy():
+                    self.normal_dn_ids.append(data_inst.instanceId)
+                    self.normal_instances.append(data_inst)
+                else:
+                    abnormal_insts.append({data_inst.instanceId: data_inst.status})
+            for etcd_inst in db_node.etcds:
+                if etcd_inst.status in ["StateLeader", "StateFollower"] \
+                        and etcd_inst.isInstanceHealthy():
+                    self.normal_etcd_ids.append(etcd_inst.instanceId)
+                    self.normal_instances.append(etcd_inst)
+                else:
+                    abnormal_insts.append({etcd_inst.instanceId: etcd_inst.status})
+        return abnormal_insts
+
+    def parse_cluster_status(self, current_status=None):
+        """
+        Parse cluster status
+        """
+        tmp_file = os.path.join(self.streaming_file_dir,
+                                DoradoDisasterRecoveryConstants.DDR_CLUSTER_STATUS_TMP_FILE)
+        if (not os.path.isfile(tmp_file)) and (not current_status):
+            raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"]
+                            % "cluster status file:%s" % tmp_file)
+        self.status_info = DbClusterStatus()
+        self.__clean_cluster_status()
+        if current_status:
+            self.status_info.init_from_content(current_status)
+        else:
+            self.status_info.initFromFile(tmp_file)
+        self.cluster_status = self.status_info.clusterStatus
+        self.logger.debug("Current cluster status is:%s." % self.cluster_status)
+        # Parse instance status
+        abnormal_insts = self.__parse_instance_status()
+        # Get node names of normal nodes with nodeId
+        for instance in self.normal_instances:
+            self.normal_node_list.append(self.cluster_info.getDbNodeByID(int(instance.nodeId)).name)
+        self.normal_node_list = list(set(self.normal_node_list))
+        for node_id in list(set(self.normal_cm_ids)):
+            self.normal_cm_ips.append(self.cluster_info.getDbNodeByID(int(node_id)).name)
+        self.logger.debug("Parsed primary dns:%s" % self.primary_dn_ids)
+        self.logger.debug("Parsed Main standby dns:%s" % self.main_standby_ids)
+        if abnormal_insts:
+            self.logger.debug("Abnormal instances:%s" % abnormal_insts)
+        else:
+            self.logger.debug("Checked all instances is normal:%s"
+                              % set([inst.instanceId for inst in self.normal_instances]))
+
+    def check_cluster_status(self, status_allowed, only_check=False,
+                             check_current=False, is_log=True):
+        """
+        Stream disaster cluster switch to check cluster status
+        """
+        cluster_status = self.cluster_status
+        if check_current:
+            self.logger.debug("Starting check CLuster status")
+            check_cmd = "source %s && cm_ctl query | grep cluster_state | awk '{print $NF}'"\
+                        % self.mpp_file
+            status, output = CmdUtil.retryGetstatusoutput(check_cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_516["GAUSS_51600"] +
+                                "status(%d), output(%s)" % (status, output))
+            cluster_status = output.strip()
+            self.logger.debug("Checked cluster status is:%s" % cluster_status)
+        if cluster_status not in status_allowed:
+            if only_check is True:
+                self.logger.debug("Current cluster status is %s" % cluster_status)
+                return False
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "check cluster status")
+        if is_log:
+            self.logger.log("Successfully check cluster status is: %s." % cluster_status)
+        else:
+            self.logger.debug("Checked cluster status is: %s." % cluster_status)
+        return True
+
+    def check_is_under_upgrade(self):
+        """
+        Check is cluster is not doing upgrade
+        """
+        if DefaultValue.isUnderUpgrade(self.user):
+            self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51632"]
+                                % "check upgrade binary file, please ensure upgrade "
+                                  "is finished and upgrade files has been cleaned")
+        self.logger.debug("Successfully check cluster is not under upgrade opts.")
+
+    def check_cluster_is_common(self):
+        """
+        Check no main standby and cascade standby
+        """
+        if self.main_standby_ids or self.cascade_standby_ids:
+            self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51632"]
+                                % "check instance status, there are already main standby "
+                                  "or cascade standby, main standby:%s, cascade stadnby:%s"
+                                % (self.main_standby_ids, self.cascade_standby_ids))
+        self.logger.log("Successfully check instance status.")
+
+    def check_dn_instance_params(self):
+        """set_dn_instance_params"""
+        check_dick = {"enable_dcf": "off", "synchronous_commit": "on"}
+        dn_insts = [dn_inst for db_node in self.cluster_info.dbNodes
+                    for dn_inst in db_node.datanodes]
+        if len(dn_insts) <= 2:
+            self.logger.debug("Need set most available for current cluster.")
+            check_dick.update({"most_available_sync": "on"})
+        primary_dn_insts = [inst for inst in dn_insts if inst.instanceId in self.primary_dn_ids]
+        if not primary_dn_insts:
+            self.logger.debug("The primary dn not exist, do not need check dn inst params.")
+            return
+        execute_dn = primary_dn_insts[0]
+        param_list = []
+        guc_backup_file = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
+        if not os.path.isfile(guc_backup_file):
+            FileUtil.createFileInSafeMode(guc_backup_file, DefaultValue.KEY_FILE_MODE_IN_OS)
+        for peer_check, idx in list(check_dick.items()):
+            param_list.append((execute_dn, {peer_check: idx}))
+        ret = parallelTool.parallelExecute(self._check_dn_inst_param, param_list)
+        self.ssh_tool.scpFiles(guc_backup_file, self.streaming_file_dir, self.cluster_node_names)
+        if any(ret):
+            self.logger.logExit('\n'.join(filter(bool, ret)))
+        self.logger.debug("Successfully check dn inst default value.")
+
+    def _check_dn_inst_param(self, param):
+        """check_dn_inst_param"""
+        self.logger.debug("Check dn inst params: %s." % param[1])
+        if len(param) != 2:
+            error_msg = ErrorCode.GAUSS_521["GAUSS_52102"] % param
+            return error_msg
+        guc_backup_file = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
+        for sql_key, value in list(param[1].items()):
+            sql = "show %s;" % sql_key
+            (status, output) = ClusterCommand.remoteSQLCommand(sql,
+                                                               self.user, param[0].hostname,
+                                                               str(param[0].port))
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql, "\nError:%s" % output)
+            if output.strip() != value:
+                if sql_key in DoradoDisasterRecoveryConstants.GUC_CHANGE_MAP.keys():
+                    content = "%s,%s,%s" % (sql_key, output.strip(), self.trace_id)
+                    FileUtil.write_add_file(guc_backup_file, content,
+                                            DefaultValue.KEY_FILE_MODE_IN_OS)
+                    self.__set_guc_param(sql_key, DoradoDisasterRecoveryConstants.GUC_CHANGE_MAP[sql_key],
+                                         mode="reload", inst_type="dn", raise_error=True)
+                    return
+                error_msg = ErrorCode.GAUSS_516["GAUSS_51632"] \
+                            % "check [%s], Actual value: [%s], expect value: [%s]" \
+                            % (sql, output, value)
+                return error_msg
+        self.logger.debug("Successfully check and rectify dn inst value:%s." % param[1])
+
+    def restore_guc_params(self):
+        """
+        Restore guc params in .streaming_guc_backup
+        """
+        self.logger.debug("Start restore guc params.")
+        guc_backup_file = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
+        if not os.path.isfile(guc_backup_file):
+            self.logger.debug("Not found guc backup file, no need restore guc params.")
+        params_record = DefaultValue.obtain_file_content(guc_backup_file)
+        params_record.reverse()
+        restored_keys = []
+        for param in params_record:
+            guc_key, guc_value, trace_id = param.split(",")
+            self.logger.debug("Got guc param:%s, value:%s, trace id:%s in guc backup file."
+                              % (guc_key, guc_value, trace_id))
+            if guc_key not in DoradoDisasterRecoveryConstants.GUC_CHANGE_MAP.keys():
+                continue
+            # When the number of dns <=2, ensure that the maximum available mode is always on.
+            dn_insts = [dn_inst for db_node in self.cluster_info.dbNodes
+                        for dn_inst in db_node.datanodes]
+            if guc_key in restored_keys or len(dn_insts) <= 2 \
+                    and guc_key in ["most_available_sync"]:
+                continue
+            guc_value = "off" if guc_value not in ["on", "off"] else guc_value
+            self.__set_guc_param(guc_key, guc_value, mode="reload",
+                                 inst_type="dn", raise_error=False)
+            restored_keys.append(guc_key)
+
+    def set_most_available(self, mode='set', inst_type='dn', raise_error=True):
+        dn_insts = [dn_inst for db_node in self.cluster_info.dbNodes
+                    for dn_inst in db_node.datanodes if int(dn_inst.mirrorId) == 1]
+        if len(dn_insts) > 2:
+            self.logger.debug("No need set most available for current cluster.")
+            return
+        self.__set_guc_param("most_available_sync", "on", mode=mode,
+                             inst_type=inst_type, raise_error=raise_error)
+
+        self.__set_guc_param("synchronous_commit", "on", mode=mode,
+                             inst_type=inst_type, raise_error=raise_error)
+
+    def __set_guc_param(self, key, value, mode='set', inst_type='dn', raise_error=True):
+        """
+        Set guc param
+        """
+        if inst_type == 'dn':
+            instance = '-Z datanode'
+        elif inst_type == 'cn':
+            instance = '-Z coordinator'
+        else:
+            instance = "-Z datanode -Z coordinator"
+        cmd = "source %s && gs_guc %s %s -N all -I all " \
+              "-c \"%s=%s\"" \
+              % (self.mpp_file, mode, instance, key, value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            if raise_error:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error:%s" % output)
+            else:
+                self.logger.debug(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error:%s" % output)
+        else:
+            self.logger.debug("Successfully change %s %s with mode %s." % (key, value, mode))
+
+    def distribute_cluster_conf(self):
+        """
+        Record cluster conf in files
+        """
+        data = {"remoteClusterConf": self.params.remoteClusterConf,
+                "localClusterConf": self.params.localClusterConf}
+        file_path = os.path.join(self.streaming_file_dir,
+                                 DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
+        FileUtil.write_update_file(file_path, data, DefaultValue.KEY_FILE_MODE_IN_OS)
+        self.ssh_tool.scpFiles(file_path, self.streaming_file_dir, self.cluster_node_names)
+
+    def __record_wal_keep_segments(self, param_list):
+        """
+        record wal_keep_segments value to .wal_keep_segments_record
+        """
+        dn_inst, sql_check, wal_keep_segments = param_list
+        self.logger.debug("Starting record wal_keep_segments default "
+                          "value for isntance:%s." % dn_inst.instanceId)
+        (status, output) = ClusterCommand.remoteSQLCommand(
+            sql_check, self.user, dn_inst.hostname, dn_inst.port, True)
+        self.logger.debug("Got %s wal_keep_segments, status=%d, output: %s." %
+                          (dn_inst.instanceId, status, SensitiveMask.mask_pwd(output)))
+        if status == 0 and output.strip():
+            value = output.strip()
+            FileUtil.createFile(wal_keep_segments, True, DefaultValue.KEY_FILE_MODE)
+            FileUtil.writeFile(wal_keep_segments, [str(dn_inst.instanceId) + ":" + str(value)])
+            self.logger.debug("Successfully record %s wal_keep_segments default value:%s" %
+                              (dn_inst.hostname, value))
+        else:
+            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"]
+                            % "wal_keep_segments default value of %s" % dn_inst.instanceId)
+
+    def get_default_wal_keep_segments(self, only_mode=None):
+        """
+        get wal_keep_segments default value in primary dn
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Wal keep segment opts not for mode:%s." % self.params.mode)
+            return
+        self.logger.debug("Starting get wal_keep_segments default value.")
+        wal_keep_segments = os.path.join(
+            self.streaming_file_dir, DoradoDisasterRecoveryConstants.WAL_KEEP_SEGMENTS)
+        sql_check = "show wal_keep_segments;"
+        param_list = [(dn_inst, sql_check, wal_keep_segments) for db_node in
+                      self.cluster_info.dbNodes for dn_inst in db_node.datanodes
+                      if dn_inst.instanceId in self.primary_dn_ids]
+        if not param_list:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "obtain param list for get wal_keep_segments")
+        parallelTool.parallelExecute(self.__record_wal_keep_segments, param_list)
+        self.logger.debug("Successfully get wal_keep_segments default value.")
+
+    def __set_wal_keep_segments_each_inst(self, params_list):
+        """
+        Set wal_keep_segments value in primary dn
+        """
+        (inst, opt_type, value, mpprc_file) = params_list
+        self.logger.debug("Start [%s] shardNum [%s] node [%s] wal_keep_segments value [%s]."
+                          % (opt_type, inst.mirrorId, inst.hostname, value))
+        cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
+              "-Z datanode -D %s -c \\\"wal_keep_segments = '%s'\\\"\"" % \
+              (mpprc_file, inst.hostname, mpprc_file, opt_type, inst.datadir, value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                            "Options:%s, Error: \n%s "
+                            % ("set wal_keep_segments for inst:%s" % inst.instanceId, str(output)))
+        self.logger.debug("Successfully [%s] shardNum [%s] node [%s] wal_keep_segments "
+                          "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
+
+    def set_wal_keep_segments(self, opt_type, value, restore_flag=False, only_mode=None):
+        """
+        guc set wal_keep_segments value in primary dn
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Set wal_keep_segments opts not for mode:%s." % self.params.mode)
+            return
+        self.logger.log("Starting %s wal_keep_segments value: %s." % (opt_type, value))
+        if restore_flag and isinstance(value, dict):
+            params_list = [(inst, opt_type, value.get(inst.instanceId, 128), self.mpp_file) for
+                           node in self.cluster_info.dbNodes for inst in node.datanodes
+                           if inst.instanceId in self.primary_dn_ids]
+        else:
+            params_list = [(inst, opt_type, value, self.mpp_file) for node in
+                           self.cluster_info.dbNodes for inst in node.datanodes
+                           if inst.instanceId in self.primary_dn_ids]
+        if not params_list:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "obtain param list for set wal_keep_segments")
+        parallelTool.parallelExecute(self.__set_wal_keep_segments_each_inst, params_list)
+        self.logger.log("Successfully %s wal_keep_segments value: %s." % (opt_type, value))
+
+    def __stop_one_node(self, node_id):
+        """
+        Stop one node by node id
+        """
+        self.logger.debug("Start stop node:%s" % node_id)
+        cmd = ClusterCommand.getStopCmd(int(node_id), "i", 1800)
+        self.logger.debug("Streaming disaster calling cm_ctl to stop cluster, cmd=[%s]" % cmd)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            self.logger.debug("Failed stop node:%s, error:%s" % (node_id, output))
+        else:
+            self.logger.debug("Successfully stop node:%s" % node_id)
+
+    def stop_cluster_by_node(self, only_mode=None):
+        """
+        stop the cluster by node
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Stop cluster by node not for mode:%s." % self.params.mode)
+            return
+        self.logger.log("Stopping the cluster by node.")
+        static_config = "%s/cluster_static_config" % self.bin_path
+        cm_ctl_file = "%s/cm_ctl" % self.bin_path
+        if not os.path.isfile(static_config) or not os.path.isfile(cm_ctl_file):
+            raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"]
+                            % (static_config + " or " + cm_ctl_file))
+        node_id_list = list(set([instance.nodeId for instance in self.normal_instances]))
+        parallelTool.parallelExecute(self.__stop_one_node, node_id_list)
+        self.logger.log("Successfully stopped the cluster by node for streaming cluster.")
+
+    def get_all_connection_node_name(self, action_flag="", no_update=True):
+        """
+        get all connection node name
+        """
+        if self.connected_nodes and no_update:
+            self.logger.debug("Got connected nodes:%s for action:%s"
+                              % (self.connected_nodes, action_flag))
+            return self.connected_nodes
+        rets = parallelTool.parallelExecute(DefaultValue.fast_ping, self.cluster_node_names)
+        self.logger.debug("Check connect for action:%s, result:%s" % (action_flag, str(rets)))
+        connected_hosts = [ret[0] for ret in rets if ret[-1]]
+        self.connected_nodes = connected_hosts
+        return self.connected_nodes
+
+    def update_streaming_pg_hba(self):
+        """
+        update pg_hba.conf, read config_param.json file and set other cluster ip
+        :return:NA
+        """
+        self.logger.log("Start update pg_hba config.")
+        FileUtil.cpFile(self.params.xml_path, self.streaming_xml)
+        cmd = "source %s; %s -U %s -X '%s' --try-reload" % (
+            self.mpp_file, OMCommand.getLocalScript(
+                "Local_Config_Hba"), self.user, self.streaming_xml)
+        self.logger.debug("Command for changing instance pg_hba.conf file: %s" % cmd)
+        self.get_all_connection_node_name("update_streaming_pg_hba")
+        try:
+            self.ssh_tool.scpFiles(self.streaming_xml, self.streaming_file_dir)
+            self.ssh_tool.executeCommand(cmd, hostList=self.connected_nodes)
+        except Exception as error:
+            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
+                  % "update streaming pg_hba with error:%s" % error
+            self.logger.debug(msg)
+            raise Exception(msg)
+        self.logger.log("Successfully update pg_hba config.")
+
+    def __get_repl_info_cmd(self, node_name, ret, dn_inst, opt_mode, idx):
+        """
+        get_repl_info_cmd
+        """
+        if node_name != self.local_host:
+            set_cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
+                      "-Z datanode -D %s -c " \
+                      "\\\"replconninfo%s = 'localhost=%s localport=%s " \
+                      "localheartbeatport=%s localservice=%s remotehost=%s " \
+                      "remoteport=%s remoteheartbeatport=%s " \
+                      "remoteservice=%s iscascade=%s iscrossregion=%s'\\\"\""
+            set_cmd = set_cmd % (self.mpp_file, node_name,
+                                 self.mpp_file, opt_mode,
+                                 dn_inst.datadir, idx, ret.group(1),
+                                 ret.group(2), ret.group(3), ret.group(4),
+                                 ret.group(5), ret.group(6), ret.group(7),
+                                 ret.group(8), "true", "false")
+        else:
+            set_cmd = "source %s ; gs_guc %s -Z datanode -D %s -c " \
+                      "\"replconninfo%s = 'localhost=%s localport=%s " \
+                      "localheartbeatport=%s localservice=%s remotehost=%s " \
+                      "remoteport=%s remoteheartbeatport=%s " \
+                      "remoteservice=%s iscascade=%s iscrossregion=%s'\""
+            set_cmd = set_cmd % (self.mpp_file, opt_mode,
+                                 dn_inst.datadir, idx, ret.group(1),
+                                 ret.group(2), ret.group(3), ret.group(4),
+                                 ret.group(5), ret.group(6), ret.group(7),
+                                 ret.group(8), "true", "false")
+        return set_cmd
+
+    def __set_original_repl_info(self, dn_inst, node_name, opt_mode="set"):
+        """
+        Rectify original replconninfos
+        """
+        orignal_ports = None
+        if not all([dn_inst, node_name]):
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain dn infos")
+        for idx in range(1, DoradoDisasterRecoveryConstants.MAX_REPLICATION_NUMS + 1):
+            if node_name == self.local_host:
+                cmd = "source %s; gs_guc check -Z datanode -D %s " \
+                      "-c 'cross_cluster_replconninfo%s'" % (self.mpp_file, dn_inst.datadir, idx)
+            else:
+                cmd = "source %s; pssh -H %s 'source %s; gs_guc check " \
+                      "-Z datanode -D %s -c \"cross_cluster_replconninfo%s\"'" \
+                      % (self.mpp_file, node_name, self.mpp_file, dn_inst.datadir, idx)
+            self.logger.debug("Check original repl infos with cmd:%s" % cmd)
+            status, output = CmdUtil.retryGetstatusoutput(cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                                " Error: \n%s " % output)
+            if output.count("=NULL") > 2 or "iscrossregion=true" in output.lower():
+                self.logger.debug("InstanceID:%s, Index:%s" % (dn_inst.instanceId, idx))
+                return idx, orignal_ports
+            ret = re.search(
+                r"cross_cluster_replconninfo%s='localhost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})"
+                r" localport=(\d{4,5}) "
+                r"remotehost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}) "
+                r"remoteport=(\d{4,5}) " % idx, output)
+            if not ret:
+                raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "search repl infos")
+            set_cmd = self.__get_repl_info_cmd(node_name, ret, dn_inst, opt_mode, idx)
+            self.logger.debug("Set original repl infos with cmd:%s" % set_cmd)
+            status, output = CmdUtil.retryGetstatusoutput(set_cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_cmd +
+                                " Error: \n%s " % output)
+            orignal_ports = (ret.group(2), ret.group(3), ret.group(4))
+            self.logger.debug("Successfully rectify original repl infos for instance:%s."
+                              % dn_inst.instanceId)
+
+    def __get_local_data_ip(self, inst_host):
+        """
+        Get local data ip
+        """
+        local_cluster_info = self.params.localClusterConf
+        shards = local_cluster_info["shards"]
+        inst_ips = DefaultValue.get_remote_ips(inst_host, self.mpp_file)
+        for shard in shards:
+            for node in shard:
+                ip = node["ip"]
+                data_ip = node["dataIp"]
+                if ip in inst_ips:
+                    self.logger.debug("Got ip[%s], dataIp[%s]." % (ip, data_ip))
+                    return data_ip
+        raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
+                        % "obtain shards from local cluster info")
+
+    def __config_one_dn_instance(self, params):
+        """
+        Config replconninfo for one dn instance
+        """
+        inst, opt_mode, remote_cluster_info = params
+        local_data_ip = self.__get_local_data_ip(inst.hostname)
+        base_dn_port = self.params.remoteClusterConf['port']
+        self.logger.debug("Start config instance:[%s], got dataIp:[%s], port:[%s]."
+                          % (inst.instanceId, local_data_ip, base_dn_port))
+        if not all([local_data_ip, base_dn_port]):
+            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"]
+                            % "dn port or dataIp for config instance")
+        inst_index, original_ports = self.__set_original_repl_info(
+            inst, inst.hostname, opt_mode=opt_mode)
+        repl_params = []
+        shards = remote_cluster_info.get("shards")
+        for shard in shards:
+            for node_info in shard:
+                data_ip = node_info.get("dataIp")
+                shard_num = node_info.get("shardNum", '1')
+                if str(inst.mirrorId) == str(shard_num):
+                    repl_params.append((
+                        shard_num, inst.hostname, local_data_ip,
+                        inst.datadir, data_ip, inst_index,
+                        original_ports, base_dn_port, opt_mode))
+                    inst_index += 1
+        return repl_params
+
+    def __do_config_dn_repl_info(self, params):
+        """
+        function:config postgres conf
+        :return:NA
+        """
+        shard_num, host, local_data_ip, data_dir, data_ip, index, \
+        original_ports, base_port, opt_mode = params
+        local_port, local_heartbeat, local_service = original_ports
+        remote_base = int(base_port)
+        self.logger.debug("shard num %s base port is %s" % (shard_num, remote_base))
+        remote_port = remote_base + 1
+        remote_heartbeat = remote_base + 5
+        remote_service = remote_base + 4
+        is_cascade = "false"
+        if self.local_host == host:
+            guc_cmd = "source %s ; gs_guc %s -Z datanode -D %s " \
+                      "-c \"replconninfo%s = 'localhost=%s localport=%s " \
+                      "localheartbeatport=%s localservice=%s remotehost=%s " \
+                      "remoteport=%s remoteheartbeatport=%s remoteservice=%s " \
+                      "iscascade=%s iscrossregion=true'\"" \
+                      % (self.mpp_file, opt_mode, data_dir, index, local_data_ip, local_port,
+                         local_heartbeat, local_service, data_ip, remote_port,
+                         remote_heartbeat, remote_service, is_cascade)
+            self.logger.debug("Set datanode postgres file for streaming "
+                              "disaster cluster with cmd:%s" % guc_cmd)
+        else:
+            guc_cmd = "source %s; pssh -s -H %s \"source %s ; gs_guc %s -Z datanode -D %s " \
+                      "-c \\\"replconninfo%s = 'localhost=%s localport=%s " \
+                      "localheartbeatport=%s localservice=%s remotehost=%s " \
+                      "remoteport=%s remoteheartbeatport=%s remoteservice=%s " \
+                      "iscascade=%s iscrossregion=true'\\\"\"" \
+                      % (self.mpp_file, host,
+                         self.mpp_file, opt_mode, data_dir, index,
+                         local_data_ip, local_port, local_heartbeat,
+                         local_service, data_ip, remote_port,
+                         remote_heartbeat, remote_service, is_cascade)
+            self.logger.debug("Set datanode postgres file for streaming "
+                              "disaster cluster with cmd:%s" % guc_cmd)
+        status, output = CmdUtil.retryGetstatusoutput(guc_cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % guc_cmd +
+                            " Error: \n%s " % output)
+
+    def config_cross_cluster_repl_info(self):
+        """
+        update postgresql.conf for replconninfo
+        """
+        self.logger.debug("set all datanode guc param in postgres conf for ddr cluster.")
+        repl_params = []
+        opt_mode = "reload" if self.params.mode == "primary" else "set"
+        config_repl_params = []
+        datanode_instance = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes]
+
+        for inst in datanode_instance:
+            config_repl_params.append((inst, opt_mode, self.params.remoteClusterConf))
+        rets = parallelTool.parallelExecute(self.__config_one_dn_instance, config_repl_params)
+        for param in rets:
+            repl_params += param
+        self.logger.debug("Got repl params:%s" % str(repl_params))
+        parallelTool.parallelExecute(self.__do_config_dn_repl_info, repl_params)
+        self.logger.debug(
+            "Successfully set all datanode guc param in postgres conf for streaming cluster.")
+
+    def set_cmserver_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
+        """
+        set cmserver guc param
+        :return: NA
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Set cms guc [%s] to [%s] not for mode:%s."
+                              % (guc_parameter, guc_value, self.params.mode))
+            return
+        cmd = "gs_guc %s -Z cmserver -N all -I all -c \"%s=%s\" " % \
+              (guc_type, guc_parameter, guc_value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
+                  % "set cm server guc [%s] to [%s], output:%s" \
+                  % (guc_parameter, guc_value, output)
+            self.logger.debug(msg)
+
+    def set_cmagent_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
+        """
+        set cmagent guc param
+        :return: NA
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Set cma guc [%s] to [%s] not for mode:%s."
+                              % (guc_parameter, guc_value, self.params.mode))
+            return
+        cmd = "gs_guc %s -Z cmagent -N all -I all -c \"%s=%s\" " % \
+              (guc_type, guc_parameter, guc_value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
+                  % "set cm agent guc [%s] to [%s], output:%s" \
+                  % (guc_parameter, guc_value, output)
+            self.logger.debug(msg)
+
+    def __check_datanode_data_ip_connection(self, inst):
+        """
+        Check remote data ip can connect or not
+        """
+        any_connected = False
+        node_infos = [node_info for shard in self.params.remoteClusterConf.get("shards", [])
+                      for node_info in shard]
+        local_data_ip = self.__get_local_data_ip(inst.hostname)
+        for node_info in node_infos:
+            data_ip = node_info.get("dataIp")
+            shard_num = node_info.get("shardNum", '1')
+            if str(shard_num) != str(inst.mirrorId):
+                continue
+            _, ret = DefaultValue.fast_ping_on_node(inst.hostname, local_data_ip,
+                                                    data_ip, self.logger)
+            if ret:
+                any_connected = True
+                break
+        if not any_connected:
+            self.logger.error("Failed check data ip connection for inst:%s." % inst.instanceId)
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "check data ip connection")
+        self.logger.debug("Successfully check main standby data ip connection.")
+
+    def __pghba_backup_handler(self, node_name, dir_name, inst_id, mode="backup"):
+        """
+        Backup or restore pg_hba file.
+        """
+        file_path = os.path.join(dir_name, "pg_hba.conf")
+        old_file_path = os.path.join(dir_name, "pg_hba.conf.old")
+        dest_file = os.path.join(self.streaming_file_dir, "%s_pg_hba.conf" % inst_id)
+        if self.local_host == node_name:
+            if mode == "backup" and not os.path.isfile(dest_file):
+                if os.path.isfile(file_path):
+                    self.logger.debug("Backup file from[%s] to[%s]." % (
+                        file_path, dest_file))
+                    FileUtil.cpFile(file_path, dest_file)
+                else:
+                    self.logger.debug("Backup file from[%s] to[%s]." % (
+                        old_file_path, dest_file))
+                    FileUtil.cpFile(old_file_path, dest_file)
+            if mode == "restore":
+                self.logger.debug("Restore file from[%s] to[%s]." % (
+                    dest_file, file_path))
+                FileUtil.cpFile(dest_file, file_path)
+                FileUtil.removeFile(dest_file)
+        else:
+            if mode == "backup":
+                cmd = "source %s; pssh -s -H %s \"if [ ! -f '%s' ];then if [ -f '%s' ];" \
+                      "then cp '%s' '%s';else cp '%s' '%s';fi;fi\"" \
+                      % (self.mpp_file, node_name, dest_file, file_path, file_path,
+                         dest_file, old_file_path, dest_file)
+                self.logger.debug("Backup file on node[%s] with cmd [%s]." % (
+                    node_name, cmd))
+            else:
+                cmd = "source %s; pssh -s -H %s \"cp %s %s && rm -f %s\"" % (
+                    self.mpp_file, node_name, dest_file, file_path, dest_file)
+                self.logger.debug("Restore file on node[%s] from[%s] to[%s]." % (
+                    node_name, file_path, dest_file))
+            status, output = CmdUtil.retryGetstatusoutput(cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                                " Error: \n%s " % output)
+
+    def __pg_ident_backup_handler(self, node_name, dir_name, inst_id, mode="backup"):
+        """
+        Backup or restore pg_ident file.
+        """
+        file_path = os.path.join(dir_name, "pg_ident.conf")
+        dest_file = os.path.join(self.streaming_file_dir, "%s_pg_ident.conf" % inst_id)
+        if self.local_host == node_name:
+            if mode == "backup" and not os.path.isfile(dest_file):
+                if os.path.isfile(file_path):
+                    self.logger.debug("Backup file from[%s] to[%s]." % (
+                        file_path, dest_file))
+                    FileUtil.cpFile(file_path, dest_file)
+
+            if mode == "restore" and os.path.isfile(dest_file):
+                self.logger.debug("Restore file from[%s] to[%s]." % (
+                    dest_file, file_path))
+                FileUtil.cpFile(dest_file, file_path)
+                FileUtil.removeFile(dest_file)
+        else:
+            if mode == "backup":
+                cmd = "source %s; pssh -s -H %s \"if [ ! -f '%s' ];then if [ -f '%s' ];" \
+                      "then cp '%s' '%s';fi;fi\"" \
+                      % (self.mpp_file, node_name, dest_file, file_path, file_path, dest_file)
+                self.logger.debug("Backup file on node[%s] with cmd [%s]." % (
+                    node_name, cmd))
+            else:
+                cmd = "source %s; pssh -s -H %s \"if [ -f '%s' ];then cp '%s' '%s' && " \
+                      "rm -f '%s';fi\"" % (self.mpp_file, node_name, dest_file, dest_file,
+                                           file_path, dest_file)
+                self.logger.debug("Restore file on node[%s] from[%s] to[%s]." % (
+                    node_name, file_path, dest_file))
+            status, output = CmdUtil.retryGetstatusoutput(cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                                " Error: \n%s " % output)
+
+    def __start_main_standby_dn(self, start_params):
+        """
+        Start single main standby dn
+        """
+        local_ip, inst, bin_path, distribute_arg, build_timeout = start_params
+        self.logger.debug("Starting start dn:%s" % inst.instanceId)
+        if local_ip == inst.hostname:
+            cmd_start = "source %s; %s/gs_ctl start -D %s -M hadr_main_standby%s" % (
+                self.mpp_file, bin_path, inst.datadir, distribute_arg)
+        else:
+            cmd_start = "source %s; pssh -s -t %s -H %s \"source %s; %s/gs_ctl start -D %s " \
+                        "-M hadr_main_standby%s\"" \
+                        % (self.mpp_file, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, inst.hostname,
+                           self.mpp_file, bin_path, inst.datadir, distribute_arg)
+        self.logger.debug("Start dn with cmd:%s." % cmd_start)
+        status, output = CmdUtil.retry_util_timeout(cmd_start, build_timeout)
+        if status != 0:
+            raise Exception(
+                ErrorCode.GAUSS_514[
+                    "GAUSS_51400"] % cmd_start + " Error: \n%s " % output)
+        self.logger.debug("Successfully start dn:%s" % inst.instanceId)
+
+    def __build_main_standby_dn(self, params):
+        """
+        Build single main standby dn
+        """
+        inst, build_timeout, local_ip, bin_path, distribute_arg, rds_backup, backup_pwd = params
+        self.logger.debug("Start build main standby dn:%s" % inst.instanceId)
+        self.__check_datanode_data_ip_connection(inst)
+        self.__pghba_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="backup")
+        self.__pg_ident_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="backup")
+        # -t 1209600 means default value 14 days
+        if local_ip == inst.hostname:
+            cmd = "source %s; %s/gs_ctl build -D %s -b cross_cluster_full -g 0 -q -t %s" \
+                  % (self.mpp_file, bin_path, inst.datadir,
+                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
+        else:
+            cmd = "echo \"source %s; %s/gs_ctl build -D %s -b cross_cluster_full -g 0 -q " \
+                  " -t %s\" | pssh -s -t %s -H %s" \
+                  % (self.mpp_file, bin_path, inst.datadir,
+                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT,
+                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, inst.hostname)
+        cmd_log = cmd.replace(backup_pwd, '***')
+        self.logger.debug("Building with cmd:%s." % cmd_log)
+        status, output = CmdUtil.retry_util_timeout(cmd, build_timeout)
+        if status != 0:
+            error_detail = "Error: Failed to do build because of pssh timeout." \
+                if "was killed or timeout" in output else \
+                "Error: Failed to do build because of retry timeout in %s s." \
+                % build_timeout
+            self.logger.debug("Failed to do gs_ctl build. " + error_detail)
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "full build from remote cluster" + error_detail)
+        self.logger.debug("Successfully build main standby dn:%s" % inst.instanceId)
+        self.__pghba_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="restore")
+        self.__pg_ident_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="restore")
+        start_params = (local_ip, inst, bin_path, distribute_arg, build_timeout)
+        self.__start_main_standby_dn(start_params)
+
+    def __build_cascade_standby_dn(self, params):
+        """
+        Build single main standby dn
+        """
+        inst, build_timeout, local_ip, bin_path, distribute_arg = params
+        self.logger.debug("Start build cascade standby dn:%s" % inst.instanceId)
+        # -t 1209600 means default value 14 days
+        if local_ip == inst.hostname:
+            cmd = "source %s; %s/gs_ctl build -D %s -M cascade_standby " \
+                  "-b standby_full -r 7200%s -t %s" \
+                  % (self.mpp_file, bin_path, inst.datadir, distribute_arg,
+                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
+        else:
+            cmd = "echo \"source %s; %s/gs_ctl build -D %s -M cascade_standby -b standby_full " \
+                  "-r 7200%s -t %s\" | pssh -s -t %s -H %s" \
+                  % (self.mpp_file, bin_path, inst.datadir, distribute_arg,
+                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT,
+                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, inst.hostname)
+        self.logger.debug("Building with cmd:%s." % cmd)
+        status, output = CmdUtil.retry_util_timeout(cmd, build_timeout)
+        if status != 0:
+            error_detail = "Error: Failed to do build because of pssh timeout." \
+                if "was killed or timeout" in output else \
+                "Error: Failed to do build because of retry timeout in %s s." \
+                % build_timeout
+            self.logger.debug("Failed to do gs_ctl build. " + error_detail)
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "full build from remote cluster" + error_detail)
+        self.logger.debug("Successfully build cascade standby dn:%s" % inst.instanceId)
+
+    def start_dss_instance(self, only_mode=None):
+        """
+        Start dss server process
+        """
+        cmd = "source %s; export DSS_MAINTAIN=TRUE; dssserver -D %s &  " % self.dss_home_dir
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            self.logger.error(ErrorCode.GAUSS_516["GAUSS_51600"] +
+                              "status(%d), output(%s)" % (status, output))
+        return output
+
+    def kill_dss_instance(self, only_mode=None):
+        """
+        Start dss server process
+        """
+        cmd = "source %s; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9" % self.mpp_file
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            self.logger.error(ErrorCode.GAUSS_516["GAUSS_51600"] +
+                              "status(%d), output(%s)" % (status, output))
+        return output
+
+    def build_dn_instance(self, only_mode=None):
+        """
+        Build dn instance
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Build dn step is not for mode:%s." % self.params.mode)
+            return
+        self.logger.debug("Start building process.")
+        distribute_arg = "" if self.cluster_info.isSingleInstCluster() else " -Z datanode"
+        main_params = []
+        cascade_params = []
+        datanode_instance = [inst for node in self.cluster_info.dbNodes
+                             for inst in node.datanodes]
+        for inst in datanode_instance:
+            if inst.instanceId in self.main_standby_ids + self.primary_dn_ids:
+                main_params.append((inst, self.params.waitingTimeout, self.local_host,
+                                    self.bin_path, distribute_arg, self.params.hadrUserName,
+                                    self.params.hadrUserPassword))
+            else:
+                cascade_params.append((inst, self.params.waitingTimeout, self.local_host,
+                                       self.bin_path, distribute_arg))
+        if main_params:
+            parallelTool.parallelExecute(self.__build_main_standby_dn, main_params)
+            self.logger.debug("Finished build main standby dns.")
+        #if cascade_params:
+        #    parallelTool.parallelExecute(self.__build_cascade_standby_dn, cascade_params)
+        #    self.logger.debug("Finished build cascade standby dns.")
+        del self.params.hadrUserPassword
+
+    def query_cluster(self):
+        """
+        query cluster
+        :return: output
+        """
+        cmd = "source %s; cm_ctl query -v -C -s -i -d" % self.mpp_file
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            self.logger.error(ErrorCode.GAUSS_516["GAUSS_51600"] +
+                              "status(%d), output(%s)" % (status, output))
+        return output
+
+    def start_cluster(self, cm_timeout=None, only_mode=None):
+        """
+        start the cluster
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Start cluster is not for mode:%s." % self.params.mode)
+            return
+        self.logger.log("Starting the cluster.")
+        cm_timeout = cm_timeout or 300
+        user, group = UserUtil.getPathOwner(self.gp_home)
+        if user == "" or group == "":
+            raise Exception("Failed to obtain the owner of application.")
+        end_time = datetime.now() + timedelta(seconds=cm_timeout)
+        cmd = ClusterCommand.getStartCmd(0, cm_timeout)
+        self.logger.debug("Calling cm_ctl to start cluster, cmd=[%s]" % cmd)
+        status, output = CmdUtil.retryGetstatusoutput(cmd, retry_time=0)
+        if status != 0:
+            error_str = ErrorCode.GAUSS_516["GAUSS_51607"] % "the cluster" + \
+                          " Error:\n%s." % output
+            self.logger.debug(error_str)
+            self.logger.log("Warning: the cluster is not normal, please check cluster status!")
+        else:
+            self.logger.log("Successfully started primary instance. "
+                            "Please wait for standby instances.")
+
+        cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL,
+                                 DefaultValue.CLUSTER_STATUS_DEGRADED]
+        while True:
+            time.sleep(5)
+            self.logger.log('Waiting cluster normal.')
+            check_ret = self.check_cluster_status(cluster_normal_status, only_check=True,
+                                                  check_current=True, is_log=False)
+            if check_ret:
+                self.logger.log("Successfully started standby instances.")
+                break
+            if datetime.now() >= end_time:
+                query_result = self.query_cluster()
+                self.logger.log("Timeout. Failed to start the cluster in (%s)s." % cm_timeout)
+                self.logger.log("Current cluster status (%s)." % query_result)
+                self.logger.log("It will continue to start in the background.")
+                break
+
+    def __check_one_main_standby_connection(self, param_list):
+        """
+        concurrent check main standby is connected primary dn
+        """
+        (dn_inst, sql_check) = param_list
+        self.logger.debug("Node %s primary dn instanceId [%s] Check main standby is connected "
+                          "with cmd:%s." % (dn_inst.hostname, dn_inst.instanceId, sql_check))
+        status, output = ClusterCommand.remoteSQLCommand(
+            sql_check, self.user, dn_inst.hostname, dn_inst.port)
+        if status == 0 and output.strip():
+            self.logger.debug("Successfully check main standby connected "
+                              "primary dn on inst:[%s]." % dn_inst.instanceId)
+            return True
+        self.logger.debug("Retry check main standby connected on inst:[%s]." % dn_inst.instanceId)
+
+    def check_main_standby_connection_primary_dn(self, p_inst_list):
+        """
+        check connection main_standby connected primary dn
+        """
+        if not p_inst_list:
+            self.logger.debug("The primary dn does not exist on current cluster.")
+            return
+        self.primary_dn_ids = p_inst_list
+        sql_check = "select 1 from pg_catalog.gs_hadr_local_rto_and_rpo_stat();"
+        sql_check_2 = "select 1 from pg_catalog.pg_stat_get_wal_senders() where " \
+                      "sync_state='Async' and peer_role='Standby' and peer_state='Normal';"
+        param_list = [(dn_inst, sql_check) for db_node in self.cluster_info.dbNodes
+                      for dn_inst in db_node.datanodes
+                      if dn_inst.instanceId in self.primary_dn_ids]
+        param_list_2 = [(dn_inst, sql_check_2) for db_node in self.cluster_info.dbNodes
+                        for dn_inst in db_node.datanodes if dn_inst.instanceId
+                        in self.primary_dn_ids]
+        if not param_list:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "obtain param list for check main standby connection on primary dn")
+        self.logger.debug("Start check main standby connection with sql:%s." % sql_check)
+        results = parallelTool.parallelExecute(self.__check_one_main_standby_connection,
+                                               param_list)
+        self.logger.debug("Start check main standby connection with sql:%s." % sql_check_2)
+        results_2 = parallelTool.parallelExecute(self.__check_one_main_standby_connection,
+                                                 param_list_2)
+
+        return all(results+results_2)
+
+    def wait_main_standby_connection(self, only_mode=None):
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Start cluster is not for mode:%s." % self.params.mode)
+            return
+        self.logger.log("Waiting for the main standby connection.")
+        end_time = datetime.now() + timedelta(seconds=self.params.waitingTimeout)
+        while True:
+            p_inst_list = [int(i) for i in DefaultValue.get_primary_dn_instance_id("Primary",
+                                                                                   ignore=True)]
+            if self.check_main_standby_connection_primary_dn(p_inst_list):
+                break
+            if datetime.now() >= end_time:
+                raise Exception(
+                    ErrorCode.GAUSS_516["GAUSS_51632"] % "check main standby connection" +
+                    " Because Waiting timeout: %ss" % str(self.params.waitingTimeout))
+            time.sleep(5)
+        self.logger.log("Main standby already connected.")
+
+    def hadr_key_generator(self, key_name):
+        """
+        Generate key_name.key.cipher & key_name.key.rand
+        """
+        self.logger.log("Start generate hadr key files.")
+        if not os.path.exists(self.bin_path):
+            msg = ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain bin path."
+            self.logger.debug(msg)
+            raise Exception(msg)
+        if not os.path.exists(self.gp_home):
+            msg = ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain env GPHOME"
+            self.logger.debug(msg)
+            raise Exception(msg)
+        key_cipher = os.path.join(self.bin_path, "%s.key.cipher" % key_name)
+        key_rand = os.path.join(self.bin_path, "%s.key.rand" % key_name)
+        cmd = "export LD_LIBRARY_PATH=%s/script/gspylib/clib && source %s " \
+              "&& gs_guc generate -S default -o %s -D '%s' && %s && %s" \
+              % (self.gp_home, self.mpp_file, key_name, self.bin_path,
+                 CmdUtil.getChmodCmd(str(ConstantsBase.KEY_FILE_MODE), key_cipher),
+                 CmdUtil.getChmodCmd(str(ConstantsBase.KEY_FILE_MODE), key_rand))
+        if (not os.path.isfile(key_cipher)) or (not os.path.isfile(key_rand)):
+            status, output = CmdUtil.retryGetstatusoutput(cmd)
+            if status != 0 or (not os.path.isfile(key_cipher)) \
+                    or (not os.path.isfile(key_rand)):
+                msg = ErrorCode.GAUSS_516["GAUSS_51632"] \
+                      % "generate hadr key files" + "Error:%s" % output
+                self.logger.error(msg)
+                raise Exception(msg)
+        else:
+            self.logger.log("Streaming key files already exist.")
+
+        self.ssh_tool.scpFiles(key_cipher, self.bin_path)
+        self.ssh_tool.scpFiles(key_rand, self.bin_path)
+        self.logger.log("Finished generate and distribute hadr key files.")
+
+    def encrypt_hadr_user_info(self, key_name, hadr_user, hadr_pwd):
+        """
+        Encrypt hadr user info.
+        """
+        self.logger.log("Start encrypt hadr user info.")
+        cmd = "source %s && gs_encrypt -f %s \"%s|%s\"" \
+              % (self.mpp_file, key_name, hadr_user, hadr_pwd)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0 or not output:
+            msg = ErrorCode.GAUSS_516["GAUSS_51632"] % "encrypt hadr user info"
+            self.logger.error(msg)
+            raise Exception(msg)
+        self.logger.log("Successfully encrypt hadr user info.")
+        return output
+
+    def keep_hadr_user_info(self, info_str, retry=5):
+        """
+        Keep hadr user info into GLOBAL CONFIGURATION
+        """
+        self.logger.log("Start save hadr user info into database.")
+        sql = "ALTER GLOBAL CONFIGURATION with(hadr_user_info ='%s');" % info_str
+        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
+                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        primary_dns = primary_dns * retry
+        output = "None"
+        for dn_inst in primary_dns:
+            status, output = ClusterCommand.remoteSQLCommand(
+                sql, self.user, dn_inst.hostname, dn_inst.port, True)
+            if status == 0:
+                self.logger.log("Successfully save hadr user info into database.")
+                return
+        msg = ErrorCode.GAUSS_516['GAUSS_51632'] % "save hadr user info into database"
+        self.logger.error(msg + "Error:%s" % SensitiveMask.mask_pwd(output))
+        raise Exception(msg)
+
+    def restore_wal_keep_segments(self, only_mode=None):
+        """
+        restore wal_keep_segments default value
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Restore wal_keep_segments not for mode:%s." % self.params.mode)
+            return
+        self.logger.debug("Starting restore wal_keep_segments default value.")
+        default_value_dict = {}
+        wal_keep_segments = os.path.join(self.streaming_file_dir,
+                                         DoradoDisasterRecoveryConstants.WAL_KEEP_SEGMENTS)
+        if not os.path.isfile(wal_keep_segments):
+            self.logger.debug("Not found wal keep segments record file, no need restore.")
+            return
+        wal_keep_segments_list = FileUtil.readFile(wal_keep_segments)
+        if not wal_keep_segments_list:
+            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632'] % "obtain record wal_keep_segments")
+        for each_dn in wal_keep_segments_list:
+            DefaultValue.checkGuc(each_dn.split(":")[1].strip())
+            default_value_dict[each_dn.split(":")[0].strip()] = each_dn.split(":")[1].strip()
+        self.set_wal_keep_segments("reload", default_value_dict, True)
+        self.logger.debug("Successfully restore wal_keep_segments default value.")
+
+    def __clean_streaming_files_on_local_node(self, file_name_list):
+        file_name_list = [file_name_list] \
+            if not isinstance(file_name_list, list) else file_name_list
+        for file_name in file_name_list:
+            file_path = os.path.join(self.streaming_file_dir, file_name)
+            if os.path.isfile(file_path):
+                FileUtil.removeFile(file_path)
+                self.logger.debug("Successfully removed file:[%s]" % file_path)
+
+    def clean_step_file(self):
+        """
+        Clean step file for each action
+        """
+        step_file = os.path.basename(self.step_file_path)
+        self.__clean_streaming_files_on_local_node(step_file)
+        self.logger.log("Successfully removed step file.")
+
+    def check_action_and_mode(self):
+        """
+        Check action and mode if step file exist.
+        if any streaming options not finished(step file exist),
+        not allowed doing any other streaming options except query.
+        """
+        self.logger.debug("Checking action and mode.")
+        exist_step_file_names = []
+        for file_name in DoradoDisasterRecoveryConstants.DDR_STEP_FILES.values():
+            step_file_path = os.path.join(self.streaming_file_dir, file_name)
+            if os.path.isfile(step_file_path) and file_name != ".ddr_query.step":
+                exist_step_file_names.append(file_name)
+        if exist_step_file_names and set(exist_step_file_names) ^ {os.path.basename(
+                self.step_file_path)}:
+            exist_action = [key for key, value in DoradoDisasterRecoveryConstants.DDR_STEP_FILES.items()
+                            if value in exist_step_file_names]
+            self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51632"]
+                                % "check action and mode, the step files %s already exist, "
+                                  "please ensure the action %s is finished before "
+                                  "doing current options" % (exist_step_file_names, exist_action))
+        self.logger.debug("Successfully checked action and mode.")
+
+    def clean_streaming_dir(self):
+        """
+        Clean streaming dir when stop or failover
+        """
+        self.logger.debug("Start clean streaming dir:%s." % self.streaming_file_dir)
+        cmd = g_file.SHELL_CMD_DICT["deleteDir"] % (self.streaming_file_dir,
+                                                    self.streaming_file_dir)
+        try:
+            self.ssh_tool.executeCommand(cmd, hostList=self.cluster_info.getClusterNodeNames())
+        except Exception as error:
+            self.logger.debug(
+                "Failed to remove streaming dir with error:%s" % error)
+        self.logger.log("Finished remove streaming dir.")
+
+    def clean_global_config(self):
+        """
+        Clean global config
+        """
+        self.logger.log("Clean hadr user info.")
+        sql = "DROP GLOBAL CONFIGURATION hadr_user_info;"
+        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
+                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        output = "None"
+        for dn_inst in primary_dns:
+            status, output = ClusterCommand.remoteSQLCommand(
+                sql, self.user, dn_inst.hostname, dn_inst.port, True)
+            if status == 0:
+                self.logger.log("Successfully clean hadr user info from database.")
+                return
+        msg = ErrorCode.GAUSS_516['GAUSS_51632'] % "clean hadr user info from database"
+        self.logger.debug(msg + "Error:%s" % SensitiveMask.mask_pwd(output))
+
+    def get_build_info(self):
+        """
+        Assemble build infos
+        """
+        # 1. Get local primary dn inst dir, host
+        self.logger.debug("Start assemble build info")
+        dn_inst_info = []
+        dn_instances = [dn_inst for db_node in self.cluster_info.dbNodes
+                        for dn_inst in db_node.datanodes if int(dn_inst.mirrorId) == 1]
+        for dn_inst in dn_instances:
+            dn_info = dict()
+            dn_info["port"] = dn_inst.port + 1
+            dn_info["data_dir"] = dn_inst.datadir
+            dn_info["host_name"] = dn_inst.hostname
+            dn_info["listen_ip"] = self.__get_local_data_ip(dn_inst.hostname)
+            self.logger.debug("Got build listen ips:%s, ip:%s selected."
+                              % (str(dn_inst.listenIps), dn_info["listen_ip"]))
+            dn_inst_info.append(dn_info)
+
+        # 2. Get remote dn ip and port
+        remote_ip_port = []
+        shards = self.params.remoteClusterConf["shards"]
+        remote_port = int(self.params.remoteClusterConf["port"]) + 1
+        shard_info = [info for shard in shards for info in shard
+                      if info.get("shardNum", "1") == "1"]
+        for node_info in shard_info:
+            remote_ip = node_info.get("dataIp")
+            remote_ip_port.append((remote_ip, remote_port))
+        if (not dn_inst_info) or (not remote_ip_port):
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain dn info")
+        self.logger.debug("Successfully get remote dn info:%s." % remote_ip_port)
+        return dn_inst_info, remote_ip_port
+
+    def build_file_from_remote(self):
+        """
+        Build files from remote cluster
+        """
+        local_dn_info, remote_ip_port = self.get_build_info()
+        cmd_local = 'source %s; %s/gs_ctl build -D %s -M standby -b copy_secure_files -Z datanode' \
+                    ' -U %s -P "%s" -C "localhost=%s localport=%s remotehost=%s remoteport=%s"'
+        cmd_remote = "echo \"source %s; %s/gs_ctl build -D %s -M standby -b copy_secure_files -Z " \
+                     "datanode -U %s -P '%s' -C 'localhost=%s localport=%s " \
+                     "remotehost=%s remoteport=%s'\"" \
+                     " | pssh -s -H %s"
+
+        end_time = datetime.now() + timedelta(seconds=self.params.waitingTimeout)
+        self.logger.debug("Retry Building with timeout:%ss." % self.params.waitingTimeout)
+        succeed = False
+        while datetime.now() < end_time:
+            for local_primary in local_dn_info:
+                for remote_ip, remote_port in remote_ip_port:
+                    if local_primary["host_name"] == NetUtil.GetHostIpOrName():
+                        cmd = cmd_local % (self.mpp_file, "%s/bin" % self.gauss_home,
+                                           local_primary["data_dir"],
+                                           self.params.hadrUserName, self.params.hadrUserPassword,
+                                           local_primary["listen_ip"], local_primary["port"],
+                                           remote_ip, remote_port)
+                    else:
+                        cmd = cmd_remote % (self.mpp_file, "%s/bin" % self.gauss_home,
+                                            local_primary["data_dir"],
+                                            self.params.hadrUserName, self.params.hadrUserPassword,
+                                            local_primary["listen_ip"], local_primary["port"],
+                                            remote_ip, remote_port, local_primary["host_name"])
+                    result = DefaultValue.fast_ping_on_node(local_primary["host_name"],
+                                                            local_primary["listen_ip"],
+                                                            remote_ip, self.logger)
+                    if not result[-1]:
+                        self.logger.debug("Ignore build from %s, ping result:%s"
+                                          % (remote_ip, result[-1]))
+                        continue
+                    if self.cluster_info.isSingleInstCluster():
+                        cmd = cmd.replace(" -Z datanode", "")
+                    self.logger.debug("Building with cmd:%s."
+                                      % cmd.replace(self.params.hadrUserPassword, "***"))
+                    status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd)
+                    if status == 0:
+                        succeed = True
+                        self.logger.debug("Successfully Building with cmd:%s."
+                                          % cmd.replace(self.params.hadrUserPassword, "***"))
+                        return succeed
+                    else:
+                        self.logger.debug("Building result:%s." % SensitiveMask.mask_pwd(output))
+                time.sleep(1)
+        return succeed
+
+    def __copy_secure_dir_from_dn_dir(self):
+        """
+        Find and copy key file dir from all dn dir
+        """
+        local_temp_secure_path = os.path.join(
+            self.streaming_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
+        if os.path.isdir(local_temp_secure_path):
+            FileUtil.removeDirectory(local_temp_secure_path)
+        rand_path = os.path.join(local_temp_secure_path, DoradoDisasterRecoveryConstants.HADR_KEY_RAND)
+        cipher_path = os.path.join(local_temp_secure_path, DoradoDisasterRecoveryConstants.HADR_KEY_CIPHER)
+        cmd_tep = "echo \"if [ -d '%s' ];then source %s && pscp --trace-id %s -H %s '%s' '%s' " \
+                  "&& rm -rf '%s';fi\" | pssh -s -H %s"
+        succeed = False
+        for db_node in self.cluster_info.dbNodes:
+            for dn_inst in db_node.datanodes:
+                if int(dn_inst.mirrorId) == 1:
+                    key_file_path = os.path.realpath(os.path.join(
+                        dn_inst.datadir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES))
+                    cmd_copy_dir = cmd_tep % (key_file_path, self.mpp_file, self.trace_id,
+                                              self.local_host, key_file_path,
+                                              self.streaming_file_dir,
+                                              key_file_path, dn_inst.hostname)
+                    status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd_copy_dir)
+                    self.logger.debug("Copy cmd:%s" % cmd_copy_dir)
+                    if status != 0:
+                        self.logger.debug("Try copy secure dir from:[%s][%s], error:%s" % (
+                            dn_inst.hostname, key_file_path, output))
+                    if os.path.isdir(local_temp_secure_path) and os.path.isfile(rand_path) \
+                            and os.path.isfile(cipher_path):
+                        succeed = True
+        if not succeed:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "copy secure file dir")
+        self.logger.debug("Successfully copy secure dir, file list:%s." %
+                          os.listdir(local_temp_secure_path))
+
+    def build_and_distribute_key_files(self, only_mode=None):
+        """
+        Distribute key files
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Wal keep segment opts not for mode:%s." % self.params.mode)
+            return
+        self.logger.log("Start build key files from remote cluster.")
+        # build file
+        if not self.build_file_from_remote():
+            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632'] % "build files from cluster")
+        # copy file from data dir to streaming dir
+        self.__copy_secure_dir_from_dn_dir()
+        # check version consistency
+        self.__check_version_file()
+        # check cluster user consistency
+        self.__check_cluster_user()
+        # distribute key files to all node
+        secure_dir_path = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
+        self.__copy_hadr_user_key(secure_dir_path, update=True)
+        FileUtil.removeDirectory(secure_dir_path)
+        self.logger.log("Successfully build and distribute key files to all nodes.")
+
+    def __check_version_file(self):
+        """
+        function: Check whether the version numbers of the host
+        cluster and the disaster recovery cluster are the same
+        """
+        gs_secure_version = os.path.realpath(os.path.join(self.streaming_file_dir,
+                                                          "gs_secure_files/version.cfg"))
+        master_commit_id = VersionInfo.get_version_info(gs_secure_version)[-1]
+        local_version_file = VersionInfo.get_version_file()
+        local_commit_id = VersionInfo.get_version_info(local_version_file)[-1]
+        self.logger.debug("The committed of the host cluster is %s, "
+                          "and the committed of the disaster recovery cluster is %s" %
+                          (master_commit_id, local_commit_id))
+        if local_commit_id != master_commit_id:
+            raise ValueError(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                             "check version. Different version of cluster and disaster recovery")
+
+    def __check_cluster_user(self):
+        """
+        function: Check whether the version numbers of the host
+        cluster and the disaster recovery cluster are the same
+        """
+        user_file = os.path.realpath(os.path.join(self.streaming_file_dir,
+                                                  DoradoDisasterRecoveryConstants.GS_SECURE_FILES,
+                                                  DoradoDisasterRecoveryConstants.CLUSTER_USER_RECORD))
+        remote_user = DefaultValue.obtain_file_content(user_file, is_list=False)
+        if remote_user.strip() != self.user:
+            self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51632"]
+                                % "check cluster user consistency, remote:%s, local:%s"
+                                % (remote_user, self.user))
+        self.logger.debug("Successfully checked cluster user consistency.")
+
+    def check_cluster_type(self, allowed_type):
+        """
+        Check cluster type is allowed type or not
+        """
+        if allowed_type == 'primary' and self.main_standby_ids:
+            self.logger.logExit(ErrorCode.GAUSS_516['GAUSS_51632']
+                                % "check cluster type, standby cluster is not supported for %s"
+                                % self.params.task)
+        elif allowed_type == 'standby' and self.primary_dn_ids:
+            self.logger.logExit(ErrorCode.GAUSS_516['GAUSS_51632']
+                                % "check cluster type, primary cluster is not supported for %s"
+                                % self.params.task)
+        else:
+            self.logger.log("Check cluster type succeed.")
+
+    def __remove_streaming_repl_info(self, params):
+        """
+        Remove streaming repl info from single dn instances.
+        """
+        dn_inst, guc_mode, dn_num = params
+        self.logger.debug("Start remove replconninfo for instance:%s" % dn_inst.instanceId)
+        for idx in range(1, dn_num + 1):
+            if dn_inst.hostname == self.local_host:
+                cmd = "source %s; gs_guc check -Z datanode -D %s " \
+                      "-c 'replconninfo%s'" % (self.mpp_file, dn_inst.datadir, idx)
+            else:
+                cmd = "source %s; pssh -H %s 'source %s; gs_guc check " \
+                      "-Z datanode -D %s -c \"replconninfo%s\"'" \
+                      % (self.mpp_file, dn_inst.hostname, self.mpp_file, dn_inst.datadir, idx)
+            self.logger.debug("Check original repl infos with cmd:%s" % cmd)
+            status, output = CmdUtil.retryGetstatusoutput(cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                                " Error: \n%s " % output)
+            if output.count("=NULL") > 2:
+                continue
+            elif "iscrossregion=false" in output.lower():
+                ret = re.search(
+                    r"replconninfo%s='localhost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})"
+                    r" localport=(\d{4,5}) localheartbeatport=(\d{4,5}) "
+                    r"localservice=(\d{4,5}) "
+                    r"remotehost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}) "
+                    r"remoteport=(\d{4,5}) remoteheartbeatport=(\d{4,5}) "
+                    r"remoteservice=(\d{4,5})" % idx, output)
+                if not ret:
+                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "search repl infos")
+                if dn_inst.hostname != NetUtil.GetHostIpOrName():
+                    set_cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
+                              "-Z datanode -D %s -c " \
+                              "\\\"replconninfo%s = 'localhost=%s localport=%s " \
+                              "localheartbeatport=%s localservice=%s remotehost=%s " \
+                              "remoteport=%s remoteheartbeatport=%s " \
+                              "remoteservice=%s'\\\"\""
+                    set_cmd = set_cmd % (self.mpp_file, dn_inst.hostname,
+                                         self.mpp_file, guc_mode,
+                                         dn_inst.datadir, idx, ret.group(1),
+                                         ret.group(2), ret.group(3), ret.group(4),
+                                         ret.group(5), ret.group(6), ret.group(7),
+                                         ret.group(8))
+                else:
+                    set_cmd = "source %s ; gs_guc %s -Z datanode -D %s -c " \
+                              "\"replconninfo%s = 'localhost=%s localport=%s " \
+                              "localheartbeatport=%s localservice=%s remotehost=%s " \
+                              "remoteport=%s remoteheartbeatport=%s " \
+                              "remoteservice=%s'\""
+                    set_cmd = set_cmd % (self.mpp_file, guc_mode,
+                                         dn_inst.datadir, idx, ret.group(1),
+                                         ret.group(2), ret.group(3), ret.group(4),
+                                         ret.group(5), ret.group(6), ret.group(7),
+                                         ret.group(8))
+                self.logger.debug("Set original repl infos with cmd:%s" % set_cmd)
+                status, output = CmdUtil.retryGetstatusoutput(set_cmd)
+                if status != 0:
+                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_cmd +
+                                    " Error: \n%s " % output)
+                self.logger.debug("Successfully remove original repl infos with cmd:%s."
+                                  % set_cmd)
+            elif "iscrossregion=true" in output.lower():
+                if dn_inst.hostname != self.local_host:
+                    set_cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
+                              "-Z datanode -D %s -c \\\"replconninfo%s\\\"\""
+                    set_cmd = set_cmd % (self.mpp_file, dn_inst.hostname,
+                                         self.mpp_file, guc_mode,
+                                         dn_inst.datadir, idx)
+                else:
+                    set_cmd = "source %s ; gs_guc %s -Z datanode -D %s -c " \
+                              "\"replconninfo%s\""
+                    set_cmd = set_cmd % (self.mpp_file, guc_mode,
+                                         dn_inst.datadir, idx)
+                self.logger.debug("Remove stream repl infos with cmd:%s" % set_cmd)
+                status, output = CmdUtil.retryGetstatusoutput(set_cmd)
+                if status != 0:
+                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_cmd +
+                                    " Error: \n%s " % output)
+                self.logger.debug("Successfully remove stream repl infos with cmd:%s."
+                                  % set_cmd)
+        self.logger.debug("Successfully removed replconninfo for instance:%s" % dn_inst.instanceId)
+
+    def remove_all_stream_repl_infos(self, guc_mode="set"):
+        """
+        Remove retreaming disaster repl infos from all instances
+        """
+        params = []
+        dn_instances = [inst for node in self.cluster_info.dbNodes
+                        for inst in node.datanodes]
+        cluster_conf = os.path.join(self.streaming_file_dir,
+                                    DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
+        dn_num = DefaultValue.get_all_dn_num_for_dr(cluster_conf, dn_instances[0],
+                                                    self.cluster_info, self.logger)
+        for inst in dn_instances:
+            if inst.instanceId not in self.normal_dn_ids:
+                self.logger.error("Ignore rectify repl info of dn:%s" % inst.instanceId)
+                continue
+            params.append((inst, guc_mode, dn_num))
+        if params:
+            self.logger.log("Starting remove all node dn instances repl infos.")
+            parallelTool.parallelExecute(self.__remove_streaming_repl_info, params)
+            self.logger.log("Successfully remove all node dn instances repl infos.")
+
+    def remove_streaming_cluster_file(self):
+        """
+        function:  remove the parameter file for config pg_hba
+        :return: NA
+        """
+        self.logger.log("Start remove cluster file.")
+        cluster_info_file = os.path.join(self.streaming_file_dir,
+                                         DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
+        cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (cluster_info_file, cluster_info_file)
+        try:
+            self.ssh_tool.executeCommand(cmd, hostList=self.cluster_info.getClusterNodeNames())
+        except Exception as error:
+            self.logger.debug(
+                "Failed to remove cluster file with error:%s" % error)
+        self.logger.log("Finished remove cluster file.")
+
+    def remove_streaming_pg_hba(self, ignore_error=False):
+        """
+        Remove remote ips from pg hba of streaming disaster
+        """
+        self.logger.log("Start remove pg_hba config.")
+        remove_ips = []
+        shards = self.params.remoteClusterConf["shards"]
+        for shard in shards:
+            for node_info in shard:
+                data_ip = node_info.get("dataIp")
+                remove_ips.append(data_ip)
+        remove_ips = list(set(remove_ips))
+        host_names = self.get_all_connection_node_name("remove_streaming_pg_hba")
+        self.logger.debug("Remove ips:%s from pg_hba on nodes:%s" % (
+            str(remove_ips), str(host_names)))
+        cmd = "%s -U '%s' -l '%s'" % (OMCommand.getLocalScript("Local_Config_Hba"),
+                                      self.user, self.log_file)
+        remove_ips_str = ""
+        for node_ip in remove_ips:
+            remove_ips_str += " --remove-ip %s" % node_ip
+        cmd += remove_ips_str
+        self.logger.debug("Command for updating pg_hba:%s." % cmd)
+        try:
+            self.ssh_tool.executeCommand(cmd, DefaultValue.SUCCESS, host_names)
+        except Exception as error:
+            self.logger.debug("Failed updating pg_hba with error:%s." % error)
+            if not ignore_error:
+                raise error
+        self.logger.log("Finished remove pg_hba config.")
+
+    def streaming_drop_replication_slot(self, dn_inst, drop_slots):
+        """
+        Delete dn_xxx_hadr on all dn nodes if dn_xxx_hadr exists when the disaster tolerance
+        relationship is lifted
+        """
+        if not drop_slots:
+            self.logger.debug("WARNING:Not found dn_xxx_hadr on %s node, No need to "
+                              "delete." % dn_inst.instanceId)
+        else:
+            for slot in drop_slots:
+                self.logger.debug("starting drop inst %s %s" % (dn_inst.instanceId, slot.strip()))
+                sql = "select * from pg_catalog.pg_drop_replication_slot('%s');" % slot.strip()
+                status_dr, output_dr = ClusterCommand.remoteSQLCommand(
+                    sql, self.user, dn_inst.hostname, dn_inst.port, maintenance_mode=True)
+                self.logger.debug("get %s need drop replication_slots, status=%d, "
+                                  "output: %s." % (dn_inst.hostname, status_dr,
+                                                   SensitiveMask.mask_pwd(output_dr)))
+                if status_dr != 0:
+                    self.logger.debug("Failed to remove inst %s %s with error: %s" % (
+                        dn_inst.instanceId, slot.strip(), output_dr))
+                self.logger.debug(
+                    "Successfully drop node %s %s" % (dn_inst.instanceId, slot.strip()))
+
+    def concurrent_drop_slot(self, dn_inst):
+        """
+        concurrent drop all dn replication slots
+        """
+        sql_check = "select * from pg_catalog.pg_get_replication_slots();"
+        self.logger.debug("Starting concurrent drop node %s instance [%s] replication slots" %
+                          (dn_inst.hostname, dn_inst.instanceId))
+        status, output = ClusterCommand.remoteSQLCommand(
+            sql_check, self.user, dn_inst.hostname, dn_inst.port, maintenance_mode=True)
+        self.logger.debug("get %s all replication slots, status=%d, output: %s." %
+                          (dn_inst.instanceId, status, SensitiveMask.mask_pwd(output)))
+        if status == 0 and output.strip():
+            drop_slots = []
+            if str(dn_inst.instanceId).startswith("6"):
+                drop_slots = re.findall(r"dn_\d+_hadr", output.strip())
+            if str(dn_inst.instanceId).startswith("5"):
+                drop_slots = re.findall(r"cn_\d+_\d+\.\d+\.\d+\.\d+_\d+", output.strip())
+            self.logger.debug("Waiting to delete instance [%s] replication slots is: %s" %
+                              (dn_inst.instanceId, drop_slots))
+            self.streaming_drop_replication_slot(dn_inst, drop_slots)
+        else:
+            self.logger.debug("Obtain all replication slot results:%s." % output)
+
+    def streaming_clean_replication_slot(self):
+        """
+        Delete dn_xxx_hadr on all dn nodes if dn_xxx_hadr exists when the disaster tolerance
+        relationship is lifted
+        """
+        self.logger.log("Starting drop all node replication slots")
+        params = [dn_inst for db_node in self.cluster_info.dbNodes
+                  for dn_inst in db_node.datanodes if dn_inst.instanceId in self.normal_dn_ids]
+        self.logger.debug("need drop all node replication slots: %s" %
+                          [inst.instanceId for inst in params])
+        parallelTool.parallelExecute(self.concurrent_drop_slot, params)
+        self.logger.log("Finished drop all node replication slots")
+
+    def update_streaming_info(self, key, value, only_mode=None):
+        """
+        Update info for streaming status
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Update query status [%s] to [%s] "
+                              "not for mode:%s." % (key, value, self.params.mode))
+            return
+        self.logger.debug("Update query [%s] to [%s]." % (key, value))
+        try:
+            if key == "cluster":
+                key_stat = DoradoDisasterRecoveryConstants.HADR_CLUSTER_STAT
+            elif key == DoradoDisasterRecoveryConstants.ACTION_FAILOVER:
+                key_stat = DoradoDisasterRecoveryConstants.HADR_FAILOVER_STAT
+            elif key == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+                key_stat = DoradoDisasterRecoveryConstants.HADR_SWICHOVER_STAT
+            elif key == DoradoDisasterRecoveryConstants.ACTION_ESTABLISH:
+                key_stat = DoradoDisasterRecoveryConstants.HADR_ESTABLISH_STAT
+            else:
+                self.logger.debug("key error.")
+                return
+            file_path = os.path.realpath(os.path.join(self.streaming_file_dir, key_stat))
+            with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+                                   DefaultValue.KEY_FILE_MODE_IN_OS), "w") as fp_write:
+                fp_write.write(value)
+            host_names = self.get_all_connection_node_name(
+                action_flag="update_streaming_info", no_update=True)
+            self.ssh_tool.scpFiles(file_path, self.streaming_file_dir, host_names)
+        except Exception as error:
+            self.logger.debug("Failed write info, key:%s, value:%s, "
+                              "error:%s." % (key, value, error))
+
+    def create_cluster_maintance_file(self, value):
+        """
+        add cluster_maintance file for streaming failover and switchover disaster_standby
+        """
+        self.logger.debug("Start create cluster_maintance file.")
+        try:
+            cluster_maintance_file = os.path.realpath(os.path.join(self.gauss_home,
+                                                                   "bin/cluster_maintance"))
+            with os.fdopen(os.open(cluster_maintance_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+                                   DefaultValue.KEY_FILE_MODE_IN_OS), "w") as fp_write:
+                fp_write.write(value)
+            host_names = self.get_all_connection_node_name("create_cluster_maintance_file")
+            self.ssh_tool.scpFiles(cluster_maintance_file,
+                                   os.path.join(self.gauss_home, "bin"), host_names)
+        except Exception as error:
+            self.logger.debug("WARNING: Failed create cluster_maintance file, value:%s, "
+                              "error:%s." % (value, str(error)))
+        self.logger.debug("Successfully create cluster_maintance file.")
+
+    def streaming_failover_single_inst(self, stream_disaster_step, action_flag=None):
+        """
+        streaming disaster recovery failover for single_inst cluster
+        """
+        self.create_cluster_maintance_file("streaming failover")
+        if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+            self.update_streaming_info("cluster", "promote")
+        # 0. check cluster status and get normal instance list
+        if stream_disaster_step < 0:
+            if action_flag == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
+            else:
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "10%")
+            self.init_cluster_status()
+            self.parse_cluster_status()
+            self.write_streaming_step("0_check_cluster_status_done_for_failover")
+        # 1.Specify max xid and max ter to start etcd
+        max_term_record = os.path.join(self.streaming_file_dir, ".max_term_record")
+        if stream_disaster_step < 1:
+            max_term = self.get_term_info()
+            term_key = "/%s/CMServer/status_key/term" % self.user
+            para_dict = {term_key: max_term, self.backup_open_key: "0"}
+            ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
+                                                  self.logger, self.user, para_dict)
+            DefaultValue.write_content_on_file(max_term_record, max_term)
+            self.write_streaming_step("1_start_etcd_done_for_failover")
+        self._failover_config_step(stream_disaster_step, action_flag)
+        self._failover_start_step(stream_disaster_step, action_flag, max_term_record)
+
+    def _failover_start_step(self, stream_disaster_step, action_flag, max_term_record):
+        """
+        Failover step 5 & 6
+        """
+        if stream_disaster_step < 5:
+            if action_flag == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "80%")
+            else:
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "80%")
+                if not os.path.isfile(max_term_record):
+                    raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % max_term_record)
+                _, dn_infos = self.get_specified_dn_infos()
+                max_term_list = DefaultValue.obtain_file_content(max_term_record)
+                if not max_term_list:
+                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "read max term")
+                params = [(dn_info, max_term_list[0]) for dn_info in dn_infos]
+                if params:
+                    parallelTool.parallelExecute(self.start_primary_dn, params)
+            self.write_streaming_step("5_start_primary_dn_done")
+        if stream_disaster_step < 6:
+            self.start_cluster()
+            cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL,
+                                     DefaultValue.CLUSTER_STATUS_DEGRADED]
+            self.check_cluster_status(cluster_normal_status, check_current=True)
+            cluster_info = self.query_cluster_info()
+            self.parse_cluster_status(current_status=cluster_info)
+            if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+                self.clean_global_config()
+                self.restore_guc_params()
+            self.streaming_clean_archive_slot()
+            if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "100%")
+                self.update_streaming_info("cluster", "normal")
+            else:
+                self.update_streaming_info("cluster", "archive")
+
+    def streaming_clean_archive_slot(self):
+        """
+        drop lot_type is physical and slot_name not contain (gs_roach_full，gs_roach_inc，
+        cn_xxx，dn_xxx, dn_xxx_hadr) on all cn node and all primary dn node if the
+        slot_name exists when the disaster cluster become primary cluster
+        """
+        self.logger.debug("Starting drop archive slots")
+        params = [dn_inst for db_node in self.cluster_info.dbNodes
+                  for dn_inst in db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        self.logger.debug("need drop all node archive slots: %s" %
+                          [inst.instanceId for inst in params])
+        parallelTool.parallelExecute(self.parallel_drop_archive_slot, params)
+        self.logger.debug("Successfully drop all node archive slots")
+
+    def parallel_drop_archive_slot(self, dn_inst):
+        """
+        concurrent drop all primary dn and all cn archive slots
+        """
+        sql_check = "select slot_name from pg_catalog.pg_get_replication_slots() " \
+                    "where slot_type='physical' and slot_name not in " \
+                    "('gs_roach_full', 'gs_roach_inc') and slot_name not like 'cn_%' and " \
+                    "slot_name not like 'dn_%';"
+        self.logger.debug("Starting concurrent drop node %s instance [%s] archive slots" %
+                          (dn_inst.hostname, dn_inst.instanceId))
+        (status, output) = ClusterCommand.remoteSQLCommand(
+            sql_check, self.user, dn_inst.hostname, dn_inst.port)
+        self.logger.debug("get %s all archive slots, status=%d, output: %s." %
+                          (dn_inst.instanceId, status, output))
+        if status == 0 and output.strip():
+            archive_slots = output.strip().split('\n')
+            self.logger.debug("Waiting to delete instance [%s] archive slots is: %s" %
+                              (dn_inst.instanceId, archive_slots))
+            self.streaming_drop_replication_slot(dn_inst, archive_slots)
+
+    def get_specified_dn_infos(self, update=False, dn_status="Primary"):
+
+        """
+        Get specified dn infos
+        """
+        tmp_file = os.path.join(self.streaming_file_dir, "cluster_state_tmp")
+        if not os.path.isfile(tmp_file) or update:
+            cmd = ClusterCommand.getQueryStatusCmd(self.user, 0, tmp_file)
+            self.logger.debug("Update cluster state with cmd: %s" % cmd)
+            status, output = CmdUtil.retryGetstatusoutput(cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                                % "obtain primary dn infos" + "Error:%s" % output)
+        cluster_info = DbClusterStatus()
+        cluster_info.initFromFile(tmp_file)
+        dn_infos = []
+        dn_ids = []
+        dn_instances = [(inst, db_node.name) for db_node in cluster_info.dbNodes
+                        for inst in db_node.datanodes]
+        for data_inst, db_node_name in dn_instances:
+            if data_inst.status == dn_status:
+                one_dn_info = dict()
+                one_dn_info["node_ip"] = db_node_name
+                one_dn_info["instance_id"] = data_inst.instanceId
+                one_dn_info["data_dir"] = data_inst.datadir
+                dn_ids.append(data_inst.instanceId)
+                dn_infos.append(one_dn_info)
+        self.logger.debug("Got primary dn infos: %s:%s" % (dn_ids, dn_infos))
+        return dn_ids, dn_infos
+
+    def start_primary_dn(self, params):
+        """
+        Start main standby as primary dn in streaming failover.
+        """
+        dn_info, max_term = params
+        opt_type = " -Z datanode" if not self.cluster_info.isSingleInstCluster() else ""
+        self.logger.debug("Starting primary dn %s, max term:%s." %
+                          (dn_info["instance_id"], max_term))
+        bin_path = "%s/bin" % self.cluster_info.appPath
+        instance_id = dn_info["instance_id"]
+        hostname = dn_info["node_ip"]
+        data_dir = dn_info["data_dir"]
+        if self.local_ip == hostname:
+            cmd_start = "source %s; %s/gs_ctl start%s -D %s -M pending -t 600" % \
+                        (self.mpp_file, bin_path, opt_type, data_dir)
+        else:
+            cmd_start = "source %s; pssh -s -t 900 -H %s \"source %s; " \
+                        "%s/gs_ctl start%s -D %s -M pending" \
+                        " -t 600\"" % (self.mpp_file, hostname, self.mpp_file,
+                                       bin_path, opt_type, data_dir)
+        self.logger.debug("Start primary dn with cmd:%s" % cmd_start)
+        status, output = CmdUtil.retryGetstatusoutput(cmd_start)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "start primary dn %s with error:%s"
+                            % (instance_id, output))
+        self.logger.debug("Successfully start primary dn %s" % instance_id)
+        if self.local_ip == hostname:
+            cmd_config = "source %s; %s/gs_ctl notify%s -D %s -M primary -T %s -t 600" \
+                         % (self.mpp_file, bin_path, opt_type, data_dir, max_term)
+        else:
+            cmd_config = "source %s; pssh -s -t 900 -H %s \"source %s; %s/gs_ctl notify%s -D %s " \
+                         "-M primary -T %s -t 600\""  % (self.mpp_file, self.mpp_file, hostname,
+                                                         bin_path, opt_type, data_dir, max_term)
+        self.logger.debug("Config primary dn with cmd:%s" % cmd_config)
+        status, output = CmdUtil.retryGetstatusoutput(cmd_config)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "config primary dn %s with error:%s"
+                            % (instance_id, output))
+        self.logger.debug("Successfully start and config primary dn:%s" % instance_id)
+
+    def stream_disaster_set_cmserver_guc(self, guc_parameter, guc_value, guc_type):
+        """
+        set cmserver guc param
+        :param guc_parameter: guc param
+        :param guc_value: value
+        :param guc_type: init type
+        :return: NA
+        """
+        self.logger.debug("Starting set cm server for streaming disaster.")
+        cmd = "source %s && gs_guc %s -Z cmserver -D 'cm_instance_data_path' -c \"%s=%s\" " \
+              % (self.mpp_file, guc_type, guc_parameter, guc_value)
+        self.logger.debug("streaming disaster calling set cms, cmd=[%s]" % cmd)
+        self.ssh_tool.executeCommand(cmd, hostList=self.normal_cm_ips)
+        self.logger.debug("Successfully set cm server for streaming disaster.")
+
+    def stream_disaster_set_cmagent_guc(self, guc_parameter, guc_value, guc_type):
+        """
+        set cmagent guc param
+        :param guc_parameter: guc param
+        :param guc_value: value
+        :param guc_type: init type
+        :return: NA
+        """
+        self.logger.debug("Starting set cm agent for streaming disaster.")
+        cmd = "source %s && gs_guc %s -Z cmagent -D 'cm_instance_data_path' -c \"%s=%s\" " \
+              % (self.mpp_file, guc_type, guc_parameter, guc_value)
+        self.logger.debug("streaming disaster calling set cma, cmd=[%s]" % cmd)
+        self.ssh_tool.executeCommand(cmd, hostList=self.normal_node_list)
+        self.logger.debug("Successfully set cm agent for streaming disaster.")
+
+    def _failover_config_step(self, stream_disaster_step, action_flag):
+        """
+        Failover step 2 - 4
+        """
+        # 2.Stop the cluster by node
+        if stream_disaster_step < 2:
+            if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+                self.streaming_clean_replication_slot()
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "30%")
+            self.stop_cluster_by_node()
+            self.write_streaming_step("2_stop_cluster_done_for_failover")
+        # 3.Start the cluster in the main cluster mode
+        if stream_disaster_step < 3:
+            self.set_cmserver_guc("backup_open", "0", "set")
+            self.stream_disaster_set_cmagent_guc("agent_backup_open", "0", "set")
+            self.write_streaming_step("3_set_backup_open_for_failover")
+        # 4.Delete the relevant guc parameters and remove the disaster tolerance relationship
+        # based on streaming disaster recovery cluster, No need to delete for switchover.
+        if not action_flag:
+            if stream_disaster_step < 4:
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "50%")
+                self.remove_all_stream_repl_infos()
+                self.remove_streaming_pg_hba(True)
+                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "70%")
+                self.write_streaming_step("4_remove_hba_repl_done_for_failover")
+
+    def get_term_info(self):
+        """get_term_info"""
+        # get max term from dns
+        return self.get_term()
+
+    def get_term(self, normal_dn=True):
+        """
+        get etcd term
+        """
+        max_term = 0
+        sql_cmd = "select term from pg_last_xlog_replay_location();"
+        params_list = [(inst, sql_cmd, max_term, normal_dn) for db_node in
+                       self.cluster_info.dbNodes for inst in db_node.datanodes]
+        if params_list:
+            term_list = parallelTool.parallelExecute(self.get_max_term_by_compare, params_list)
+            self.logger.debug("Get term list: %s." % term_list)
+            if not term_list:
+                max_term = 0
+            else:
+                max_term = int(max(term_list))
+        if int(max_term) == 0:
+            raise Exception("Failed get term")
+        max_term = int(max_term) + 100
+        self.logger.debug("Get max term %s in dns" % max_term)
+        return max_term
+
+    def streaming_switchover_roll_back_condition(self):
+        """
+        check need rollback or not by Main Standby dn status
+        output: return True means need rollback
+        """
+        self.logger.debug("Starting check switchover rollback condition.")
+        cluster_status = self.query_cluster_info(cm_check=True)
+        if not cluster_status:
+            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
+                            % "query cluster status when check rollback condition")
+
+        rollback_check_list = ["Main Standby Need repair(Disconnected)",
+                               "Main Standby Need repair(Connecting)"]
+        need_rollback = False
+        for check_status in rollback_check_list:
+            if check_status in cluster_status:
+                need_rollback = True
+        self.logger.debug("Successfully check rollback condition: %s." % need_rollback)
+        self.logger.debug("Cluster status: %s." % cluster_status)
+        return need_rollback
+
+    def get_max_term_by_compare(self, params):
+        """
+        get max term by compare
+        """
+        instance, sql_cmd, max_term, normal_dn = params
+        if (normal_dn is True and instance.instanceId in self.normal_dn_ids) or \
+                (normal_dn is False and instance.instanceType == DefaultValue.MASTER_INSTANCE):
+            (status, output) = ClusterCommand.remoteSQLCommand(
+                sql_cmd, self.user, instance.hostname, instance.port, maintenance_mode=True)
+            if status != 0 or self.find_error(output):
+                raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] %
+                                sql_cmd + "\nError: %s" % output)
+            self.logger.debug("TERM %s, Instance %s" % (output, instance.instanceId))
+            term = output.strip()
+            if int(term) > int(max_term):
+                max_term = term
+        return int(max_term)
+
+    def remove_cluster_maintance_file(self):
+        """
+        function:  remove the cluster_maintance file
+        :return: NA
+        """
+        self.logger.debug("Start remove cluster_maintance file.")
+        cluster_maintance_file = os.path.realpath(os.path.join(self.gauss_home,
+                                                               "bin/cluster_maintance"))
+        cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (cluster_maintance_file, cluster_maintance_file)
+        host_names = self.get_all_connection_node_name("remove_cluster_maintance_file")
+        try:
+            self.ssh_tool.executeCommand(cmd, hostList=host_names)
+        except Exception as error:
+            self.logger.debug(
+                "Failed to remove cluster_maintance file with error: %s" % str(error))
+        self.logger.debug("Successfully remove %s cluster_maintance file." % host_names)
+
+    def get_node_sship_from_nodeid(self, node_id):
+        """
+        get node sship from nodeid
+        :param node_id: node id
+        :return:
+        """
+        for nodename in self.cluster_info.dbNodes:
+            if int(node_id) == int(nodename.id):
+                return nodename.sshIps[0]
+
+    def delivery_file_to_other_node(self, path_name, file_name, node_list=None):
+        """delivery_file_to_other_node"""
+        send_file = "%s/%s" % (path_name, file_name)
+        send_file_bak = "%s/%s_bak" % (path_name, file_name)
+        if not os.path.isfile(send_file):
+            raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % send_file)
+
+        if node_list:
+            p_node_list = " -H ".join(node_list)
+        elif self.cluster_info.getClusterNodeNames():
+            p_node_list = " -H ".join(self.cluster_info.getClusterNodeNames())
+        else:
+            raise Exception("Failed to delivery file: %s, node information does not exits"
+                            % file_name)
+        pscp_cmd = "cp %s %s && source %s && pscp -t 60 -H %s %s %s && rm -f %s" % \
+                   (send_file, send_file_bak, self.mpp_file, p_node_list,
+                    send_file_bak, send_file, send_file_bak)
+        status, output = CmdUtil.retryGetstatusoutput(pscp_cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % pscp_cmd + " Error:\n%s" % output)
+        else:
+            self.logger.debug("Successfully send %s to all nodes" % send_file)
+
+    @staticmethod
+    def find_error(output):
+        """
+        error rule
+        :param output: error info
+        :return:bool
+        """
+        error_msg_flag = "(ERROR|FATAL|PANIC)"
+        error_pattern = "^%s:.*" % error_msg_flag
+        pattern = re.compile(error_pattern)
+        for line in output.split("\n"):
+            line = line.strip()
+            result = pattern.match(line)
+            if result is not None:
+                return True
+        return False
+
+    def set_stream_cluster_run_mode_guc(self, guc_mode, fail_over=False):
+        """
+        function: set cluster run mode guc
+        :return:
+        """
+        cluster_run_mode = "cluster_primary" if self.params.mode == "primary" \
+            else "cluster_standby"
+        if fail_over:
+            cluster_run_mode = "cluster_primary"
+        guc_cmd = "source %s && gs_guc %s -Z datanode -N all -I all -c " \
+                  "\"stream_cluster_run_mode = '%s'\"" % \
+                  (self.mpp_file, guc_mode, cluster_run_mode)
+        host_names = self.cluster_info.getClusterNodeNames()
+        ignore_node = [node for node in host_names if node not in self.normal_node_list]
+        if ignore_node:
+            self.logger.debug(
+                "WARNING: cluster_run_mode for datanode ignore nodes:%s" % ignore_node)
+            nodes = ",".join(ignore_node)
+            guc_cmd = guc_cmd + " --ignore-node %s" % nodes
+        self.logger.debug("Set dn stream_cluster_run_mode with cmd:%s" % guc_cmd)
+        (status, output) = CmdUtil.retryGetstatusoutput(guc_cmd)
+        if status != 0:
+            self.logger.debug("Warning: Failed %s dn stream_cluster_run_mode=%s, output: %s" %
+                              (guc_mode, cluster_run_mode, str(output)))
+        else:
+            self.logger.debug("Successfully %s streaming cluster run mode for "
+                              "datanode param %s" % (guc_mode, cluster_run_mode))
+
+        guc_cmd_cn = "source %s && gs_guc %s -Z coordinator -N all -I all -c " \
+                     "\"stream_cluster_run_mode = '%s'\"" % \
+                     (self.mpp_file, guc_mode, cluster_run_mode)
+        if ignore_node:
+            self.logger.debug(
+                "WARNING: cluster_run_mode for coordinator ignore nodes:%s" % ignore_node)
+            nodes = ",".join(ignore_node)
+            guc_cmd_cn = guc_cmd_cn + " --ignore-node %s" % nodes
+        self.logger.debug("Set cn stream_cluster_run_mode with cmd:%s" % guc_cmd_cn)
+        (status, output) = CmdUtil.retryGetstatusoutput(guc_cmd_cn)
+        if status != 0:
+            self.logger.debug("Warning: Failed %s cn stream_cluster_run_mode=%s, output: %s" %
+                              (guc_mode, cluster_run_mode, str(output)))
+        else:
+            self.logger.debug("Successfully %s streaming cluster run mode for "
+                              "coordinator param %s" % (guc_mode, cluster_run_mode))
+
+    def set_data_in_dcc(self, key, value, only_mode=None):
+        """
+        Set data in dcc
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("set [%s][%s] not for mode:%s." % (key, value, self.params.mode))
+            return
+        self.logger.debug("Start set data: [%s][%s] in dcc." % (key, value))
+        ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
+                                              self.logger, self.user,
+                                              {key: value})
+        self.logger.log("Successfully set [%s][%s]." % (key, value))
+
+    def stop_cluster(self, action=None):
+        """
+        stop the cluster
+        """
+        self.logger.log("Stopping the cluster.")
+        static_config = "%s/bin/cluster_static_config" % self.cluster_info.appPath
+        cm_ctl_file = "%s/bin/cm_ctl" % self.cluster_info.appPath
+        if not os.path.isfile(static_config) or not os.path.isfile(cm_ctl_file):
+            raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
+                            (static_config + " or " + cm_ctl_file))
+        cmd = ClusterCommand.getStopCmd(0, "i", 1800)
+        if action:
+            cmd = ClusterCommand.getStopCmd(0, timeout=1800)
+        self.logger.debug("disaster cluster calling cm_ctl to stop cluster, cmd=[%s]" % cmd)
+        status, output = CmdUtil.retryGetstatusoutput(cmd, retry_time=0)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] %
+                            ("the cluster" + " Error:\n%s." % output))
+        self.logger.log("Successfully stopped the cluster.")
diff --git a/script/impl/dorado_disaster_recovery/ddr_constants.py b/script/impl/dorado_disaster_recovery/ddr_constants.py
new file mode 100644
index 00000000..6e185b35
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/ddr_constants.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : streaming_constants.py is utility for defining constants
+# of streaming disaster recovery.
+#############################################################################
+
+
+class DoradoDisasterRecoveryConstants:
+
+    # streaming files
+    DDR_LOG_FILE = "gs_ddr.log"
+    DDR_FILES_DIR = 'ddr_cabin'
+    DDR_CLUSTER_STATUS_TMP_FILE = "cluster_state_tmp"
+    WAL_KEEP_SEGMENTS = ".wal_keep_segments_record"
+    DDR_CLUSTER_CONF_RECORD = "cluster_conf_record"
+    GS_SECURE_FILES = "gs_secure_files"
+    HADR_KEY_CIPHER = "hadr.key.cipher"
+    HADR_KEY_RAND = "hadr.key.rand"
+    STREAM_SWITCHOVER_STATE = ".switchover_cluster_state"
+    MAX_TERM_RECORD = ".max_term_record"
+    PROCESS_LOCK_FILE = 'ddr_lock_'
+    STREAMING_CONFIG_XML = "ddr_config.xml"
+    GUC_BACKUP_FILE = ".ddr_guc_backup"
+    CLUSTER_USER_RECORD = ".cluster_user_record"
+
+    ACTION_START = "start"
+    ACTION_SWITCHOVER = "switchover"
+    ACTION_FAILOVER = "failover"
+
+    ACTION_ESTABLISH = "establish"
+
+    # streaming query temp file
+    HADR_CLUSTER_STAT = ".hadr_cluster_stat"
+    HADR_FAILOVER_STAT = ".hadr_failover_stat"
+    HADR_SWICHOVER_STAT = ".hadr_switchover_stat"
+    HADR_ESTABLISH_STAT = ".hadr_establish_stat"
+
+    STREAM_DISTRIBUTE_ACTION = "distribute_stream_failover"
+
+    # GUC CHANGE MAP
+    GUC_CHANGE_MAP = {"most_available_sync": "on", "synchronous_commit": "on"}
+
+    # params in json file for each module
+    STREAMING_JSON_PARAMS = {
+        "start": ["localClusterConf", "remoteClusterConf"],
+        "stop": ["localClusterConf", "remoteClusterConf"],
+        "switchover": [],
+        "failover": []
+    }
+
+    # step file of each module
+    DDR_STEP_FILES = {
+        "start_primary": ".ddr_start_primary.step",
+        "start_standby": ".ddr_start_standby.step",
+        "stop": ".ddr_stop.step",
+        "switchover_primary": ".ddr_switchover_primary.step",
+        "switchover_standby": ".ddr_switchover_standby.step",
+        "failover": ".ddr_failover.step",
+        "query": ".ddr_query.step",
+    }
+    # task need check process is exist
+    TASK_EXIST_CHECK = ["start", "stop", "switchover", "failover"]
+
+    # default values
+    MAX_WAL_KEEP_SEGMENTS = 16384
+    MAX_REPLICATION_NUMS = 8
+    MAX_BUILD_TIMEOUT = 1209600
+    STANDBY_START_TIMEOUT = 3600 * 24 * 7
+    CHECK_PROCESS_WAIT_TIME = 3
+
+    # backup open key
+    BACKUP_OPEN = "/%s/CMServer/backup_open"
+
+    # log remark
+    LOG_REMARK = "-" * 80
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/__init__.py b/script/impl/dorado_disaster_recovery/ddr_modules/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
new file mode 100644
index 00000000..ee341be5
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : streaming_disaster_recovery_start.py is utility for creating
+# relationship between primary cluster and standby cluster.
+
+import os
+
+from base_utils.security.sensitive_mask import SensitiveMask
+from gspylib.common.ErrorCode import ErrorCode
+from gspylib.common.Common import DefaultValue, ClusterCommand
+from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
+from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryConstants
+
+
+class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def _first_step_for_ddr_start(self, step):
+        """
+        First step for streaming start
+        """
+        if step >= 2:
+            return
+        self.logger.debug("Start first step of DisasterRecovery start.")
+        #创建容灾过程使用的临时目录
+        self.create_disaster_recovery_dir(self.streaming_file_dir)
+        #检查执行的标志文件
+        self.check_action_and_mode()
+        self.init_cluster_status()
+
+    def _second_step_for_ddr_start(self, step):
+        """
+        Second step for ddr start
+        """
+        if step >= 2:
+            return
+        self.logger.debug("Start second step of ddr start.")
+        self.check_cluster_status(status_allowed=['Normal'])
+        self.check_cluster_is_common()
+        cm_exist = DefaultValue.check_is_cm_cluster(self.logger)
+        if not cm_exist:
+            self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                                "check cm_ctl is available for current cluster")
+        self.check_is_under_upgrade()
+        #检查dn的GUC参数
+        #self.check_dn_instance_params()
+        self.write_streaming_step("2_check_cluster_step")
+
+    def _third_step_for_ddr_start(self, step):
+        """
+        Third step for streaming start
+        """
+        if step >= 3:
+            return
+        self.logger.debug("Start third step of streaming start.")
+        #self.drop_replication_slot_on_dr_cluster(only_mode="disaster_standby")
+        #self.prepare_gs_secure_files(only_mode='primary')
+        #self.build_and_distribute_key_files(only_mode='disaster_standby')
+        #self.get_default_wal_keep_segments(only_mode='primary')
+        self.write_streaming_step("3_set_wal_segments_step")
+
+    def drop_replication_slot_on_dr_cluster(self, only_mode=None):
+        """
+        Drop replication slot on dr cluster
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Drop replication slot opts not for mode:%s." % self.params.mode)
+            return
+        sql_check = "select slot_name from pg_get_replication_slots() where slot_type='logical'"
+        primary_dns = DefaultValue.get_primary_dn_instance_id("Primary", ignore=True)
+        if not primary_dns:
+            return
+        primary_insts = [inst for node in self.cluster_info.dbNodes
+                         for inst in node.datanodes if str(inst.instanceId) in primary_dns]
+        dn_inst = primary_insts[0]
+        self.logger.debug("Start drop node %s [%s] slots" % (dn_inst.hostname, dn_inst.instanceId))
+        status, output = ClusterCommand.remoteSQLCommand(
+            sql_check, self.user, dn_inst.hostname, dn_inst.port)
+        self.logger.debug("Get %s all replication slots, status=%d, output: %s." %
+                          (dn_inst.instanceId, status, SensitiveMask.mask_pwd(output)))
+        if status == 0 and output.strip():
+            drop_slots = output.strip().split('\n')
+            for slot in drop_slots:
+                self.logger.debug("Starting drop node %s %s" % (dn_inst.instanceId, slot.strip()))
+                sql = "select * from pg_drop_replication_slot('%s');" % slot.strip()
+                status_dr, output_dr = ClusterCommand.remoteSQLCommand(
+                    sql, self.user, dn_inst.hostname, dn_inst.port)
+                if status_dr != 0:
+                    self.logger.debug("Failed to remove node %s %s with error: %s" % (
+                        dn_inst.hostname, slot.strip(), SensitiveMask.mask_pwd(output_dr)))
+                self.logger.debug(
+                    "Successfully drop node %s %s" % (dn_inst.instanceId, slot.strip()))
+
+    def _fourth_step_for_ddr_start(self, step):
+        """
+        Fourth step for streaming start
+        """
+        if step >= 4:
+            return
+        self.logger.debug("Start fourth step of streaming start.")
+        self.set_wal_keep_segments(
+            "reload", DoradoDisasterRecoveryConstants.MAX_WAL_KEEP_SEGMENTS, only_mode='primary')
+        self.write_streaming_step("4_set_wal_segments_step")
+
+    def _fifth_step_for_ddr_start(self, step):
+        """
+        Fifth step for streaming start
+        """
+        if step >= 5:
+            return
+        self.logger.debug("Start fifth step of streaming start.")
+        self.set_data_in_dcc(self.backup_open_key, "0", only_mode='primary')
+        self.set_data_in_dcc(self.backup_open_key, "1", only_mode='disaster_standby')
+        #self.set_most_available(mode="reload", raise_error=False)
+        self.stop_cluster_by_node(only_mode='disaster_standby')
+        self.write_streaming_step("5_set_wal_segments_step")
+
+    def common_step_for_ddr_start(self):
+        """
+        Common step for ddr start between step 1 and 2
+        """
+        self.logger.debug("Start common config step of ddr start.")
+        self.distribute_cluster_conf()
+        self.update_streaming_pg_hba()
+        self.config_cross_cluster_repl_info()
+
+    def _sixth_step_for_ddr_start(self, step):
+        """
+        Sixth step for streaming start
+        """
+        if step >= 6:
+            return
+        self.logger.debug("Start sixth step of streaming start.")
+        self.set_cmserver_guc("backup_open", "1", "set", only_mode='disaster_standby')
+        self.set_cmagent_guc("agent_backup_open", "1", "set", only_mode='disaster_standby')
+        self.write_streaming_step("6_set_guc_step")
+
+    def _seventh_step_for_ddr_start(self, step):
+        """
+        Seventh step for streaming start
+        """
+        if step >= 7:
+            return
+        self.logger.debug("Start seventh step of streaming start.")
+        self.update_streaming_info("cluster", "restore", only_mode='disaster_standby')
+        try:
+            self.start_dss_instance(only_mode='disaster_standby')
+            self.build_dn_instance(only_mode='disaster_standby')
+            self.kill_dss_instance(only_mode='disaster_standby')
+        except Exception as error:
+            self.update_streaming_info("cluster", "restore_fail", only_mode='disaster_standby')
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "build dns" + "Error:%s" % error)
+        self.write_streaming_step("7_build_dn_instance_step")
+
+    def _eighth_step_for_ddr_start(self, step):
+        """
+        Eighth step for streaming start
+        """
+        if step >= 8:
+            return
+        self.logger.debug("Start eighth step of streaming start.")
+        self.start_cluster(cm_timeout=DoradoDisasterRecoveryConstants.STANDBY_START_TIMEOUT,
+                           only_mode='disaster_standby')
+        self.update_streaming_info("cluster", "full_backup", only_mode='primary')
+        try:
+            self.wait_main_standby_connection(only_mode='primary')
+        except Exception as error:
+            self.update_streaming_info("cluster", "backup_fail", only_mode='primary')
+            raise Exception(str(error))
+        ret = self.check_cluster_status(status_allowed=['Normal'],
+                                        only_check=True, check_current=True)
+        query_status = "recovery" if ret else "recovery_fail"
+        self.update_streaming_info("cluster", query_status, only_mode='disaster_standby')
+        self.update_streaming_info("cluster", "archive", only_mode='primary')
+        self.write_streaming_step("8_start_cluster_step")
+
+    def _ninth_step_for_ddr_start(self, step):
+        """
+        ninth step for streaming start
+        """
+        if step >= 9:
+            return
+        self.logger.debug("Start ninth step of streaming start.")
+        #self.restore_wal_keep_segments(only_mode='primary')
+        self.clean_gs_secure_dir()
+        self.clean_step_file()
+
+    def _check_and_refresh_disaster_user_permission(self):
+        """check and refresh disaster user permission"""
+        if self.params.mode != "primary":
+            return
+        self.check_hadr_user(only_mode='primary')
+        self.check_hadr_pwd(only_mode='primary')
+        self.logger.debug("Encrypt hadr user info to database not "
+                          "for mode:%s." % self.params.mode)
+        hadr_cipher_path = os.path.join(self.bin_path, "hadr.key.cipher")
+        hadr_rand_path = os.path.join(self.bin_path, "hadr.key.rand")
+        if not os.path.isfile(hadr_cipher_path) or not os.path.isfile(hadr_rand_path):
+            self.hadr_key_generator('hadr')
+        user_info = DefaultValue.obtain_hadr_user_encrypt_str(self.cluster_info, self.user,
+                                                              self.logger, False, True)
+        if user_info:
+            self.clean_global_config()
+        pass_str = self.encrypt_hadr_user_info(
+            'hadr', self.params.hadrUserName, self.params.hadrUserPassword)
+        self.keep_hadr_user_info(pass_str)
+
+    def run(self):
+        self.logger.log("Start create dorado storage disaster relationship.")
+        step = self.query_streaming_step()
+        self._first_step_for_ddr_start(step)
+        #1.检查集群状态正常
+        self.parse_cluster_status()
+        #dorado存储复制没有流复制user
+        #self._check_and_refresh_disaster_user_permission()
+        self._second_step_for_ddr_start(step)
+        #更新pg_hba和replinfo
+        self.common_step_for_ddr_start()
+        self._third_step_for_ddr_start(step)
+        self._fourth_step_for_ddr_start(step)
+        self._fifth_step_for_ddr_start(step)
+        #设置CM backup_open参数，灾备backup_open=1， 主集群backup_open=0
+        self._sixth_step_for_ddr_start(step)
+        #start dss，build main standby
+        self._seventh_step_for_ddr_start(step)
+        self._eighth_step_for_ddr_start(step)
+        self._ninth_step_for_ddr_start(step)
+        self.logger.log("Successfully do streaming disaster recovery start.")
+ 
\ No newline at end of file
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
new file mode 100644
index 00000000..77bdacc4
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : streaming_disaster_recovery_failover.py is utility for
+# standby cluster failover to primary cluster.
+
+
+from gspylib.common.Common import DefaultValue
+from gspylib.common.ErrorCode import ErrorCode
+from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
+
+
+class DisasterRecoveryFailoverHandler(DoradoDisasterRecoveryBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def run(self):
+        self.logger.log("Start streaming disaster failover.")
+        self.check_action_and_mode()
+        step = self.check_streaming_failover_workable(check_type_step=3, check_status_step=0)
+        self.check_is_under_upgrade()
+        self.init_cluster_conf()
+        try:
+            self.streaming_failover_single_inst(step)
+            self.update_streaming_info("cluster", "normal")
+            self.clean_step_file()
+        except Exception as error:
+            self.update_streaming_info("cluster", "promote_fail")
+            raise Exception(
+                ErrorCode.GAUSS_516["GAUSS_51632"] % "centralize failover" + "Error:%s" % error)
+        finally:
+            self.remove_cluster_maintance_file()
+        self.clean_streaming_dir()
+        self.logger.log("Successfully do streaming disaster recovery failover.")
+
+    def check_streaming_failover_workable(self, check_type_step=0, check_status_step=0):
+        """
+        Check streaming failover is workable.
+        """
+        self.logger.debug("Streaming disaster distribute cluster failover...")
+        stream_disaster_step = self.query_streaming_step()
+        if not DefaultValue.is_disaster_cluster(self.cluster_info) \
+                and stream_disaster_step < check_type_step:
+            self.logger.debug("The primary dn exist, do nothing except record the result file.")
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                            "streaming disaster cluster failover, Because the primary cluster "
+                            "does not support failover")
+        cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL,
+                                 DefaultValue.CLUSTER_STATUS_DEGRADED]
+        if stream_disaster_step < check_status_step:
+            self.init_cluster_status()
+        self.parse_cluster_status()
+        if stream_disaster_step < check_status_step:
+            self.check_cluster_status(cluster_normal_status)
+        return stream_disaster_step
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
new file mode 100644
index 00000000..dc7ffea3
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : streaming_disaster_recovery_query.py is utility for
+# query streaming disaster recovery condition.
+
+import os
+
+from base_utils.security.sensitive_mask import SensitiveMask
+from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryConstants
+from gspylib.common.Common import ClusterCommand
+from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
+
+
+class StreamingQueryHandler(DoradoDisasterRecoveryBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def get_streaming_cluster_query_value(self, file_name):
+        """
+        Query infos from files.
+        """
+        file_path = os.path.realpath(os.path.join(self.streaming_file_dir, file_name))
+        if not os.path.isfile(file_path) and file_name in [DoradoDisasterRecoveryConstants.HADR_CLUSTER_STAT]:
+            return "normal"
+        if not os.path.isfile(file_path):
+            return "0%"
+        with open(file_path, 'r') as read_file:
+            value = read_file.read().strip()
+            return value
+
+    def check_archive(self, former_status, cluster_status):
+        """
+        Check for archive.
+        """
+        self.logger.log("Start check archive.")
+        if former_status.strip() not in ["archive", "archive_fail"]:
+            self.logger.debug("Ignore for status:%s" % former_status)
+            return
+        archive_status = "archive_fail"
+        if cluster_status.lower() not in ["normal", "degraded"]:
+            self.logger.debug("Cluster status:%s,archive fail." % cluster_status)
+            return archive_status
+        if self.main_standby_ids or (not self.primary_dn_ids):
+            self.logger.debug("Ignore update archive for disaster_standby cluster.")
+            return archive_status
+        sql_check = "select 1 from pg_catalog.pg_stat_get_wal_senders() where sync_state" \
+                    "='Async' and peer_role='Standby' and peer_state='Normal';"
+        dn_instances = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes
+                        if inst.instanceId in self.primary_dn_ids]
+        self.logger.debug("Check archive with cmd:%s." % sql_check)
+        if dn_instances:
+            status, output = ClusterCommand.remoteSQLCommand(
+                sql_check, self.user, dn_instances[0].hostname,
+                dn_instances[0].port)
+            if status == 0 and output and output.strip():
+                archive_status = "archive"
+                self.logger.debug("Successfully check archive, results:%s." %
+                                  SensitiveMask.mask_pwd(output))
+                return archive_status
+            elif status == 0 and not output.strip():
+                self.logger.debug("Check archive fail.")
+                return archive_status
+            else:
+                self.logger.debug("Check archive status:%s, output:%s."
+                                  % (status, output))
+        self.logger.debug("Check archive result:%s." % archive_status)
+        return archive_status
+
+    def check_recovery(self, former_status, cluster_status="normal"):
+        """
+        Check for recovery.
+        """
+        self.logger.log("Start check recovery.")
+        if former_status.strip() not in ["recovery", "recovery_fail"]:
+            self.logger.debug("Ignore for check recovery status:%s" % former_status)
+            return
+        recovery_status = "recovery_fail"
+        if cluster_status.lower() not in ["normal", "degraded"]:
+            self.logger.debug("Cluster status:%s,recovery fail." % cluster_status)
+            return recovery_status
+        if self.primary_dn_ids or (not self.main_standby_ids):
+            self.logger.debug("Ignore update recovery for primary cluster.")
+            return recovery_status
+        return "recovery"
+
+    def get_max_rpo_rto(self):
+        """
+        Get max rpo and rto.
+        """
+        self.logger.log("Start check RPO & RTO.")
+        rpo_sql = "SELECT current_rpo FROM dbe_perf.global_streaming_hadr_rto_and_rpo_stat;"
+        rto_sql = "SELECT current_rto FROM dbe_perf.global_streaming_hadr_rto_and_rpo_stat;"
+        rto_rpo_sql = rpo_sql + rto_sql
+        if not self.primary_dn_ids:
+            self.logger.debug("Not found primary dn in cluster, cluster status:%s, "
+                              "main standby:%s." % (self.cluster_status, self.main_standby_ids))
+            return "", ""
+        log_info = "Execute sql [%s] on node [%s: %s] with result:%s"
+        dn_instances = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes
+                        if inst.instanceId in self.primary_dn_ids]
+        if dn_instances:
+            status, output = ClusterCommand.remoteSQLCommand(
+                rto_rpo_sql, self.user, dn_instances[0].hostname, dn_instances[0].port)
+            if status == 0 and output:
+                try:
+                    rets = output.strip().split('\n')
+                    length = len(rets) // 2
+                    rpo_list = [int(i) for i in rets[:length]]
+                    rto_list = [int(j) for j in rets[length:]]
+                    max_rpo, max_rto = str(max(rpo_list)), str(max(rto_list))
+                except ValueError:
+                    return "", ""
+                self.logger.debug("Successfully get max rpo:%s, rto:%s, output:%s"
+                                  % (max_rpo, max_rto, ','.join(output.split('\n'))))
+                return max_rpo, max_rto
+            else:
+                self.logger.debug(log_info % (rto_rpo_sql, dn_instances[0].hostname,
+                                              dn_instances[0].port, ','.join(output.split('\n'))))
+        return "", ""
+
+    def run(self):
+        self.logger.log("Start streaming disaster query.")
+        cluster_info = self.query_cluster_info()
+        if cluster_info:
+            self.parse_cluster_status(current_status=cluster_info)
+        self.check_is_under_upgrade()
+        check_cluster_stat = self.get_streaming_cluster_query_value(
+            DoradoDisasterRecoveryConstants.HADR_CLUSTER_STAT)
+        archive_status = self.check_archive(check_cluster_stat, self.cluster_status)
+        recovery_status = self.check_recovery(check_cluster_stat, self.cluster_status)
+        hadr_cluster_stat = archive_status or recovery_status or check_cluster_stat
+
+        hadr_failover_stat = self.get_streaming_cluster_query_value(
+            DoradoDisasterRecoveryConstants.HADR_FAILOVER_STAT)
+        hadr_switchover_stat = self.get_streaming_cluster_query_value(
+            DoradoDisasterRecoveryConstants.HADR_SWICHOVER_STAT)
+        if hadr_cluster_stat != "promote":
+            hadr_failover_stat = ""
+        if hadr_cluster_stat != "switchover":
+            hadr_switchover_stat = ""
+
+        self.logger.debug("Start check max rpo and rto.")
+        max_rpo, max_rto = self.get_max_rpo_rto()
+        self.logger.debug("Finished check max rpo and rto.")
+        values = dict()
+        values["hadr_cluster_stat"] = hadr_cluster_stat
+        values["hadr_failover_stat"] = hadr_failover_stat
+        values["hadr_switchover_stat"] = hadr_switchover_stat
+        values["RPO"] = max_rpo
+        values["RTO"] = max_rto
+        self.logger.log("Successfully executed streaming disaster "
+                        "recovery query, result:\n%s" % values)
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
new file mode 100644
index 00000000..abe08902
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : streaming_disaster_recovery_stop.py is a utility for stopping
+# streaming disaster recovery on primary cluster.
+
+from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
+
+
+class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def _first_step_for_streaming_stop(self, step):
+        """
+        First step for streaming stop
+        """
+        if step >= 2:
+            return
+        self.logger.debug("Start first step of streaming stop.")
+        self.init_cluster_status()
+        self.check_action_and_mode()
+
+    def _second_step_for_streaming_stop(self, step):
+        """
+        Second step for streaming stop
+        """
+        if step >= 2:
+            return
+        self.logger.debug("Start second step of streaming start.")
+        self.check_cluster_status(status_allowed=['Normal'])
+        self.check_cluster_type(allowed_type='primary')
+        self.check_is_under_upgrade()
+        self.write_streaming_step("2_check_cluster_step")
+
+    def _third_step_for_streaming_stop(self, step):
+        """
+        Third step for streaming stop
+        """
+        if step >= 3:
+            return
+        self.logger.debug("Start third step of streaming stop.")
+        self.remove_all_stream_repl_infos(guc_mode="reload")
+        self.remove_streaming_cluster_file()
+        self.write_streaming_step("3_remove_config_step")
+
+    def _fourth_step_for_streaming_stop(self, step):
+        """
+        Fourth step for streaming stop
+        """
+        if step >= 4:
+            return
+        self.logger.debug("Start fourth step of streaming stop.")
+        self.remove_streaming_pg_hba()
+        self.restore_guc_params()
+        self.write_streaming_step("4_remove_pg_hba_step")
+
+    def _fifth_step_for_streaming_stop(self, step):
+        """
+        Fifth step for streaming stop
+        """
+        if step >= 5:
+            return
+        self.logger.debug("Start fifth step of streaming start.")
+        self.streaming_clean_replication_slot()
+        self.write_streaming_step("5_update_config_step")
+
+    def _sixth_step_for_streaming_stop(self, step):
+        """
+        Sixth step for streaming stop
+        """
+        if step >= 6:
+            return
+        self.logger.debug("Start sixth step of streaming stop.")
+        self.check_cluster_status(['Normal'])
+        self.clean_global_config()
+        self.update_streaming_info("cluster", "normal")
+        self.clean_streaming_dir()
+
+    def run(self):
+        self.logger.log("Start remove streaming disaster relationship.")
+        step = self.query_streaming_step()
+        self._first_step_for_streaming_stop(step)
+        self.parse_cluster_status()
+        self._second_step_for_streaming_stop(step)
+        self._third_step_for_streaming_stop(step)
+        self._fourth_step_for_streaming_stop(step)
+        self._fifth_step_for_streaming_stop(step)
+        self._sixth_step_for_streaming_stop(step)
+        self.logger.log("Successfully do streaming disaster recovery stop.")
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
new file mode 100644
index 00000000..2763ae77
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : streaming_disaster_recovery_switchover.py is a utility for
+# changing role between primary cluster and standby cluster.
+
+import os
+import time
+from datetime import datetime, timedelta
+
+from base_utils.os.cmd_util import CmdUtil
+from base_utils.os.env_util import EnvUtil
+from gspylib.common.Common import DefaultValue, ClusterCommand, ClusterInstanceConfig
+from gspylib.common.DbClusterStatus import DbClusterStatus
+from gspylib.common.ErrorCode import ErrorCode
+from gspylib.threads.parallelTool import parallelTool
+from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
+from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryConstants
+
+
+class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def run(self):
+        """
+        streaming disaster recovery switchover
+        """
+        self.logger.log("Start streaming disaster switchover.")
+        self.check_action_and_mode()
+        self.check_switchover_workable()
+        self.init_cluster_conf()
+        self.check_dn_instance_params()
+        self.check_is_under_upgrade()
+        try:
+            self.streaming_switchover_single_inst()
+            self.clean_step_file()
+        except Exception as error:
+            if self.params.mode == "primary":
+                self.update_streaming_info("cluster", "promote_fail")
+            raise Exception(
+                ErrorCode.GAUSS_516["GAUSS_51632"] % "switchover" + "Error:%s" % str(error))
+        finally:
+            self.remove_cluster_maintance_file_for_switchover()
+            self.remove_cluster_maintance_file()
+        self.logger.log("Successfully do streaming disaster recovery switchover.")
+
+    def streaming_switchover_single_inst(self):
+        """
+        streaming disaster recovery switchover for single_inst cluster
+        disaster_standby: expect primary cluster becomes standby
+        primary: expect standby cluster becomes primary
+        """
+        self.create_cluster_maintance_file("streaming switchover")
+        self.update_streaming_info("cluster", DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
+        stream_disaster_step = self.query_streaming_step()
+        if self.params.mode == "primary":
+            end_time = datetime.now() + timedelta(seconds=self.params.waitingTimeout)
+            self.logger.log("Waiting for switchover barrier.")
+            while True:
+                switchover_barrier_list = self.check_streaming_disaster_switchover_barrier()
+                if len(switchover_barrier_list) == len(self.normal_dn_ids):
+                    break
+                if datetime.now() >= end_time:
+                    self.restart_cluster()
+                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                                    "check switchover_barrier on all main standby dn" +
+                                    " Because check timeout: %ss" %
+                                    str(self.params.waitingTimeout))
+                time.sleep(5)
+            self.streaming_failover_single_inst(stream_disaster_step,
+                                                DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
+        else:
+            self.add_cluster_maintance_file_for_switchover()
+            try:
+                if stream_disaster_step < 1:
+                    self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
+                    self.stop_cluster()
+                    self.start_cluster()
+                    self.streaming_disaster_set_master_cluster_in_switchover()
+                    self.write_streaming_step("1_streaming_disaster_set_master_in_switchover")
+                if stream_disaster_step < 2:
+                    self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "30%")
+                    ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
+                                                          self.logger, self.user,
+                                                          {self.backup_open_key: "2"})
+                    self.stop_cluster()
+                    self.write_streaming_step("2_stop_cluster_for_switchover")
+                if stream_disaster_step < 3:
+                    self.set_cmserver_guc("backup_open", "2", "set")
+                    self.set_cmagent_guc("agent_backup_open", "2", "set")
+                    self.write_streaming_step("3_set_backup_open_2_done")
+                if stream_disaster_step < 4:
+                    self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "50%")
+                    self.remove_cluster_maintance_file_for_switchover()
+                    self.remove_cluster_maintance_file()
+                    self.start_cluster()
+                    self.write_streaming_step("4_start_cluster_done")
+                if stream_disaster_step < 5:
+                    self.wait_for_normal(timeout=self.params.waitingTimeout,
+                                         streaming_switchover="streaming_switchover")
+                    self.streaming_clean_replication_slot()
+                    self.update_streaming_info("cluster", "recovery")
+            except Exception as error:
+                self.logger.error("Failed to do streaming disaster cluster switchover, Error:"
+                                  " \n%s" % str(error))
+                rollback_step = self.query_streaming_step()
+                self.logger.debug("Roll back switchover step:%s" % rollback_step)
+                self.remove_cluster_maintance_file_for_switchover()
+                self.remove_cluster_maintance_file()
+                if rollback_step < 4 or (rollback_step >= 4 and
+                                         self.streaming_switchover_roll_back_condition()):
+                    self.streaming_switchover_roll_back(update_query=True)
+                self.clean_step_file()
+                raise Exception(error)
+        self.remove_hadr_switchover_process_file()
+
+    def remove_hadr_switchover_process_file(self):
+        self.logger.debug("Remove hadr switchover process file for switchover.")
+        process_file = os.path.realpath(os.path.join(self.streaming_file_dir,
+                                                     ".hadr_switchover_stat"))
+        cmd = "if [ -f {0} ]; then rm -rf {0}; fi".format(process_file)
+        self.ssh_tool.executeCommand(cmd, hostList=self.connected_nodes)
+        self.logger.debug("Successfully remove switchover process on all connected nodes.")
+
+    @staticmethod
+    def clean_file_on_node(params):
+        """
+        clean file on dest node with path
+        """
+        dest_ip, dest_path, timeout = params
+        cmd = "source %s && pssh -s -t %s -H %s 'if [ -f %s ]; then rm -f %s; fi'" % (
+            EnvUtil.getMpprcFile(), timeout, dest_ip, dest_path, dest_path)
+        status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd)
+        return status, output, dest_ip
+
+    def restart_cluster(self, restart_timeout=DefaultValue.TIMEOUT_CLUSTER_START):
+        """
+        Restart cluster
+        """
+        self.logger.log("Restart cluster.")
+        static_config = "%s/bin/cluster_static_config" % self.bin_path
+        cm_ctl_file = "%s/bin/cm_ctl" % self.bin_path
+        if not os.path.isfile(static_config):
+            self.logger.debug("Checked file %s lost." % static_config)
+        if not os.path.isfile(cm_ctl_file):
+            self.logger.debug("Checked file %s lost." % cm_ctl_file)
+        stop_cmd = ClusterCommand.getStopCmd(0, timeout=restart_timeout)
+        status, output = CmdUtil.retryGetstatusoutput(stop_cmd, retry_time=0)
+        self.logger.debug("Stop cluster result:[%s][%s]." % (status, output))
+        start_cmd = ClusterCommand.getStartCmd(0, timeout=restart_timeout)
+        status, output = CmdUtil.retryGetstatusoutput(start_cmd, retry_time=0)
+        self.logger.debug("Start cluster result:[%s][%s]." % (status, output))
+
+    def remove_cluster_maintance_file_for_switchover(self):
+        """
+        function:  remove the cluster_maintance file
+        :return: NA
+        """
+        self.logger.debug("Remove cluster_maintance file for switchover.")
+        cluster_maintance_file = os.path.realpath(os.path.join(self.gauss_home,
+                                                               "bin/cluster_maintance"))
+        host_names = \
+            self.get_all_connection_node_name("remove_cluster_maintance_file_for_switchover")
+        try:
+            pscp_params = []
+            all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
+                             for dn_inst in db_node.datanodes]
+            if not self.cluster_info.isSingleInstCluster():
+                all_instances.extend([dn_inst for db_node in self.cluster_info.dbNodes
+                                      for dn_inst in db_node.coordinators])
+            for dn_inst in all_instances:
+                if dn_inst.hostname in host_names:
+                    pscp_params.append([dn_inst.hostname, os.path.join(
+                        dn_inst.datadir, os.path.basename(cluster_maintance_file)), 10])
+            if len(pscp_params) > 0:
+                results = parallelTool.parallelExecute(self.clean_file_on_node, pscp_params)
+                for ret in results:
+                    if ret[0] != 0:
+                        self.logger.debug("clean maintance file to node[%s] with status[%s], "
+                                          "output[%s]" % (ret[-1], ret[0], ret[1]))
+        except Exception as error:
+            self.logger.debug(
+                "Failed to remove cluster_maintance file for switchover with error: %s"
+                % str(error))
+        self.logger.debug("Successfully remove %s cluster_maintance file for switchover."
+                          % host_names)
+
+    def add_cluster_maintance_file_for_switchover(self):
+        """
+        add cluster_maintance file for streaming disaster switchover to disaster_standby
+        """
+        self.logger.debug("Start add cluster_maintance file for switchover.")
+        try:
+            cluster_maintance_file = os.path.realpath(os.path.join(self.gauss_home,
+                                                                   "bin/cluster_maintance"))
+            host_names = \
+                self.get_all_connection_node_name("add_cluster_maintance_file_for_switchover", True)
+            pscp_params = []
+            all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
+                             for dn_inst in db_node.datanodes]
+            for dn_inst in all_instances:
+                if dn_inst.hostname in host_names:
+                    pscp_params.append([dn_inst.hostname, cluster_maintance_file,
+                                        os.path.join(dn_inst.datadir, "cluster_maintance"), 10])
+            if len(pscp_params) > 0:
+                results = parallelTool.parallelExecute(
+                    DefaultValue.distribute_file_to_node, pscp_params)
+                for ret in results:
+                    if ret[0] != 0:
+                        self.logger.debug("Distribute maintance file for switchover to node[%s] "
+                                          "with status[%s], output[%s]" % (ret[-1], ret[0], ret[1]))
+        except Exception as error:
+            self.logger.debug("WARNING: Failed add cluster_maintance file for switchover, "
+                              "error:%s." % (str(error)))
+        self.logger.debug("Successfully add cluster_maintance file for switchover.")
+
+    def streaming_disaster_set_master_cluster_in_switchover(self):
+        """
+        streaming disaster set master cluster in switchover
+        """
+        self.logger.debug("Starting set streaming master cluster in switchover.")
+        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes
+                       for dn_inst in db_node.datanodes if
+                       dn_inst.instanceId in self.primary_dn_ids]
+        if not primary_dns:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "obtain primary dns for switchover")
+        if self.streaming_dr_in_switchover(primary_dns):
+            if self.streaming_dr_service_truncation_check(primary_dns):
+                self.logger.debug("Successfully set streaming master cluster in switchover.")
+
+    def streaming_dr_service_truncation_check(self, primary_dns_list):
+        """
+        streaming dr service truncation check
+        """
+        self.logger.log("Waiting for truncation.")
+        results = parallelTool.parallelExecute(self.concurrent_check_dr_service_truncation,
+                                               primary_dns_list)
+        return all(results)
+
+    def concurrent_check_dr_service_truncation(self, dn_inst):
+        """
+        Wait for the log playback to complete.
+        """
+        self.logger.debug("Starting check node %s shardNum %s instance %s streaming service "
+                          "truncation." % (dn_inst.hostname, dn_inst.mirrorId, dn_inst.instanceId))
+        sql_check = "select * from gs_streaming_dr_service_truncation_check();"
+        end_time = datetime.now() + timedelta(seconds=1200)
+        succeed = False
+        while datetime.now() < end_time:
+            status, output = ClusterCommand.remoteSQLCommand(sql_check, self.user, dn_inst.hostname,
+                                                             dn_inst.port)
+            if status == 0 and output and output.strip() == "t":
+                succeed = True
+                break
+            time.sleep(5)
+            self.logger.debug("Retry truncation check shardNum %s in node %s instance %s." %
+                              (dn_inst.mirrorId, dn_inst.hostname, dn_inst.instanceId))
+        if not succeed:
+            self.logger.error("Failed to execute the command: %s, Error:\n%s" % (sql_check, output))
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                            "check truncate service before switchover")
+        self.logger.debug("Successfully check node %s shardNum %s instance %s streaming service "
+                          "truncation." % (dn_inst.hostname, dn_inst.mirrorId, dn_inst.instanceId))
+        return True
+
+    def streaming_dr_in_switchover(self, primary_dns_list):
+        """
+        set steaming dr in switchover
+        """
+        results = parallelTool.parallelExecute(self.concurrent_set_dr_in_switchover,
+                                               primary_dns_list)
+        return all(results)
+
+    def concurrent_set_dr_in_switchover(self, dn_inst):
+        """
+        Switchover requires log truncation first
+        """
+        self.logger.debug("Starting set shardNum %s node %s streaming dr in switchover." %
+                          (dn_inst.mirrorId, dn_inst.hostname))
+        sql_cmd = "select * from gs_streaming_dr_in_switchover();"
+        # We need to use the normal port to transmit service truncation,
+        # not the OM port.
+        port = int(dn_inst.port) - 1
+        (status, output) = ClusterCommand.remoteSQLCommand(sql_cmd,
+                                                           self.user, dn_inst.hostname, str(port))
+        self.logger.debug("check streaming in switchover, status=%d, output: %s."
+                          % (status, output))
+        if status != 0 or self.find_error(output) or output.strip() != "t":
+            self.logger.error("Failed to execute the command: %s, Error:\n%s" % (sql_cmd, output))
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                            "generate switchover barrier before switchover")
+        self.logger.debug("Successfully set shardNum %s node %s streaming dr in switchover." %
+                          (dn_inst.mirrorId, dn_inst.hostname))
+        return True
+
+    def wait_for_normal(self, timeout=DefaultValue.TIMEOUT_CLUSTER_START,
+                        streaming_switchover=None):
+        """
+        function:Wait the cluster become Normal or Degraded
+        input:NA
+        output:NA
+        """
+        self.logger.debug("Waiting for cluster status being satisfied.")
+        end_time = None if timeout <= 0 else datetime.now() + timedelta(seconds=timeout)
+
+        check_status = 0
+        while True:
+            time.sleep(10)
+            if end_time is not None and datetime.now() >= end_time:
+                check_status = 1
+                self.logger.debug("Timeout. The cluster is not available.")
+                break
+            # View the cluster status
+            status_file = "/home/%s/gauss_check_status_%d.dat" % (self.user, os.getpid())
+            cmd = ClusterCommand.getQueryStatusCmd(outFile=status_file)
+            (status, output) = CmdUtil.retryGetstatusoutput(cmd, retry_time=0)
+            if status != 0:
+                if os.path.exists(status_file):
+                    os.remove(status_file)
+                self.logger.debug("Failed to obtain the cluster status. Error: \n%s" % output)
+                continue
+            # Determine whether the cluster status is normal or degraded
+            cluster_status = DbClusterStatus()
+            cluster_status.initFromFile(status_file)
+            if os.path.exists(status_file):
+                os.remove(status_file)
+            if cluster_status.clusterStatus == "Normal":
+                self.logger.log("The cluster status is Normal.")
+                break
+            else:
+                self.logger.debug("Cluster status is %s(%s)." % (
+                cluster_status.clusterStatus, cluster_status.clusterStatusDetail))
+
+        if check_status != 0:
+            if streaming_switchover == "streaming_switchover":
+                raise Exception(
+                    ErrorCode.GAUSS_528["GAUSS_52800"] % (cluster_status.clusterStatus,
+                                                          cluster_status.clusterStatusDetail))
+            self.logger.logExit(ErrorCode.GAUSS_528["GAUSS_52800"] % (
+                cluster_status.clusterStatus, cluster_status.clusterStatusDetail))
+        self.logger.debug("Successfully wait for cluster status become Normal.", "constant")
+
+    def set_auto_csn_barrier_guc(self, guc_mode, action_flag=False, roll_back=False):
+        """
+        auto_csn_barrier : 0 / 1
+        """
+        guc_value = 1 if self.params.mode == "primary" else 0
+        if action_flag:
+            guc_value = 0
+        if roll_back:
+            guc_value = 1
+        self.logger.debug("Starting %s auto_csn_barrier is %s." % (guc_mode, guc_value))
+        cmd = 'source %s && gs_guc %s -Z coordinator -N all -I all ' \
+              '-c "auto_csn_barrier=%s"' % (self.mpp_file, guc_mode, guc_value)
+        host_names = self.cluster_info.getClusterNodeNames()
+        ignore_node = [node for node in host_names if node not in self.normal_node_list]
+        if ignore_node:
+            self.logger.debug(
+                "WARNING: auto_csn_barrier need ignore host name is %s" % ignore_node)
+            nodes = ",".join(ignore_node)
+            cmd = cmd + " --ignore-node %s" % nodes
+        self.logger.debug("Set auto_csn_barrier with cmd:%s" % cmd)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "set auto_csn_barrier" + "Error:%s" % output)
+        self.logger.debug("Successfully %s auto_csn_barrier is %s." % (guc_mode, guc_value))
+
+    def streaming_switchover_roll_back(self, update_query=False):
+        """
+        streaming disaster cluster roll back in switchover
+        """
+        self.logger.log("Roll back streaming disaster cluster switchover...")
+        ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
+                                              self.logger, self.user,
+                                              {self.backup_open_key: "0"})
+        self.stop_cluster()
+        self.set_cmserver_guc("backup_open", "0", "set")
+        self.set_cmagent_guc("agent_backup_open", "0", "set")
+        self.logger.log("Successfully modify cma and cms parameters to start according to primary "
+                        "cluster mode")
+        if update_query:
+            self.update_streaming_info("cluster", "archive")
+        self.start_cluster()
+        self.logger.log("Successfully Roll back streaming disaster cluster switchover.")
+
+    def check_streaming_disaster_switchover_barrier(self):
+        """
+        check whether get switchover_barrier on all dn
+        """
+        self.logger.debug("check streaming disaster switchover barrier...")
+        sql_cmd = "select * from gs_streaming_dr_get_switchover_barrier();"
+        switchover_barrier_list = []
+        for db_node in self.cluster_info.dbNodes:
+            for dn_inst in db_node.datanodes:
+                if dn_inst.instanceId not in self.normal_dn_ids:
+                    self.logger.debug("Warning: Not check for abnormal instance %s %s" % (
+                        dn_inst.instanceType, dn_inst.instanceId))
+                    continue
+                (status, output) = ClusterCommand.remoteSQLCommand(
+                    sql_cmd, self.user, dn_inst.hostname, dn_inst.port, maintenance_mode=True)
+                self.logger.debug("Check inst has switchover barrier, status=%d, "
+                                  "output: %s." % (status, output))
+                if status == 0 and output.strip() == "t":
+                    self.logger.debug("Successfully check instance %s %s has switchover "
+                                      "barrier." % (dn_inst.instanceType, dn_inst.instanceId))
+                    switchover_barrier_list.append(dn_inst.instanceId)
+        return switchover_barrier_list
+
+    def check_switchover_workable(self):
+        """
+        Check switchover is workable
+        """
+        if not DefaultValue.is_disaster_cluster(self.cluster_info) \
+                and self.params.mode == "primary":
+            self.logger.debug("The primary dn exist, do nothing except record the result file.")
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                            "streaming disaster cluster switchover, Because the primary cluster "
+                            "[drClusterMode] parameter must be disaster_standby")
+        if DefaultValue.is_disaster_cluster(self.cluster_info) and \
+                self.params.mode == "disaster_standby":
+            self.logger.debug("The primary dn not exist, do nothing except record the result file.")
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
+                            "streaming disaster cluster switchover, Because the disaster_standby "
+                            "cluster [drClusterMode] parameter must be primary")
+        self.logger.log("Waiting for cluster and all instances normal.")
+        if self.params.mode == "primary":
+            end_time = datetime.now() + timedelta(seconds=600)
+            while True:
+                self.init_cluster_status()
+                self.parse_cluster_status()
+                if self.check_cluster_status(status_allowed=['Normal'], only_check=True,
+                                     is_log=False) and self.check_instances_ready_for_switchover():
+                    break
+                if datetime.now() >= end_time:
+                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                                    % "check cluster and instances status"
+                                      " with timeout: %ss" % str(600))
+                time.sleep(5)
+                self.logger.debug("Retry check stream disaster standby cluster status...")
+        else:
+            self.init_cluster_status()
+            self.parse_cluster_status()
+            if (not self.check_cluster_status(status_allowed=['Normal'], only_check=True,
+                                              is_log=False)) \
+                    or (not self.check_instances_ready_for_switchover()):
+                raise Exception(ErrorCode.GAUSS_516['GAUSS_51632'] % "check cluster status")
+
+    def check_instances_ready_for_switchover(self):
+        """
+        Check cns and dns is ready for switchover
+        """
+        dn_instances = [dn_inst.instanceId for db_node in self.cluster_info.dbNodes
+                        for dn_inst in db_node.datanodes]
+        if len(dn_instances) != len(self.normal_dn_ids):
+            self.logger.debug("Not all dn instances is normal.")
+            return False
+        self.logger.debug("Successfully check cn and dn instances are normal.")
+        return True
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
new file mode 100644
index 00000000..530d7d6a
--- /dev/null
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################################
+# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
+#
+# openGauss is licensed under Mulan PSL v2.
+# You can use this software according to the terms
+# and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+#
+#          http://license.coscl.org.cn/MulanPSL2
+#
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ----------------------------------------------------------------------------
+# Description  : params_handler.py is a utility for parsing and verifying streaming
+# disaster recovery params.
+#############################################################################
+
+import os
+import sys
+import json
+import optparse
+import getpass
+
+from impl.streaming_disaster_recovery.streaming_constants import DoradoDisasterRecoveryConstants
+from gspylib.common.DbClusterInfo import dbClusterInfo
+from gspylib.common.ErrorCode import ErrorCode
+from base_utils.security.security_checker import SecurityChecker, ValidationError
+from domain_utils.cluster_file.version_info import VersionInfo
+
+
+def check_streaming_start_mode(mode):
+    """
+    Check start mode
+    """
+    if mode not in ["primary", "disaster_standby"]:
+        raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50011"] % ('-m', mode))
+
+
+def check_xml_file(file):
+    """
+    Check xml file param
+    """
+    if not file:
+        raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50001'] % 'X')
+    SecurityChecker.check_is_string('xml file path', file)
+    if not os.path.isfile(file):
+        raise ValidationError(ErrorCode.GAUSS_502["GAUSS_50201"] % file)
+
+
+def check_hadr_user(value):
+    """
+    Check disaster user
+    """
+    description = "disaster username"
+    SecurityChecker.check_db_user(description, value)
+
+
+def check_hadr_pwd(value):
+    """
+    Check disaster user password
+    """
+    description = "disaster user password"
+    # check_db_password will be used in cloud scene
+    SecurityChecker.check_db_user(description, value)
+
+
+def check_wait_timeout(value):
+    """
+    Check wait timeout
+    """
+    description = "wait timeout"
+    SecurityChecker.check_is_digit(description, value)
+
+
+def check_local_cluster_conf(value):
+    """
+    Check local cluster conf
+    """
+    SecurityChecker.check_is_dict("localClusterConf", value)
+    port = value.get('port')
+    SecurityChecker.check_port_valid('port of localClusterConf', port)
+    shards = value.get('shards')
+    SecurityChecker.check_is_list('shards of localClusterConf', shards)
+    for shard in shards:
+        for node in shard:
+            ip = node.get('ip')
+            data_ip = node.get('dataIp')
+            SecurityChecker.check_ip_valid('ip of localClusterConf', ip)
+            SecurityChecker.check_ip_valid('dataIp of localClusterConf', data_ip)
+
+
+def check_remote_cluster_conf(value):
+    """
+    Check local cluster conf
+    """
+    SecurityChecker.check_is_dict("remoteClusterConf", value)
+    port = value.get('port')
+    SecurityChecker.check_port_valid('port of remoteClusterConf', port)
+    shards = value.get('shards')
+    SecurityChecker.check_is_list('shards of remoteClusterConf', shards)
+    for shard in shards:
+        for node in shard:
+            ip = node.get('ip')
+            data_ip = node.get('dataIp')
+            SecurityChecker.check_ip_valid('ip of remoteClusterConf', ip)
+            SecurityChecker.check_ip_valid('dataIp of remoteClusterConf', data_ip)
+
+
+STREAMING_PARAMS_FOR_MODULE = {
+    "start": {
+        "mode": check_streaming_start_mode,
+        "xml_path": check_xml_file,
+        "hadrUserName": check_hadr_user,
+        "hadrUserPassword": check_hadr_pwd,
+        "waitingTimeout": check_wait_timeout,
+        "localClusterConf": check_local_cluster_conf,
+        "remoteClusterConf": check_remote_cluster_conf
+    },
+    "stop": {
+        "xml_path": check_xml_file,
+        "waitingTimeout": check_wait_timeout,
+        "localClusterConf": check_local_cluster_conf,
+        "remoteClusterConf": check_remote_cluster_conf
+    },
+    "switchover": {
+        "mode": check_streaming_start_mode,
+        "waitingTimeout": check_wait_timeout
+    },
+    "failover": {
+        "waitingTimeout": check_wait_timeout,
+    },
+    "query": {}
+}
+
+HELP_MSG = """
+gs_sdr is a utility for streaming disaster recovery fully options.
+
+Usage:
+  gs_sdr -? | --help
+  gs_sdr -V | --version
+  gs_sdr -t start -m [primary|disaster_standby] -X XMLFILE [-U DR_USERNAME] [-W DR_PASSWORD] [--json JSONFILE] [--time-out=SECS] [-l LOGFILE] 
+  gs_sdr -t stop -X XMLFILE|--json JSONFILE [-l LOGFILE] 
+  gs_sdr -t switchover -m [primary|disaster_standby] [--time-out=SECS] [-l LOGFILE]
+  gs_sdr -t failover [-l LOGFILE]
+  gs_sdr -t query [-l LOGFILE]
+General options:
+  -?, --help                     Show help information for this utility,
+                                 and exit the command line mode.
+  -V, --version                  Show version information.
+  -t                             Task name, it could be:
+                                 "start", "stop", "switchover", "failover", "query".
+  -m                             Option mode, it could be:
+                                 "primary", "disaster_standby".
+  -U                             Disaster recovery user name.
+  -W                             Disaster recovery user password.
+  -X                             Path of the XML configuration file.
+  -l                             Path of log file.
+  --json                         Path of params file for streaming options.
+  --time-out=SECS                Maximum waiting time when Main standby connect to the primary dn,
+                                    default value is 1200s.
+"""
+
+
+class ParamsHandler(object):
+    """
+    Parse and check params.
+    """
+    def __init__(self, logger, trace_id):
+        self.params = None
+        self.logger = logger
+        self.trace_id = trace_id
+
+    @staticmethod
+    def option_parser():
+        """
+        parsing parameters
+        :return: param obj
+        """
+        parser = optparse.OptionParser(conflict_handler='resolve')
+        parser.disable_interspersed_args()
+        parser.epilog = "Example: gs_sdr -t " \
+                        "start -m primary -X clusterConfig.xml " \
+                        "--time-out=1200."
+        parser.add_option('-V', "--version", dest='version_info', action='store_true',
+                          help='-V|--version show version info.')
+        parser.add_option('-?', "--help", dest='help_info', action='store_true',
+                          help='-?|--help show help message and exist.')
+        parser.add_option('-t', dest='task', type='string',
+                          help='Task name. It could be "start", "stop", '
+                               '"switchover", "failover", "query"')
+        parser.add_option('-m', dest='mode', type='string',
+                          help='Cluster run mode. It could be ["primary", "disaster_standby"].')
+        parser.add_option('-U', dest='hadrusername', type='string',
+                          help='hadr user name.')
+        parser.add_option('-W', dest='hadruserpasswd', type='string',
+                          help='hadr user password.')
+        parser.add_option('-X', dest='xml_path', type='string',
+                          help='Cluster config xml path.')
+        parser.add_option('--json', dest='json_path', type='string',
+                          help='Config json file of streaming options')
+        parser.add_option('--time-out=', dest='timeout', default="1200", type='string',
+                          help='time out.')
+        parser.add_option("-l", dest='logFile', type='string',
+                          help='Path of log file.')
+        parser.add_option("--dorado-info", dest='dorado_info', type='string',
+                          help='Path of dorado xlog share disk.')
+        return parser
+
+    def __print_usage(self):
+        """
+        Print help message
+        """
+        if self.params.help_info:
+            print(HELP_MSG)
+            sys.exit(0)
+
+    def __print_version_info(self):
+        """
+        Print version info
+        """
+        if self.params.version_info:
+            print("%s %s" % (sys.argv[0].split("/")[-1],
+                             VersionInfo.COMMON_VERSION))
+            sys.exit(0)
+
+    def __cluster_conf_parser(self, file_path):
+        """
+        Parse params in json file
+        """
+        if self.params.json_path:
+            if not os.path.isfile(file_path):
+                raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50010']
+                                      % '--json' + " Json file is not exist.")
+            with open(file_path, 'r') as read_fp:
+                param_dict = json.load(read_fp)
+            for key, value in param_dict.items():
+                if key not in DoradoDisasterRecoveryConstants.STREAMING_JSON_PARAMS[self.params.task]:
+                    continue
+                setattr(self.params, key, value)
+            return
+        cluster_info = dbClusterInfo()
+        if not self.params.xml_path or not os.path.isfile(self.params.xml_path):
+            raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50010']
+                                  % '-X' + " XML file and json file are all not exist.")
+        cluster_info.initFromXml(self.params.xml_path)
+        remote_cluster_conf = dict()
+        remote_cluster_conf.setdefault("port", cluster_info.remote_dn_base_port)
+        remote_cluster_conf.setdefault("shards", cluster_info.remote_stream_ip_map)
+        setattr(self.params, "remoteClusterConf", remote_cluster_conf)
+        self.logger.debug("Remote stream cluster conf: %s." % str(remote_cluster_conf))
+
+        local_cluster_conf = dict()
+        local_cluster_conf.setdefault("port", cluster_info.local_dn_base_port)
+        local_cluster_conf.setdefault("shards", cluster_info.local_stream_ip_map)
+        setattr(self.params, "localClusterConf", local_cluster_conf)
+        self.logger.debug("Local stream cluster conf: %s." % str(local_cluster_conf))
+        if not remote_cluster_conf["shards"] or len(remote_cluster_conf["shards"])\
+                != len(local_cluster_conf["shards"]):
+            raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50026'] % "streaming DR")
+
+    def __init_default_params(self):
+        """
+        Init params if need default value
+        """
+        if not self.params.timeout.isdigit():
+            raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50004"] % "--time-out")
+        self.params.waitingTimeout = int(self.params.timeout)
+
+    def __parse_args(self):
+        """
+        Parse arguments
+        """
+        parser = ParamsHandler.option_parser()
+        self.params, _ = parser.parse_args()
+        self.__print_usage()
+        self.__print_version_info()
+        if not hasattr(self.params, 'task') or not self.params.task:
+            raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".")
+        if self.params.task not in DoradoDisasterRecoveryConstants.STREAMING_JSON_PARAMS.keys():
+            raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't')
+        # parse arguments in json/xml file
+        if DoradoDisasterRecoveryConstants.STREAMING_JSON_PARAMS[self.params.task]:
+            self.__cluster_conf_parser(self.params.json_path)
+
+    def __reload_hadr_user_info(self):
+        """
+        Input hadr user info
+        """
+        if self.params.task not in ["start"]:
+            return
+        if self.params.hadrusername and self.params.hadruserpasswd:
+            self.params.hadrUserName = self.params.hadrusername
+            self.params.hadrUserPassword = self.params.hadruserpasswd
+            del self.params.hadruserpasswd
+            return
+        user_name = ""
+        if not self.params.hadrusername:
+            user_name = input("Please enter disaster user name:")
+        self.params.hadrUserName = user_name if user_name else self.params.hadrusername
+        if self.params.hadruserpasswd:
+            self.params.hadrUserPassword = self.params.hadruserpasswd
+            del self.params.hadruserpasswd
+            return
+        for i in range(3):
+            user_passwd = getpass.getpass("Please enter password for [%s]:" %
+                                          self.params.hadrUserName)
+            user_passwd_check = getpass.getpass("Please repeat enter for password for [%s]:"
+                                                % self.params.hadrUserName)
+            if user_passwd == user_passwd_check:
+                break
+            if i == 2:
+                self.logger.logExit("The two passwords entered for too many "
+                                    "times are inconsistent. Authentication failed.")
+            self.logger.error(
+                ErrorCode.GAUSS_503["GAUSS_50306"] % user_name
+                + "The two passwords are different, please enter password again.")
+        self.params.hadrUserPassword = user_passwd
+        del user_passwd
+        del user_passwd_check
+        self.logger.debug("The hadr user information is successfully loaded.")
+
+    def get_valid_params(self):
+        """
+        Check params
+        """
+        try:
+            self.__parse_args()
+            self.logger.log(DoradoDisasterRecoveryConstants.LOG_REMARK)
+            self.logger.log('Streaming disaster recovery ' + self.params.task + ' ' + self.trace_id)
+            self.logger.log(DoradoDisasterRecoveryConstants.LOG_REMARK)
+            self.__init_default_params()
+            self.__reload_hadr_user_info()
+            for param_name, validate in STREAMING_PARAMS_FOR_MODULE[self.params.task].items():
+                check_value = getattr(self.params, param_name)
+                if self.params.task == "stop":
+                    if param_name == "xml_path" and not check_value:
+                        check_value = getattr(self.params, 'json_path')
+                validate(check_value)
+        except ValidationError as error:
+            self.logger.logExit(str(error))
+        return self.params
-- 
Gitee


From 6dd7237a9e1e4b8f0157ae17642bc30243adde2c Mon Sep 17 00:00:00 2001
From: chuanglichuangwai <ou_rt_wfu@163.com>
Date: Fri, 4 Aug 2023 10:57:52 +0800
Subject: [PATCH 02/23] =?UTF-8?q?gs=5Fddr=E5=B7=A5=E5=85=B7=E7=9A=84switch?=
 =?UTF-8?q?over=E5=92=8Cfailover=E7=9A=84=E5=AD=90=E5=91=BD=E4=BB=A4?=
 =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 script/gspylib/common/Common.py               |   4 +-
 script/gspylib/common/DbClusterInfo.py        |  12 +
 script/gspylib/common/ErrorCode.py            |   7 +-
 .../impl/dorado_disaster_recovery/ddr_base.py | 385 ++++++++++++------
 .../dorado_disaster_recovery/ddr_constants.py |   2 +-
 .../dorado_diaster_recovery_start.py          |  30 +-
 .../dorado_disaster_recovery_failover.py      |  34 +-
 .../dorado_disaster_recovery_query.py         |   2 +-
 .../dorado_disaster_recovery_stop.py          |  14 +-
 .../dorado_disaster_recovery_switchover.py    | 167 ++++----
 .../params_handler.py                         |   5 +-
 11 files changed, 418 insertions(+), 244 deletions(-)

diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py
index 2d1016fa..4f91eb9b 100644
--- a/script/gspylib/common/Common.py
+++ b/script/gspylib/common/Common.py
@@ -3027,7 +3027,7 @@ class DefaultValue():
             if os.path.isfile(cm_agent_conf_temp_file):
                 with open(cm_agent_conf_temp_file, "r") as cma_conf_file:
                     content = cma_conf_file.read()
-                    ret = re.findall(r'agent_backup_open *= *1|agent_backup_open *= *2', content)
+                    ret = re.findall(r'agent_backup_open *= *1', content)
                     g_file.removeFile(cm_agent_conf_temp_file)
                     if ret:
                         return True
@@ -3037,7 +3037,7 @@ class DefaultValue():
                 raise Exception(ErrorCode.GAUSS_502['GAUSS_50201'] % cm_agent_conf_file)
         with open(cm_agent_conf_file, "r") as cma_conf_file:
             content = cma_conf_file.read()
-            ret = re.findall(r'agent_backup_open *= *1|agent_backup_open *= *2', content)
+            ret = re.findall(r'agent_backup_open *= *1', content)
         if ret:
             return True
         else:
diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py
index 91564fca..305a00c4 100644
--- a/script/gspylib/common/DbClusterInfo.py
+++ b/script/gspylib/common/DbClusterInfo.py
@@ -1689,6 +1689,18 @@ class dbClusterInfo():
     def get_staic_conf_path(self, user, ignore_err=False):
         return self.__getStaticConfigFilePath(user=user, ignore_err=ignore_err)
 
+    def get_mpprc_file(self, user):
+        """
+        get mpprc file
+        """
+        mpprcFile = EnvUtil.getEnvironmentParameterValue('MPPDB_ENV_SEPARATE_PATH', user)
+        if mpprcFile is not None and mpprcFile != "":
+            mpprcFile = mpprcFile.replace("\\", "\\\\").replace('"', '\\"\\"')
+            checkPathVaild(mpprcFile)
+            userProfile = mpprcFile
+        else:
+            userProfile = ClusterConstants.BASHRC
+        return userProfile
 
     def __getEnvironmentParameterValue(self, environmentParameterName, user):
         """
diff --git a/script/gspylib/common/ErrorCode.py b/script/gspylib/common/ErrorCode.py
index 7c0f0392..8d82f5f0 100644
--- a/script/gspylib/common/ErrorCode.py
+++ b/script/gspylib/common/ErrorCode.py
@@ -133,6 +133,7 @@ class ErrorCode():
         'GAUSS_50110': "[GAUSS-50110] : Cannot execute this script on %s.",
         'GAUSS_50111': "[GAUSS-50111] : The %s directory has no permission.",
         'GAUSS_50112': "[GAUSS-50112] : Failed to get the permission of %s.",
+        'GAUSS_50113': "[GAUSS-50113] : The %s is not writable and readable for %s.",
     }
 
     ###########################################################################
@@ -333,7 +334,8 @@ class ErrorCode():
         'GAUSS_50621': "[GAUSS-50621] : Failed to check network care speed.\n",
         'GAUSS_50622': "[GAUSS-50622] : Failed to obtain network card "
                        "interrupt count numbers. Commands for getting "
-                       "interrupt count numbers: %s."
+                       "interrupt count numbers: %s.",
+        'GAUSS_50623': "[GAUSS-50623] : Ping cluster nodes failed. Successfully ping node: %s."
 
     }
 
@@ -631,7 +633,8 @@ class ErrorCode():
                        "the %s parameter is not needed.",
         'GAUSS_51656': "[GAUSS-51656] : Waiting for udev trigger to end timeout",
         'GAUSS_51657': "[GAUSS-51657] : Waiting for start %s to end timeout",
-        'GAUSS_51658': "[GAUSS-51658] : The azName is different, and the value of azPriority must be different. "
+        'GAUSS_51658': "[GAUSS-51658] : The azName is different, and the value of azPriority must be different. ",
+        'GAUSS_51659': "[GAUSS-51659] : The cluster status detected by the \"%s\" command is abnormal. "
     }
 
     ###########################################################################
diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 0424c911..8fd4b74d 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -21,6 +21,7 @@
 import json
 import os
 import re
+import sys
 import time
 from datetime import datetime
 from datetime import timedelta
@@ -62,8 +63,8 @@ class DoradoDisasterRecoveryBase(object):
         self.local_host = None
         self.local_ip = None
         self.is_single_inst = None
-        self.streaming_file_dir = None
-        self.streaming_xml = None
+        self.dorado_file_dir = None
+        self.dorado_xml = None
         self.cluster_node_names = None
         self.normal_cm_ips = []
         self.normal_node_list = []
@@ -96,8 +97,8 @@ class DoradoDisasterRecoveryBase(object):
         self.local_ip = DefaultValue.getIpByHostName()
         self.is_single_inst = True if self.cluster_info.isSingleInstCluster() else None
         self.cluster_node_names = self.cluster_info.getClusterNodeNames()
-        self.streaming_file_dir = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.DDR_FILES_DIR)
-        self.streaming_xml = os.path.join(self.streaming_file_dir,
+        self.dorado_file_dir = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.DDR_FILES_DIR)
+        self.streaming_xml = os.path.join(self.dorado_file_dir,
                                           DoradoDisasterRecoveryConstants.STREAMING_CONFIG_XML)
         self.ssh_tool = SshTool(self.cluster_node_names, self.log_file)
         self.mpp_file = EnvUtil.getMpprcFile()
@@ -139,14 +140,14 @@ class DoradoDisasterRecoveryBase(object):
                 raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "init step file path")
         else:
             step_file_name = DoradoDisasterRecoveryConstants.DDR_STEP_FILES[self.params.task]
-        self.step_file_path = os.path.join(self.streaming_file_dir, step_file_name)
+        self.step_file_path = os.path.join(self.dorado_file_dir, step_file_name)
         self.logger.debug("Init step file:%s." % self.step_file_path)
 
     def read_cluster_conf_record(self, check_file_exist=True):
         """
         Read cluster conf from file
         """
-        cluster_conf_record = os.path.join(self.streaming_file_dir,
+        cluster_conf_record = os.path.join(self.dorado_file_dir,
                                            DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
         if not os.path.isfile(cluster_conf_record):
             if check_file_exist:
@@ -354,7 +355,7 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.log("Start prepare secure files.")
         secure_dir_name = DoradoDisasterRecoveryConstants.GS_SECURE_FILES
         temp_secure_dir_path = os.path.realpath(
-            os.path.join(self.streaming_file_dir, secure_dir_name))
+            os.path.join(self.dorado_file_dir, secure_dir_name))
         if os.path.isdir(temp_secure_dir_path):
             self.logger.debug("Secure file dir exist, cleaning...")
             FileUtil.removeDirectory(temp_secure_dir_path)
@@ -406,13 +407,13 @@ class DoradoDisasterRecoveryBase(object):
         """
         Remove streaming files dir
         """
-        cmd = "if [ -d %s ]; then rm %s -rf;fi" % (dir_path, self.streaming_file_dir)
+        cmd = "if [ -d %s ]; then rm %s -rf;fi" % (dir_path, self.dorado_file_dir)
         self.ssh_tool.executeCommand(cmd)
         self.logger.debug("Successfully remove dir [%s] on all nodes." % dir_path)
 
-    def query_streaming_step(self):
+    def query_dorado_step(self):
         """
-        Streaming step
+        dorado step
         """
         step = -1
         if os.path.isfile(self.step_file_path):
@@ -426,7 +427,7 @@ class DoradoDisasterRecoveryBase(object):
                             (step, self.params.task))
         return step
 
-    def write_streaming_step(self, step):
+    def write_dorado_step(self, step):
         """
         write streaming step
         :return: NA
@@ -441,7 +442,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         Generate cluster status file
         """
-        tmp_file = os.path.join(self.streaming_file_dir,
+        tmp_file = os.path.join(self.dorado_file_dir,
                                 DoradoDisasterRecoveryConstants.DDR_CLUSTER_STATUS_TMP_FILE)
         cmd = ClusterCommand.getQueryStatusCmd("", tmp_file)
         self.logger.debug("Command for checking cluster state: %s" % cmd)
@@ -533,7 +534,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         Parse cluster status
         """
-        tmp_file = os.path.join(self.streaming_file_dir,
+        tmp_file = os.path.join(self.dorado_file_dir,
                                 DoradoDisasterRecoveryConstants.DDR_CLUSTER_STATUS_TMP_FILE)
         if (not os.path.isfile(tmp_file)) and (not current_status):
             raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"]
@@ -612,25 +613,22 @@ class DoradoDisasterRecoveryBase(object):
 
     def check_dn_instance_params(self):
         """set_dn_instance_params"""
-        check_dick = {"enable_dcf": "off", "synchronous_commit": "on"}
+        check_dick = {"enable_dcf": "off"}
         dn_insts = [dn_inst for db_node in self.cluster_info.dbNodes
                     for dn_inst in db_node.datanodes]
-        if len(dn_insts) <= 2:
-            self.logger.debug("Need set most available for current cluster.")
-            check_dick.update({"most_available_sync": "on"})
         primary_dn_insts = [inst for inst in dn_insts if inst.instanceId in self.primary_dn_ids]
         if not primary_dn_insts:
             self.logger.debug("The primary dn not exist, do not need check dn inst params.")
             return
         execute_dn = primary_dn_insts[0]
         param_list = []
-        guc_backup_file = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
+        guc_backup_file = os.path.join(self.dorado_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
         if not os.path.isfile(guc_backup_file):
             FileUtil.createFileInSafeMode(guc_backup_file, DefaultValue.KEY_FILE_MODE_IN_OS)
         for peer_check, idx in list(check_dick.items()):
             param_list.append((execute_dn, {peer_check: idx}))
         ret = parallelTool.parallelExecute(self._check_dn_inst_param, param_list)
-        self.ssh_tool.scpFiles(guc_backup_file, self.streaming_file_dir, self.cluster_node_names)
+        self.ssh_tool.scpFiles(guc_backup_file, self.dorado_file_dir, self.cluster_node_names)
         if any(ret):
             self.logger.logExit('\n'.join(filter(bool, ret)))
         self.logger.debug("Successfully check dn inst default value.")
@@ -641,7 +639,7 @@ class DoradoDisasterRecoveryBase(object):
         if len(param) != 2:
             error_msg = ErrorCode.GAUSS_521["GAUSS_52102"] % param
             return error_msg
-        guc_backup_file = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
+        guc_backup_file = os.path.join(self.dorado_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
         for sql_key, value in list(param[1].items()):
             sql = "show %s;" % sql_key
             (status, output) = ClusterCommand.remoteSQLCommand(sql,
@@ -668,7 +666,7 @@ class DoradoDisasterRecoveryBase(object):
         Restore guc params in .streaming_guc_backup
         """
         self.logger.debug("Start restore guc params.")
-        guc_backup_file = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
+        guc_backup_file = os.path.join(self.dorado_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
         if not os.path.isfile(guc_backup_file):
             self.logger.debug("Not found guc backup file, no need restore guc params.")
         params_record = DefaultValue.obtain_file_content(guc_backup_file)
@@ -731,10 +729,10 @@ class DoradoDisasterRecoveryBase(object):
         """
         data = {"remoteClusterConf": self.params.remoteClusterConf,
                 "localClusterConf": self.params.localClusterConf}
-        file_path = os.path.join(self.streaming_file_dir,
+        file_path = os.path.join(self.dorado_file_dir,
                                  DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
         FileUtil.write_update_file(file_path, data, DefaultValue.KEY_FILE_MODE_IN_OS)
-        self.ssh_tool.scpFiles(file_path, self.streaming_file_dir, self.cluster_node_names)
+        self.ssh_tool.scpFiles(file_path, self.dorado_file_dir, self.cluster_node_names)
 
     def __record_wal_keep_segments(self, param_list):
         """
@@ -766,7 +764,7 @@ class DoradoDisasterRecoveryBase(object):
             return
         self.logger.debug("Starting get wal_keep_segments default value.")
         wal_keep_segments = os.path.join(
-            self.streaming_file_dir, DoradoDisasterRecoveryConstants.WAL_KEEP_SEGMENTS)
+            self.dorado_file_dir, DoradoDisasterRecoveryConstants.WAL_KEEP_SEGMENTS)
         sql_check = "show wal_keep_segments;"
         param_list = [(dn_inst, sql_check, wal_keep_segments) for db_node in
                       self.cluster_info.dbNodes for dn_inst in db_node.datanodes
@@ -874,7 +872,7 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.debug("Command for changing instance pg_hba.conf file: %s" % cmd)
         self.get_all_connection_node_name("update_streaming_pg_hba")
         try:
-            self.ssh_tool.scpFiles(self.streaming_xml, self.streaming_file_dir)
+            self.ssh_tool.scpFiles(self.streaming_xml, self.dorado_file_dir)
             self.ssh_tool.executeCommand(cmd, hostList=self.connected_nodes)
         except Exception as error:
             msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
@@ -1061,6 +1059,24 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.debug(
             "Successfully set all datanode guc param in postgres conf for streaming cluster.")
 
+    def set_datanode_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
+        """
+        set datanode guc param
+        :return: NA
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Set datanode guc [%s] to [%s] not for mode:%s."
+                              % (guc_parameter, guc_value, self.params.mode))
+            return
+        cmd = "gs_guc %s -Z datanode -N all -I all -c \"%s=%s\" " % \
+              (guc_type, guc_parameter, guc_value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
+                  % "set datanode guc [%s] to [%s], output:%s" \
+                  % (guc_parameter, guc_value, output)
+            self.logger.debug(msg)
+
     def set_cmserver_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
         """
         set cmserver guc param
@@ -1126,7 +1142,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         file_path = os.path.join(dir_name, "pg_hba.conf")
         old_file_path = os.path.join(dir_name, "pg_hba.conf.old")
-        dest_file = os.path.join(self.streaming_file_dir, "%s_pg_hba.conf" % inst_id)
+        dest_file = os.path.join(self.dorado_file_dir, "%s_pg_hba.conf" % inst_id)
         if self.local_host == node_name:
             if mode == "backup" and not os.path.isfile(dest_file):
                 if os.path.isfile(file_path):
@@ -1165,7 +1181,7 @@ class DoradoDisasterRecoveryBase(object):
         Backup or restore pg_ident file.
         """
         file_path = os.path.join(dir_name, "pg_ident.conf")
-        dest_file = os.path.join(self.streaming_file_dir, "%s_pg_ident.conf" % inst_id)
+        dest_file = os.path.join(self.dorado_file_dir, "%s_pg_ident.conf" % inst_id)
         if self.local_host == node_name:
             if mode == "backup" and not os.path.isfile(dest_file):
                 if os.path.isfile(file_path):
@@ -1531,7 +1547,7 @@ class DoradoDisasterRecoveryBase(object):
             return
         self.logger.debug("Starting restore wal_keep_segments default value.")
         default_value_dict = {}
-        wal_keep_segments = os.path.join(self.streaming_file_dir,
+        wal_keep_segments = os.path.join(self.dorado_file_dir,
                                          DoradoDisasterRecoveryConstants.WAL_KEEP_SEGMENTS)
         if not os.path.isfile(wal_keep_segments):
             self.logger.debug("Not found wal keep segments record file, no need restore.")
@@ -1545,33 +1561,42 @@ class DoradoDisasterRecoveryBase(object):
         self.set_wal_keep_segments("reload", default_value_dict, True)
         self.logger.debug("Successfully restore wal_keep_segments default value.")
 
-    def __clean_streaming_files_on_local_node(self, file_name_list):
+    def __clean_dorado_files_on_local_node(self, file_name_list):
         file_name_list = [file_name_list] \
             if not isinstance(file_name_list, list) else file_name_list
         for file_name in file_name_list:
-            file_path = os.path.join(self.streaming_file_dir, file_name)
+            file_path = os.path.join(self.dorado_file_dir, file_name)
             if os.path.isfile(file_path):
                 FileUtil.removeFile(file_path)
                 self.logger.debug("Successfully removed file:[%s]" % file_path)
 
+    def clean_flag_file(self):
+        """
+        Clean flag file
+        """
+        flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+        if os.path.exists(flag_file):
+            self.logger.debug("Successfully removed flag file %s." % flag_file)
+            os.remove(flag_file)
+
     def clean_step_file(self):
         """
         Clean step file for each action
         """
         step_file = os.path.basename(self.step_file_path)
-        self.__clean_streaming_files_on_local_node(step_file)
+        self.__clean_dorado_files_on_local_node(step_file)
         self.logger.log("Successfully removed step file.")
 
     def check_action_and_mode(self):
         """
         Check action and mode if step file exist.
-        if any streaming options not finished(step file exist),
+        if any dorado options not finished(step file exist),
         not allowed doing any other streaming options except query.
         """
         self.logger.debug("Checking action and mode.")
         exist_step_file_names = []
         for file_name in DoradoDisasterRecoveryConstants.DDR_STEP_FILES.values():
-            step_file_path = os.path.join(self.streaming_file_dir, file_name)
+            step_file_path = os.path.join(self.dorado_file_dir, file_name)
             if os.path.isfile(step_file_path) and file_name != ".ddr_query.step":
                 exist_step_file_names.append(file_name)
         if exist_step_file_names and set(exist_step_file_names) ^ {os.path.basename(
@@ -1584,19 +1609,19 @@ class DoradoDisasterRecoveryBase(object):
                                   "doing current options" % (exist_step_file_names, exist_action))
         self.logger.debug("Successfully checked action and mode.")
 
-    def clean_streaming_dir(self):
+    def clean_dorado_dir(self):
         """
-        Clean streaming dir when stop or failover
+        Clean dorado dir when stop or failover
         """
-        self.logger.debug("Start clean streaming dir:%s." % self.streaming_file_dir)
-        cmd = g_file.SHELL_CMD_DICT["deleteDir"] % (self.streaming_file_dir,
-                                                    self.streaming_file_dir)
+        self.logger.debug("Start clean dorado dir:%s." % self.dorado_file_dir)
+        cmd = g_file.SHELL_CMD_DICT["deleteDir"] % (self.dorado_file_dir,
+                                                    self.dorado_file_dir)
         try:
             self.ssh_tool.executeCommand(cmd, hostList=self.cluster_info.getClusterNodeNames())
         except Exception as error:
             self.logger.debug(
-                "Failed to remove streaming dir with error:%s" % error)
-        self.logger.log("Finished remove streaming dir.")
+                "Failed to remove dorado dir with error:%s" % error)
+        self.logger.log("Finished remove dorado dir.")
 
     def clean_global_config(self):
         """
@@ -1706,7 +1731,7 @@ class DoradoDisasterRecoveryBase(object):
         Find and copy key file dir from all dn dir
         """
         local_temp_secure_path = os.path.join(
-            self.streaming_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
+            self.dorado_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
         if os.path.isdir(local_temp_secure_path):
             FileUtil.removeDirectory(local_temp_secure_path)
         rand_path = os.path.join(local_temp_secure_path, DoradoDisasterRecoveryConstants.HADR_KEY_RAND)
@@ -1721,7 +1746,7 @@ class DoradoDisasterRecoveryBase(object):
                         dn_inst.datadir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES))
                     cmd_copy_dir = cmd_tep % (key_file_path, self.mpp_file, self.trace_id,
                                               self.local_host, key_file_path,
-                                              self.streaming_file_dir,
+                                              self.dorado_file_dir,
                                               key_file_path, dn_inst.hostname)
                     status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd_copy_dir)
                     self.logger.debug("Copy cmd:%s" % cmd_copy_dir)
@@ -1754,7 +1779,7 @@ class DoradoDisasterRecoveryBase(object):
         # check cluster user consistency
         self.__check_cluster_user()
         # distribute key files to all node
-        secure_dir_path = os.path.join(self.streaming_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
+        secure_dir_path = os.path.join(self.dorado_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
         self.__copy_hadr_user_key(secure_dir_path, update=True)
         FileUtil.removeDirectory(secure_dir_path)
         self.logger.log("Successfully build and distribute key files to all nodes.")
@@ -1764,7 +1789,7 @@ class DoradoDisasterRecoveryBase(object):
         function: Check whether the version numbers of the host
         cluster and the disaster recovery cluster are the same
         """
-        gs_secure_version = os.path.realpath(os.path.join(self.streaming_file_dir,
+        gs_secure_version = os.path.realpath(os.path.join(self.dorado_file_dir,
                                                           "gs_secure_files/version.cfg"))
         master_commit_id = VersionInfo.get_version_info(gs_secure_version)[-1]
         local_version_file = VersionInfo.get_version_file()
@@ -1781,7 +1806,7 @@ class DoradoDisasterRecoveryBase(object):
         function: Check whether the version numbers of the host
         cluster and the disaster recovery cluster are the same
         """
-        user_file = os.path.realpath(os.path.join(self.streaming_file_dir,
+        user_file = os.path.realpath(os.path.join(self.dorado_file_dir,
                                                   DoradoDisasterRecoveryConstants.GS_SECURE_FILES,
                                                   DoradoDisasterRecoveryConstants.CLUSTER_USER_RECORD))
         remote_user = DefaultValue.obtain_file_content(user_file, is_list=False)
@@ -1896,7 +1921,7 @@ class DoradoDisasterRecoveryBase(object):
         params = []
         dn_instances = [inst for node in self.cluster_info.dbNodes
                         for inst in node.datanodes]
-        cluster_conf = os.path.join(self.streaming_file_dir,
+        cluster_conf = os.path.join(self.dorado_file_dir,
                                     DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
         dn_num = DefaultValue.get_all_dn_num_for_dr(cluster_conf, dn_instances[0],
                                                     self.cluster_info, self.logger)
@@ -1916,7 +1941,7 @@ class DoradoDisasterRecoveryBase(object):
         :return: NA
         """
         self.logger.log("Start remove cluster file.")
-        cluster_info_file = os.path.join(self.streaming_file_dir,
+        cluster_info_file = os.path.join(self.dorado_file_dir,
                                          DoradoDisasterRecoveryConstants.DDR_CLUSTER_CONF_RECORD)
         cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (cluster_info_file, cluster_info_file)
         try:
@@ -2015,9 +2040,9 @@ class DoradoDisasterRecoveryBase(object):
         parallelTool.parallelExecute(self.concurrent_drop_slot, params)
         self.logger.log("Finished drop all node replication slots")
 
-    def update_streaming_info(self, key, value, only_mode=None):
+    def update_dorado_info(self, key, value, only_mode=None):
         """
-        Update info for streaming status
+        Update info for dorado status
         """
         if only_mode and self.params.mode != only_mode:
             self.logger.debug("Update query status [%s] to [%s] "
@@ -2036,20 +2061,20 @@ class DoradoDisasterRecoveryBase(object):
             else:
                 self.logger.debug("key error.")
                 return
-            file_path = os.path.realpath(os.path.join(self.streaming_file_dir, key_stat))
+            file_path = os.path.realpath(os.path.join(self.dorado_file_dir, key_stat))
             with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
                                    DefaultValue.KEY_FILE_MODE_IN_OS), "w") as fp_write:
                 fp_write.write(value)
             host_names = self.get_all_connection_node_name(
-                action_flag="update_streaming_info", no_update=True)
-            self.ssh_tool.scpFiles(file_path, self.streaming_file_dir, host_names)
+                action_flag="update_dorado_info", no_update=True)
+            self.ssh_tool.scpFiles(file_path, self.dorado_file_dir, host_names)
         except Exception as error:
             self.logger.debug("Failed write info, key:%s, value:%s, "
                               "error:%s." % (key, value, error))
 
     def create_cluster_maintance_file(self, value):
         """
-        add cluster_maintance file for streaming failover and switchover disaster_standby
+        add cluster_maintance file for dorado failover and switchover disaster_standby
         """
         self.logger.debug("Start create cluster_maintance file.")
         try:
@@ -2066,70 +2091,199 @@ class DoradoDisasterRecoveryBase(object):
                               "error:%s." % (value, str(error)))
         self.logger.debug("Successfully create cluster_maintance file.")
 
-    def streaming_failover_single_inst(self, stream_disaster_step, action_flag=None):
+
+    def check_datanode_query_info(self, params):
         """
-        streaming disaster recovery failover for single_inst cluster
+        check datanode info by "gs_ctl query" command.
         """
-        self.create_cluster_maintance_file("streaming failover")
+        state, dest_ip, datadir = params
+        # get mpprc file
+        mpprcFile = self.cluster_info.get_mpprc_file(self.user)
+        if dest_ip == self.local_host:
+            cmd = "source %s && gs_ctl query -D %s" % (mpprcFile, datadir)
+        else:
+            cmd = "pssh -H %s \"source %s && gs_ctl query -D %s \"" % (dest_ip,
+                                                                       mpprcFile,
+                                                                       datadir)
+        (status, output) = subprocess.getstatusoutput(cmd)
+        dbState = re.findall(r"db_state.*: (.*?)\n", output)
+        localRole = re.findall(r"local_role.*: (.*?)\n", output)
+        peerRole = re.findall(r"peer_role.*: (.*?)\n", output)
+        preeState = re.findall(r"pree_state.*: (.*?)\n", output)
+        channel = re.findall(r"channel.*: (.*?)\n", output)
+        if status == 0:
+            check_ok = 0
+            if state == "Primary":
+                if (len(dbState) != 1 or dbState[0] != "Normal") or \
+                        (len(localRole) != 2 or localRole[0] != "Primary" or localRole[1] != "Primary") or \
+                        (len(peerRole) != 1 or peerRole[0] != "StandbyCluster_Standby") or \
+                        (len(preeState) != 1 or preeState[0] != "Normal") or \
+                        (len(channel) != 1 or "-->" not in channel[0]):
+                    check_ok = -1
+            elif state == "Main Standby":
+                if (len(dbState) != 1 or dbState[0] != "Normal") or \
+                        (len(localRole) != 2 or localRole[0] != "Main Standby" or localRole[1] != "Standby") or \
+                        (len(peerRole) != 1 or peerRole[0] != "Primary") or \
+                        (len(preeState) != 1 or preeState[0] != "Normal") or \
+                        (len(channel) != 1 or "<--" not in channel[0]):
+                    check_ok = -1
+            elif state == "Standby":
+                # 不管是主集群，还是灾难备集群仅仅检查 local_role 只有一个元素 Standby 和 db_state 为 Normal
+                if (len(dbState) != 1 or dbState[0] != "Normal") or \
+                        (len(localRole) != 1 or localRole[0] != "Standby"):
+                    check_ok = -1
+            else:
+                raise Exception(ErrorCode.GAUSS_521["GAUSS_52102"] % state)
+        else:
+            check_ok = status
+
+        return check_ok, output, dest_ip
+
+    def check_dorado_datanode_query_info(self, timeout=DefaultValue.TIMEOUT_CLUSTER_START,
+                        dorado_switchover=None):
+        """
+        check gs_ctl query info
+        """
+        self.logger.debug("Waiting for gs_ctl query status being satisfied.")
+        end_time = None if timeout <= 0 else datetime.now() + timedelta(seconds=timeout)
+
+        self.init_cluster_status()
+        self.parse_cluster_status()
+        host_names = self.get_all_connection_node_name()
+        if len(host_names) != len(self.cluster_node_names):
+            raise Exception(ErrorCode.GAUSS_506["GAUSS_50623"] % host_names)
+        check_params = []
+        all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
+                         for dn_inst in db_node.datanodes]
+        for dn_inst in all_instances:
+            check_params.append([dn_inst.state, dn_inst.hostname, dn_inst.datadir])
+        if len(check_params) <= 0:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "cluster")
+        while True:
+            check_status = 0
+            time.sleep(10)
+            if end_time is not None and datetime.now() >= end_time:
+                check_status = 1
+                self.logger.debug("Timeout. The gs_ctl query command cannot obtain the expected status.")
+                break
+            results = parallelTool.parallelExecute(
+                self.check_datanode_query_info, check_params)
+            for ret in results:
+                if ret[0] != 0:
+                    self.logger.debug("Failed to check node[%s] info using \"gs_ctl query\" command "
+                                      "with status[%s], output[%s]" % (ret[-1], ret[0], ret[1]))
+                    check_status = 1
+                if check_status == 0:
+                    break
+        if check_status != 0:
+            if dorado_switchover == "dorado_switchover":
+                raise Exception(
+                    ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
+            self.logger.logExit(
+                ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
+        self.logger.debug("Successfully wait for gs_ctl query status become Normal.", "constant")
+
+    def dorado_failover_single_inst(self, dorado_disaster_step, action_flag=None):
+        """
+        dorado disaster recovery failover for single_inst cluster
+        """
+        self.create_cluster_maintance_file("dorado failover")
         if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-            self.update_streaming_info("cluster", "promote")
+            self.update_dorado_info("cluster", "promote")
         # 0. check cluster status and get normal instance list
-        if stream_disaster_step < 0:
+        if dorado_disaster_step < 0:
             if action_flag == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
             else:
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "10%")
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "10%")
             self.init_cluster_status()
             self.parse_cluster_status()
-            self.write_streaming_step("0_check_cluster_status_done_for_failover")
-        # 1.Specify max xid and max ter to start etcd
-        max_term_record = os.path.join(self.streaming_file_dir, ".max_term_record")
-        if stream_disaster_step < 1:
-            max_term = self.get_term_info()
-            term_key = "/%s/CMServer/status_key/term" % self.user
-            para_dict = {term_key: max_term, self.backup_open_key: "0"}
-            ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
-                                                  self.logger, self.user, para_dict)
-            DefaultValue.write_content_on_file(max_term_record, max_term)
-            self.write_streaming_step("1_start_etcd_done_for_failover")
-        self._failover_config_step(stream_disaster_step, action_flag)
-        self._failover_start_step(stream_disaster_step, action_flag, max_term_record)
-
-    def _failover_start_step(self, stream_disaster_step, action_flag, max_term_record):
+            self.stop_cluster()
+            self.write_dorado_step("0_dorado_disaster_stop_cluster_for_failover")
+            flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+            if os.path.exists(flag_file):
+                self.logger.debug("Delete file %s." % flag_file)
+                os.remove(flag_file)
+            self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+            sys.exit(0)
+        if dorado_disaster_step < 1:
+            # 标志文件存在，检查远程复制的lun设备权限，更新进度，代表 "远程复制Pair"任务完成
+            flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+            if not os.path.exists(flag_file) or not self.check_xlog_file_path():
+                self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+                sys.exit(0)
+            self.write_dorado_step("1_set_remote_replication_pairs_for_failover")
+        self._failover_config_step(dorado_disaster_step, action_flag)
+        self._failover_start_step(dorado_disaster_step, action_flag)
+
+    def check_dorado_datanode_query_info(self, timeout=DefaultValue.TIMEOUT_CLUSTER_START,
+                        dorado_switchover=None):
+        """
+        check gs_ctl query info
+        """
+        self.logger.debug("Waiting for gs_ctl query status being satisfied.")
+        end_time = None if timeout <= 0 else datetime.now() + timedelta(seconds=timeout)
+
+        host_names = self.get_all_connection_node_name()
+        if len(host_names) != len(self.cluster_node_names):
+            raise Exception(ErrorCode.GAUSS_506["GAUSS_50623"] % host_names)
+        check_params = []
+        all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
+                         for dn_inst in db_node.datanodes]
+        for dn_inst in all_instances:
+            check_params.append([dn_inst.state, dn_inst.hostname, dn_inst.datadir])
+        if len(check_params) <= 0:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "cluster")
+        while True:
+            check_status = 0
+            time.sleep(10)
+            if end_time is not None and datetime.now() >= end_time:
+                check_status = 1
+                self.logger.debug("Timeout. The gs_ctl query command cannot obtain the expected status.")
+                break
+            results = parallelTool.parallelExecute(
+                self.check_datanode_query_info, check_params)
+            for ret in results:
+                if ret[0] != 0:
+                    self.logger.debug("Failed to check node[%s] info using \"gs_ctl query\" command "
+                                      "with status[%s], output[%s]" % (ret[-1], ret[0], ret[1]))
+                    check_status = 1
+                if check_status == 0:
+                    break
+        if check_status != 0:
+            if dorado_switchover == "dorado_switchover":
+                raise Exception(
+                    ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
+            self.logger.logExit(
+                ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
+        self.logger.debug("Successfully wait for gs_ctl query status become Normal.", "constant")
+
+    def _failover_start_step(self, dorado_disaster_step, action_flag):
         """
         Failover step 5 & 6
         """
-        if stream_disaster_step < 5:
+        if dorado_disaster_step < 3:
             if action_flag == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "80%")
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "80%")
             else:
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "80%")
-                if not os.path.isfile(max_term_record):
-                    raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % max_term_record)
-                _, dn_infos = self.get_specified_dn_infos()
-                max_term_list = DefaultValue.obtain_file_content(max_term_record)
-                if not max_term_list:
-                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "read max term")
-                params = [(dn_info, max_term_list[0]) for dn_info in dn_infos]
-                if params:
-                    parallelTool.parallelExecute(self.start_primary_dn, params)
-            self.write_streaming_step("5_start_primary_dn_done")
-        if stream_disaster_step < 6:
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "80%")
+            self.remove_cluster_maintance_file_for_switchover()
+            self.remove_cluster_maintance_file()
             self.start_cluster()
-            cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL,
-                                     DefaultValue.CLUSTER_STATUS_DEGRADED]
+            self.write_dorado_step("3_start_cluster_done")
+        if dorado_disaster_step < 4:
+            cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL]
             self.check_cluster_status(cluster_normal_status, check_current=True)
-            cluster_info = self.query_cluster_info()
-            self.parse_cluster_status(current_status=cluster_info)
             if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                self.clean_global_config()
-                self.restore_guc_params()
-            self.streaming_clean_archive_slot()
-            if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "100%")
-                self.update_streaming_info("cluster", "normal")
+                self.check_dorado_datanode_query_info(timeout=30,
+                                                      dorado_switchover="dorado_failover")
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "100%")
+                self.update_dorado_info("cluster", "normal")
             else:
-                self.update_streaming_info("cluster", "archive")
+                self.check_dorado_datanode_query_info(timeout=30,
+                                                      dorado_switchover="dorado_switchover")
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "100%")
+                self.update_dorado_info("cluster", "archive")
 
     def streaming_clean_archive_slot(self):
         """
@@ -2170,7 +2324,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         Get specified dn infos
         """
-        tmp_file = os.path.join(self.streaming_file_dir, "cluster_state_tmp")
+        tmp_file = os.path.join(self.dorado_file_dir, "cluster_state_tmp")
         if not os.path.isfile(tmp_file) or update:
             cmd = ClusterCommand.getQueryStatusCmd(self.user, 0, tmp_file)
             self.logger.debug("Update cluster state with cmd: %s" % cmd)
@@ -2267,31 +2421,20 @@ class DoradoDisasterRecoveryBase(object):
         self.ssh_tool.executeCommand(cmd, hostList=self.normal_node_list)
         self.logger.debug("Successfully set cm agent for streaming disaster.")
 
-    def _failover_config_step(self, stream_disaster_step, action_flag):
+    def _failover_config_step(self, dorado_disaster_step, action_flag):
         """
         Failover step 2 - 4
         """
         # 2.Stop the cluster by node
-        if stream_disaster_step < 2:
-            if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                self.streaming_clean_replication_slot()
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "30%")
-            self.stop_cluster_by_node()
-            self.write_streaming_step("2_stop_cluster_done_for_failover")
-        # 3.Start the cluster in the main cluster mode
-        if stream_disaster_step < 3:
+        if dorado_disaster_step < 2:
+            if action_flag == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "30%")
+            else:
+                self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "30%")
+            self.set_datanode_guc("cluster_run_mode", "cluster_primary", "set")
             self.set_cmserver_guc("backup_open", "0", "set")
-            self.stream_disaster_set_cmagent_guc("agent_backup_open", "0", "set")
-            self.write_streaming_step("3_set_backup_open_for_failover")
-        # 4.Delete the relevant guc parameters and remove the disaster tolerance relationship
-        # based on streaming disaster recovery cluster, No need to delete for switchover.
-        if not action_flag:
-            if stream_disaster_step < 4:
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "50%")
-                self.remove_all_stream_repl_infos()
-                self.remove_streaming_pg_hba(True)
-                self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "70%")
-                self.write_streaming_step("4_remove_hba_repl_done_for_failover")
+            self.set_cmagent_guc("agent_backup_open", "0", "set")
+            self.write_dorado_step("2_set_cluster_guc_for_failover_done")
 
     def get_term_info(self):
         """get_term_info"""
diff --git a/script/impl/dorado_disaster_recovery/ddr_constants.py b/script/impl/dorado_disaster_recovery/ddr_constants.py
index 6e185b35..60d8b3da 100644
--- a/script/impl/dorado_disaster_recovery/ddr_constants.py
+++ b/script/impl/dorado_disaster_recovery/ddr_constants.py
@@ -54,7 +54,7 @@ class DoradoDisasterRecoveryConstants:
     STREAM_DISTRIBUTE_ACTION = "distribute_stream_failover"
 
     # GUC CHANGE MAP
-    GUC_CHANGE_MAP = {"most_available_sync": "on", "synchronous_commit": "on"}
+    GUC_CHANGE_MAP = {}
 
     # params in json file for each module
     STREAMING_JSON_PARAMS = {
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
index ee341be5..7d57043b 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
@@ -40,7 +40,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
             return
         self.logger.debug("Start first step of DisasterRecovery start.")
         #创建容灾过程使用的临时目录
-        self.create_disaster_recovery_dir(self.streaming_file_dir)
+        self.create_disaster_recovery_dir(self.dorado_file_dir)
         #检查执行的标志文件
         self.check_action_and_mode()
         self.init_cluster_status()
@@ -61,7 +61,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self.check_is_under_upgrade()
         #检查dn的GUC参数
         #self.check_dn_instance_params()
-        self.write_streaming_step("2_check_cluster_step")
+        self.write_dorado_step("2_check_cluster_step")
 
     def _third_step_for_ddr_start(self, step):
         """
@@ -74,7 +74,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         #self.prepare_gs_secure_files(only_mode='primary')
         #self.build_and_distribute_key_files(only_mode='disaster_standby')
         #self.get_default_wal_keep_segments(only_mode='primary')
-        self.write_streaming_step("3_set_wal_segments_step")
+        self.write_dorado_step("3_set_wal_segments_step")
 
     def drop_replication_slot_on_dr_cluster(self, only_mode=None):
         """
@@ -117,7 +117,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self.logger.debug("Start fourth step of streaming start.")
         self.set_wal_keep_segments(
             "reload", DoradoDisasterRecoveryConstants.MAX_WAL_KEEP_SEGMENTS, only_mode='primary')
-        self.write_streaming_step("4_set_wal_segments_step")
+        self.write_dorado_step("4_set_wal_segments_step")
 
     def _fifth_step_for_ddr_start(self, step):
         """
@@ -130,7 +130,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self.set_data_in_dcc(self.backup_open_key, "1", only_mode='disaster_standby')
         #self.set_most_available(mode="reload", raise_error=False)
         self.stop_cluster_by_node(only_mode='disaster_standby')
-        self.write_streaming_step("5_set_wal_segments_step")
+        self.write_dorado_step("5_set_wal_segments_step")
 
     def common_step_for_ddr_start(self):
         """
@@ -150,7 +150,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self.logger.debug("Start sixth step of streaming start.")
         self.set_cmserver_guc("backup_open", "1", "set", only_mode='disaster_standby')
         self.set_cmagent_guc("agent_backup_open", "1", "set", only_mode='disaster_standby')
-        self.write_streaming_step("6_set_guc_step")
+        self.write_dorado_step("6_set_guc_step")
 
     def _seventh_step_for_ddr_start(self, step):
         """
@@ -159,15 +159,15 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         if step >= 7:
             return
         self.logger.debug("Start seventh step of streaming start.")
-        self.update_streaming_info("cluster", "restore", only_mode='disaster_standby')
+        self.update_dorado_info("cluster", "restore", only_mode='disaster_standby')
         try:
             self.start_dss_instance(only_mode='disaster_standby')
             self.build_dn_instance(only_mode='disaster_standby')
             self.kill_dss_instance(only_mode='disaster_standby')
         except Exception as error:
-            self.update_streaming_info("cluster", "restore_fail", only_mode='disaster_standby')
+            self.update_dorado_info("cluster", "restore_fail", only_mode='disaster_standby')
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "build dns" + "Error:%s" % error)
-        self.write_streaming_step("7_build_dn_instance_step")
+        self.write_dorado_step("7_build_dn_instance_step")
 
     def _eighth_step_for_ddr_start(self, step):
         """
@@ -178,18 +178,18 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self.logger.debug("Start eighth step of streaming start.")
         self.start_cluster(cm_timeout=DoradoDisasterRecoveryConstants.STANDBY_START_TIMEOUT,
                            only_mode='disaster_standby')
-        self.update_streaming_info("cluster", "full_backup", only_mode='primary')
+        self.update_dorado_info("cluster", "full_backup", only_mode='primary')
         try:
             self.wait_main_standby_connection(only_mode='primary')
         except Exception as error:
-            self.update_streaming_info("cluster", "backup_fail", only_mode='primary')
+            self.update_dorado_info("cluster", "backup_fail", only_mode='primary')
             raise Exception(str(error))
         ret = self.check_cluster_status(status_allowed=['Normal'],
                                         only_check=True, check_current=True)
         query_status = "recovery" if ret else "recovery_fail"
-        self.update_streaming_info("cluster", query_status, only_mode='disaster_standby')
-        self.update_streaming_info("cluster", "archive", only_mode='primary')
-        self.write_streaming_step("8_start_cluster_step")
+        self.update_dorado_info("cluster", query_status, only_mode='disaster_standby')
+        self.update_dorado_info("cluster", "archive", only_mode='primary')
+        self.write_dorado_step("8_start_cluster_step")
 
     def _ninth_step_for_ddr_start(self, step):
         """
@@ -224,7 +224,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
 
     def run(self):
         self.logger.log("Start create dorado storage disaster relationship.")
-        step = self.query_streaming_step()
+        step = self.query_dorado_step()
         self._first_step_for_ddr_start(step)
         #1.检查集群状态正常
         self.parse_cluster_status()
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
index 77bdacc4..4f12b804 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
@@ -30,41 +30,41 @@ class DisasterRecoveryFailoverHandler(DoradoDisasterRecoveryBase):
         super().__init__(*args, **kwargs)
 
     def run(self):
-        self.logger.log("Start streaming disaster failover.")
+        self.logger.log("Start dorado disaster failover.")
         self.check_action_and_mode()
-        step = self.check_streaming_failover_workable(check_type_step=3, check_status_step=0)
+        step = self.check_dorado_failover_workable(check_type_step=3, check_status_step=0)
         self.check_is_under_upgrade()
-        self.init_cluster_conf()
         try:
-            self.streaming_failover_single_inst(step)
-            self.update_streaming_info("cluster", "normal")
+            self.dorado_failover_single_inst(step)
+            self.update_dorado_info("cluster", "normal")
             self.clean_step_file()
+            self.clean_flag_file()
         except Exception as error:
-            self.update_streaming_info("cluster", "promote_fail")
+            self.update_dorado_info("cluster", "promote_fail")
             raise Exception(
                 ErrorCode.GAUSS_516["GAUSS_51632"] % "centralize failover" + "Error:%s" % error)
         finally:
             self.remove_cluster_maintance_file()
-        self.clean_streaming_dir()
-        self.logger.log("Successfully do streaming disaster recovery failover.")
+        self.clean_dorado_dir()
+        self.logger.log("Successfully do dorado disaster recovery failover.")
 
-    def check_streaming_failover_workable(self, check_type_step=0, check_status_step=0):
+    def check_dorado_failover_workable(self, check_type_step=0, check_status_step=0):
         """
-        Check streaming failover is workable.
+        Check dorado failover is workable.
         """
-        self.logger.debug("Streaming disaster distribute cluster failover...")
-        stream_disaster_step = self.query_streaming_step()
+        self.logger.debug("dorado disaster distribute cluster failover...")
+        dorado_disaster_step = self.query_dorado_step()
         if not DefaultValue.is_disaster_cluster(self.cluster_info) \
-                and stream_disaster_step < check_type_step:
+                and dorado_disaster_step < check_type_step:
             self.logger.debug("The primary dn exist, do nothing except record the result file.")
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
-                            "streaming disaster cluster failover, Because the primary cluster "
+                            "dorado disaster cluster failover, Because the primary cluster "
                             "does not support failover")
         cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL,
                                  DefaultValue.CLUSTER_STATUS_DEGRADED]
-        if stream_disaster_step < check_status_step:
+        if dorado_disaster_step < check_status_step:
             self.init_cluster_status()
         self.parse_cluster_status()
-        if stream_disaster_step < check_status_step:
+        if dorado_disaster_step < check_status_step:
             self.check_cluster_status(cluster_normal_status)
-        return stream_disaster_step
+        return dorado_disaster_step
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
index dc7ffea3..a2825fe9 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
@@ -35,7 +35,7 @@ class StreamingQueryHandler(DoradoDisasterRecoveryBase):
         """
         Query infos from files.
         """
-        file_path = os.path.realpath(os.path.join(self.streaming_file_dir, file_name))
+        file_path = os.path.realpath(os.path.join(self.dorado_file_dir, file_name))
         if not os.path.isfile(file_path) and file_name in [DoradoDisasterRecoveryConstants.HADR_CLUSTER_STAT]:
             return "normal"
         if not os.path.isfile(file_path):
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
index abe08902..be1c289e 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
@@ -46,7 +46,7 @@ class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
         self.check_cluster_status(status_allowed=['Normal'])
         self.check_cluster_type(allowed_type='primary')
         self.check_is_under_upgrade()
-        self.write_streaming_step("2_check_cluster_step")
+        self.write_dorado_step("2_check_cluster_step")
 
     def _third_step_for_streaming_stop(self, step):
         """
@@ -57,7 +57,7 @@ class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
         self.logger.debug("Start third step of streaming stop.")
         self.remove_all_stream_repl_infos(guc_mode="reload")
         self.remove_streaming_cluster_file()
-        self.write_streaming_step("3_remove_config_step")
+        self.write_dorado_step("3_remove_config_step")
 
     def _fourth_step_for_streaming_stop(self, step):
         """
@@ -68,7 +68,7 @@ class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
         self.logger.debug("Start fourth step of streaming stop.")
         self.remove_streaming_pg_hba()
         self.restore_guc_params()
-        self.write_streaming_step("4_remove_pg_hba_step")
+        self.write_dorado_step("4_remove_pg_hba_step")
 
     def _fifth_step_for_streaming_stop(self, step):
         """
@@ -78,7 +78,7 @@ class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
             return
         self.logger.debug("Start fifth step of streaming start.")
         self.streaming_clean_replication_slot()
-        self.write_streaming_step("5_update_config_step")
+        self.write_dorado_step("5_update_config_step")
 
     def _sixth_step_for_streaming_stop(self, step):
         """
@@ -89,12 +89,12 @@ class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
         self.logger.debug("Start sixth step of streaming stop.")
         self.check_cluster_status(['Normal'])
         self.clean_global_config()
-        self.update_streaming_info("cluster", "normal")
-        self.clean_streaming_dir()
+        self.update_dorado_info("cluster", "normal")
+        self.clean_dorado_dir()
 
     def run(self):
         self.logger.log("Start remove streaming disaster relationship.")
-        step = self.query_streaming_step()
+        step = self.query_dorado_step()
         self._first_step_for_streaming_stop(step)
         self.parse_cluster_status()
         self._second_step_for_streaming_stop(step)
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
index 2763ae77..3a2c077f 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
@@ -18,8 +18,11 @@
 # ----------------------------------------------------------------------------
 # Description  : streaming_disaster_recovery_switchover.py is a utility for
 # changing role between primary cluster and standby cluster.
-
+import json
 import os
+import re
+import subprocess
+import sys
 import time
 from datetime import datetime, timedelta
 
@@ -36,103 +39,113 @@ from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryCo
 class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        self.remote_replication_pairs_log_message = \
+            "Please configure \"Remote Replication Pairs\" correctly on "\
+            "And check and grant appropriate permissions to the corresponding device files.\n"\
+            "to inform the tool and execute the tool again."
 
     def run(self):
         """
-        streaming disaster recovery switchover
+        dorado disaster recovery switchover
         """
-        self.logger.log("Start streaming disaster switchover.")
+        self.logger.log("Start dorado disaster switchover.")
         self.check_action_and_mode()
         self.check_switchover_workable()
-        self.init_cluster_conf()
         self.check_dn_instance_params()
         self.check_is_under_upgrade()
         try:
-            self.streaming_switchover_single_inst()
+            self.dorado_switchover_single_inst()
             self.clean_step_file()
+            self.clean_flag_file()
         except Exception as error:
             if self.params.mode == "primary":
-                self.update_streaming_info("cluster", "promote_fail")
+                self.update_dorado_info("cluster", "promote_fail")
             raise Exception(
                 ErrorCode.GAUSS_516["GAUSS_51632"] % "switchover" + "Error:%s" % str(error))
         finally:
             self.remove_cluster_maintance_file_for_switchover()
             self.remove_cluster_maintance_file()
-        self.logger.log("Successfully do streaming disaster recovery switchover.")
+        self.logger.log("Successfully do dorado disaster recovery switchover.")
+
+    def check_xlog_file_path(self):
+        """
+        get and check xlog_file_path
+        """
+        linkDev = self.dorado_info
+        if os.path.islink(linkDev):
+            linkDev = os.readlink(self.dorado_info)
+        if not os.access(linkDev, os.R_OK | os.W_OK):
+            self.logger.debug(ErrorCode.GAUSS_501("GAUSS_50113") % self.user)
+            return False
+        return True
 
-    def streaming_switchover_single_inst(self):
+    def dorado_switchover_single_inst(self):
         """
-        streaming disaster recovery switchover for single_inst cluster
+        dorado disaster recovery switchover for single_inst cluster
         disaster_standby: expect primary cluster becomes standby
         primary: expect standby cluster becomes primary
         """
-        self.create_cluster_maintance_file("streaming switchover")
-        self.update_streaming_info("cluster", DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
-        stream_disaster_step = self.query_streaming_step()
+        self.create_cluster_maintance_file("dorado switchover")
+        self.update_dorado_info("cluster", DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
+        dorado_disaster_step = self.query_dorado_step()
         if self.params.mode == "primary":
-            end_time = datetime.now() + timedelta(seconds=self.params.waitingTimeout)
-            self.logger.log("Waiting for switchover barrier.")
-            while True:
-                switchover_barrier_list = self.check_streaming_disaster_switchover_barrier()
-                if len(switchover_barrier_list) == len(self.normal_dn_ids):
-                    break
-                if datetime.now() >= end_time:
-                    self.restart_cluster()
-                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
-                                    "check switchover_barrier on all main standby dn" +
-                                    " Because check timeout: %ss" %
-                                    str(self.params.waitingTimeout))
-                time.sleep(5)
-            self.streaming_failover_single_inst(stream_disaster_step,
-                                                DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
+            # 这里可以等待 “Remote Copy Pairs” 同步状态完成
+            # 
+            self.dorado_failover_single_inst(dorado_disaster_step,
+                                             DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
         else:
             self.add_cluster_maintance_file_for_switchover()
             try:
-                if stream_disaster_step < 1:
+                if dorado_disaster_step < 1:
                     self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
                     self.stop_cluster()
-                    self.start_cluster()
-                    self.streaming_disaster_set_master_cluster_in_switchover()
-                    self.write_streaming_step("1_streaming_disaster_set_master_in_switchover")
-                if stream_disaster_step < 2:
-                    self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "30%")
-                    ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
-                                                          self.logger, self.user,
-                                                          {self.backup_open_key: "2"})
-                    self.stop_cluster()
-                    self.write_streaming_step("2_stop_cluster_for_switchover")
-                if stream_disaster_step < 3:
-                    self.set_cmserver_guc("backup_open", "2", "set")
-                    self.set_cmagent_guc("agent_backup_open", "2", "set")
-                    self.write_streaming_step("3_set_backup_open_2_done")
-                if stream_disaster_step < 4:
-                    self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "50%")
+                    self.write_dorado_step("1_dorado_disaster_stop_cluster_for_switchover")
+                    flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+                    if os.path.exists(flag_file):
+                        self.logger.debug("Delete file %s." % flag_file)
+                        os.remove(flag_file)
+                    self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+                    sys.exit(0)
+                if dorado_disaster_step < 2:
+                    self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "30%")
+                    flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+                    if not os.path.exists(flag_file) or not self.check_xlog_file_path():
+                        self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+                        sys.exit(0)
+                    self.write_dorado_step("2_set_remote_replication_pairs_for_switchover")
+                if dorado_disaster_step < 3:
+                    self.set_datanode_guc("cluster_run_mode", "cluster_standby", "set")
+                    self.set_cmserver_guc("backup_open", "1", "set")
+                    self.set_cmagent_guc("agent_backup_open", "1", "set")
+                    self.write_dorado_step("3_set_cluster_guc_done")
+                if dorado_disaster_step < 4:
+                    self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "50%")
                     self.remove_cluster_maintance_file_for_switchover()
                     self.remove_cluster_maintance_file()
                     self.start_cluster()
-                    self.write_streaming_step("4_start_cluster_done")
-                if stream_disaster_step < 5:
+                    self.write_dorado_step("4_start_cluster_done")
+                if dorado_disaster_step < 5:
                     self.wait_for_normal(timeout=self.params.waitingTimeout,
-                                         streaming_switchover="streaming_switchover")
-                    self.streaming_clean_replication_slot()
-                    self.update_streaming_info("cluster", "recovery")
+                                         dorado_switchover="dorado_switchover")
+                    self.check_dorado_datanode_query_info(timeout=self.params.waitingTimeout,
+                                                          dorado_switchover="dorado_switchover")
+                    self.update_dorado_info("cluster", "recovery")
             except Exception as error:
-                self.logger.error("Failed to do streaming disaster cluster switchover, Error:"
+                self.logger.error("Failed to do dorado disaster cluster switchover, Error:"
                                   " \n%s" % str(error))
-                rollback_step = self.query_streaming_step()
+                rollback_step = self.query_dorado_step()
                 self.logger.debug("Roll back switchover step:%s" % rollback_step)
                 self.remove_cluster_maintance_file_for_switchover()
                 self.remove_cluster_maintance_file()
-                if rollback_step < 4 or (rollback_step >= 4 and
-                                         self.streaming_switchover_roll_back_condition()):
-                    self.streaming_switchover_roll_back(update_query=True)
+                self.dorado_switchover_roll_back(update_query=True)
                 self.clean_step_file()
+                self.clean_flag_file()
                 raise Exception(error)
         self.remove_hadr_switchover_process_file()
 
     def remove_hadr_switchover_process_file(self):
         self.logger.debug("Remove hadr switchover process file for switchover.")
-        process_file = os.path.realpath(os.path.join(self.streaming_file_dir,
+        process_file = os.path.realpath(os.path.join(self.dorado_file_dir,
                                                      ".hadr_switchover_stat"))
         cmd = "if [ -f {0} ]; then rm -rf {0}; fi".format(process_file)
         self.ssh_tool.executeCommand(cmd, hostList=self.connected_nodes)
@@ -203,7 +216,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
 
     def add_cluster_maintance_file_for_switchover(self):
         """
-        add cluster_maintance file for streaming disaster switchover to disaster_standby
+        add cluster_maintance file for dorado disaster switchover to disaster_standby
         """
         self.logger.debug("Start add cluster_maintance file for switchover.")
         try:
@@ -311,7 +324,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
         return True
 
     def wait_for_normal(self, timeout=DefaultValue.TIMEOUT_CLUSTER_START,
-                        streaming_switchover=None):
+                        dorado_switchover=None):
         """
         function:Wait the cluster become Normal or Degraded
         input:NA
@@ -346,10 +359,10 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                 break
             else:
                 self.logger.debug("Cluster status is %s(%s)." % (
-                cluster_status.clusterStatus, cluster_status.clusterStatusDetail))
+                    cluster_status.clusterStatus, cluster_status.clusterStatusDetail))
 
         if check_status != 0:
-            if streaming_switchover == "streaming_switchover":
+            if dorado_switchover == "dorado_switchover":
                 raise Exception(
                     ErrorCode.GAUSS_528["GAUSS_52800"] % (cluster_status.clusterStatus,
                                                           cluster_status.clusterStatusDetail))
@@ -383,23 +396,29 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                             % "set auto_csn_barrier" + "Error:%s" % output)
         self.logger.debug("Successfully %s auto_csn_barrier is %s." % (guc_mode, guc_value))
 
-    def streaming_switchover_roll_back(self, update_query=False):
+    def dorado_switchover_roll_back(self, update_query=False):
         """
-        streaming disaster cluster roll back in switchover
+        dorado disaster cluster roll back in switchover
         """
-        self.logger.log("Roll back streaming disaster cluster switchover...")
-        ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
-                                              self.logger, self.user,
-                                              {self.backup_open_key: "0"})
+        self.logger.log("Roll back dorado disaster cluster switchover...")
         self.stop_cluster()
-        self.set_cmserver_guc("backup_open", "0", "set")
-        self.set_cmagent_guc("agent_backup_open", "0", "set")
-        self.logger.log("Successfully modify cma and cms parameters to start according to primary "
+        if self.params.mode == "primary":
+            self.set_datanode_guc("cluster_run_mode", "'cluster_standby", "set")
+            self.set_cmserver_guc("backup_open", "1", "set")
+            self.set_cmagent_guc("agent_backup_open", "1", "set")
+        else:
+            self.set_datanode_guc("cluster_run_mode", "'cluster_primary", "set")
+            self.set_cmserver_guc("backup_open", "0", "set")
+            self.set_cmagent_guc("agent_backup_open", "0", "set")
+        self.logger.log("Successfully modify cma and cms parameters to start according to original "
                         "cluster mode")
         if update_query:
-            self.update_streaming_info("cluster", "archive")
-        self.start_cluster()
-        self.logger.log("Successfully Roll back streaming disaster cluster switchover.")
+            self.update_dorado_info("cluster", "archive")
+        self.logger.debug("Please restore the original \"Remote Replication Pairs\" correctly on "
+                          "the storage management interface.\n"
+                          "And check and grant appropriate permissions to the corresponding device files.\n"
+                          "After completing these steps, start the cluster manually !")
+        self.logger.log("Successfully Roll back dorado disaster cluster switchover.")
 
     def check_streaming_disaster_switchover_barrier(self):
         """
@@ -432,13 +451,13 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                 and self.params.mode == "primary":
             self.logger.debug("The primary dn exist, do nothing except record the result file.")
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
-                            "streaming disaster cluster switchover, Because the primary cluster "
+                            "dorado disaster cluster switchover, Because the primary cluster "
                             "[drClusterMode] parameter must be disaster_standby")
         if DefaultValue.is_disaster_cluster(self.cluster_info) and \
                 self.params.mode == "disaster_standby":
             self.logger.debug("The primary dn not exist, do nothing except record the result file.")
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
-                            "streaming disaster cluster switchover, Because the disaster_standby "
+                            "dorado disaster cluster switchover, Because the disaster_standby "
                             "cluster [drClusterMode] parameter must be primary")
         self.logger.log("Waiting for cluster and all instances normal.")
         if self.params.mode == "primary":
@@ -447,7 +466,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                 self.init_cluster_status()
                 self.parse_cluster_status()
                 if self.check_cluster_status(status_allowed=['Normal'], only_check=True,
-                                     is_log=False) and self.check_instances_ready_for_switchover():
+                                             is_log=False) and self.check_instances_ready_for_switchover():
                     break
                 if datetime.now() >= end_time:
                     raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index 530d7d6a..eb316755 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -143,7 +143,7 @@ gs_sdr is a utility for streaming disaster recovery fully options.
 Usage:
   gs_sdr -? | --help
   gs_sdr -V | --version
-  gs_sdr -t start -m [primary|disaster_standby] -X XMLFILE [-U DR_USERNAME] [-W DR_PASSWORD] [--json JSONFILE] [--time-out=SECS] [-l LOGFILE] 
+  gs_sdr -t start -m [primary|disaster_standby] -X XMLFILE [-U DR_USERNAME] [-W DR_PASSWORD] [--time-out=SECS] [-l LOGFILE] 
   gs_sdr -t stop -X XMLFILE|--json JSONFILE [-l LOGFILE] 
   gs_sdr -t switchover -m [primary|disaster_standby] [--time-out=SECS] [-l LOGFILE]
   gs_sdr -t failover [-l LOGFILE]
@@ -160,7 +160,6 @@ General options:
   -W                             Disaster recovery user password.
   -X                             Path of the XML configuration file.
   -l                             Path of log file.
-  --json                         Path of params file for streaming options.
   --time-out=SECS                Maximum waiting time when Main standby connect to the primary dn,
                                     default value is 1200s.
 """
@@ -201,8 +200,6 @@ class ParamsHandler(object):
                           help='hadr user password.')
         parser.add_option('-X', dest='xml_path', type='string',
                           help='Cluster config xml path.')
-        parser.add_option('--json', dest='json_path', type='string',
-                          help='Config json file of streaming options')
         parser.add_option('--time-out=', dest='timeout', default="1200", type='string',
                           help='time out.')
         parser.add_option("-l", dest='logFile', type='string',
-- 
Gitee


From 5c392b92a6dfde1837e2f4ff4ca992c89cbf5977 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Mon, 14 Aug 2023 16:01:02 +0800
Subject: [PATCH 03/23] dorado diaster recovery start comit

---
 script/gs_ddr                                 |   5 +-
 .../impl/dorado_disaster_recovery/ddr_base.py | 400 +++++++++---------
 .../dorado_disaster_recovery/ddr_constants.py |  16 +-
 .../dorado_diaster_recovery_start.py          |  89 ++--
 .../dorado_disaster_recovery_query.py         |   8 +-
 .../params_handler.py                         |  30 +-
 6 files changed, 270 insertions(+), 278 deletions(-)

diff --git a/script/gs_ddr b/script/gs_ddr
index f7c3793f..120726bd 100644
--- a/script/gs_ddr
+++ b/script/gs_ddr
@@ -58,7 +58,6 @@ class DoradoStorageDisasterRecoveryBase(object):
         self.logger = None
         self.trace_id = uuid.uuid1().hex
         self.dorado_info = None
-        DoradoStorageDisasterRecoveryBase.mock_process_user_sensitive_info()
         self.__init_globals()
 
     @staticmethod
@@ -69,11 +68,11 @@ class DoradoStorageDisasterRecoveryBase(object):
 
     def __init_globals(self):
         self.user = UserUtil.getUserInfo()['name']
-        tmp_logger_file = ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.STREAMING_LOG_FILE, self.user)
+        tmp_logger_file = ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.DDR_LOG_FILE, self.user)
         tmp_logger = GaussLog(tmp_logger_file, 'parse_and_validate_params', trace_id=self.trace_id)
         self.params = ParamsHandler(tmp_logger, self.trace_id).get_valid_params()
         self.log_file = self.params.logFile if self.params.logFile else \
-            ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.STREAMING_LOG_FILE, self.user)
+            ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.DDR_LOG_FILE, self.user)
         self.logger = GaussLog(self.log_file, self.params.task, trace_id=self.trace_id)
 
 
diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 8fd4b74d..70792908 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -432,7 +432,7 @@ class DoradoDisasterRecoveryBase(object):
         write streaming step
         :return: NA
         """
-        self.logger.debug("Streaming action:[%s] record current step:[%s]"
+        self.logger.debug("Dorado disaster recovery action:[%s] record current step:[%s]"
                           % (self.params.task, step))
         with os.fdopen(os.open(self.step_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
                                DefaultValue.KEY_FILE_MODE_IN_OS), "w") as fp_write:
@@ -792,6 +792,25 @@ class DoradoDisasterRecoveryBase(object):
                             % ("set wal_keep_segments for inst:%s" % inst.instanceId, str(output)))
         self.logger.debug("Successfully [%s] shardNum [%s] node [%s] wal_keep_segments "
                           "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
+        
+
+    def __set_dn_xlog_file_path(self, params_list):
+        """
+        Set xlog_file_path value in primary dn
+        """
+        (inst, opt_type, value, mpprc_file) = params_list
+        self.logger.debug("Start [%s] shardNum [%s] node [%s] wal_keep_segments value [%s]."
+                          % (opt_type, inst.mirrorId, inst.hostname, value))
+        cmd = "source %s; gs_guc %s " \
+              "-N %s -D %s -c \"xlog_file_path = '%s'\" " % \
+              (mpprc_file, opt_type, inst.node, inst.datadir, value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                            "Options:%s, Error: \n%s "
+                            % ("set xlog_file_path for inst:%s" % inst.instanceId, str(output)))
+        self.logger.debug("Successfully [%s] shardNum [%s] node [%s] wal_keep_segments "
+                          "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
 
     def set_wal_keep_segments(self, opt_type, value, restore_flag=False, only_mode=None):
         """
@@ -815,6 +834,78 @@ class DoradoDisasterRecoveryBase(object):
         parallelTool.parallelExecute(self.__set_wal_keep_segments_each_inst, params_list)
         self.logger.log("Successfully %s wal_keep_segments value: %s." % (opt_type, value))
 
+    def set_xlog_file_path(self, xlog_file_path):
+        """
+        guc set xlog_file_path value in primary dn
+        """
+        self.__set_guc_param("xlog_file_path", xlog_file_path)
+        self.set_xlog_lock_file_path()
+
+    def __set_xlog_lock_file_each_inst(self, params_list):
+        """
+        Set xlog_lock_file_path value in each dn
+        """
+        (inst, opt_type, value, mpprc_file) = params_list
+        self.logger.debug("Start [%s] shardNum [%s] node [%s] xlog_lock_file value [%s]."
+                          % (opt_type, inst.mirrorId, inst.hostname, value))
+        cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
+              "-Z datanode -D %s -c \\\"xlog_lock_file_path = '%s'\\\"\"" % \
+              (mpprc_file, inst.hostname, mpprc_file, opt_type, inst.datadir, value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                            "Options:%s, Error: \n%s "
+                            % ("set xlog_lock_file_path for inst:%s" % inst.instanceId, str(output)))
+        self.logger.debug("Successfully [%s] shardNum [%s] node [%s] xlog_lock_file_path "
+                          "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
+    
+    def set_xlog_lock_file_path(self, opt_type="set"):
+        """
+        guc set xlog_lock_file_path value in primary dn
+        """
+        self.logger.log("Starting %s xlog_lock_file_path param" % (opt_type))
+        params_list=[]
+        for dbnode in self.cluster_info.dbNodes:
+            for inst in dbnode.datanodes:
+                lock_file = os.path.join(inst.datadir, "xlog_lock_file")
+                params_list.append((inst, opt_type, lock_file, self.mpp_file))
+        
+        if not params_list:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "obtain param list for set xlog_lock_file_path")
+        
+        parallelTool.parallelExecute(self.__set_xlog_lock_file_each_inst, params_list)
+        self.logger.log("Successfully %s xlog_lock_file_path param." % (opt_type))
+
+    def set_application_name(self):
+        """
+        guc set application_name value 
+        """
+        self.logger.log("Starting set application_name param" ) 
+        app_name_prefix = "dn_master" if self.params.mode == "primary" \
+            else "dn_standby"
+        params_list=[]
+        for dbnode in self.cluster_info.dbNodes:
+            for inst in dbnode.datanodes:
+                app_name = "%s_%s" % (app_name_prefix, inst.instanceId)
+                params_list.append((inst, "set", app_name, self.mpp_file))
+        
+        if not params_list:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "obtain param list for set application_name")
+        
+        parallelTool.parallelExecute(self.__set_xlog_lock_file_each_inst, params_list)
+        self.logger.log("Successfully %s application_name param." % (opt_type))
+    
+    def set_cluster_run_mode(self):
+        """
+        guc set xlog_file_path value in primary dn
+        """
+        cluster_run_mode = "cluster_primary" if self.params.mode == "primary" \
+            else "cluster_standby"
+        self.__set_guc_param("cluster_run_mode", cluster_run_mode)
+        self.__set_guc_param("ha_module_debug", "off")
+
     def __stop_one_node(self, node_id):
         """
         Stop one node by node id
@@ -859,7 +950,7 @@ class DoradoDisasterRecoveryBase(object):
         self.connected_nodes = connected_hosts
         return self.connected_nodes
 
-    def update_streaming_pg_hba(self):
+    def update_pg_hba(self):
         """
         update pg_hba.conf, read config_param.json file and set other cluster ip
         :return:NA
@@ -870,7 +961,7 @@ class DoradoDisasterRecoveryBase(object):
             self.mpp_file, OMCommand.getLocalScript(
                 "Local_Config_Hba"), self.user, self.streaming_xml)
         self.logger.debug("Command for changing instance pg_hba.conf file: %s" % cmd)
-        self.get_all_connection_node_name("update_streaming_pg_hba")
+        self.get_all_connection_node_name("update_pg_hba")
         try:
             self.ssh_tool.scpFiles(self.streaming_xml, self.dorado_file_dir)
             self.ssh_tool.executeCommand(cmd, hostList=self.connected_nodes)
@@ -881,75 +972,6 @@ class DoradoDisasterRecoveryBase(object):
             raise Exception(msg)
         self.logger.log("Successfully update pg_hba config.")
 
-    def __get_repl_info_cmd(self, node_name, ret, dn_inst, opt_mode, idx):
-        """
-        get_repl_info_cmd
-        """
-        if node_name != self.local_host:
-            set_cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
-                      "-Z datanode -D %s -c " \
-                      "\\\"replconninfo%s = 'localhost=%s localport=%s " \
-                      "localheartbeatport=%s localservice=%s remotehost=%s " \
-                      "remoteport=%s remoteheartbeatport=%s " \
-                      "remoteservice=%s iscascade=%s iscrossregion=%s'\\\"\""
-            set_cmd = set_cmd % (self.mpp_file, node_name,
-                                 self.mpp_file, opt_mode,
-                                 dn_inst.datadir, idx, ret.group(1),
-                                 ret.group(2), ret.group(3), ret.group(4),
-                                 ret.group(5), ret.group(6), ret.group(7),
-                                 ret.group(8), "true", "false")
-        else:
-            set_cmd = "source %s ; gs_guc %s -Z datanode -D %s -c " \
-                      "\"replconninfo%s = 'localhost=%s localport=%s " \
-                      "localheartbeatport=%s localservice=%s remotehost=%s " \
-                      "remoteport=%s remoteheartbeatport=%s " \
-                      "remoteservice=%s iscascade=%s iscrossregion=%s'\""
-            set_cmd = set_cmd % (self.mpp_file, opt_mode,
-                                 dn_inst.datadir, idx, ret.group(1),
-                                 ret.group(2), ret.group(3), ret.group(4),
-                                 ret.group(5), ret.group(6), ret.group(7),
-                                 ret.group(8), "true", "false")
-        return set_cmd
-
-    def __set_original_repl_info(self, dn_inst, node_name, opt_mode="set"):
-        """
-        Rectify original replconninfos
-        """
-        orignal_ports = None
-        if not all([dn_inst, node_name]):
-            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain dn infos")
-        for idx in range(1, DoradoDisasterRecoveryConstants.MAX_REPLICATION_NUMS + 1):
-            if node_name == self.local_host:
-                cmd = "source %s; gs_guc check -Z datanode -D %s " \
-                      "-c 'cross_cluster_replconninfo%s'" % (self.mpp_file, dn_inst.datadir, idx)
-            else:
-                cmd = "source %s; pssh -H %s 'source %s; gs_guc check " \
-                      "-Z datanode -D %s -c \"cross_cluster_replconninfo%s\"'" \
-                      % (self.mpp_file, node_name, self.mpp_file, dn_inst.datadir, idx)
-            self.logger.debug("Check original repl infos with cmd:%s" % cmd)
-            status, output = CmdUtil.retryGetstatusoutput(cmd)
-            if status != 0:
-                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
-                                " Error: \n%s " % output)
-            if output.count("=NULL") > 2 or "iscrossregion=true" in output.lower():
-                self.logger.debug("InstanceID:%s, Index:%s" % (dn_inst.instanceId, idx))
-                return idx, orignal_ports
-            ret = re.search(
-                r"cross_cluster_replconninfo%s='localhost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})"
-                r" localport=(\d{4,5}) "
-                r"remotehost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}) "
-                r"remoteport=(\d{4,5}) " % idx, output)
-            if not ret:
-                raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "search repl infos")
-            set_cmd = self.__get_repl_info_cmd(node_name, ret, dn_inst, opt_mode, idx)
-            self.logger.debug("Set original repl infos with cmd:%s" % set_cmd)
-            status, output = CmdUtil.retryGetstatusoutput(set_cmd)
-            if status != 0:
-                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_cmd +
-                                " Error: \n%s " % output)
-            orignal_ports = (ret.group(2), ret.group(3), ret.group(4))
-            self.logger.debug("Successfully rectify original repl infos for instance:%s."
-                              % dn_inst.instanceId)
 
     def __get_local_data_ip(self, inst_host):
         """
@@ -967,115 +989,66 @@ class DoradoDisasterRecoveryBase(object):
                     return data_ip
         raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
                         % "obtain shards from local cluster info")
-
-    def __config_one_dn_instance(self, params):
+    
+    def __get_remote_ips(self):
         """
-        Config replconninfo for one dn instance
+        Get remote dn data ip
         """
-        inst, opt_mode, remote_cluster_info = params
-        local_data_ip = self.__get_local_data_ip(inst.hostname)
-        base_dn_port = self.params.remoteClusterConf['port']
-        self.logger.debug("Start config instance:[%s], got dataIp:[%s], port:[%s]."
-                          % (inst.instanceId, local_data_ip, base_dn_port))
-        if not all([local_data_ip, base_dn_port]):
-            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"]
-                            % "dn port or dataIp for config instance")
-        inst_index, original_ports = self.__set_original_repl_info(
-            inst, inst.hostname, opt_mode=opt_mode)
-        repl_params = []
-        shards = remote_cluster_info.get("shards")
+        remoteClusterConf = self.params.remoteClusterConf
+        shards = remoteClusterConf["shards"]
+        indx = 1
+        remote_ips = []
         for shard in shards:
-            for node_info in shard:
-                data_ip = node_info.get("dataIp")
-                shard_num = node_info.get("shardNum", '1')
-                if str(inst.mirrorId) == str(shard_num):
-                    repl_params.append((
-                        shard_num, inst.hostname, local_data_ip,
-                        inst.datadir, data_ip, inst_index,
-                        original_ports, base_dn_port, opt_mode))
-                    inst_index += 1
-        return repl_params
-
-    def __do_config_dn_repl_info(self, params):
-        """
-        function:config postgres conf
-        :return:NA
+            for node in shard:
+                ip = node["ip"]
+                data_ip = node["dataIp"]
+                remote_ips.append(data_ip)
+        
+        return remote_ips
+        
+    def __config_one_dn_instance(self, params):
         """
-        shard_num, host, local_data_ip, data_dir, data_ip, index, \
-        original_ports, base_port, opt_mode = params
-        local_port, local_heartbeat, local_service = original_ports
-        remote_base = int(base_port)
-        self.logger.debug("shard num %s base port is %s" % (shard_num, remote_base))
-        remote_port = remote_base + 1
-        remote_heartbeat = remote_base + 5
-        remote_service = remote_base + 4
-        is_cascade = "false"
-        if self.local_host == host:
-            guc_cmd = "source %s ; gs_guc %s -Z datanode -D %s " \
-                      "-c \"replconninfo%s = 'localhost=%s localport=%s " \
-                      "localheartbeatport=%s localservice=%s remotehost=%s " \
-                      "remoteport=%s remoteheartbeatport=%s remoteservice=%s " \
-                      "iscascade=%s iscrossregion=true'\"" \
-                      % (self.mpp_file, opt_mode, data_dir, index, local_data_ip, local_port,
-                         local_heartbeat, local_service, data_ip, remote_port,
-                         remote_heartbeat, remote_service, is_cascade)
-            self.logger.debug("Set datanode postgres file for streaming "
-                              "disaster cluster with cmd:%s" % guc_cmd)
-        else:
-            guc_cmd = "source %s; pssh -s -H %s \"source %s ; gs_guc %s -Z datanode -D %s " \
-                      "-c \\\"replconninfo%s = 'localhost=%s localport=%s " \
-                      "localheartbeatport=%s localservice=%s remotehost=%s " \
-                      "remoteport=%s remoteheartbeatport=%s remoteservice=%s " \
-                      "iscascade=%s iscrossregion=true'\\\"\"" \
-                      % (self.mpp_file, host,
-                         self.mpp_file, opt_mode, data_dir, index,
-                         local_data_ip, local_port, local_heartbeat,
-                         local_service, data_ip, remote_port,
-                         remote_heartbeat, remote_service, is_cascade)
-            self.logger.debug("Set datanode postgres file for streaming "
-                              "disaster cluster with cmd:%s" % guc_cmd)
-        status, output = CmdUtil.retryGetstatusoutput(guc_cmd)
-        if status != 0:
-            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % guc_cmd +
-                            " Error: \n%s " % output)
+        Config cross_cluster_replconninfo for one dn instance
+        """
+        inst, opt_mode = params
+        local_dn_ip = inst.listenIps[0]
+        local_port = inst.port
+        remote_port = self.params.remoteClusterConf['port']
+        remote_data_ips = self.__get_remote_ips()
+
+        idx = 1
+        for remote_ip in remote_data_ips:
+            set_cmd = "source %s ; gs_guc set -N %s -D %s -c " \
+                      "\"cross_cluster_replconninfo%s = 'localhost=%s localport=%s " \
+                      "remotehost=%s remoteport=%s '\"" \
+                        % (self.mpp_file, inst.hostname, inst.datadir, idx, 
+                            local_dn_ip, local_port, remote_ip, remote_port)
+            self.logger.debug("Set dn cross cluster replinfos with cmd:%s" % set_cmd)
+            idx += 1
+            status, output = CmdUtil.retryGetstatusoutput(set_cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_cmd +
+                                " Error: \n%s " % output)
+            self.logger.debug("Successfully rectify original repl infos for instance:%s."
+                              % inst.instanceId)
+            
 
     def config_cross_cluster_repl_info(self):
         """
-        update postgresql.conf for replconninfo
+        update postgresql.conf for cross_cluster_replconninfo
         """
-        self.logger.debug("set all datanode guc param in postgres conf for ddr cluster.")
-        repl_params = []
-        opt_mode = "reload" if self.params.mode == "primary" else "set"
+        self.logger.debug("set all datanode guc param in postgres conf for cross_cluster_replconninfo.")
+        
+        opt_mode = "set"
         config_repl_params = []
         datanode_instance = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes]
 
         for inst in datanode_instance:
-            config_repl_params.append((inst, opt_mode, self.params.remoteClusterConf))
+            config_repl_params.append((inst, opt_mode))
         rets = parallelTool.parallelExecute(self.__config_one_dn_instance, config_repl_params)
-        for param in rets:
-            repl_params += param
-        self.logger.debug("Got repl params:%s" % str(repl_params))
-        parallelTool.parallelExecute(self.__do_config_dn_repl_info, repl_params)
+      
         self.logger.debug(
-            "Successfully set all datanode guc param in postgres conf for streaming cluster.")
-
-    def set_datanode_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
-        """
-        set datanode guc param
-        :return: NA
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Set datanode guc [%s] to [%s] not for mode:%s."
-                              % (guc_parameter, guc_value, self.params.mode))
-            return
-        cmd = "gs_guc %s -Z datanode -N all -I all -c \"%s=%s\" " % \
-              (guc_type, guc_parameter, guc_value)
-        status, output = CmdUtil.retryGetstatusoutput(cmd)
-        if status != 0:
-            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
-                  % "set datanode guc [%s] to [%s], output:%s" \
-                  % (guc_parameter, guc_value, output)
-            self.logger.debug(msg)
+            "Successfully set all datanode guc param in postgres conf for cross_cluster_replconninfo.")
 
     def set_cmserver_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
         """
@@ -1086,8 +1059,8 @@ class DoradoDisasterRecoveryBase(object):
             self.logger.debug("Set cms guc [%s] to [%s] not for mode:%s."
                               % (guc_parameter, guc_value, self.params.mode))
             return
-        cmd = "gs_guc %s -Z cmserver -N all -I all -c \"%s=%s\" " % \
-              (guc_type, guc_parameter, guc_value)
+        cmd = "source %s; cm_ctl %s --param --server -k \"%s=%s\" " % \
+              (self.mpp_file, guc_type, guc_parameter, guc_value)
         status, output = CmdUtil.retryGetstatusoutput(cmd)
         if status != 0:
             msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
@@ -1104,8 +1077,8 @@ class DoradoDisasterRecoveryBase(object):
             self.logger.debug("Set cma guc [%s] to [%s] not for mode:%s."
                               % (guc_parameter, guc_value, self.params.mode))
             return
-        cmd = "gs_guc %s -Z cmagent -N all -I all -c \"%s=%s\" " % \
-              (guc_type, guc_parameter, guc_value)
+        cmd = "source %s; cm_ctl %s --param --agent -k \"%s=%s\" " % \
+              (self.mpp_file, guc_type, guc_parameter, guc_value)
         status, output = CmdUtil.retryGetstatusoutput(cmd)
         if status != 0:
             msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
@@ -1300,18 +1273,65 @@ class DoradoDisasterRecoveryBase(object):
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "full build from remote cluster" + error_detail)
         self.logger.debug("Successfully build cascade standby dn:%s" % inst.instanceId)
-
-    def start_dss_instance(self, only_mode=None):
+    
+    def __start_dss_and_build(self, params):
         """
         Start dss server process
         """
-        cmd = "source %s; export DSS_MAINTAIN=TRUE; dssserver -D %s &  " % self.dss_home_dir
+        inst, mpprc_file = params
+        self.logger.debug("Start dssserver on node [%s] ." %  inst.hostname)
+
+        cmd = "source %s; pssh -H %s \"source %s ; export DSS_MAINTAIN=TRUE; " \
+             " dssserver -D %s & \"" % (mpprc_file,inst.hostname, mpprc_file, self.dss_home_dir)
         status, output = CmdUtil.retryGetstatusoutput(cmd)
         if status != 0:
-            self.logger.error(ErrorCode.GAUSS_516["GAUSS_51600"] +
-                              "status(%d), output(%s)" % (status, output))
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                            "Options:%s, Error: \n%s "
+                            % ("start dssserver on node :%s" % inst.hostname, str(output)))
+        self.logger.debug("Successfully Start dssserver on node [%s] " % inst.hostname)
+
+        build_cmd = "source %s; pssh -H %s \"source %s ; gs_ctl build -D %s -b cross_cluster_full -g 0 -q " \
+                    % (mpprc_file,inst.hostname, mpprc_file, inst.datadir)
+        status, output = CmdUtil.retryGetstatusoutput(build_cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % build_cmd +
+                            "Options:%s, Error: \n%s "
+                            % ("build main_standby on node :%s" % inst.hostname, str(output)))
+        self.logger.debug("Successfully build main_standby in disaster standby cluster on node [%s] " % inst.hostname)
+
+        kill_cmd = "source %s; pssh -H %s \"source %s ; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 " \
+                    % (mpprc_file,inst.hostname, mpprc_file)
+        status, output = CmdUtil.retryGetstatusoutput(kill_cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % kill_cmd +
+                            "Options:%s, Error: \n%s "
+                            % ("stop dssserver before start cluster on node :%s" % inst.hostname, str(output)))
+        self.logger.debug("Successfully stop dssserver before start cluster on node [%s] " % inst.hostname)
         return output
 
+    def start_dss_instance(self, only_mode=None):
+        """
+        Start dss server process
+        """
+        if self.params.mode == "primary" or self.params.mode != only_mode:
+            self.logger.debug("start dssserver step is not for mode:%s." % self.params.mode)
+            return
+        primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
+                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        
+        params_list = []
+        for inst in primary_dn:
+            params_list.append((inst, self.mpp_file))
+        
+        if not params_list:
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "obtain param list for start dssserver in disaster_standby")
+        parallelTool.parallelExecute(self.__start_dss_and_build, params_list)
+        self.logger.log("Successfully start dssserver and build main_standby inst : %s." % primary_dn)
+        return
+
+        
+
     def kill_dss_instance(self, only_mode=None):
         """
         Start dss server process
@@ -1429,26 +1449,19 @@ class DoradoDisasterRecoveryBase(object):
             self.logger.debug("The primary dn does not exist on current cluster.")
             return
         self.primary_dn_ids = p_inst_list
-        sql_check = "select 1 from pg_catalog.gs_hadr_local_rto_and_rpo_stat();"
-        sql_check_2 = "select 1 from pg_catalog.pg_stat_get_wal_senders() where " \
+        sql_check = "select 1 from pg_catalog.pg_stat_get_wal_senders() where " \
                       "sync_state='Async' and peer_role='Standby' and peer_state='Normal';"
         param_list = [(dn_inst, sql_check) for db_node in self.cluster_info.dbNodes
-                      for dn_inst in db_node.datanodes
-                      if dn_inst.instanceId in self.primary_dn_ids]
-        param_list_2 = [(dn_inst, sql_check_2) for db_node in self.cluster_info.dbNodes
-                        for dn_inst in db_node.datanodes if dn_inst.instanceId
-                        in self.primary_dn_ids]
+                      for dn_inst in db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        
         if not param_list:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "obtain param list for check main standby connection on primary dn")
         self.logger.debug("Start check main standby connection with sql:%s." % sql_check)
         results = parallelTool.parallelExecute(self.__check_one_main_standby_connection,
                                                param_list)
-        self.logger.debug("Start check main standby connection with sql:%s." % sql_check_2)
-        results_2 = parallelTool.parallelExecute(self.__check_one_main_standby_connection,
-                                                 param_list_2)
 
-        return all(results+results_2)
+        return all(results)
 
     def wait_main_standby_connection(self, only_mode=None):
         if only_mode and self.params.mode != only_mode:
@@ -2051,13 +2064,13 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.debug("Update query [%s] to [%s]." % (key, value))
         try:
             if key == "cluster":
-                key_stat = DoradoDisasterRecoveryConstants.HADR_CLUSTER_STAT
+                key_stat = DoradoDisasterRecoveryConstants.DDR_CLUSTER_STAT
             elif key == DoradoDisasterRecoveryConstants.ACTION_FAILOVER:
-                key_stat = DoradoDisasterRecoveryConstants.HADR_FAILOVER_STAT
+                key_stat = DoradoDisasterRecoveryConstants.DDR_FAILOVER_STAT
             elif key == DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                key_stat = DoradoDisasterRecoveryConstants.HADR_SWICHOVER_STAT
+                key_stat = DoradoDisasterRecoveryConstants.DDR_SWICHOVER_STAT
             elif key == DoradoDisasterRecoveryConstants.ACTION_ESTABLISH:
-                key_stat = DoradoDisasterRecoveryConstants.HADR_ESTABLISH_STAT
+                key_stat = DoradoDisasterRecoveryConstants.DDR_ESTABLISH_STAT
             else:
                 self.logger.debug("key error.")
                 return
@@ -2128,7 +2141,6 @@ class DoradoDisasterRecoveryBase(object):
                         (len(channel) != 1 or "<--" not in channel[0]):
                     check_ok = -1
             elif state == "Standby":
-                # 不管是主集群，还是灾难备集群仅仅检查 local_role 只有一个元素 Standby 和 db_state 为 Normal
                 if (len(dbState) != 1 or dbState[0] != "Normal") or \
                         (len(localRole) != 1 or localRole[0] != "Standby"):
                     check_ok = -1
@@ -2287,8 +2299,8 @@ class DoradoDisasterRecoveryBase(object):
 
     def streaming_clean_archive_slot(self):
         """
-        drop lot_type is physical and slot_name not contain (gs_roach_full，gs_roach_inc，
-        cn_xxx，dn_xxx, dn_xxx_hadr) on all cn node and all primary dn node if the
+        drop lot_type is physical and slot_name not contain (gs_roach_full,gs_roach_inc,
+        cn_xxx,dn_xxx, dn_xxx_hadr) on all cn node and all primary dn node if the
         slot_name exists when the disaster cluster become primary cluster
         """
         self.logger.debug("Starting drop archive slots")
diff --git a/script/impl/dorado_disaster_recovery/ddr_constants.py b/script/impl/dorado_disaster_recovery/ddr_constants.py
index 60d8b3da..8469e324 100644
--- a/script/impl/dorado_disaster_recovery/ddr_constants.py
+++ b/script/impl/dorado_disaster_recovery/ddr_constants.py
@@ -16,8 +16,8 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : streaming_constants.py is utility for defining constants
-# of streaming disaster recovery.
+# Description  : ddr_constants.py is utility for defining constants
+# of dorado disaster recovery.
 #############################################################################
 
 
@@ -46,18 +46,18 @@ class DoradoDisasterRecoveryConstants:
     ACTION_ESTABLISH = "establish"
 
     # streaming query temp file
-    HADR_CLUSTER_STAT = ".hadr_cluster_stat"
-    HADR_FAILOVER_STAT = ".hadr_failover_stat"
-    HADR_SWICHOVER_STAT = ".hadr_switchover_stat"
-    HADR_ESTABLISH_STAT = ".hadr_establish_stat"
+    DDR_CLUSTER_STAT = ".ddr_cluster_stat"
+    DDR_FAILOVER_STAT = ".ddr_failover_stat"
+    DDR_SWICHOVER_STAT = ".ddr_switchover_stat"
+    DDR_ESTABLISH_STAT = ".ddr_establish_stat"
 
-    STREAM_DISTRIBUTE_ACTION = "distribute_stream_failover"
+    DDR_DISTRIBUTE_ACTION = "distribute_stream_failover"
 
     # GUC CHANGE MAP
     GUC_CHANGE_MAP = {}
 
     # params in json file for each module
-    STREAMING_JSON_PARAMS = {
+    DDR_JSON_PARAMS = {
         "start": ["localClusterConf", "remoteClusterConf"],
         "stop": ["localClusterConf", "remoteClusterConf"],
         "switchover": [],
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
index 7d57043b..98f70007 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
@@ -63,50 +63,32 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         #self.check_dn_instance_params()
         self.write_dorado_step("2_check_cluster_step")
 
+    def common_step_for_ddr_start(self):
+        """
+        Common step for ddr start between step 1 and 2
+        """
+        self.logger.debug("Start common config step of ddr start.")
+        self.distribute_cluster_conf()
+        #调用local/ConfigHba.py和streaming_xml进行设置，考虑使用gs_guc set适配
+        self.update_pg_hba()
+        self.config_cross_cluster_repl_info()
+        self.set_xlog_file_path(self.dorado_info)
+        self.set_application_name()
+        self.set_cluster_run_mode()
+
     def _third_step_for_ddr_start(self, step):
         """
         Third step for streaming start
         """
         if step >= 3:
             return
-        self.logger.debug("Start third step of streaming start.")
-        #self.drop_replication_slot_on_dr_cluster(only_mode="disaster_standby")
+        self.logger.debug("Start third step of ddr start.")
+
         #self.prepare_gs_secure_files(only_mode='primary')
         #self.build_and_distribute_key_files(only_mode='disaster_standby')
         #self.get_default_wal_keep_segments(only_mode='primary')
         self.write_dorado_step("3_set_wal_segments_step")
 
-    def drop_replication_slot_on_dr_cluster(self, only_mode=None):
-        """
-        Drop replication slot on dr cluster
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Drop replication slot opts not for mode:%s." % self.params.mode)
-            return
-        sql_check = "select slot_name from pg_get_replication_slots() where slot_type='logical'"
-        primary_dns = DefaultValue.get_primary_dn_instance_id("Primary", ignore=True)
-        if not primary_dns:
-            return
-        primary_insts = [inst for node in self.cluster_info.dbNodes
-                         for inst in node.datanodes if str(inst.instanceId) in primary_dns]
-        dn_inst = primary_insts[0]
-        self.logger.debug("Start drop node %s [%s] slots" % (dn_inst.hostname, dn_inst.instanceId))
-        status, output = ClusterCommand.remoteSQLCommand(
-            sql_check, self.user, dn_inst.hostname, dn_inst.port)
-        self.logger.debug("Get %s all replication slots, status=%d, output: %s." %
-                          (dn_inst.instanceId, status, SensitiveMask.mask_pwd(output)))
-        if status == 0 and output.strip():
-            drop_slots = output.strip().split('\n')
-            for slot in drop_slots:
-                self.logger.debug("Starting drop node %s %s" % (dn_inst.instanceId, slot.strip()))
-                sql = "select * from pg_drop_replication_slot('%s');" % slot.strip()
-                status_dr, output_dr = ClusterCommand.remoteSQLCommand(
-                    sql, self.user, dn_inst.hostname, dn_inst.port)
-                if status_dr != 0:
-                    self.logger.debug("Failed to remove node %s %s with error: %s" % (
-                        dn_inst.hostname, slot.strip(), SensitiveMask.mask_pwd(output_dr)))
-                self.logger.debug(
-                    "Successfully drop node %s %s" % (dn_inst.instanceId, slot.strip()))
 
     def _fourth_step_for_ddr_start(self, step):
         """
@@ -114,7 +96,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         """
         if step >= 4:
             return
-        self.logger.debug("Start fourth step of streaming start.")
+        self.logger.debug("Start fourth step of ddr start.")
         self.set_wal_keep_segments(
             "reload", DoradoDisasterRecoveryConstants.MAX_WAL_KEEP_SEGMENTS, only_mode='primary')
         self.write_dorado_step("4_set_wal_segments_step")
@@ -125,21 +107,14 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         """
         if step >= 5:
             return
-        self.logger.debug("Start fifth step of streaming start.")
-        self.set_data_in_dcc(self.backup_open_key, "0", only_mode='primary')
-        self.set_data_in_dcc(self.backup_open_key, "1", only_mode='disaster_standby')
-        #self.set_most_available(mode="reload", raise_error=False)
-        self.stop_cluster_by_node(only_mode='disaster_standby')
-        self.write_dorado_step("5_set_wal_segments_step")
-
-    def common_step_for_ddr_start(self):
-        """
-        Common step for ddr start between step 1 and 2
-        """
-        self.logger.debug("Start common config step of ddr start.")
-        self.distribute_cluster_conf()
-        self.update_streaming_pg_hba()
-        self.config_cross_cluster_repl_info()
+        self.logger.debug("Start fifth step of ddr start.")
+        # self.set_data_in_dcc(self.backup_open_key, "0", only_mode='primary')
+        # self.set_data_in_dcc(self.backup_open_key, "1", only_mode='disaster_standby')
+        # self.set_most_available(mode="reload", raise_error=False)
+        #self.stop_cluster_by_node(only_mode='disaster_standby')
+        self.stop_cluster()
+        self.start_cluster(only_mode="primary")
+        self.write_ddr_step("5_set_wal_segments_step")
 
     def _sixth_step_for_ddr_start(self, step):
         """
@@ -158,12 +133,12 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         """
         if step >= 7:
             return
-        self.logger.debug("Start seventh step of streaming start.")
+        self.logger.debug("Start seventh step of ddr start.")
         self.update_dorado_info("cluster", "restore", only_mode='disaster_standby')
         try:
             self.start_dss_instance(only_mode='disaster_standby')
-            self.build_dn_instance(only_mode='disaster_standby')
-            self.kill_dss_instance(only_mode='disaster_standby')
+            # self.build_dn_instance(only_mode='disaster_standby')
+            # self.kill_dss_instance(only_mode='disaster_standby')
         except Exception as error:
             self.update_dorado_info("cluster", "restore_fail", only_mode='disaster_standby')
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "build dns" + "Error:%s" % error)
@@ -175,7 +150,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         """
         if step >= 8:
             return
-        self.logger.debug("Start eighth step of streaming start.")
+        self.logger.debug("Start eighth step of ddr start.")
         self.start_cluster(cm_timeout=DoradoDisasterRecoveryConstants.STANDBY_START_TIMEOUT,
                            only_mode='disaster_standby')
         self.update_dorado_info("cluster", "full_backup", only_mode='primary')
@@ -199,7 +174,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
             return
         self.logger.debug("Start ninth step of streaming start.")
         #self.restore_wal_keep_segments(only_mode='primary')
-        self.clean_gs_secure_dir()
+        #self.clean_gs_secure_dir()
         self.clean_step_file()
 
     def _check_and_refresh_disaster_user_permission(self):
@@ -228,13 +203,11 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self._first_step_for_ddr_start(step)
         #1.检查集群状态正常
         self.parse_cluster_status()
-        #dorado存储复制没有流复制user
-        #self._check_and_refresh_disaster_user_permission()
+        #检查集群内dn状态和cm服务
         self._second_step_for_ddr_start(step)
         #更新pg_hba和replinfo
         self.common_step_for_ddr_start()
-        self._third_step_for_ddr_start(step)
-        self._fourth_step_for_ddr_start(step)
+
         self._fifth_step_for_ddr_start(step)
         #设置CM backup_open参数，灾备backup_open=1， 主集群backup_open=0
         self._sixth_step_for_ddr_start(step)
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
index a2825fe9..371582ae 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
@@ -36,7 +36,7 @@ class StreamingQueryHandler(DoradoDisasterRecoveryBase):
         Query infos from files.
         """
         file_path = os.path.realpath(os.path.join(self.dorado_file_dir, file_name))
-        if not os.path.isfile(file_path) and file_name in [DoradoDisasterRecoveryConstants.HADR_CLUSTER_STAT]:
+        if not os.path.isfile(file_path) and file_name in [DoradoDisasterRecoveryConstants.DDR_CLUSTER_STAT]:
             return "normal"
         if not os.path.isfile(file_path):
             return "0%"
@@ -141,15 +141,15 @@ class StreamingQueryHandler(DoradoDisasterRecoveryBase):
             self.parse_cluster_status(current_status=cluster_info)
         self.check_is_under_upgrade()
         check_cluster_stat = self.get_streaming_cluster_query_value(
-            DoradoDisasterRecoveryConstants.HADR_CLUSTER_STAT)
+            DoradoDisasterRecoveryConstants.DDR_CLUSTER_STAT)
         archive_status = self.check_archive(check_cluster_stat, self.cluster_status)
         recovery_status = self.check_recovery(check_cluster_stat, self.cluster_status)
         hadr_cluster_stat = archive_status or recovery_status or check_cluster_stat
 
         hadr_failover_stat = self.get_streaming_cluster_query_value(
-            DoradoDisasterRecoveryConstants.HADR_FAILOVER_STAT)
+            DoradoDisasterRecoveryConstants.DDR_FAILOVER_STAT)
         hadr_switchover_stat = self.get_streaming_cluster_query_value(
-            DoradoDisasterRecoveryConstants.HADR_SWICHOVER_STAT)
+            DoradoDisasterRecoveryConstants.DDR_SWICHOVER_STAT)
         if hadr_cluster_stat != "promote":
             hadr_failover_stat = ""
         if hadr_cluster_stat != "switchover":
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index eb316755..761ab39c 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -26,7 +26,7 @@ import json
 import optparse
 import getpass
 
-from impl.streaming_disaster_recovery.streaming_constants import DoradoDisasterRecoveryConstants
+from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryConstants
 from gspylib.common.DbClusterInfo import dbClusterInfo
 from gspylib.common.ErrorCode import ErrorCode
 from base_utils.security.security_checker import SecurityChecker, ValidationError
@@ -76,6 +76,13 @@ def check_wait_timeout(value):
     description = "wait timeout"
     SecurityChecker.check_is_digit(description, value)
 
+def check_dorado_config(value):
+    """
+    Check dorado config
+    """
+    description = "dorado config"
+    SecurityChecker.check_is_string(description, value)
+
 
 def check_local_cluster_conf(value):
     """
@@ -115,8 +122,9 @@ STREAMING_PARAMS_FOR_MODULE = {
     "start": {
         "mode": check_streaming_start_mode,
         "xml_path": check_xml_file,
-        "hadrUserName": check_hadr_user,
-        "hadrUserPassword": check_hadr_pwd,
+        # "hadrUserName": check_hadr_user,
+        # "hadrUserPassword": check_hadr_pwd,
+        "doradoConfig": check_wait_timeout,
         "waitingTimeout": check_wait_timeout,
         "localClusterConf": check_local_cluster_conf,
         "remoteClusterConf": check_remote_cluster_conf
@@ -194,10 +202,10 @@ class ParamsHandler(object):
                                '"switchover", "failover", "query"')
         parser.add_option('-m', dest='mode', type='string',
                           help='Cluster run mode. It could be ["primary", "disaster_standby"].')
-        parser.add_option('-U', dest='hadrusername', type='string',
-                          help='hadr user name.')
-        parser.add_option('-W', dest='hadruserpasswd', type='string',
-                          help='hadr user password.')
+        # parser.add_option('-U', dest='hadrusername', type='string',
+        #                   help='hadr user name.')
+        # parser.add_option('-W', dest='hadruserpasswd', type='string',
+        #                   help='hadr user password.')
         parser.add_option('-X', dest='xml_path', type='string',
                           help='Cluster config xml path.')
         parser.add_option('--time-out=', dest='timeout', default="1200", type='string',
@@ -236,7 +244,7 @@ class ParamsHandler(object):
             with open(file_path, 'r') as read_fp:
                 param_dict = json.load(read_fp)
             for key, value in param_dict.items():
-                if key not in DoradoDisasterRecoveryConstants.STREAMING_JSON_PARAMS[self.params.task]:
+                if key not in DoradoDisasterRecoveryConstants.DDR_JSON_PARAMS[self.params.task]:
                     continue
                 setattr(self.params, key, value)
             return
@@ -278,10 +286,10 @@ class ParamsHandler(object):
         self.__print_version_info()
         if not hasattr(self.params, 'task') or not self.params.task:
             raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".")
-        if self.params.task not in DoradoDisasterRecoveryConstants.STREAMING_JSON_PARAMS.keys():
+        if self.params.task not in DoradoDisasterRecoveryConstants.DDR_JSON_PARAMS.keys():
             raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't')
         # parse arguments in json/xml file
-        if DoradoDisasterRecoveryConstants.STREAMING_JSON_PARAMS[self.params.task]:
+        if DoradoDisasterRecoveryConstants.DDR_JSON_PARAMS[self.params.task]:
             self.__cluster_conf_parser(self.params.json_path)
 
     def __reload_hadr_user_info(self):
@@ -331,7 +339,7 @@ class ParamsHandler(object):
             self.logger.log('Streaming disaster recovery ' + self.params.task + ' ' + self.trace_id)
             self.logger.log(DoradoDisasterRecoveryConstants.LOG_REMARK)
             self.__init_default_params()
-            self.__reload_hadr_user_info()
+            #self.__reload_hadr_user_info()
             for param_name, validate in STREAMING_PARAMS_FOR_MODULE[self.params.task].items():
                 check_value = getattr(self.params, param_name)
                 if self.params.task == "stop":
-- 
Gitee


From 2ca0bb5f98cc7e0eec86053b3d053538eaa6c77b Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Mon, 14 Aug 2023 21:57:25 +0800
Subject: [PATCH 04/23] dorado config params

---
 build.sh                                               |  6 ++++--
 build/get_PlatForm_str.sh                              | 10 +++++-----
 script/gs_ddr                                          |  1 +
 .../ddr_modules/dorado_diaster_recovery_start.py       |  2 +-
 script/impl/dorado_disaster_recovery/params_handler.py |  4 ++--
 5 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/build.sh b/build.sh
index ccf89dda..e4f76dba 100644
--- a/build.sh
+++ b/build.sh
@@ -54,7 +54,7 @@ done
 
 PLAT_FORM_STR=$(sh "${ROOT_DIR}/build/get_PlatForm_str.sh")
 if [ "${PLAT_FORM_STR}"x == "Failed"x ]; then
-    echo "We only support openEuler(aarch64), EulerOS(aarch64), FusionOS, CentOS, UnionTech(X86) platform."
+    echo "We only support kylin(aarch64), EulerOS(aarch64), FusionOS, CentOS, UnionTech(X86) platform."
     exit 1;
 fi
 
@@ -68,6 +68,8 @@ if [ X$(echo $PLAT_FORM_STR | grep "centos") != X"" ]; then
     dist_version="CentOS"
 elif [ X$(echo $PLAT_FORM_STR | grep "openeuler") != X"" ]; then
     dist_version="openEuler"
+elif [ X$(echo $PLAT_FORM_STR | grep "kylin") != X"" ]; then
+    dist_version="kylin"
 elif [ X$(echo $PLAT_FORM_STR | grep "fusionos") != X"" ]; then
     dist_version="FusionOS"
 elif [ X$(echo $PLAT_FORM_STR | grep "euleros") != X"" ]; then
@@ -79,7 +81,7 @@ elif [ X$(echo $PLAT_FORM_STR | grep "asianux") != X"" ]; then
 elif [ X$(echo $PLAT_FORM_STR | grep "UnionTech") != X"" ]; then
     dist_version="UnionTech"
 else
-    echo "We only support openEuler(aarch64), EulerOS(aarch64), FusionOS, CentOS, Ubuntu(x86), UnionTech(x86) platform."
+    echo "We only support kylin(aarch64), EulerOS(aarch64), FusionOS, CentOS, Ubuntu(x86), UnionTech(x86) platform."
     echo "Kernel is $kernel"
     exit 1
 fi
diff --git a/build/get_PlatForm_str.sh b/build/get_PlatForm_str.sh
index 2bd8af9b..98e3233e 100644
--- a/build/get_PlatForm_str.sh
+++ b/build/get_PlatForm_str.sh
@@ -19,14 +19,14 @@ function get_os_str() {
 
     cpu_arc=$(uname -p)
 
-    if [ "$os_name"x = "centos"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
+   if [ "$os_name"x = "centos"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
         os_str=centos7.6_x86_64
     elif [ "$os_name"x = "euleros"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
         os_str=euleros2.0_sp8_aarch64
-    elif [ "$os_name"x = "openEuler"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
-        os_str=openeuler_aarch64
-    elif [ "$os_name"x = "openEuler"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
-        os_str=openeuler_x86_64
+    elif [ "$os_name"x = "kylin"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
+        os_str=kylin_aarch64
+    elif [ "$os_name"x = "kylin"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
+        os_str=kylin_x86_64
     elif [ "$os_name"x = "fusionos"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
         os_str=fusionos_aarch64
     elif [ "$os_name"x = "fusionos"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
diff --git a/script/gs_ddr b/script/gs_ddr
index 120726bd..3e699349 100644
--- a/script/gs_ddr
+++ b/script/gs_ddr
@@ -71,6 +71,7 @@ class DoradoStorageDisasterRecoveryBase(object):
         tmp_logger_file = ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.DDR_LOG_FILE, self.user)
         tmp_logger = GaussLog(tmp_logger_file, 'parse_and_validate_params', trace_id=self.trace_id)
         self.params = ParamsHandler(tmp_logger, self.trace_id).get_valid_params()
+        self.dorado_info = self.params.doradoConfig
         self.log_file = self.params.logFile if self.params.logFile else \
             ClusterLog.getOMLogPath(DoradoDisasterRecoveryConstants.DDR_LOG_FILE, self.user)
         self.logger = GaussLog(self.log_file, self.params.task, trace_id=self.trace_id)
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
index 98f70007..a6311d89 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
@@ -72,7 +72,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         #调用local/ConfigHba.py和streaming_xml进行设置，考虑使用gs_guc set适配
         self.update_pg_hba()
         self.config_cross_cluster_repl_info()
-        self.set_xlog_file_path(self.dorado_info)
+        self.set_xlog_file_path(self.params.doradoConfig)
         self.set_application_name()
         self.set_cluster_run_mode()
 
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index 761ab39c..4857acba 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -124,7 +124,7 @@ STREAMING_PARAMS_FOR_MODULE = {
         "xml_path": check_xml_file,
         # "hadrUserName": check_hadr_user,
         # "hadrUserPassword": check_hadr_pwd,
-        "doradoConfig": check_wait_timeout,
+        "doradoConfig": check_dorado_config,
         "waitingTimeout": check_wait_timeout,
         "localClusterConf": check_local_cluster_conf,
         "remoteClusterConf": check_remote_cluster_conf
@@ -212,7 +212,7 @@ class ParamsHandler(object):
                           help='time out.')
         parser.add_option("-l", dest='logFile', type='string',
                           help='Path of log file.')
-        parser.add_option("--dorado-info", dest='dorado_info', type='string',
+        parser.add_option("--dorado-config", dest='doradoConfig', type='string',
                           help='Path of dorado xlog share disk.')
         return parser
 
-- 
Gitee


From 121afb4409fb783a033dace6fe447cfca652bb6f Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Tue, 15 Aug 2023 11:47:49 +0800
Subject: [PATCH 05/23] update ddr_base.py

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 49 ++++++++++++-------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 70792908..cda0fca1 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -838,8 +838,24 @@ class DoradoDisasterRecoveryBase(object):
         """
         guc set xlog_file_path value in primary dn
         """
-        self.__set_guc_param("xlog_file_path", xlog_file_path)
-        self.set_xlog_lock_file_path()
+        self.set_xlog_path(xlog_file_path)
+        self.set_xlog_lock_file()
+
+    def set_xlog_path(self, xlog_file_path):
+        """
+        guc set xlog_file_path value
+        """
+        self.logger.log("Starting set xlog_lock_file_path param")
+        cmd = "source %s && gs_guc set -Z datanode -N all -I all " \
+              "-c \"xlog_file_path='%s'\"" \
+              % (self.mpp_file,  xlog_file_path)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error:%s" % output)
+        else:
+            self.logger.debug("Successfully set xlog_file_path %s." % xlog_file_path)
+
+        self.logger.log("Successfully %s xlog_lock_file_path param." % (opt_type))
 
     def __set_xlog_lock_file_each_inst(self, params_list):
         """
@@ -859,7 +875,7 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.debug("Successfully [%s] shardNum [%s] node [%s] xlog_lock_file_path "
                           "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
     
-    def set_xlog_lock_file_path(self, opt_type="set"):
+    def set_xlog_lock_file(self, opt_type="set"):
         """
         guc set xlog_lock_file_path value in primary dn
         """
@@ -956,22 +972,19 @@ class DoradoDisasterRecoveryBase(object):
         :return:NA
         """
         self.logger.log("Start update pg_hba config.")
-        FileUtil.cpFile(self.params.xml_path, self.streaming_xml)
-        cmd = "source %s; %s -U %s -X '%s' --try-reload" % (
-            self.mpp_file, OMCommand.getLocalScript(
-                "Local_Config_Hba"), self.user, self.streaming_xml)
-        self.logger.debug("Command for changing instance pg_hba.conf file: %s" % cmd)
-        self.get_all_connection_node_name("update_pg_hba")
-        try:
-            self.ssh_tool.scpFiles(self.streaming_xml, self.dorado_file_dir)
-            self.ssh_tool.executeCommand(cmd, hostList=self.connected_nodes)
-        except Exception as error:
-            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
-                  % "update streaming pg_hba with error:%s" % error
-            self.logger.debug(msg)
-            raise Exception(msg)
-        self.logger.log("Successfully update pg_hba config.")
+        remote_ips = self.__get_remote_ips()
 
+        for remote_ip in remote_ips:
+            cmd = "source %s ; gs_guc set -Z datanode -N all -I all -h " \
+                      "\"host   all   all   %s/32   trust" \
+                        % (self.mpp_file, remote_ip)
+            self.logger.debug("Update pg_hba.conf with cmd: %s" % cmd)
+            status, output = CmdUtil.retryGetstatusoutput(cmd)
+            if status != 0:
+                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                                " Error: \n%s " % output)
+            self.logger.debug("Successfully update pg_hba config with remote datanode ip:%s."
+                              % remote_ips)
 
     def __get_local_data_ip(self, inst_host):
         """
-- 
Gitee


From c98140857f17cec40f55121271d2aa38ce12445c Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Tue, 15 Aug 2023 15:37:57 +0800
Subject: [PATCH 06/23] gs_ddr -t start bugfix

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 30 +++++++++++++++----
 .../dorado_diaster_recovery_start.py          |  2 +-
 .../params_handler.py                         |  2 ++
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index cda0fca1..582ab945 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -855,7 +855,7 @@ class DoradoDisasterRecoveryBase(object):
         else:
             self.logger.debug("Successfully set xlog_file_path %s." % xlog_file_path)
 
-        self.logger.log("Successfully %s xlog_lock_file_path param." % (opt_type))
+        self.logger.log("Successfully set xlog_lock_file_path param: %s." % (xlog_file_path))
 
     def __set_xlog_lock_file_each_inst(self, params_list):
         """
@@ -875,6 +875,24 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.debug("Successfully [%s] shardNum [%s] node [%s] xlog_lock_file_path "
                           "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
     
+    def __set_app_name_each_inst(self, params_list):
+        """
+        Set xlog_lock_file_path value in each dn
+        """
+        (inst, opt_type, value, mpprc_file) = params_list
+        self.logger.debug("Start [%s] shardNum [%s] node [%s] application_name value [%s]."
+                          % (opt_type, inst.mirrorId, inst.hostname, value))
+        cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
+              "-Z datanode -D %s -c \\\"application_name = '%s'\\\"\"" % \
+              (mpprc_file, inst.hostname, mpprc_file, opt_type, inst.datadir, value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
+                            "Options:%s, Error: \n%s "
+                            % ("set application_name for inst:%s" % inst.instanceId, str(output)))
+        self.logger.debug("Successfully [%s] shardNum [%s] node [%s] application_name "
+                          "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
+    
     def set_xlog_lock_file(self, opt_type="set"):
         """
         guc set xlog_lock_file_path value in primary dn
@@ -910,15 +928,15 @@ class DoradoDisasterRecoveryBase(object):
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "obtain param list for set application_name")
         
-        parallelTool.parallelExecute(self.__set_xlog_lock_file_each_inst, params_list)
-        self.logger.log("Successfully %s application_name param." % (opt_type))
+        parallelTool.parallelExecute(self.__set_app_name_each_inst, params_list)
+        self.logger.log("Successfully set application_name param." )
     
     def set_cluster_run_mode(self):
         """
         guc set xlog_file_path value in primary dn
         """
-        cluster_run_mode = "cluster_primary" if self.params.mode == "primary" \
-            else "cluster_standby"
+        cluster_run_mode = "'cluster_primary'" if self.params.mode == "primary" \
+            else "'cluster_standby'"
         self.__set_guc_param("cluster_run_mode", cluster_run_mode)
         self.__set_guc_param("ha_module_debug", "off")
 
@@ -976,7 +994,7 @@ class DoradoDisasterRecoveryBase(object):
 
         for remote_ip in remote_ips:
             cmd = "source %s ; gs_guc set -Z datanode -N all -I all -h " \
-                      "\"host   all   all   %s/32   trust" \
+                      "\"host   all   all   %s/32   trust\"" \
                         % (self.mpp_file, remote_ip)
             self.logger.debug("Update pg_hba.conf with cmd: %s" % cmd)
             status, output = CmdUtil.retryGetstatusoutput(cmd)
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
index a6311d89..090575b1 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
@@ -114,7 +114,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         #self.stop_cluster_by_node(only_mode='disaster_standby')
         self.stop_cluster()
         self.start_cluster(only_mode="primary")
-        self.write_ddr_step("5_set_wal_segments_step")
+        self.write_dorado_step("5_set_wal_segments_step")
 
     def _sixth_step_for_ddr_start(self, step):
         """
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index 4857acba..a2ca5835 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -208,6 +208,8 @@ class ParamsHandler(object):
         #                   help='hadr user password.')
         parser.add_option('-X', dest='xml_path', type='string',
                           help='Cluster config xml path.')
+        parser.add_option('--json', dest='json_path', type='string',
+                          help='Config json file of streaming options')
         parser.add_option('--time-out=', dest='timeout', default="1200", type='string',
                           help='time out.')
         parser.add_option("-l", dest='logFile', type='string',
-- 
Gitee


From 5713ba1e44988fa58739d67bee106391c01eb73b Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Tue, 15 Aug 2023 21:08:04 +0800
Subject: [PATCH 07/23] update ddr_base.py

---
 script/impl/dorado_disaster_recovery/ddr_base.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 582ab945..0ac37b1f 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -845,7 +845,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         guc set xlog_file_path value
         """
-        self.logger.log("Starting set xlog_lock_file_path param")
+        self.logger.log("Starting set xlog_file_path param")
         cmd = "source %s && gs_guc set -Z datanode -N all -I all " \
               "-c \"xlog_file_path='%s'\"" \
               % (self.mpp_file,  xlog_file_path)
@@ -855,14 +855,14 @@ class DoradoDisasterRecoveryBase(object):
         else:
             self.logger.debug("Successfully set xlog_file_path %s." % xlog_file_path)
 
-        self.logger.log("Successfully set xlog_lock_file_path param: %s." % (xlog_file_path))
+        self.logger.log("Successfully set xlog_file_path param: %s." % (xlog_file_path))
 
     def __set_xlog_lock_file_each_inst(self, params_list):
         """
         Set xlog_lock_file_path value in each dn
         """
         (inst, opt_type, value, mpprc_file) = params_list
-        self.logger.debug("Start [%s] shardNum [%s] node [%s] xlog_lock_file value [%s]."
+        self.logger.debug("Start [%s] shardNum [%s] node [%s] xlog_lock_file_path value [%s]."
                           % (opt_type, inst.mirrorId, inst.hostname, value))
         cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
               "-Z datanode -D %s -c \\\"xlog_lock_file_path = '%s'\\\"\"" % \
-- 
Gitee


From a7be8e09f864054d3d6eedde94721b15a22d51c4 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Wed, 16 Aug 2023 14:44:42 +0800
Subject: [PATCH 08/23] bugfix

---
 .../impl/dorado_disaster_recovery/ddr_base.py  | 10 ++++++----
 .../dorado_disaster_recovery/params_handler.py | 18 +++++++++---------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 0ac37b1f..470f0129 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -1310,19 +1310,20 @@ class DoradoDisasterRecoveryBase(object):
         Start dss server process
         """
         inst, mpprc_file = params
-        self.logger.debug("Start dssserver on node [%s] ." %  inst.hostname)
-
+    
         cmd = "source %s; pssh -H %s \"source %s ; export DSS_MAINTAIN=TRUE; " \
              " dssserver -D %s & \"" % (mpprc_file,inst.hostname, mpprc_file, self.dss_home_dir)
         status, output = CmdUtil.retryGetstatusoutput(cmd)
+        self.logger.debug("Start dssserver on node [%s],cmd: %s." %  inst.hostname, cmd)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
                             "Options:%s, Error: \n%s "
                             % ("start dssserver on node :%s" % inst.hostname, str(output)))
         self.logger.debug("Successfully Start dssserver on node [%s] " % inst.hostname)
 
-        build_cmd = "source %s; pssh -H %s \"source %s ; gs_ctl build -D %s -b cross_cluster_full -g 0 -q " \
+        build_cmd = "source %s; pssh -H %s \"source %s ; gs_ctl build -D %s -b cross_cluster_full -g 0 -q\"" \
                     % (mpprc_file,inst.hostname, mpprc_file, inst.datadir)
+        self.logger.debug("Build main standby datanode on node [%s],cmd: %s." %  inst.hostname, build_cmd)
         status, output = CmdUtil.retryGetstatusoutput(build_cmd)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % build_cmd +
@@ -1330,8 +1331,9 @@ class DoradoDisasterRecoveryBase(object):
                             % ("build main_standby on node :%s" % inst.hostname, str(output)))
         self.logger.debug("Successfully build main_standby in disaster standby cluster on node [%s] " % inst.hostname)
 
-        kill_cmd = "source %s; pssh -H %s \"source %s ; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 " \
+        kill_cmd = "source %s; pssh -H %s \"source %s ; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 \"" \
                     % (mpprc_file,inst.hostname, mpprc_file)
+        self.logger.debug("Stop dssserver proc on node [%s],cmd: %s." %  inst.hostname, kill_cmd)
         status, output = CmdUtil.retryGetstatusoutput(kill_cmd)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % kill_cmd +
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index a2ca5835..8eabed13 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -146,16 +146,16 @@ STREAMING_PARAMS_FOR_MODULE = {
 }
 
 HELP_MSG = """
-gs_sdr is a utility for streaming disaster recovery fully options.
+gs_ddr is a utility for streaming disaster recovery fully options.
 
 Usage:
-  gs_sdr -? | --help
-  gs_sdr -V | --version
-  gs_sdr -t start -m [primary|disaster_standby] -X XMLFILE [-U DR_USERNAME] [-W DR_PASSWORD] [--time-out=SECS] [-l LOGFILE] 
-  gs_sdr -t stop -X XMLFILE|--json JSONFILE [-l LOGFILE] 
-  gs_sdr -t switchover -m [primary|disaster_standby] [--time-out=SECS] [-l LOGFILE]
-  gs_sdr -t failover [-l LOGFILE]
-  gs_sdr -t query [-l LOGFILE]
+  gs_ddr -? | --help
+  gs_ddr -V | --version
+  gs_ddr -t start -m [primary|disaster_standby] -X XMLFILE [--time-out=SECS] [-l LOGFILE] 
+  gs_ddr -t stop -X XMLFILE|--json JSONFILE [-l LOGFILE] 
+  gs_ddr -t switchover -m [primary|disaster_standby] [--time-out=SECS] [-l LOGFILE]
+  gs_ddr -t failover [-l LOGFILE]
+  gs_ddr -t query [-l LOGFILE]
 General options:
   -?, --help                     Show help information for this utility,
                                  and exit the command line mode.
@@ -190,7 +190,7 @@ class ParamsHandler(object):
         """
         parser = optparse.OptionParser(conflict_handler='resolve')
         parser.disable_interspersed_args()
-        parser.epilog = "Example: gs_sdr -t " \
+        parser.epilog = "Example: gs_ddr -t " \
                         "start -m primary -X clusterConfig.xml " \
                         "--time-out=1200."
         parser.add_option('-V', "--version", dest='version_info', action='store_true',
-- 
Gitee


From 7da36d14738327c6542f6d6ceae58fa4d520d2a8 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Wed, 16 Aug 2023 15:10:19 +0800
Subject: [PATCH 09/23] updata sql

---
 script/impl/dorado_disaster_recovery/ddr_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 470f0129..91a0be74 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -1483,7 +1483,7 @@ class DoradoDisasterRecoveryBase(object):
             return
         self.primary_dn_ids = p_inst_list
         sql_check = "select 1 from pg_catalog.pg_stat_get_wal_senders() where " \
-                      "sync_state='Async' and peer_role='Standby' and peer_state='Normal';"
+                      "sync_state='Async' and peer_role='StandbyCluster_Standby' and peer_state='Normal';"
         param_list = [(dn_inst, sql_check) for db_node in self.cluster_info.dbNodes
                       for dn_inst in db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
         
-- 
Gitee


From 3e302b8c48530f11abc0da5fb14bbd3dd2dc6bea Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Wed, 16 Aug 2023 21:43:46 +0800
Subject: [PATCH 10/23] update and bugfix

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 92 +++++++++----------
 .../dorado_diaster_recovery_start.py          | 41 ++++-----
 2 files changed, 59 insertions(+), 74 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 91a0be74..5f46a22b 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -102,7 +102,7 @@ class DoradoDisasterRecoveryBase(object):
                                           DoradoDisasterRecoveryConstants.STREAMING_CONFIG_XML)
         self.ssh_tool = SshTool(self.cluster_node_names, self.log_file)
         self.mpp_file = EnvUtil.getMpprcFile()
-        self.dss_home_dir = ""
+        self.dss_home_dir = self.cluster_info.dss_home
         self._init_step_file_path()
 
     def init_cluster_conf(self):
@@ -1305,76 +1305,68 @@ class DoradoDisasterRecoveryBase(object):
                             % "full build from remote cluster" + error_detail)
         self.logger.debug("Successfully build cascade standby dn:%s" % inst.instanceId)
     
-    def __start_dss_and_build(self, params):
+    def start_dss_instance(self, only_mode=None):
         """
-        Start dss server process
+        Start dssserver process 
         """
-        inst, mpprc_file = params
-    
+        self.logger.log("Start start dssserver in main standby node.")
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Start dssserver step is not for mode:%s." % self.params.mode)
+            return
+        primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
+                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        main_standby_inst = primary_dn[0]
+
         cmd = "source %s; pssh -H %s \"source %s ; export DSS_MAINTAIN=TRUE; " \
-             " dssserver -D %s & \"" % (mpprc_file,inst.hostname, mpprc_file, self.dss_home_dir)
+             " dssserver -D $DSS_HOME & \"" % (self.mpp_file, main_standby_inst.hostname, self.mpp_file)
+        self.logger.debug("Start dssserver on node [%s],cmd: %s." % (main_standby_inst.hostname, cmd))
         status, output = CmdUtil.retryGetstatusoutput(cmd)
-        self.logger.debug("Start dssserver on node [%s],cmd: %s." %  inst.hostname, cmd)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
                             "Options:%s, Error: \n%s "
-                            % ("start dssserver on node :%s" % inst.hostname, str(output)))
-        self.logger.debug("Successfully Start dssserver on node [%s] " % inst.hostname)
+                            % ("Start dssserver on node :%s" % main_standby_inst.hostname, str(output)))
+        self.logger.log("Successfully Start dssserver on node [%s] " % main_standby_inst.hostname)
 
+    def build_main_standby_datanode(self, only_mode=None):
+        """
+        Build Main standby datanode 
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Build Main standby step is not for mode:%s." % self.params.mode)
+            return
+        primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
+                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+        main_standby_inst = primary_dn[0]
+        
         build_cmd = "source %s; pssh -H %s \"source %s ; gs_ctl build -D %s -b cross_cluster_full -g 0 -q\"" \
-                    % (mpprc_file,inst.hostname, mpprc_file, inst.datadir)
-        self.logger.debug("Build main standby datanode on node [%s],cmd: %s." %  inst.hostname, build_cmd)
+                    % (self.mpp_file,main_standby_inst.hostname, self.mpp_file, main_standby_inst.datadir)
+        self.logger.debug("Build Main standby datanode on node [%s],cmd: %s." %  (main_standby_inst.hostname, build_cmd))
         status, output = CmdUtil.retryGetstatusoutput(build_cmd)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % build_cmd +
                             "Options:%s, Error: \n%s "
-                            % ("build main_standby on node :%s" % inst.hostname, str(output)))
-        self.logger.debug("Successfully build main_standby in disaster standby cluster on node [%s] " % inst.hostname)
-
-        kill_cmd = "source %s; pssh -H %s \"source %s ; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 \"" \
-                    % (mpprc_file,inst.hostname, mpprc_file)
-        self.logger.debug("Stop dssserver proc on node [%s],cmd: %s." %  inst.hostname, kill_cmd)
-        status, output = CmdUtil.retryGetstatusoutput(kill_cmd)
-        if status != 0:
-            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % kill_cmd +
-                            "Options:%s, Error: \n%s "
-                            % ("stop dssserver before start cluster on node :%s" % inst.hostname, str(output)))
-        self.logger.debug("Successfully stop dssserver before start cluster on node [%s] " % inst.hostname)
-        return output
+                            % ("build main_standby on node :%s" % main_standby_inst.hostname, str(output)))
+        self.logger.debug("Successfully build main_standby in disaster standby cluster on node [%s] " % main_standby_inst.hostname)
 
-    def start_dss_instance(self, only_mode=None):
+    def kill_dss_instance(self, only_mode=None):
         """
-        Start dss server process
+        Kill dssserver process 
         """
-        if self.params.mode == "primary" or self.params.mode != only_mode:
-            self.logger.debug("start dssserver step is not for mode:%s." % self.params.mode)
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Kill dssserver process step is not for mode:%s." % self.params.mode)
             return
         primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
                        db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
-        
-        params_list = []
-        for inst in primary_dn:
-            params_list.append((inst, self.mpp_file))
-        
-        if not params_list:
-            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
-                            % "obtain param list for start dssserver in disaster_standby")
-        parallelTool.parallelExecute(self.__start_dss_and_build, params_list)
-        self.logger.log("Successfully start dssserver and build main_standby inst : %s." % primary_dn)
-        return
-
-        
+        main_standby_inst = primary_dn[0]
 
-    def kill_dss_instance(self, only_mode=None):
-        """
-        Start dss server process
-        """
-        cmd = "source %s; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9" % self.mpp_file
-        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        kill_cmd = "source %s; pssh -H %s \"source %s ; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 \"" \
+                    % (self.mpp_file,main_standby_inst.hostname, self.mpp_file)
+        self.logger.debug("Kill dssserver on node [%s],cmd: %s." %  (main_standby_inst.hostname, kill_cmd))
+        status = CmdUtil.retryGetstatusoutput(kill_cmd)
         if status != 0:
-            self.logger.error(ErrorCode.GAUSS_516["GAUSS_51600"] +
-                              "status(%d), output(%s)" % (status, output))
-        return output
+            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
+                            % "kill dssserver before start cluster on node:" + main_standby_inst.hostname)
+        self.logger.debug("Successfully stop dssserver before start cluster on node [%s] " % main_standby_inst.hostname)
 
     def build_dn_instance(self, only_mode=None):
         """
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
index 090575b1..a80c0aa6 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
@@ -69,12 +69,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         """
         self.logger.debug("Start common config step of ddr start.")
         self.distribute_cluster_conf()
-        #调用local/ConfigHba.py和streaming_xml进行设置，考虑使用gs_guc set适配
-        self.update_pg_hba()
-        self.config_cross_cluster_repl_info()
-        self.set_xlog_file_path(self.params.doradoConfig)
-        self.set_application_name()
-        self.set_cluster_run_mode()
+        
 
     def _third_step_for_ddr_start(self, step):
         """
@@ -84,10 +79,12 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
             return
         self.logger.debug("Start third step of ddr start.")
 
-        #self.prepare_gs_secure_files(only_mode='primary')
-        #self.build_and_distribute_key_files(only_mode='disaster_standby')
-        #self.get_default_wal_keep_segments(only_mode='primary')
-        self.write_dorado_step("3_set_wal_segments_step")
+        self.update_pg_hba()
+        self.config_cross_cluster_repl_info()
+        self.set_xlog_file_path(self.params.doradoConfig)
+        self.set_application_name()
+        self.set_cluster_run_mode()
+        self.write_dorado_step("3_set_datanode_guc_step")
 
 
     def _fourth_step_for_ddr_start(self, step):
@@ -97,48 +94,44 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         if step >= 4:
             return
         self.logger.debug("Start fourth step of ddr start.")
-        self.set_wal_keep_segments(
-            "reload", DoradoDisasterRecoveryConstants.MAX_WAL_KEEP_SEGMENTS, only_mode='primary')
-        self.write_dorado_step("4_set_wal_segments_step")
+        self.stop_cluster()
+        self.write_dorado_step("4_stop_cluster_step")
 
     def _fifth_step_for_ddr_start(self, step):
         """
-        Fifth step for streaming start
+        Fifth step for ddr start
         """
         if step >= 5:
             return
         self.logger.debug("Start fifth step of ddr start.")
         # self.set_data_in_dcc(self.backup_open_key, "0", only_mode='primary')
         # self.set_data_in_dcc(self.backup_open_key, "1", only_mode='disaster_standby')
-        # self.set_most_available(mode="reload", raise_error=False)
-        #self.stop_cluster_by_node(only_mode='disaster_standby')
-        self.stop_cluster()
         self.start_cluster(only_mode="primary")
-        self.write_dorado_step("5_set_wal_segments_step")
+        self.write_dorado_step("5_start_primary_cluster_step")
 
     def _sixth_step_for_ddr_start(self, step):
         """
-        Sixth step for streaming start
+        Sixth step for ddr start
         """
         if step >= 6:
             return
-        self.logger.debug("Start sixth step of streaming start.")
+        self.logger.debug("Start sixth step of ddr start.")
         self.set_cmserver_guc("backup_open", "1", "set", only_mode='disaster_standby')
         self.set_cmagent_guc("agent_backup_open", "1", "set", only_mode='disaster_standby')
-        self.write_dorado_step("6_set_guc_step")
+        self.write_dorado_step("6_set_cm_guc_step")
 
     def _seventh_step_for_ddr_start(self, step):
         """
         Seventh step for streaming start
         """
-        if step >= 7:
+        if step >= 7 or self.params.mode == "primary":
             return
         self.logger.debug("Start seventh step of ddr start.")
         self.update_dorado_info("cluster", "restore", only_mode='disaster_standby')
         try:
             self.start_dss_instance(only_mode='disaster_standby')
-            # self.build_dn_instance(only_mode='disaster_standby')
-            # self.kill_dss_instance(only_mode='disaster_standby')
+            self.build_main_standby_datanode(only_mode='disaster_standby')
+            self.kill_dss_instance(only_mode='disaster_standby')
         except Exception as error:
             self.update_dorado_info("cluster", "restore_fail", only_mode='disaster_standby')
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "build dns" + "Error:%s" % error)
-- 
Gitee


From 5ec3c50ce03c7480a41d0e10aacaaeecd8119cf9 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Thu, 17 Aug 2023 17:26:01 +0800
Subject: [PATCH 11/23] upadte

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 271 +++---------------
 1 file changed, 37 insertions(+), 234 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 5f46a22b..a43dfdc4 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -39,7 +39,7 @@ from gspylib.common.DbClusterStatus import DbClusterStatus
 from gspylib.threads.SshTool import SshTool
 from gspylib.threads.parallelTool import parallelTool
 from gspylib.os.gsfile import g_file
-from base_utils.os.cmd_util import CmdUtil
+from base_utils.os.cmd_util import CmdUtil, FastPopen
 from base_utils.os.env_util import EnvUtil
 from base_utils.os.net_util import NetUtil
 from base_utils.os.file_util import FileUtil
@@ -1117,194 +1117,6 @@ class DoradoDisasterRecoveryBase(object):
                   % (guc_parameter, guc_value, output)
             self.logger.debug(msg)
 
-    def __check_datanode_data_ip_connection(self, inst):
-        """
-        Check remote data ip can connect or not
-        """
-        any_connected = False
-        node_infos = [node_info for shard in self.params.remoteClusterConf.get("shards", [])
-                      for node_info in shard]
-        local_data_ip = self.__get_local_data_ip(inst.hostname)
-        for node_info in node_infos:
-            data_ip = node_info.get("dataIp")
-            shard_num = node_info.get("shardNum", '1')
-            if str(shard_num) != str(inst.mirrorId):
-                continue
-            _, ret = DefaultValue.fast_ping_on_node(inst.hostname, local_data_ip,
-                                                    data_ip, self.logger)
-            if ret:
-                any_connected = True
-                break
-        if not any_connected:
-            self.logger.error("Failed check data ip connection for inst:%s." % inst.instanceId)
-            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "check data ip connection")
-        self.logger.debug("Successfully check main standby data ip connection.")
-
-    def __pghba_backup_handler(self, node_name, dir_name, inst_id, mode="backup"):
-        """
-        Backup or restore pg_hba file.
-        """
-        file_path = os.path.join(dir_name, "pg_hba.conf")
-        old_file_path = os.path.join(dir_name, "pg_hba.conf.old")
-        dest_file = os.path.join(self.dorado_file_dir, "%s_pg_hba.conf" % inst_id)
-        if self.local_host == node_name:
-            if mode == "backup" and not os.path.isfile(dest_file):
-                if os.path.isfile(file_path):
-                    self.logger.debug("Backup file from[%s] to[%s]." % (
-                        file_path, dest_file))
-                    FileUtil.cpFile(file_path, dest_file)
-                else:
-                    self.logger.debug("Backup file from[%s] to[%s]." % (
-                        old_file_path, dest_file))
-                    FileUtil.cpFile(old_file_path, dest_file)
-            if mode == "restore":
-                self.logger.debug("Restore file from[%s] to[%s]." % (
-                    dest_file, file_path))
-                FileUtil.cpFile(dest_file, file_path)
-                FileUtil.removeFile(dest_file)
-        else:
-            if mode == "backup":
-                cmd = "source %s; pssh -s -H %s \"if [ ! -f '%s' ];then if [ -f '%s' ];" \
-                      "then cp '%s' '%s';else cp '%s' '%s';fi;fi\"" \
-                      % (self.mpp_file, node_name, dest_file, file_path, file_path,
-                         dest_file, old_file_path, dest_file)
-                self.logger.debug("Backup file on node[%s] with cmd [%s]." % (
-                    node_name, cmd))
-            else:
-                cmd = "source %s; pssh -s -H %s \"cp %s %s && rm -f %s\"" % (
-                    self.mpp_file, node_name, dest_file, file_path, dest_file)
-                self.logger.debug("Restore file on node[%s] from[%s] to[%s]." % (
-                    node_name, file_path, dest_file))
-            status, output = CmdUtil.retryGetstatusoutput(cmd)
-            if status != 0:
-                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
-                                " Error: \n%s " % output)
-
-    def __pg_ident_backup_handler(self, node_name, dir_name, inst_id, mode="backup"):
-        """
-        Backup or restore pg_ident file.
-        """
-        file_path = os.path.join(dir_name, "pg_ident.conf")
-        dest_file = os.path.join(self.dorado_file_dir, "%s_pg_ident.conf" % inst_id)
-        if self.local_host == node_name:
-            if mode == "backup" and not os.path.isfile(dest_file):
-                if os.path.isfile(file_path):
-                    self.logger.debug("Backup file from[%s] to[%s]." % (
-                        file_path, dest_file))
-                    FileUtil.cpFile(file_path, dest_file)
-
-            if mode == "restore" and os.path.isfile(dest_file):
-                self.logger.debug("Restore file from[%s] to[%s]." % (
-                    dest_file, file_path))
-                FileUtil.cpFile(dest_file, file_path)
-                FileUtil.removeFile(dest_file)
-        else:
-            if mode == "backup":
-                cmd = "source %s; pssh -s -H %s \"if [ ! -f '%s' ];then if [ -f '%s' ];" \
-                      "then cp '%s' '%s';fi;fi\"" \
-                      % (self.mpp_file, node_name, dest_file, file_path, file_path, dest_file)
-                self.logger.debug("Backup file on node[%s] with cmd [%s]." % (
-                    node_name, cmd))
-            else:
-                cmd = "source %s; pssh -s -H %s \"if [ -f '%s' ];then cp '%s' '%s' && " \
-                      "rm -f '%s';fi\"" % (self.mpp_file, node_name, dest_file, dest_file,
-                                           file_path, dest_file)
-                self.logger.debug("Restore file on node[%s] from[%s] to[%s]." % (
-                    node_name, file_path, dest_file))
-            status, output = CmdUtil.retryGetstatusoutput(cmd)
-            if status != 0:
-                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
-                                " Error: \n%s " % output)
-
-    def __start_main_standby_dn(self, start_params):
-        """
-        Start single main standby dn
-        """
-        local_ip, inst, bin_path, distribute_arg, build_timeout = start_params
-        self.logger.debug("Starting start dn:%s" % inst.instanceId)
-        if local_ip == inst.hostname:
-            cmd_start = "source %s; %s/gs_ctl start -D %s -M hadr_main_standby%s" % (
-                self.mpp_file, bin_path, inst.datadir, distribute_arg)
-        else:
-            cmd_start = "source %s; pssh -s -t %s -H %s \"source %s; %s/gs_ctl start -D %s " \
-                        "-M hadr_main_standby%s\"" \
-                        % (self.mpp_file, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, inst.hostname,
-                           self.mpp_file, bin_path, inst.datadir, distribute_arg)
-        self.logger.debug("Start dn with cmd:%s." % cmd_start)
-        status, output = CmdUtil.retry_util_timeout(cmd_start, build_timeout)
-        if status != 0:
-            raise Exception(
-                ErrorCode.GAUSS_514[
-                    "GAUSS_51400"] % cmd_start + " Error: \n%s " % output)
-        self.logger.debug("Successfully start dn:%s" % inst.instanceId)
-
-    def __build_main_standby_dn(self, params):
-        """
-        Build single main standby dn
-        """
-        inst, build_timeout, local_ip, bin_path, distribute_arg, rds_backup, backup_pwd = params
-        self.logger.debug("Start build main standby dn:%s" % inst.instanceId)
-        self.__check_datanode_data_ip_connection(inst)
-        self.__pghba_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="backup")
-        self.__pg_ident_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="backup")
-        # -t 1209600 means default value 14 days
-        if local_ip == inst.hostname:
-            cmd = "source %s; %s/gs_ctl build -D %s -b cross_cluster_full -g 0 -q -t %s" \
-                  % (self.mpp_file, bin_path, inst.datadir,
-                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
-        else:
-            cmd = "echo \"source %s; %s/gs_ctl build -D %s -b cross_cluster_full -g 0 -q " \
-                  " -t %s\" | pssh -s -t %s -H %s" \
-                  % (self.mpp_file, bin_path, inst.datadir,
-                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT,
-                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, inst.hostname)
-        cmd_log = cmd.replace(backup_pwd, '***')
-        self.logger.debug("Building with cmd:%s." % cmd_log)
-        status, output = CmdUtil.retry_util_timeout(cmd, build_timeout)
-        if status != 0:
-            error_detail = "Error: Failed to do build because of pssh timeout." \
-                if "was killed or timeout" in output else \
-                "Error: Failed to do build because of retry timeout in %s s." \
-                % build_timeout
-            self.logger.debug("Failed to do gs_ctl build. " + error_detail)
-            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
-                            % "full build from remote cluster" + error_detail)
-        self.logger.debug("Successfully build main standby dn:%s" % inst.instanceId)
-        self.__pghba_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="restore")
-        self.__pg_ident_backup_handler(inst.hostname, inst.datadir, inst.instanceId, mode="restore")
-        start_params = (local_ip, inst, bin_path, distribute_arg, build_timeout)
-        self.__start_main_standby_dn(start_params)
-
-    def __build_cascade_standby_dn(self, params):
-        """
-        Build single main standby dn
-        """
-        inst, build_timeout, local_ip, bin_path, distribute_arg = params
-        self.logger.debug("Start build cascade standby dn:%s" % inst.instanceId)
-        # -t 1209600 means default value 14 days
-        if local_ip == inst.hostname:
-            cmd = "source %s; %s/gs_ctl build -D %s -M cascade_standby " \
-                  "-b standby_full -r 7200%s -t %s" \
-                  % (self.mpp_file, bin_path, inst.datadir, distribute_arg,
-                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
-        else:
-            cmd = "echo \"source %s; %s/gs_ctl build -D %s -M cascade_standby -b standby_full " \
-                  "-r 7200%s -t %s\" | pssh -s -t %s -H %s" \
-                  % (self.mpp_file, bin_path, inst.datadir, distribute_arg,
-                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT,
-                     DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, inst.hostname)
-        self.logger.debug("Building with cmd:%s." % cmd)
-        status, output = CmdUtil.retry_util_timeout(cmd, build_timeout)
-        if status != 0:
-            error_detail = "Error: Failed to do build because of pssh timeout." \
-                if "was killed or timeout" in output else \
-                "Error: Failed to do build because of retry timeout in %s s." \
-                % build_timeout
-            self.logger.debug("Failed to do gs_ctl build. " + error_detail)
-            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
-                            % "full build from remote cluster" + error_detail)
-        self.logger.debug("Successfully build cascade standby dn:%s" % inst.instanceId)
-    
     def start_dss_instance(self, only_mode=None):
         """
         Start dssserver process 
@@ -1317,14 +1129,23 @@ class DoradoDisasterRecoveryBase(object):
                        db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
         main_standby_inst = primary_dn[0]
 
-        cmd = "source %s; pssh -H %s \"source %s ; export DSS_MAINTAIN=TRUE; " \
-             " dssserver -D $DSS_HOME & \"" % (self.mpp_file, main_standby_inst.hostname, self.mpp_file)
+        if self.local_host == main_standby_inst.hostname:
+            cmd = 'sh -c "source {}; export DSS_MAINTAIN=TRUE && nohup dssserver -D $DSS_HOME >/dev/null 2>&1 & "'.format(
+                self.mpp_file)
+            # cmd = 'sh -c %s; export DSS_MAINTAIN=TRUE && ' \
+            #       "nohup dssserver -D $DSS_HOME >/dev/null 2>&1 &" % (self.mpp_file)
+        else:
+            cmd = "source %s; pssh -s -t 5 -H %s \"source %s; export DSS_MAINTAIN=TRUE && " \
+                  "nohup dssserver -D $DSS_HOME >/dev/null 2>&1 & \"" \
+                  % (self.mpp_file, main_standby_inst.hostname)
+ 
         self.logger.debug("Start dssserver on node [%s],cmd: %s." % (main_standby_inst.hostname, cmd))
-        status, output = CmdUtil.retryGetstatusoutput(cmd)
-        if status != 0:
-            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
-                            "Options:%s, Error: \n%s "
-                            % ("Start dssserver on node :%s" % main_standby_inst.hostname, str(output)))
+        proc = FastPopen(cmd)
+        out, err = proc.communicate()
+        if proc.returncode != 0:
+            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] +
+                        'Start dssserver on node [{}] Error: {}'.format(main_standby_inst.hostname, str(err + out).strip()))
+                      
         self.logger.log("Successfully Start dssserver on node [%s] " % main_standby_inst.hostname)
 
     def build_main_standby_datanode(self, only_mode=None):
@@ -1337,11 +1158,17 @@ class DoradoDisasterRecoveryBase(object):
         primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
                        db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
         main_standby_inst = primary_dn[0]
-        
-        build_cmd = "source %s; pssh -H %s \"source %s ; gs_ctl build -D %s -b cross_cluster_full -g 0 -q\"" \
-                    % (self.mpp_file,main_standby_inst.hostname, self.mpp_file, main_standby_inst.datadir)
+
+        if self.local_host == main_standby_inst.hostname:
+            build_cmd = "source %s; gs_ctl build -D %s -b cross_cluster_full -g 0 -q -t %s" \
+                    % (self.mpp_file, main_standby_inst.datadir, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
+        else:
+            build_cmd = "source %s; pssh -s -t %s -H %s \"source %s;" \
+                    " gs_ctl build -D %s -b cross_cluster_full -g 0 -q -t %s \"" \
+                    % (self.mpp_file, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, main_standby_inst.hostname,
+                        self.mpp_file, main_standby_inst.datadir, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
         self.logger.debug("Build Main standby datanode on node [%s],cmd: %s." %  (main_standby_inst.hostname, build_cmd))
-        status, output = CmdUtil.retryGetstatusoutput(build_cmd)
+        status, output = CmdUtil.retry_util_timeout(build_cmd, self.params.waitingTimeout)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % build_cmd +
                             "Options:%s, Error: \n%s "
@@ -1359,43 +1186,19 @@ class DoradoDisasterRecoveryBase(object):
                        db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
         main_standby_inst = primary_dn[0]
 
-        kill_cmd = "source %s; pssh -H %s \"source %s ; ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 \"" \
-                    % (self.mpp_file,main_standby_inst.hostname, self.mpp_file)
+        if self.local_host == main_standby_inst.hostname:
+            kill_cmd = "source %s; pkill -9 -f dssserver" % (self.mpp_file)
+        else:
+            kill_cmd = "source %s; pssh -s -t 3 -H %s \"pkill -9 -f dssserver\"" \
+                    % (self.mpp_file, main_standby_inst.hostname)
         self.logger.debug("Kill dssserver on node [%s],cmd: %s." %  (main_standby_inst.hostname, kill_cmd))
-        status = CmdUtil.retryGetstatusoutput(kill_cmd)
-        if status != 0:
+        sts, out = CmdUtil.getstatusoutput_by_fast_popen(kill_cmd)
+        if sts not in [0, 1]:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
-                            % "kill dssserver before start cluster on node:" + main_standby_inst.hostname)
-        self.logger.debug("Successfully stop dssserver before start cluster on node [%s] " % main_standby_inst.hostname)
+                            % "kill dssserver before start cluster on node:" + main_standby_inst.hostname +
+                                ", output:"+str(out).strip())
+        self.logger.log("Successfully kill dssserver before start cluster on node [%s] " % main_standby_inst.hostname)
 
-    def build_dn_instance(self, only_mode=None):
-        """
-        Build dn instance
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Build dn step is not for mode:%s." % self.params.mode)
-            return
-        self.logger.debug("Start building process.")
-        distribute_arg = "" if self.cluster_info.isSingleInstCluster() else " -Z datanode"
-        main_params = []
-        cascade_params = []
-        datanode_instance = [inst for node in self.cluster_info.dbNodes
-                             for inst in node.datanodes]
-        for inst in datanode_instance:
-            if inst.instanceId in self.main_standby_ids + self.primary_dn_ids:
-                main_params.append((inst, self.params.waitingTimeout, self.local_host,
-                                    self.bin_path, distribute_arg, self.params.hadrUserName,
-                                    self.params.hadrUserPassword))
-            else:
-                cascade_params.append((inst, self.params.waitingTimeout, self.local_host,
-                                       self.bin_path, distribute_arg))
-        if main_params:
-            parallelTool.parallelExecute(self.__build_main_standby_dn, main_params)
-            self.logger.debug("Finished build main standby dns.")
-        #if cascade_params:
-        #    parallelTool.parallelExecute(self.__build_cascade_standby_dn, cascade_params)
-        #    self.logger.debug("Finished build cascade standby dns.")
-        del self.params.hadrUserPassword
 
     def query_cluster(self):
         """
-- 
Gitee


From 99e2a10147dd2cda2e5cca51be8bb196d3e9d32c Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Thu, 17 Aug 2023 21:58:23 +0800
Subject: [PATCH 12/23] bugfix ddr stop

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 94 ++++---------------
 .../dorado_diaster_recovery_start.py          |  2 +-
 .../dorado_disaster_recovery_stop.py          | 65 +++++++------
 3 files changed, 49 insertions(+), 112 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index a43dfdc4..863bb2ed 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -1672,92 +1672,30 @@ class DoradoDisasterRecoveryBase(object):
         else:
             self.logger.log("Check cluster type succeed.")
 
-    def __remove_streaming_repl_info(self, params):
+    def __remove_cross_cluster_replinfo(self, params):
         """
-        Remove streaming repl info from single dn instances.
+        Remove cross_cluster_replinfo from single dn instances.
         """
         dn_inst, guc_mode, dn_num = params
-        self.logger.debug("Start remove replconninfo for instance:%s" % dn_inst.instanceId)
+        self.logger.debug("Start remove cross_cluster_replinfo for instance:%s" % dn_inst.instanceId)
+
         for idx in range(1, dn_num + 1):
-            if dn_inst.hostname == self.local_host:
-                cmd = "source %s; gs_guc check -Z datanode -D %s " \
-                      "-c 'replconninfo%s'" % (self.mpp_file, dn_inst.datadir, idx)
-            else:
-                cmd = "source %s; pssh -H %s 'source %s; gs_guc check " \
-                      "-Z datanode -D %s -c \"replconninfo%s\"'" \
-                      % (self.mpp_file, dn_inst.hostname, self.mpp_file, dn_inst.datadir, idx)
-            self.logger.debug("Check original repl infos with cmd:%s" % cmd)
+            cmd = "source %s ; gs_guc %s -N %s -D %s -c " \
+                      "\"cross_cluster_replconninfo%s\"" \
+                        % (self.mpp_file, guc_mode, dn_inst.hostname, dn_inst.datadir, idx)
+            self.logger.debug("Remove dn cross_cluster_replconninfo with cmd:%s" % cmd)
             status, output = CmdUtil.retryGetstatusoutput(cmd)
             if status != 0:
                 raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
                                 " Error: \n%s " % output)
-            if output.count("=NULL") > 2:
-                continue
-            elif "iscrossregion=false" in output.lower():
-                ret = re.search(
-                    r"replconninfo%s='localhost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})"
-                    r" localport=(\d{4,5}) localheartbeatport=(\d{4,5}) "
-                    r"localservice=(\d{4,5}) "
-                    r"remotehost=(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}) "
-                    r"remoteport=(\d{4,5}) remoteheartbeatport=(\d{4,5}) "
-                    r"remoteservice=(\d{4,5})" % idx, output)
-                if not ret:
-                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "search repl infos")
-                if dn_inst.hostname != NetUtil.GetHostIpOrName():
-                    set_cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
-                              "-Z datanode -D %s -c " \
-                              "\\\"replconninfo%s = 'localhost=%s localport=%s " \
-                              "localheartbeatport=%s localservice=%s remotehost=%s " \
-                              "remoteport=%s remoteheartbeatport=%s " \
-                              "remoteservice=%s'\\\"\""
-                    set_cmd = set_cmd % (self.mpp_file, dn_inst.hostname,
-                                         self.mpp_file, guc_mode,
-                                         dn_inst.datadir, idx, ret.group(1),
-                                         ret.group(2), ret.group(3), ret.group(4),
-                                         ret.group(5), ret.group(6), ret.group(7),
-                                         ret.group(8))
-                else:
-                    set_cmd = "source %s ; gs_guc %s -Z datanode -D %s -c " \
-                              "\"replconninfo%s = 'localhost=%s localport=%s " \
-                              "localheartbeatport=%s localservice=%s remotehost=%s " \
-                              "remoteport=%s remoteheartbeatport=%s " \
-                              "remoteservice=%s'\""
-                    set_cmd = set_cmd % (self.mpp_file, guc_mode,
-                                         dn_inst.datadir, idx, ret.group(1),
-                                         ret.group(2), ret.group(3), ret.group(4),
-                                         ret.group(5), ret.group(6), ret.group(7),
-                                         ret.group(8))
-                self.logger.debug("Set original repl infos with cmd:%s" % set_cmd)
-                status, output = CmdUtil.retryGetstatusoutput(set_cmd)
-                if status != 0:
-                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_cmd +
-                                    " Error: \n%s " % output)
-                self.logger.debug("Successfully remove original repl infos with cmd:%s."
-                                  % set_cmd)
-            elif "iscrossregion=true" in output.lower():
-                if dn_inst.hostname != self.local_host:
-                    set_cmd = "source %s; pssh -H %s \"source %s ; gs_guc %s " \
-                              "-Z datanode -D %s -c \\\"replconninfo%s\\\"\""
-                    set_cmd = set_cmd % (self.mpp_file, dn_inst.hostname,
-                                         self.mpp_file, guc_mode,
-                                         dn_inst.datadir, idx)
-                else:
-                    set_cmd = "source %s ; gs_guc %s -Z datanode -D %s -c " \
-                              "\"replconninfo%s\""
-                    set_cmd = set_cmd % (self.mpp_file, guc_mode,
-                                         dn_inst.datadir, idx)
-                self.logger.debug("Remove stream repl infos with cmd:%s" % set_cmd)
-                status, output = CmdUtil.retryGetstatusoutput(set_cmd)
-                if status != 0:
-                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_cmd +
-                                    " Error: \n%s " % output)
-                self.logger.debug("Successfully remove stream repl infos with cmd:%s."
+            self.logger.debug("Successfully remove cross_cluster_replconninfo with cmd:%s."
                                   % set_cmd)
+        
         self.logger.debug("Successfully removed replconninfo for instance:%s" % dn_inst.instanceId)
 
-    def remove_all_stream_repl_infos(self, guc_mode="set"):
+    def remove_cross_cluster_replinfos(self, guc_mode="set"):
         """
-        Remove retreaming disaster repl infos from all instances
+        Remove cross_cluster_replinfos from all instances
         """
         params = []
         dn_instances = [inst for node in self.cluster_info.dbNodes
@@ -1773,10 +1711,10 @@ class DoradoDisasterRecoveryBase(object):
             params.append((inst, guc_mode, dn_num))
         if params:
             self.logger.log("Starting remove all node dn instances repl infos.")
-            parallelTool.parallelExecute(self.__remove_streaming_repl_info, params)
+            parallelTool.parallelExecute(self.__remove_cross_cluster_replinfo, params)
             self.logger.log("Successfully remove all node dn instances repl infos.")
 
-    def remove_streaming_cluster_file(self):
+    def remove_ddr_cluster_file(self):
         """
         function:  remove the parameter file for config pg_hba
         :return: NA
@@ -1792,7 +1730,7 @@ class DoradoDisasterRecoveryBase(object):
                 "Failed to remove cluster file with error:%s" % error)
         self.logger.log("Finished remove cluster file.")
 
-    def remove_streaming_pg_hba(self, ignore_error=False):
+    def remove_pg_hba(self, ignore_error=False):
         """
         Remove remote ips from pg hba of streaming disaster
         """
@@ -1804,7 +1742,7 @@ class DoradoDisasterRecoveryBase(object):
                 data_ip = node_info.get("dataIp")
                 remove_ips.append(data_ip)
         remove_ips = list(set(remove_ips))
-        host_names = self.get_all_connection_node_name("remove_streaming_pg_hba")
+        host_names = self.get_all_connection_node_name("remove_pg_hba")
         self.logger.debug("Remove ips:%s from pg_hba on nodes:%s" % (
             str(remove_ips), str(host_names)))
         cmd = "%s -U '%s' -l '%s'" % (OMCommand.getLocalScript("Local_Config_Hba"),
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
index a80c0aa6..ea50f564 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
@@ -208,5 +208,5 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self._seventh_step_for_ddr_start(step)
         self._eighth_step_for_ddr_start(step)
         self._ninth_step_for_ddr_start(step)
-        self.logger.log("Successfully do streaming disaster recovery start.")
+        self.logger.log("Successfully do dorado disaster recovery start.")
  
\ No newline at end of file
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
index be1c289e..881aba57 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
@@ -16,8 +16,8 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : streaming_disaster_recovery_stop.py is a utility for stopping
-# streaming disaster recovery on primary cluster.
+# Description  : dorado_disaster_recovery_stop.py is a utility for stopping
+# dorado disaster recovery on primary cluster.
 
 from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
 
@@ -26,80 +26,79 @@ class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    def _first_step_for_streaming_stop(self, step):
+    def _first_step_for_ddr_stop(self, step):
         """
-        First step for streaming stop
+        First step for ddr stop
         """
         if step >= 2:
             return
-        self.logger.debug("Start first step of streaming stop.")
+        self.logger.debug("Start first step of dorado disaster recovery stop.")
         self.init_cluster_status()
         self.check_action_and_mode()
 
-    def _second_step_for_streaming_stop(self, step):
+    def _second_step_for_ddr_stop(self, step):
         """
-        Second step for streaming stop
+        Second step for ddr stop
         """
         if step >= 2:
             return
-        self.logger.debug("Start second step of streaming start.")
+        self.logger.debug("Start second step of dorado disaster recovery stop.")
         self.check_cluster_status(status_allowed=['Normal'])
         self.check_cluster_type(allowed_type='primary')
         self.check_is_under_upgrade()
         self.write_dorado_step("2_check_cluster_step")
 
-    def _third_step_for_streaming_stop(self, step):
+    def _third_step_for_ddr_stop(self, step):
         """
-        Third step for streaming stop
+        Third step for ddr stop
         """
         if step >= 3:
             return
-        self.logger.debug("Start third step of streaming stop.")
-        self.remove_all_stream_repl_infos(guc_mode="reload")
-        self.remove_streaming_cluster_file()
+        self.logger.debug("Start third step of dorado disaster recovery stop.")
+        self.remove_cross_cluster_replinfos(guc_mode="reload")
+        self.remove_ddr_cluster_file()
         self.write_dorado_step("3_remove_config_step")
 
-    def _fourth_step_for_streaming_stop(self, step):
+    def _fourth_step_for_ddr_stop(self, step):
         """
-        Fourth step for streaming stop
+        Fourth step for ddr stop
         """
         if step >= 4:
             return
-        self.logger.debug("Start fourth step of streaming stop.")
-        self.remove_streaming_pg_hba()
+        self.logger.debug("Start fourth step of dorado disaster recovery stop.")
+        self.remove_pg_hba()
         self.restore_guc_params()
         self.write_dorado_step("4_remove_pg_hba_step")
 
-    def _fifth_step_for_streaming_stop(self, step):
+    def _fifth_step_for_ddr_stop(self, step):
         """
-        Fifth step for streaming stop
+        Fifth step for ddr stop
         """
         if step >= 5:
             return
-        self.logger.debug("Start fifth step of streaming start.")
-        self.streaming_clean_replication_slot()
+        self.logger.debug("Start fifth step of dorado disaster recovery start.")
+        #self.streaming_clean_replication_slot()
         self.write_dorado_step("5_update_config_step")
 
-    def _sixth_step_for_streaming_stop(self, step):
+    def _sixth_step_for_ddr_stop(self, step):
         """
-        Sixth step for streaming stop
+        Sixth step for ddr stop
         """
         if step >= 6:
             return
-        self.logger.debug("Start sixth step of streaming stop.")
+        self.logger.debug("Start sixth step of dorado disaster recovery stop.")
         self.check_cluster_status(['Normal'])
-        self.clean_global_config()
         self.update_dorado_info("cluster", "normal")
         self.clean_dorado_dir()
 
     def run(self):
-        self.logger.log("Start remove streaming disaster relationship.")
+        self.logger.log("Start remove dorado disaster relationship.")
         step = self.query_dorado_step()
-        self._first_step_for_streaming_stop(step)
+        self._first_step_for_ddr_stop(step)
         self.parse_cluster_status()
-        self._second_step_for_streaming_stop(step)
-        self._third_step_for_streaming_stop(step)
-        self._fourth_step_for_streaming_stop(step)
-        self._fifth_step_for_streaming_stop(step)
-        self._sixth_step_for_streaming_stop(step)
-        self.logger.log("Successfully do streaming disaster recovery stop.")
+        self._second_step_for_ddr_stop(step)
+        self._third_step_for_ddr_stop(step)
+        self._fourth_step_for_ddr_stop(step)
+        self._fifth_step_for_ddr_stop(step)
+        self._sixth_step_for_ddr_stop(step)
+        self.logger.log("Successfully do dorado disaster recovery stop.")
-- 
Gitee


From fe32d53eedbac71e0d50296995c19241ffba8c02 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Fri, 18 Aug 2023 11:57:39 +0800
Subject: [PATCH 13/23] bugfix start

---
 script/gs_ddr                                              | 2 +-
 script/impl/dorado_disaster_recovery/ddr_base.py           | 4 ++--
 ...recovery_start.py => dorado_disaster_recovery_start.py} | 7 ++++---
 3 files changed, 7 insertions(+), 6 deletions(-)
 rename script/impl/dorado_disaster_recovery/ddr_modules/{dorado_diaster_recovery_start.py => dorado_disaster_recovery_start.py} (97%)

diff --git a/script/gs_ddr b/script/gs_ddr
index 3e699349..3fb5047f 100644
--- a/script/gs_ddr
+++ b/script/gs_ddr
@@ -31,7 +31,7 @@ from base_utils.os.user_util import UserUtil
 from domain_utils.cluster_file.cluster_log import ClusterLog
 from impl.dorado_disaster_recovery.params_handler import ParamsHandler
 from impl.dorado_disaster_recovery.ddr_modules.\
-    dorado_diaster_recovery_start import DisasterRecoveryStartHandler
+    dorado_disaster_recovery_start import DisasterRecoveryStartHandler
 from impl.dorado_disaster_recovery.ddr_modules.\
     dorado_disaster_recovery_stop import DisasterRecoveryStopHandler
 from impl.dorado_disaster_recovery.ddr_modules.\
diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 863bb2ed..48619e68 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -613,7 +613,7 @@ class DoradoDisasterRecoveryBase(object):
 
     def check_dn_instance_params(self):
         """set_dn_instance_params"""
-        check_dick = {"enable_dcf": "off"}
+        check_dick = {"ha_module_debug ": "off"}
         dn_insts = [dn_inst for db_node in self.cluster_info.dbNodes
                     for dn_inst in db_node.datanodes]
         primary_dn_insts = [inst for inst in dn_insts if inst.instanceId in self.primary_dn_ids]
@@ -1689,7 +1689,7 @@ class DoradoDisasterRecoveryBase(object):
                 raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
                                 " Error: \n%s " % output)
             self.logger.debug("Successfully remove cross_cluster_replconninfo with cmd:%s."
-                                  % set_cmd)
+                                  % cmd)
         
         self.logger.debug("Successfully removed replconninfo for instance:%s" % dn_inst.instanceId)
 
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
similarity index 97%
rename from script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
rename to script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
index ea50f564..0bfb7ecd 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_diaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
@@ -16,7 +16,7 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : streaming_disaster_recovery_start.py is utility for creating
+# Description  : dorado_disaster_recovery_start.py is utility for creating
 # relationship between primary cluster and standby cluster.
 
 import os
@@ -60,7 +60,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
                                 "check cm_ctl is available for current cluster")
         self.check_is_under_upgrade()
         #检查dn的GUC参数
-        #self.check_dn_instance_params()
+        self.check_dn_instance_params()
         self.write_dorado_step("2_check_cluster_step")
 
     def common_step_for_ddr_start(self):
@@ -200,7 +200,8 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         self._second_step_for_ddr_start(step)
         #更新pg_hba和replinfo
         self.common_step_for_ddr_start()
-
+        self._third_step_for_ddr_start(step)
+        self._fourth_step_for_ddr_start(step)
         self._fifth_step_for_ddr_start(step)
         #设置CM backup_open参数，灾备backup_open=1， 主集群backup_open=0
         self._sixth_step_for_ddr_start(step)
-- 
Gitee


From 873afa75ee4a70241b83ec00832051b7768d8772 Mon Sep 17 00:00:00 2001
From: chuanglichuangwai <ou_rt_wfu@163.com>
Date: Fri, 18 Aug 2023 14:48:12 +0800
Subject: [PATCH 14/23] =?UTF-8?q?switchover=20=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 82 +++++++------------
 .../dorado_disaster_recovery_switchover.py    | 42 ++++------
 .../params_handler.py                         |  2 +-
 3 files changed, 46 insertions(+), 80 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index a43dfdc4..b554baa2 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -21,6 +21,7 @@
 import json
 import os
 import re
+import subprocess
 import sys
 import time
 from datetime import datetime
@@ -98,7 +99,7 @@ class DoradoDisasterRecoveryBase(object):
         self.is_single_inst = True if self.cluster_info.isSingleInstCluster() else None
         self.cluster_node_names = self.cluster_info.getClusterNodeNames()
         self.dorado_file_dir = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.DDR_FILES_DIR)
-        self.streaming_xml = os.path.join(self.dorado_file_dir,
+        self.dorado_xml = os.path.join(self.dorado_file_dir,
                                           DoradoDisasterRecoveryConstants.STREAMING_CONFIG_XML)
         self.ssh_tool = SshTool(self.cluster_node_names, self.log_file)
         self.mpp_file = EnvUtil.getMpprcFile()
@@ -403,9 +404,9 @@ class DoradoDisasterRecoveryBase(object):
             parallelTool.parallelExecute(self.stream_clean_gs_secure, params)
         self.logger.debug("Finished clean gs secure dir.")
 
-    def remove_streaming_dir(self, dir_path):
+    def remove_dorado_dir(self, dir_path):
         """
-        Remove streaming files dir
+        Remove dorado files dir
         """
         cmd = "if [ -d %s ]; then rm %s -rf;fi" % (dir_path, self.dorado_file_dir)
         self.ssh_tool.executeCommand(cmd)
@@ -1080,6 +1081,23 @@ class DoradoDisasterRecoveryBase(object):
       
         self.logger.debug(
             "Successfully set all datanode guc param in postgres conf for cross_cluster_replconninfo.")
+    def set_datanode_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
+        """
+        set datanode guc param
+        :return: NA
+        """
+        if only_mode and self.params.mode != only_mode:
+            self.logger.debug("Set datanode guc [%s] to [%s] not for mode:%s."
+                              % (guc_parameter, guc_value, self.params.mode))
+            return
+        cmd = "gs_guc %s -Z datanode -N all -I all -c \"%s=%s\" " % \
+              (guc_type, guc_parameter, guc_value)
+        status, output = CmdUtil.retryGetstatusoutput(cmd)
+        if status != 0:
+            msg = ErrorCode.GAUSS_516['GAUSS_51632'] \
+                  % "set datanode guc [%s] to [%s], output:%s" \
+                  % (guc_parameter, guc_value, output)
+            self.logger.debug(msg)
 
     def set_cmserver_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
         """
@@ -1415,7 +1433,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         Clean flag file
         """
-        flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+        flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
         if os.path.exists(flag_file):
             self.logger.debug("Successfully removed flag file %s." % flag_file)
             os.remove(flag_file)
@@ -1973,7 +1991,7 @@ class DoradoDisasterRecoveryBase(object):
                         (len(localRole) != 1 or localRole[0] != "Standby"):
                     check_ok = -1
             else:
-                raise Exception(ErrorCode.GAUSS_521["GAUSS_52102"] % state)
+                raise Exception(ErrorCode.GAUSS_521["F"] % state)
         else:
             check_ok = status
 
@@ -1993,10 +2011,10 @@ class DoradoDisasterRecoveryBase(object):
         if len(host_names) != len(self.cluster_node_names):
             raise Exception(ErrorCode.GAUSS_506["GAUSS_50623"] % host_names)
         check_params = []
-        all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
+        all_instances = [(db_node.name, dn_inst) for db_node in self.status_info.dbNodes
                          for dn_inst in db_node.datanodes]
-        for dn_inst in all_instances:
-            check_params.append([dn_inst.state, dn_inst.hostname, dn_inst.datadir])
+        for host_name, dn_inst in all_instances:
+            check_params.append([dn_inst.status, host_name, dn_inst.datadir])
         if len(check_params) <= 0:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "cluster")
         while True:
@@ -2016,7 +2034,7 @@ class DoradoDisasterRecoveryBase(object):
                 if check_status == 0:
                     break
         if check_status != 0:
-            if dorado_switchover == "dorado_switchover":
+            if dorado_switchover == "disaster_switchover":
                 raise Exception(
                     ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
             self.logger.logExit(
@@ -2056,48 +2074,6 @@ class DoradoDisasterRecoveryBase(object):
         self._failover_config_step(dorado_disaster_step, action_flag)
         self._failover_start_step(dorado_disaster_step, action_flag)
 
-    def check_dorado_datanode_query_info(self, timeout=DefaultValue.TIMEOUT_CLUSTER_START,
-                        dorado_switchover=None):
-        """
-        check gs_ctl query info
-        """
-        self.logger.debug("Waiting for gs_ctl query status being satisfied.")
-        end_time = None if timeout <= 0 else datetime.now() + timedelta(seconds=timeout)
-
-        host_names = self.get_all_connection_node_name()
-        if len(host_names) != len(self.cluster_node_names):
-            raise Exception(ErrorCode.GAUSS_506["GAUSS_50623"] % host_names)
-        check_params = []
-        all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
-                         for dn_inst in db_node.datanodes]
-        for dn_inst in all_instances:
-            check_params.append([dn_inst.state, dn_inst.hostname, dn_inst.datadir])
-        if len(check_params) <= 0:
-            raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "cluster")
-        while True:
-            check_status = 0
-            time.sleep(10)
-            if end_time is not None and datetime.now() >= end_time:
-                check_status = 1
-                self.logger.debug("Timeout. The gs_ctl query command cannot obtain the expected status.")
-                break
-            results = parallelTool.parallelExecute(
-                self.check_datanode_query_info, check_params)
-            for ret in results:
-                if ret[0] != 0:
-                    self.logger.debug("Failed to check node[%s] info using \"gs_ctl query\" command "
-                                      "with status[%s], output[%s]" % (ret[-1], ret[0], ret[1]))
-                    check_status = 1
-                if check_status == 0:
-                    break
-        if check_status != 0:
-            if dorado_switchover == "dorado_switchover":
-                raise Exception(
-                    ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
-            self.logger.logExit(
-                ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
-        self.logger.debug("Successfully wait for gs_ctl query status become Normal.", "constant")
-
     def _failover_start_step(self, dorado_disaster_step, action_flag):
         """
         Failover step 5 & 6
@@ -2116,12 +2092,12 @@ class DoradoDisasterRecoveryBase(object):
             self.check_cluster_status(cluster_normal_status, check_current=True)
             if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
                 self.check_dorado_datanode_query_info(timeout=30,
-                                                      dorado_switchover="dorado_failover")
+                                                      dorado_switchover="disaster_failover")
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "100%")
                 self.update_dorado_info("cluster", "normal")
             else:
                 self.check_dorado_datanode_query_info(timeout=30,
-                                                      dorado_switchover="dorado_switchover")
+                                                      dorado_switchover="disaster_failover")
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "100%")
                 self.update_dorado_info("cluster", "archive")
 
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
index 3a2c077f..2f878de5 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
@@ -42,7 +42,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
         self.remote_replication_pairs_log_message = \
             "Please configure \"Remote Replication Pairs\" correctly on "\
             "And check and grant appropriate permissions to the corresponding device files.\n"\
-            "to inform the tool and execute the tool again."
+            "Create file \"%s\" to mark the completion of the above operations and execute the tool again."
 
     def run(self):
         """
@@ -50,7 +50,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
         """
         self.logger.log("Start dorado disaster switchover.")
         self.check_action_and_mode()
-        self.check_switchover_workable()
+        # self.check_switchover_workable()
         self.check_dn_instance_params()
         self.check_is_under_upgrade()
         try:
@@ -67,18 +67,6 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
             self.remove_cluster_maintance_file()
         self.logger.log("Successfully do dorado disaster recovery switchover.")
 
-    def check_xlog_file_path(self):
-        """
-        get and check xlog_file_path
-        """
-        linkDev = self.dorado_info
-        if os.path.islink(linkDev):
-            linkDev = os.readlink(self.dorado_info)
-        if not os.access(linkDev, os.R_OK | os.W_OK):
-            self.logger.debug(ErrorCode.GAUSS_501("GAUSS_50113") % self.user)
-            return False
-        return True
-
     def dorado_switchover_single_inst(self):
         """
         dorado disaster recovery switchover for single_inst cluster
@@ -97,20 +85,21 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
             self.add_cluster_maintance_file_for_switchover()
             try:
                 if dorado_disaster_step < 1:
-                    self.update_streaming_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
+                    self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
+                    #self.check_switchover_workable()
                     self.stop_cluster()
                     self.write_dorado_step("1_dorado_disaster_stop_cluster_for_switchover")
-                    flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+                    flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
                     if os.path.exists(flag_file):
-                        self.logger.debug("Delete file %s." % flag_file)
+                        self.logger.log("Delete file %s." % flag_file)
                         os.remove(flag_file)
-                    self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+                    self.logger.log(self.remote_replication_pairs_log_message % flag_file)
                     sys.exit(0)
                 if dorado_disaster_step < 2:
                     self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "30%")
-                    flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
-                    if not os.path.exists(flag_file) or not self.check_xlog_file_path():
-                        self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+                    flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
+                    if not os.path.exists(flag_file):
+                        self.logger.log(self.remote_replication_pairs_log_message % flag_file)
                         sys.exit(0)
                     self.write_dorado_step("2_set_remote_replication_pairs_for_switchover")
                 if dorado_disaster_step < 3:
@@ -126,9 +115,9 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                     self.write_dorado_step("4_start_cluster_done")
                 if dorado_disaster_step < 5:
                     self.wait_for_normal(timeout=self.params.waitingTimeout,
-                                         dorado_switchover="dorado_switchover")
+                                         dorado_switchover="disaster_switchover")
                     self.check_dorado_datanode_query_info(timeout=self.params.waitingTimeout,
-                                                          dorado_switchover="dorado_switchover")
+                                                          dorado_switchover="disaster_switchover")
                     self.update_dorado_info("cluster", "recovery")
             except Exception as error:
                 self.logger.error("Failed to do dorado disaster cluster switchover, Error:"
@@ -137,7 +126,8 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                 self.logger.debug("Roll back switchover step:%s" % rollback_step)
                 self.remove_cluster_maintance_file_for_switchover()
                 self.remove_cluster_maintance_file()
-                self.dorado_switchover_roll_back(update_query=True)
+                if rollback_step >= 2:
+                    self.dorado_switchover_roll_back(update_query=True)
                 self.clean_step_file()
                 self.clean_flag_file()
                 raise Exception(error)
@@ -362,7 +352,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                     cluster_status.clusterStatus, cluster_status.clusterStatusDetail))
 
         if check_status != 0:
-            if dorado_switchover == "dorado_switchover":
+            if dorado_switchover == "disaster_switchover":
                 raise Exception(
                     ErrorCode.GAUSS_528["GAUSS_52800"] % (cluster_status.clusterStatus,
                                                           cluster_status.clusterStatusDetail))
@@ -414,7 +404,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                         "cluster mode")
         if update_query:
             self.update_dorado_info("cluster", "archive")
-        self.logger.debug("Please restore the original \"Remote Replication Pairs\" correctly on "
+        self.logger.log("Please restore the original \"Remote Replication Pairs\" correctly on "
                           "the storage management interface.\n"
                           "And check and grant appropriate permissions to the corresponding device files.\n"
                           "After completing these steps, start the cluster manually !")
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index 8eabed13..6feadd46 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -338,7 +338,7 @@ class ParamsHandler(object):
         try:
             self.__parse_args()
             self.logger.log(DoradoDisasterRecoveryConstants.LOG_REMARK)
-            self.logger.log('Streaming disaster recovery ' + self.params.task + ' ' + self.trace_id)
+            self.logger.log('Dorado disaster recovery ' + self.params.task + ' ' + self.trace_id)
             self.logger.log(DoradoDisasterRecoveryConstants.LOG_REMARK)
             self.__init_default_params()
             #self.__reload_hadr_user_info()
-- 
Gitee


From cbc11076125af1d06d47a24a7728e36c4c4e842a Mon Sep 17 00:00:00 2001
From: chuanglichuangwai <ou_rt_wfu@163.com>
Date: Fri, 18 Aug 2023 16:05:06 +0800
Subject: [PATCH 15/23] =?UTF-8?q?switchover=20=E7=81=BE=E5=A4=87=E5=8D=87?=
 =?UTF-8?q?=E4=B8=BB=E6=B5=8B=E8=AF=95=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 127 +++++++++---------
 1 file changed, 64 insertions(+), 63 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 66cd94b8..e68d49b0 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -100,7 +100,7 @@ class DoradoDisasterRecoveryBase(object):
         self.cluster_node_names = self.cluster_info.getClusterNodeNames()
         self.dorado_file_dir = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.DDR_FILES_DIR)
         self.dorado_xml = os.path.join(self.dorado_file_dir,
-                                          DoradoDisasterRecoveryConstants.STREAMING_CONFIG_XML)
+                                       DoradoDisasterRecoveryConstants.STREAMING_CONFIG_XML)
         self.ssh_tool = SshTool(self.cluster_node_names, self.log_file)
         self.mpp_file = EnvUtil.getMpprcFile()
         self.dss_home_dir = self.cluster_info.dss_home
@@ -572,7 +572,7 @@ class DoradoDisasterRecoveryBase(object):
         cluster_status = self.cluster_status
         if check_current:
             self.logger.debug("Starting check CLuster status")
-            check_cmd = "source %s && cm_ctl query | grep cluster_state | awk '{print $NF}'"\
+            check_cmd = "source %s && cm_ctl query | grep cluster_state | awk '{print $NF}'" \
                         % self.mpp_file
             status, output = CmdUtil.retryGetstatusoutput(check_cmd)
             if status != 0:
@@ -793,7 +793,6 @@ class DoradoDisasterRecoveryBase(object):
                             % ("set wal_keep_segments for inst:%s" % inst.instanceId, str(output)))
         self.logger.debug("Successfully [%s] shardNum [%s] node [%s] wal_keep_segments "
                           "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
-        
 
     def __set_dn_xlog_file_path(self, params_list):
         """
@@ -849,7 +848,7 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.log("Starting set xlog_file_path param")
         cmd = "source %s && gs_guc set -Z datanode -N all -I all " \
               "-c \"xlog_file_path='%s'\"" \
-              % (self.mpp_file,  xlog_file_path)
+              % (self.mpp_file, xlog_file_path)
         status, output = CmdUtil.retryGetstatusoutput(cmd)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error:%s" % output)
@@ -875,7 +874,7 @@ class DoradoDisasterRecoveryBase(object):
                             % ("set xlog_lock_file_path for inst:%s" % inst.instanceId, str(output)))
         self.logger.debug("Successfully [%s] shardNum [%s] node [%s] xlog_lock_file_path "
                           "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
-    
+
     def __set_app_name_each_inst(self, params_list):
         """
         Set xlog_lock_file_path value in each dn
@@ -893,22 +892,22 @@ class DoradoDisasterRecoveryBase(object):
                             % ("set application_name for inst:%s" % inst.instanceId, str(output)))
         self.logger.debug("Successfully [%s] shardNum [%s] node [%s] application_name "
                           "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
-    
+
     def set_xlog_lock_file(self, opt_type="set"):
         """
         guc set xlog_lock_file_path value in primary dn
         """
         self.logger.log("Starting %s xlog_lock_file_path param" % (opt_type))
-        params_list=[]
+        params_list = []
         for dbnode in self.cluster_info.dbNodes:
             for inst in dbnode.datanodes:
                 lock_file = os.path.join(inst.datadir, "xlog_lock_file")
                 params_list.append((inst, opt_type, lock_file, self.mpp_file))
-        
+
         if not params_list:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "obtain param list for set xlog_lock_file_path")
-        
+
         parallelTool.parallelExecute(self.__set_xlog_lock_file_each_inst, params_list)
         self.logger.log("Successfully %s xlog_lock_file_path param." % (opt_type))
 
@@ -916,22 +915,22 @@ class DoradoDisasterRecoveryBase(object):
         """
         guc set application_name value 
         """
-        self.logger.log("Starting set application_name param" ) 
+        self.logger.log("Starting set application_name param")
         app_name_prefix = "dn_master" if self.params.mode == "primary" \
             else "dn_standby"
-        params_list=[]
+        params_list = []
         for dbnode in self.cluster_info.dbNodes:
             for inst in dbnode.datanodes:
                 app_name = "%s_%s" % (app_name_prefix, inst.instanceId)
                 params_list.append((inst, "set", app_name, self.mpp_file))
-        
+
         if not params_list:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "obtain param list for set application_name")
-        
+
         parallelTool.parallelExecute(self.__set_app_name_each_inst, params_list)
-        self.logger.log("Successfully set application_name param." )
-    
+        self.logger.log("Successfully set application_name param.")
+
     def set_cluster_run_mode(self):
         """
         guc set xlog_file_path value in primary dn
@@ -995,8 +994,8 @@ class DoradoDisasterRecoveryBase(object):
 
         for remote_ip in remote_ips:
             cmd = "source %s ; gs_guc set -Z datanode -N all -I all -h " \
-                      "\"host   all   all   %s/32   trust\"" \
-                        % (self.mpp_file, remote_ip)
+                  "\"host   all   all   %s/32   trust\"" \
+                  % (self.mpp_file, remote_ip)
             self.logger.debug("Update pg_hba.conf with cmd: %s" % cmd)
             status, output = CmdUtil.retryGetstatusoutput(cmd)
             if status != 0:
@@ -1021,7 +1020,7 @@ class DoradoDisasterRecoveryBase(object):
                     return data_ip
         raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
                         % "obtain shards from local cluster info")
-    
+
     def __get_remote_ips(self):
         """
         Get remote dn data ip
@@ -1035,9 +1034,9 @@ class DoradoDisasterRecoveryBase(object):
                 ip = node["ip"]
                 data_ip = node["dataIp"]
                 remote_ips.append(data_ip)
-        
+
         return remote_ips
-        
+
     def __config_one_dn_instance(self, params):
         """
         Config cross_cluster_replconninfo for one dn instance
@@ -1053,8 +1052,8 @@ class DoradoDisasterRecoveryBase(object):
             set_cmd = "source %s ; gs_guc set -N %s -D %s -c " \
                       "\"cross_cluster_replconninfo%s = 'localhost=%s localport=%s " \
                       "remotehost=%s remoteport=%s '\"" \
-                        % (self.mpp_file, inst.hostname, inst.datadir, idx, 
-                            local_dn_ip, local_port, remote_ip, remote_port)
+                      % (self.mpp_file, inst.hostname, inst.datadir, idx,
+                         local_dn_ip, local_port, remote_ip, remote_port)
             self.logger.debug("Set dn cross cluster replinfos with cmd:%s" % set_cmd)
             idx += 1
             status, output = CmdUtil.retryGetstatusoutput(set_cmd)
@@ -1063,14 +1062,13 @@ class DoradoDisasterRecoveryBase(object):
                                 " Error: \n%s " % output)
             self.logger.debug("Successfully rectify original repl infos for instance:%s."
                               % inst.instanceId)
-            
 
     def config_cross_cluster_repl_info(self):
         """
         update postgresql.conf for cross_cluster_replconninfo
         """
         self.logger.debug("set all datanode guc param in postgres conf for cross_cluster_replconninfo.")
-        
+
         opt_mode = "set"
         config_repl_params = []
         datanode_instance = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes]
@@ -1078,9 +1076,10 @@ class DoradoDisasterRecoveryBase(object):
         for inst in datanode_instance:
             config_repl_params.append((inst, opt_mode))
         rets = parallelTool.parallelExecute(self.__config_one_dn_instance, config_repl_params)
-      
+
         self.logger.debug(
             "Successfully set all datanode guc param in postgres conf for cross_cluster_replconninfo.")
+
     def set_datanode_guc(self, guc_parameter, guc_value, guc_type, only_mode=None):
         """
         set datanode guc param
@@ -1144,7 +1143,7 @@ class DoradoDisasterRecoveryBase(object):
             self.logger.debug("Start dssserver step is not for mode:%s." % self.params.mode)
             return
         primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
-                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+                      db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
         main_standby_inst = primary_dn[0]
 
         if self.local_host == main_standby_inst.hostname:
@@ -1156,14 +1155,15 @@ class DoradoDisasterRecoveryBase(object):
             cmd = "source %s; pssh -s -t 5 -H %s \"source %s; export DSS_MAINTAIN=TRUE && " \
                   "nohup dssserver -D $DSS_HOME >/dev/null 2>&1 & \"" \
                   % (self.mpp_file, main_standby_inst.hostname)
- 
+
         self.logger.debug("Start dssserver on node [%s],cmd: %s." % (main_standby_inst.hostname, cmd))
         proc = FastPopen(cmd)
         out, err = proc.communicate()
         if proc.returncode != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] +
-                        'Start dssserver on node [{}] Error: {}'.format(main_standby_inst.hostname, str(err + out).strip()))
-                      
+                            'Start dssserver on node [{}] Error: {}'.format(main_standby_inst.hostname,
+                                                                            str(err + out).strip()))
+
         self.logger.log("Successfully Start dssserver on node [%s] " % main_standby_inst.hostname)
 
     def build_main_standby_datanode(self, only_mode=None):
@@ -1174,24 +1174,26 @@ class DoradoDisasterRecoveryBase(object):
             self.logger.debug("Build Main standby step is not for mode:%s." % self.params.mode)
             return
         primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
-                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+                      db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
         main_standby_inst = primary_dn[0]
 
         if self.local_host == main_standby_inst.hostname:
             build_cmd = "source %s; gs_ctl build -D %s -b cross_cluster_full -g 0 -q -t %s" \
-                    % (self.mpp_file, main_standby_inst.datadir, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
+                        % (self.mpp_file, main_standby_inst.datadir, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
         else:
             build_cmd = "source %s; pssh -s -t %s -H %s \"source %s;" \
-                    " gs_ctl build -D %s -b cross_cluster_full -g 0 -q -t %s \"" \
-                    % (self.mpp_file, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10, main_standby_inst.hostname,
-                        self.mpp_file, main_standby_inst.datadir, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
-        self.logger.debug("Build Main standby datanode on node [%s],cmd: %s." %  (main_standby_inst.hostname, build_cmd))
+                        " gs_ctl build -D %s -b cross_cluster_full -g 0 -q -t %s \"" \
+                        % (self.mpp_file, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT + 10,
+                           main_standby_inst.hostname,
+                           self.mpp_file, main_standby_inst.datadir, DoradoDisasterRecoveryConstants.MAX_BUILD_TIMEOUT)
+        self.logger.debug("Build Main standby datanode on node [%s],cmd: %s." % (main_standby_inst.hostname, build_cmd))
         status, output = CmdUtil.retry_util_timeout(build_cmd, self.params.waitingTimeout)
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % build_cmd +
                             "Options:%s, Error: \n%s "
                             % ("build main_standby on node :%s" % main_standby_inst.hostname, str(output)))
-        self.logger.debug("Successfully build main_standby in disaster standby cluster on node [%s] " % main_standby_inst.hostname)
+        self.logger.debug(
+            "Successfully build main_standby in disaster standby cluster on node [%s] " % main_standby_inst.hostname)
 
     def kill_dss_instance(self, only_mode=None):
         """
@@ -1201,23 +1203,22 @@ class DoradoDisasterRecoveryBase(object):
             self.logger.debug("Kill dssserver process step is not for mode:%s." % self.params.mode)
             return
         primary_dn = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
-                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
+                      db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
         main_standby_inst = primary_dn[0]
 
         if self.local_host == main_standby_inst.hostname:
             kill_cmd = "source %s; pkill -9 -f dssserver" % (self.mpp_file)
         else:
             kill_cmd = "source %s; pssh -s -t 3 -H %s \"pkill -9 -f dssserver\"" \
-                    % (self.mpp_file, main_standby_inst.hostname)
-        self.logger.debug("Kill dssserver on node [%s],cmd: %s." %  (main_standby_inst.hostname, kill_cmd))
+                       % (self.mpp_file, main_standby_inst.hostname)
+        self.logger.debug("Kill dssserver on node [%s],cmd: %s." % (main_standby_inst.hostname, kill_cmd))
         sts, out = CmdUtil.getstatusoutput_by_fast_popen(kill_cmd)
         if sts not in [0, 1]:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "kill dssserver before start cluster on node:" + main_standby_inst.hostname +
-                                ", output:"+str(out).strip())
+                            ", output:" + str(out).strip())
         self.logger.log("Successfully kill dssserver before start cluster on node [%s] " % main_standby_inst.hostname)
 
-
     def query_cluster(self):
         """
         query cluster
@@ -1248,7 +1249,7 @@ class DoradoDisasterRecoveryBase(object):
         status, output = CmdUtil.retryGetstatusoutput(cmd, retry_time=0)
         if status != 0:
             error_str = ErrorCode.GAUSS_516["GAUSS_51607"] % "the cluster" + \
-                          " Error:\n%s." % output
+                        " Error:\n%s." % output
             self.logger.debug(error_str)
             self.logger.log("Warning: the cluster is not normal, please check cluster status!")
         else:
@@ -1296,10 +1297,10 @@ class DoradoDisasterRecoveryBase(object):
             return
         self.primary_dn_ids = p_inst_list
         sql_check = "select 1 from pg_catalog.pg_stat_get_wal_senders() where " \
-                      "sync_state='Async' and peer_role='StandbyCluster_Standby' and peer_state='Normal';"
+                    "sync_state='Async' and peer_role='StandbyCluster_Standby' and peer_state='Normal';"
         param_list = [(dn_inst, sql_check) for db_node in self.cluster_info.dbNodes
                       for dn_inst in db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
-        
+
         if not param_list:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "obtain param list for check main standby connection on primary dn")
@@ -1699,16 +1700,16 @@ class DoradoDisasterRecoveryBase(object):
 
         for idx in range(1, dn_num + 1):
             cmd = "source %s ; gs_guc %s -N %s -D %s -c " \
-                      "\"cross_cluster_replconninfo%s\"" \
-                        % (self.mpp_file, guc_mode, dn_inst.hostname, dn_inst.datadir, idx)
+                  "\"cross_cluster_replconninfo%s\"" \
+                  % (self.mpp_file, guc_mode, dn_inst.hostname, dn_inst.datadir, idx)
             self.logger.debug("Remove dn cross_cluster_replconninfo with cmd:%s" % cmd)
             status, output = CmdUtil.retryGetstatusoutput(cmd)
             if status != 0:
                 raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
                                 " Error: \n%s " % output)
             self.logger.debug("Successfully remove cross_cluster_replconninfo with cmd:%s."
-                                  % cmd)
-        
+                              % cmd)
+
         self.logger.debug("Successfully removed replconninfo for instance:%s" % dn_inst.instanceId)
 
     def remove_cross_cluster_replinfos(self, guc_mode="set"):
@@ -1888,7 +1889,6 @@ class DoradoDisasterRecoveryBase(object):
                               "error:%s." % (value, str(error)))
         self.logger.debug("Successfully create cluster_maintance file.")
 
-
     def check_datanode_query_info(self, params):
         """
         check datanode info by "gs_ctl query" command.
@@ -1936,7 +1936,7 @@ class DoradoDisasterRecoveryBase(object):
         return check_ok, output, dest_ip
 
     def check_dorado_datanode_query_info(self, timeout=DefaultValue.TIMEOUT_CLUSTER_START,
-                        dorado_switchover=None):
+                                         dorado_switchover=None):
         """
         check gs_ctl query info
         """
@@ -1966,7 +1966,7 @@ class DoradoDisasterRecoveryBase(object):
                 self.check_datanode_query_info, check_params)
             for ret in results:
                 if ret[0] != 0:
-                    self.logger.debug("Failed to check node[%s] info using \"gs_ctl query\" command "
+                    self.logger.log("Failed to check node[%s] info using \"gs_ctl query\" command "
                                       "with status[%s], output[%s]" % (ret[-1], ret[0], ret[1]))
                     check_status = 1
                 if check_status == 0:
@@ -1996,17 +1996,17 @@ class DoradoDisasterRecoveryBase(object):
             self.parse_cluster_status()
             self.stop_cluster()
             self.write_dorado_step("0_dorado_disaster_stop_cluster_for_failover")
-            flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
+            flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
             if os.path.exists(flag_file):
-                self.logger.debug("Delete file %s." % flag_file)
+                self.logger.log("Delete file %s." % flag_file)
                 os.remove(flag_file)
-            self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+            self.logger.log(self.remote_replication_pairs_log_message % flag_file)
             sys.exit(0)
         if dorado_disaster_step < 1:
             # 标志文件存在，检查远程复制的lun设备权限，更新进度，代表 "远程复制Pair"任务完成
-            flag_file = os.path.join(self.step_file_path, "remote_replication_pairs_done")
-            if not os.path.exists(flag_file) or not self.check_xlog_file_path():
-                self.logger.debug(self.remote_replication_pairs_log_message % flag_file)
+            flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
+            if not os.path.exists(flag_file):
+                self.logger.log(self.remote_replication_pairs_log_message % flag_file)
                 sys.exit(0)
             self.write_dorado_step("1_set_remote_replication_pairs_for_failover")
         self._failover_config_step(dorado_disaster_step, action_flag)
@@ -2029,13 +2029,14 @@ class DoradoDisasterRecoveryBase(object):
             cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL]
             self.check_cluster_status(cluster_normal_status, check_current=True)
             if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                self.check_dorado_datanode_query_info(timeout=30,
-                                                      dorado_switchover="disaster_failover")
+                # 没有流复制信息检查
+                # self.check_dorado_datanode_query_info(timeout=30,
+                #                                      dorado_switchover="disaster_failover")
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "100%")
                 self.update_dorado_info("cluster", "normal")
             else:
-                self.check_dorado_datanode_query_info(timeout=30,
-                                                      dorado_switchover="disaster_failover")
+                self.check_dorado_datanode_query_info(timeout=self.params.waitingTimeout,
+                                                      dorado_switchover="disaster_switchover")
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "100%")
                 self.update_dorado_info("cluster", "archive")
 
@@ -2135,8 +2136,8 @@ class DoradoDisasterRecoveryBase(object):
                          % (self.mpp_file, bin_path, opt_type, data_dir, max_term)
         else:
             cmd_config = "source %s; pssh -s -t 900 -H %s \"source %s; %s/gs_ctl notify%s -D %s " \
-                         "-M primary -T %s -t 600\""  % (self.mpp_file, self.mpp_file, hostname,
-                                                         bin_path, opt_type, data_dir, max_term)
+                         "-M primary -T %s -t 600\"" % (self.mpp_file, self.mpp_file, hostname,
+                                                        bin_path, opt_type, data_dir, max_term)
         self.logger.debug("Config primary dn with cmd:%s" % cmd_config)
         status, output = CmdUtil.retryGetstatusoutput(cmd_config)
         if status != 0:
-- 
Gitee


From 9a3683c443b2f37e3effe901a2b8cfc06d8df356 Mon Sep 17 00:00:00 2001
From: chuanglichuangwai <ou_rt_wfu@163.com>
Date: Fri, 18 Aug 2023 18:52:18 +0800
Subject: [PATCH 16/23] =?UTF-8?q?switchover=20=E6=B5=8B=E8=AF=95=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 script/impl/dorado_disaster_recovery/ddr_base.py     | 10 +++++-----
 .../dorado_disaster_recovery_switchover.py           | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index e68d49b0..21811a23 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -1906,7 +1906,7 @@ class DoradoDisasterRecoveryBase(object):
         dbState = re.findall(r"db_state.*: (.*?)\n", output)
         localRole = re.findall(r"local_role.*: (.*?)\n", output)
         peerRole = re.findall(r"peer_role.*: (.*?)\n", output)
-        preeState = re.findall(r"pree_state.*: (.*?)\n", output)
+        peerState = re.findall(r"peer_state.*: (.*?)\n", output)
         channel = re.findall(r"channel.*: (.*?)\n", output)
         if status == 0:
             check_ok = 0
@@ -1914,14 +1914,14 @@ class DoradoDisasterRecoveryBase(object):
                 if (len(dbState) != 1 or dbState[0] != "Normal") or \
                         (len(localRole) != 2 or localRole[0] != "Primary" or localRole[1] != "Primary") or \
                         (len(peerRole) != 1 or peerRole[0] != "StandbyCluster_Standby") or \
-                        (len(preeState) != 1 or preeState[0] != "Normal") or \
+                        (len(peerState) != 1 or peerState[0] != "Normal") or \
                         (len(channel) != 1 or "-->" not in channel[0]):
                     check_ok = -1
             elif state == "Main Standby":
                 if (len(dbState) != 1 or dbState[0] != "Normal") or \
                         (len(localRole) != 2 or localRole[0] != "Main Standby" or localRole[1] != "Standby") or \
                         (len(peerRole) != 1 or peerRole[0] != "Primary") or \
-                        (len(preeState) != 1 or preeState[0] != "Normal") or \
+                        (len(peerState) != 1 or peerState[0] != "Normal") or \
                         (len(channel) != 1 or "<--" not in channel[0]):
                     check_ok = -1
             elif state == "Standby":
@@ -1969,8 +1969,8 @@ class DoradoDisasterRecoveryBase(object):
                     self.logger.log("Failed to check node[%s] info using \"gs_ctl query\" command "
                                       "with status[%s], output[%s]" % (ret[-1], ret[0], ret[1]))
                     check_status = 1
-                if check_status == 0:
-                    break
+            if check_status == 0:
+                break
         if check_status != 0:
             if dorado_switchover == "disaster_switchover":
                 raise Exception(
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
index 2f878de5..56f7e41b 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
@@ -124,12 +124,12 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                                   " \n%s" % str(error))
                 rollback_step = self.query_dorado_step()
                 self.logger.debug("Roll back switchover step:%s" % rollback_step)
-                self.remove_cluster_maintance_file_for_switchover()
-                self.remove_cluster_maintance_file()
-                if rollback_step >= 2:
-                    self.dorado_switchover_roll_back(update_query=True)
-                self.clean_step_file()
-                self.clean_flag_file()
+                #self.remove_cluster_maintance_file_for_switchover()
+                #self.remove_cluster_maintance_file()
+                #if rollback_step >= 2:
+                #    self.dorado_switchover_roll_back(update_query=True)
+                #self.clean_step_file()
+                #self.clean_flag_file()
                 raise Exception(error)
         self.remove_hadr_switchover_process_file()
 
-- 
Gitee


From 962e4f15402e2b7f88bde827033b0c9d6e2fe0c4 Mon Sep 17 00:00:00 2001
From: chuanglichuangwai <ou_rt_wfu@163.com>
Date: Fri, 18 Aug 2023 21:33:18 +0800
Subject: [PATCH 17/23] =?UTF-8?q?failover=20=E6=B5=8B=E8=AF=95=E4=BF=AE?=
 =?UTF-8?q?=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../impl/dorado_disaster_recovery/ddr_base.py | 37 +++++++++++--------
 .../dorado_disaster_recovery_switchover.py    | 35 ++++++------------
 2 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 21811a23..b4c288fc 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -86,6 +86,10 @@ class DoradoDisasterRecoveryBase(object):
         self.connected_nodes = []
         self.__init_globals()
         self.backup_open_key = DoradoDisasterRecoveryConstants.BACKUP_OPEN % user
+        self.remote_replication_pairs_input_message = \
+            "Correctly configure \"Remote Replication Pairs\", " \
+            "and ensure that the corresponding device files have appropriate permissions.\n" \
+            "Ready to move on (yes/no)? "
 
     def __init_globals(self):
         self.cluster_info = dbClusterInfo()
@@ -1979,6 +1983,22 @@ class DoradoDisasterRecoveryBase(object):
                 ErrorCode.GAUSS_516["GAUSS_51659"] % "gs_ctl query")
         self.logger.debug("Successfully wait for gs_ctl query status become Normal.", "constant")
 
+    def check_input(self, msg_print):
+        flag = input(msg_print)
+        count_f = 2
+        while count_f:
+            if (
+                    flag.upper() != "YES"
+                    and flag.upper() != "NO"
+                    and flag.upper() != "Y" and flag.upper() != "N"):
+                count_f -= 1
+                flag = input("Please type 'yes' or 'no': ")
+                continue
+            break
+        if flag.upper() != "YES" and flag.upper() != "Y":
+            self.logger.exitWithError(
+                ErrorCode.GAUSS_358["GAUSS_35805"] % flag.upper())
+
     def dorado_failover_single_inst(self, dorado_disaster_step, action_flag=None):
         """
         dorado disaster recovery failover for single_inst cluster
@@ -1996,18 +2016,8 @@ class DoradoDisasterRecoveryBase(object):
             self.parse_cluster_status()
             self.stop_cluster()
             self.write_dorado_step("0_dorado_disaster_stop_cluster_for_failover")
-            flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
-            if os.path.exists(flag_file):
-                self.logger.log("Delete file %s." % flag_file)
-                os.remove(flag_file)
-            self.logger.log(self.remote_replication_pairs_log_message % flag_file)
-            sys.exit(0)
         if dorado_disaster_step < 1:
-            # 标志文件存在，检查远程复制的lun设备权限，更新进度，代表 "远程复制Pair"任务完成
-            flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
-            if not os.path.exists(flag_file):
-                self.logger.log(self.remote_replication_pairs_log_message % flag_file)
-                sys.exit(0)
+            self.check_input(self.remote_replication_pairs_input_message)
             self.write_dorado_step("1_set_remote_replication_pairs_for_failover")
         self._failover_config_step(dorado_disaster_step, action_flag)
         self._failover_start_step(dorado_disaster_step, action_flag)
@@ -2021,17 +2031,12 @@ class DoradoDisasterRecoveryBase(object):
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "80%")
             else:
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "80%")
-            self.remove_cluster_maintance_file_for_switchover()
-            self.remove_cluster_maintance_file()
             self.start_cluster()
             self.write_dorado_step("3_start_cluster_done")
         if dorado_disaster_step < 4:
             cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL]
             self.check_cluster_status(cluster_normal_status, check_current=True)
             if action_flag != DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER:
-                # 没有流复制信息检查
-                # self.check_dorado_datanode_query_info(timeout=30,
-                #                                      dorado_switchover="disaster_failover")
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_FAILOVER, "100%")
                 self.update_dorado_info("cluster", "normal")
             else:
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
index 56f7e41b..ee08e2b0 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
@@ -39,10 +39,6 @@ from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryCo
 class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.remote_replication_pairs_log_message = \
-            "Please configure \"Remote Replication Pairs\" correctly on "\
-            "And check and grant appropriate permissions to the corresponding device files.\n"\
-            "Create file \"%s\" to mark the completion of the above operations and execute the tool again."
 
     def run(self):
         """
@@ -76,9 +72,9 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
         self.create_cluster_maintance_file("dorado switchover")
         self.update_dorado_info("cluster", DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
         dorado_disaster_step = self.query_dorado_step()
+        if dorado_disaster_step < 1:
+            self.check_switchover_workable()
         if self.params.mode == "primary":
-            # 这里可以等待 “Remote Copy Pairs” 同步状态完成
-            # 
             self.dorado_failover_single_inst(dorado_disaster_step,
                                              DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER)
         else:
@@ -86,21 +82,12 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
             try:
                 if dorado_disaster_step < 1:
                     self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "10%")
-                    #self.check_switchover_workable()
                     self.stop_cluster()
                     self.write_dorado_step("1_dorado_disaster_stop_cluster_for_switchover")
-                    flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
-                    if os.path.exists(flag_file):
-                        self.logger.log("Delete file %s." % flag_file)
-                        os.remove(flag_file)
-                    self.logger.log(self.remote_replication_pairs_log_message % flag_file)
-                    sys.exit(0)
                 if dorado_disaster_step < 2:
                     self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "30%")
                     flag_file = os.path.join(self.dorado_file_dir, "remote_replication_pairs_done")
-                    if not os.path.exists(flag_file):
-                        self.logger.log(self.remote_replication_pairs_log_message % flag_file)
-                        sys.exit(0)
+                    self.check_input(self.remote_replication_pairs_input_message)
                     self.write_dorado_step("2_set_remote_replication_pairs_for_switchover")
                 if dorado_disaster_step < 3:
                     self.set_datanode_guc("cluster_run_mode", "cluster_standby", "set")
@@ -124,12 +111,12 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                                   " \n%s" % str(error))
                 rollback_step = self.query_dorado_step()
                 self.logger.debug("Roll back switchover step:%s" % rollback_step)
-                #self.remove_cluster_maintance_file_for_switchover()
-                #self.remove_cluster_maintance_file()
-                #if rollback_step >= 2:
+                # self.remove_cluster_maintance_file_for_switchover()
+                # self.remove_cluster_maintance_file()
+                # if rollback_step >= 2:
                 #    self.dorado_switchover_roll_back(update_query=True)
-                #self.clean_step_file()
-                #self.clean_flag_file()
+                # self.clean_step_file()
+                # self.clean_flag_file()
                 raise Exception(error)
         self.remove_hadr_switchover_process_file()
 
@@ -405,9 +392,9 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
         if update_query:
             self.update_dorado_info("cluster", "archive")
         self.logger.log("Please restore the original \"Remote Replication Pairs\" correctly on "
-                          "the storage management interface.\n"
-                          "And check and grant appropriate permissions to the corresponding device files.\n"
-                          "After completing these steps, start the cluster manually !")
+                        "the storage management interface.\n"
+                        "And check and grant appropriate permissions to the corresponding device files.\n"
+                        "After completing these steps, start the cluster manually !")
         self.logger.log("Successfully Roll back dorado disaster cluster switchover.")
 
     def check_streaming_disaster_switchover_barrier(self):
-- 
Gitee


From ff8886de531aeb00ceb44c8939ade6127804b284 Mon Sep 17 00:00:00 2001
From: chuanglichuangwai <ou_rt_wfu@163.com>
Date: Fri, 18 Aug 2023 21:41:04 +0800
Subject: [PATCH 18/23] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AF=B9=E6=B5=81?=
 =?UTF-8?q?=E5=A4=8D=E5=88=B6=E4=BF=A1=E6=81=AF=E7=9A=84=E5=A2=9E=E5=BC=BA?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 script/impl/dorado_disaster_recovery/ddr_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index b4c288fc..f841a42d 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -1919,14 +1919,14 @@ class DoradoDisasterRecoveryBase(object):
                         (len(localRole) != 2 or localRole[0] != "Primary" or localRole[1] != "Primary") or \
                         (len(peerRole) != 1 or peerRole[0] != "StandbyCluster_Standby") or \
                         (len(peerState) != 1 or peerState[0] != "Normal") or \
-                        (len(channel) != 1 or "-->" not in channel[0]):
+                        (len(channel) != 1 or "-->" not in channel[0] or len(channel[0]) <= 30):
                     check_ok = -1
             elif state == "Main Standby":
                 if (len(dbState) != 1 or dbState[0] != "Normal") or \
                         (len(localRole) != 2 or localRole[0] != "Main Standby" or localRole[1] != "Standby") or \
                         (len(peerRole) != 1 or peerRole[0] != "Primary") or \
                         (len(peerState) != 1 or peerState[0] != "Normal") or \
-                        (len(channel) != 1 or "<--" not in channel[0]):
+                        (len(channel) != 1 or "<--" not in channel[0] or len(channel[0]) <= 30):
                     check_ok = -1
             elif state == "Standby":
                 if (len(dbState) != 1 or dbState[0] != "Normal") or \
-- 
Gitee


From 094bc580d80994fe568b8450b319e7bfb8880a03 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Mon, 21 Aug 2023 01:39:44 +0800
Subject: [PATCH 19/23] update gs_ddr

---
 script/gs_ddr                                 |   4 +-
 .../impl/dorado_disaster_recovery/ddr_base.py | 579 +-----------------
 .../dorado_disaster_recovery/ddr_constants.py |  10 +-
 .../dorado_disaster_recovery_failover.py      |   6 +-
 .../dorado_disaster_recovery_query.py         |  72 +--
 .../dorado_disaster_recovery_start.py         |  20 -
 .../dorado_disaster_recovery_switchover.py    |   8 +-
 .../params_handler.py                         |  22 +-
 8 files changed, 61 insertions(+), 660 deletions(-)

diff --git a/script/gs_ddr b/script/gs_ddr
index 3fb5047f..6b6689db 100644
--- a/script/gs_ddr
+++ b/script/gs_ddr
@@ -39,14 +39,14 @@ from impl.dorado_disaster_recovery.ddr_modules.\
 from impl.dorado_disaster_recovery.ddr_modules.\
     dorado_disaster_recovery_switchover import DisasterRecoverySwitchoverHandler
 from impl.dorado_disaster_recovery.ddr_modules.\
-    dorado_disaster_recovery_query import StreamingQueryHandler
+    dorado_disaster_recovery_query import DoradoQueryHandler
 
 HANDLER_MAPPING = {
     "start": DisasterRecoveryStartHandler,
     "stop": DisasterRecoveryStopHandler,
     "switchover": DisasterRecoverySwitchoverHandler,
     "failover": DisasterRecoveryFailoverHandler,
-    #"query": StreamingQueryHandler
+    "query": DoradoQueryHandler
 }
 
 
diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index f841a42d..5e0064a2 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -16,7 +16,7 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : streaming_base.py is a base module for streaming disaster recovery.
+# Description  : ddr_base.py is a base module for dorado disaster recovery.
 #############################################################################
 import json
 import os
@@ -104,7 +104,7 @@ class DoradoDisasterRecoveryBase(object):
         self.cluster_node_names = self.cluster_info.getClusterNodeNames()
         self.dorado_file_dir = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.DDR_FILES_DIR)
         self.dorado_xml = os.path.join(self.dorado_file_dir,
-                                       DoradoDisasterRecoveryConstants.STREAMING_CONFIG_XML)
+                                       DoradoDisasterRecoveryConstants.DDR_CONFIG_XML)
         self.ssh_tool = SshTool(self.cluster_node_names, self.log_file)
         self.mpp_file = EnvUtil.getMpprcFile()
         self.dss_home_dir = self.cluster_info.dss_home
@@ -169,7 +169,7 @@ class DoradoDisasterRecoveryBase(object):
 
     def handle_lock_file(self, trace_id, action):
         """
-        Create lock file for other streaming process.
+        Create lock file for other dorado process.
         """
         if self.params.task not in DoradoDisasterRecoveryConstants.TASK_EXIST_CHECK:
             return
@@ -187,7 +187,7 @@ class DoradoDisasterRecoveryBase(object):
 
     def check_parallel_process_is_running(self):
         """
-        Check streaming process is running
+        Check dorado process is running
         """
         hostnames = ' -H '.join(self.cluster_node_names)
         file_path = os.path.join(self.pg_host, DoradoDisasterRecoveryConstants.PROCESS_LOCK_FILE)
@@ -221,87 +221,6 @@ class DoradoDisasterRecoveryBase(object):
         self.ssh_tool.executeCommand(cmd)
         self.logger.debug("Successfully create dir [%s] on all nodes." % dir_path)
 
-    def check_hadr_pwd(self, only_mode=None):
-        """
-        Check hadr pwd is correct or not
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Checking hadr user is not for mode:%s." % self.params.mode)
-            return
-        self.logger.debug("Start checking disaster user password.")
-        sql = "select 1;"
-        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
-                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
-        if not primary_dns:
-            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
-                            % "obtain primary dn when check disaster user")
-        status, output = ClusterCommand.remoteSQLCommand(
-            sql, self.user, primary_dns[0].hostname, primary_dns[0].port, False,
-            user_name=self.params.hadrUserName, user_pwd=self.params.hadrUserPassword)
-        if status != 0:
-            if "Invalid username/password" in output:
-                self.logger.debug("Logging denied, please check your password.")
-            self.logger.logExit(ErrorCode.GAUSS_516['GAUSS_51632']
-                                % "check disaster user password")
-        self.logger.debug("Successfully check disaster user password.")
-
-    def check_hadr_user(self, only_mode=None):
-        """
-        Check hadr user is exist
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Checking hadr user is not for mode:%s." % self.params.mode)
-            return
-        self.logger.log("Start checking disaster recovery user.")
-        sql = "select usename, userepl from pg_user;"
-        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
-                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
-        if not primary_dns:
-            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
-                            % "obtain primary dn when check disaster user")
-        status, output = ClusterCommand.remoteSQLCommand(
-            sql, self.user, primary_dns[0].hostname, primary_dns[0].port, True)
-        if status != 0:
-            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
-                            % "execute sql for checking disaster user.")
-        user_dict = {user_info.split('|')[0].strip(): user_info.split('|')[-1].strip()
-                     for user_info in output.strip().split('\n')}
-        for user_name, repl in user_dict.items():
-            if user_name == self.params.hadrUserName and repl == 't':
-                self.logger.log("Successfully check disaster recovery user.")
-                return
-        msg = ErrorCode.GAUSS_516['GAUSS_51632'] % 'checking disaster user, please confirm ' \
-                                                   'disaster user is exist and with ' \
-                                                   'replication role'
-        self.logger.logExit(msg + "Users:%s" % user_dict)
-
-    def __copy_hadr_user_key(self, secure_dir_path, update=False):
-        """
-        Copy hadr.key.cipher and hadr.key.rand
-        """
-        self.logger.log("Start copy hadr user key files.")
-        hadr_cipher_path = os.path.join(self.bin_path, "hadr.key.cipher")
-        hadr_rand_path = os.path.join(self.bin_path, "hadr.key.rand")
-        secure_cipher_path = os.path.join(secure_dir_path, "hadr.key.cipher")
-        secure_rand_path = os.path.join(secure_dir_path, "hadr.key.rand")
-        if not update:
-            if (not os.path.isfile(hadr_cipher_path)) or (not os.path.isfile(hadr_rand_path)):
-                self.logger.debug("Not found hadr user key, no need to copy.")
-                return
-            FileUtil.cpFile(hadr_cipher_path, secure_cipher_path, cmd_type="shell")
-            FileUtil.cpFile(hadr_rand_path, secure_rand_path, cmd_type="shell")
-            self.logger.debug("Successfully copy hadr key files into temp secure dir.")
-        else:
-            if (not os.path.isfile(secure_cipher_path)) or (not os.path.isfile(secure_rand_path)):
-                self.logger.debug("Not found hadr user key, no need to update.")
-                return
-            host_names = self.get_all_connection_node_name("update_hadr_key")
-            self.ssh_tool.scpFiles(secure_cipher_path, self.bin_path, hostList=host_names)
-            self.ssh_tool.scpFiles(secure_rand_path, self.bin_path, hostList=host_names)
-            FileUtil.removeFile(secure_cipher_path)
-            FileUtil.removeFile(secure_rand_path)
-            self.logger.debug("Finished copy hadr key files to nodes:%s." % host_names)
-
     def remove_secure_dir(self, dir_path, host_name):
         """
         Remove gs_secure_files dir in PGDATA
@@ -314,100 +233,6 @@ class DoradoDisasterRecoveryBase(object):
         if status != 0:
             raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s " % output)
 
-    def __stream_copy_file_to_all_dn(self, temp_secure_dir_path):
-        """
-        copy key file dir to all dn dir
-        """
-        dn_infos = DefaultValue.get_dn_info(self.cluster_info)
-        self.logger.debug("Got dns:%s" % dn_infos)
-        copy_succeed = 0
-        host_names = self.get_all_connection_node_name("copy gs_secure_files to dns")
-        for dn_info in dn_infos:
-            if dn_info["host_name"] not in host_names:
-                continue
-            self.logger.debug("Copy disaster recovery secure files to inst[%s][%s][%s]." %
-                              (dn_info['id'], dn_info['data_dir'], dn_info['host_name']))
-            try:
-                self.remove_secure_dir(dn_info['data_dir'], dn_info['host_name'])
-                self.ssh_tool.scpFiles(
-                    temp_secure_dir_path, dn_info['data_dir'], [dn_info['host_name']])
-                copy_succeed += 1
-            except Exception as error:
-                self.logger.debug("Failed copy secure files to inst[%s][%s][%s],error:%s." %
-                                  (dn_info['id'], dn_info['data_dir'], dn_info['host_name'],
-                                   str(error)))
-        if copy_succeed == 0:
-            raise Exception(
-                ErrorCode.GAUSS_516["GAUSS_51632"] % "copy secure dir to all dn data dir")
-        self.logger.log("Successfully copy secure files.")
-
-    def __prepare_cluster_user_record(self, temp_secure_dir_path):
-        """
-        Save cluster user record
-        """
-        cluster_user_record = os.path.join(temp_secure_dir_path,
-                                           DoradoDisasterRecoveryConstants.CLUSTER_USER_RECORD)
-        DefaultValue.write_content_on_file(cluster_user_record, self.user)
-        self.logger.debug("Record current cluster user:%s." % self.user)
-
-    def prepare_gs_secure_files(self, only_mode=None):
-        """
-        Prepare gs_secure_files on primary cluster
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Prepare gs_secure_files is not for mode:%s." % self.params.mode)
-            return
-        self.logger.log("Start prepare secure files.")
-        secure_dir_name = DoradoDisasterRecoveryConstants.GS_SECURE_FILES
-        temp_secure_dir_path = os.path.realpath(
-            os.path.join(self.dorado_file_dir, secure_dir_name))
-        if os.path.isdir(temp_secure_dir_path):
-            self.logger.debug("Secure file dir exist, cleaning...")
-            FileUtil.removeDirectory(temp_secure_dir_path)
-        FileUtil.createDirectory(temp_secure_dir_path, True, DefaultValue.KEY_DIRECTORY_MODE)
-        if os.path.isdir(temp_secure_dir_path):
-            self.logger.debug("Successfully create secure file dir.")
-        version_file_path = os.path.realpath(os.path.join(self.gp_home, "version.cfg"))
-        FileUtil.cpFile(version_file_path, temp_secure_dir_path)
-        self.__prepare_cluster_user_record(temp_secure_dir_path)
-        self.__copy_hadr_user_key(temp_secure_dir_path, update=False)
-        self.__stream_copy_file_to_all_dn(temp_secure_dir_path)
-        FileUtil.removeDirectory(temp_secure_dir_path)
-
-    def stream_clean_gs_secure(self, params):
-        """
-        clean gs secure dir
-        """
-        inst, file_path = params
-        self.logger.debug("Starting clean instance %s gs secure dir." % inst.instanceId)
-        cmd = "source %s && pssh -s -H %s 'if [ -d %s ]; then rm -rf %s; fi'" \
-              % (self.mpp_file, inst.hostname, file_path, file_path)
-        status, output = CmdUtil.retryGetstatusoutput(cmd)
-        if status != 0:
-            self.logger.debug("Clean gs secure dir for instance [%s] result:%s." %
-                              (inst.instanceId, output))
-        self.logger.debug("Successfully clean instance %s gs secure dir." % inst.instanceId)
-
-    def clean_gs_secure_dir(self, only_mode=None):
-        """
-        Clean gs secure dir if exist
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Clean gs_secure_files is not for mode:%s." % self.params.mode)
-            return
-        self.logger.debug("Start clean gs secure dir.")
-        params = []
-        for node in self.cluster_info.dbNodes:
-            for inst in node.datanodes:
-                if inst.hostname not in self.connected_nodes:
-                    continue
-                file_path = os.path.realpath(os.path.join(
-                    inst.datadir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES))
-                params.append((inst, file_path))
-        if params:
-            parallelTool.parallelExecute(self.stream_clean_gs_secure, params)
-        self.logger.debug("Finished clean gs secure dir.")
-
     def remove_dorado_dir(self, dir_path):
         """
         Remove dorado files dir
@@ -434,7 +259,7 @@ class DoradoDisasterRecoveryBase(object):
 
     def write_dorado_step(self, step):
         """
-        write streaming step
+        write dorado step
         :return: NA
         """
         self.logger.debug("Dorado disaster recovery action:[%s] record current step:[%s]"
@@ -571,7 +396,7 @@ class DoradoDisasterRecoveryBase(object):
     def check_cluster_status(self, status_allowed, only_check=False,
                              check_current=False, is_log=True):
         """
-        Stream disaster cluster switch to check cluster status
+        Dorado disaster cluster switch to check cluster status
         """
         cluster_status = self.cluster_status
         if check_current:
@@ -668,7 +493,7 @@ class DoradoDisasterRecoveryBase(object):
 
     def restore_guc_params(self):
         """
-        Restore guc params in .streaming_guc_backup
+        Restore guc params in .dorado_guc_backup
         """
         self.logger.debug("Start restore guc params.")
         guc_backup_file = os.path.join(self.dorado_file_dir, DoradoDisasterRecoveryConstants.GUC_BACKUP_FILE)
@@ -694,18 +519,6 @@ class DoradoDisasterRecoveryBase(object):
                                  inst_type="dn", raise_error=False)
             restored_keys.append(guc_key)
 
-    def set_most_available(self, mode='set', inst_type='dn', raise_error=True):
-        dn_insts = [dn_inst for db_node in self.cluster_info.dbNodes
-                    for dn_inst in db_node.datanodes if int(dn_inst.mirrorId) == 1]
-        if len(dn_insts) > 2:
-            self.logger.debug("No need set most available for current cluster.")
-            return
-        self.__set_guc_param("most_available_sync", "on", mode=mode,
-                             inst_type=inst_type, raise_error=raise_error)
-
-        self.__set_guc_param("synchronous_commit", "on", mode=mode,
-                             inst_type=inst_type, raise_error=raise_error)
-
     def __set_guc_param(self, key, value, mode='set', inst_type='dn', raise_error=True):
         """
         Set guc param
@@ -798,24 +611,6 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.debug("Successfully [%s] shardNum [%s] node [%s] wal_keep_segments "
                           "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
 
-    def __set_dn_xlog_file_path(self, params_list):
-        """
-        Set xlog_file_path value in primary dn
-        """
-        (inst, opt_type, value, mpprc_file) = params_list
-        self.logger.debug("Start [%s] shardNum [%s] node [%s] wal_keep_segments value [%s]."
-                          % (opt_type, inst.mirrorId, inst.hostname, value))
-        cmd = "source %s; gs_guc %s " \
-              "-N %s -D %s -c \"xlog_file_path = '%s'\" " % \
-              (mpprc_file, opt_type, inst.node, inst.datadir, value)
-        status, output = CmdUtil.retryGetstatusoutput(cmd)
-        if status != 0:
-            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
-                            "Options:%s, Error: \n%s "
-                            % ("set xlog_file_path for inst:%s" % inst.instanceId, str(output)))
-        self.logger.debug("Successfully [%s] shardNum [%s] node [%s] wal_keep_segments "
-                          "value [%s]." % (opt_type, inst.mirrorId, inst.hostname, value))
-
     def set_wal_keep_segments(self, opt_type, value, restore_flag=False, only_mode=None):
         """
         guc set wal_keep_segments value in primary dn
@@ -950,7 +745,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         self.logger.debug("Start stop node:%s" % node_id)
         cmd = ClusterCommand.getStopCmd(int(node_id), "i", 1800)
-        self.logger.debug("Streaming disaster calling cm_ctl to stop cluster, cmd=[%s]" % cmd)
+        self.logger.debug("dorado disaster calling cm_ctl to stop cluster, cmd=[%s]" % cmd)
         status, output = CmdUtil.retryGetstatusoutput(cmd)
         if status != 0:
             self.logger.debug("Failed stop node:%s, error:%s" % (node_id, output))
@@ -972,7 +767,7 @@ class DoradoDisasterRecoveryBase(object):
                             % (static_config + " or " + cm_ctl_file))
         node_id_list = list(set([instance.nodeId for instance in self.normal_instances]))
         parallelTool.parallelExecute(self.__stop_one_node, node_id_list)
-        self.logger.log("Successfully stopped the cluster by node for streaming cluster.")
+        self.logger.log("Successfully stopped the cluster by node for dorado cluster.")
 
     def get_all_connection_node_name(self, action_flag="", no_update=True):
         """
@@ -1332,76 +1127,6 @@ class DoradoDisasterRecoveryBase(object):
             time.sleep(5)
         self.logger.log("Main standby already connected.")
 
-    def hadr_key_generator(self, key_name):
-        """
-        Generate key_name.key.cipher & key_name.key.rand
-        """
-        self.logger.log("Start generate hadr key files.")
-        if not os.path.exists(self.bin_path):
-            msg = ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain bin path."
-            self.logger.debug(msg)
-            raise Exception(msg)
-        if not os.path.exists(self.gp_home):
-            msg = ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain env GPHOME"
-            self.logger.debug(msg)
-            raise Exception(msg)
-        key_cipher = os.path.join(self.bin_path, "%s.key.cipher" % key_name)
-        key_rand = os.path.join(self.bin_path, "%s.key.rand" % key_name)
-        cmd = "export LD_LIBRARY_PATH=%s/script/gspylib/clib && source %s " \
-              "&& gs_guc generate -S default -o %s -D '%s' && %s && %s" \
-              % (self.gp_home, self.mpp_file, key_name, self.bin_path,
-                 CmdUtil.getChmodCmd(str(ConstantsBase.KEY_FILE_MODE), key_cipher),
-                 CmdUtil.getChmodCmd(str(ConstantsBase.KEY_FILE_MODE), key_rand))
-        if (not os.path.isfile(key_cipher)) or (not os.path.isfile(key_rand)):
-            status, output = CmdUtil.retryGetstatusoutput(cmd)
-            if status != 0 or (not os.path.isfile(key_cipher)) \
-                    or (not os.path.isfile(key_rand)):
-                msg = ErrorCode.GAUSS_516["GAUSS_51632"] \
-                      % "generate hadr key files" + "Error:%s" % output
-                self.logger.error(msg)
-                raise Exception(msg)
-        else:
-            self.logger.log("Streaming key files already exist.")
-
-        self.ssh_tool.scpFiles(key_cipher, self.bin_path)
-        self.ssh_tool.scpFiles(key_rand, self.bin_path)
-        self.logger.log("Finished generate and distribute hadr key files.")
-
-    def encrypt_hadr_user_info(self, key_name, hadr_user, hadr_pwd):
-        """
-        Encrypt hadr user info.
-        """
-        self.logger.log("Start encrypt hadr user info.")
-        cmd = "source %s && gs_encrypt -f %s \"%s|%s\"" \
-              % (self.mpp_file, key_name, hadr_user, hadr_pwd)
-        status, output = CmdUtil.retryGetstatusoutput(cmd)
-        if status != 0 or not output:
-            msg = ErrorCode.GAUSS_516["GAUSS_51632"] % "encrypt hadr user info"
-            self.logger.error(msg)
-            raise Exception(msg)
-        self.logger.log("Successfully encrypt hadr user info.")
-        return output
-
-    def keep_hadr_user_info(self, info_str, retry=5):
-        """
-        Keep hadr user info into GLOBAL CONFIGURATION
-        """
-        self.logger.log("Start save hadr user info into database.")
-        sql = "ALTER GLOBAL CONFIGURATION with(hadr_user_info ='%s');" % info_str
-        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
-                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
-        primary_dns = primary_dns * retry
-        output = "None"
-        for dn_inst in primary_dns:
-            status, output = ClusterCommand.remoteSQLCommand(
-                sql, self.user, dn_inst.hostname, dn_inst.port, True)
-            if status == 0:
-                self.logger.log("Successfully save hadr user info into database.")
-                return
-        msg = ErrorCode.GAUSS_516['GAUSS_51632'] % "save hadr user info into database"
-        self.logger.error(msg + "Error:%s" % SensitiveMask.mask_pwd(output))
-        raise Exception(msg)
-
     def restore_wal_keep_segments(self, only_mode=None):
         """
         restore wal_keep_segments default value
@@ -1455,7 +1180,7 @@ class DoradoDisasterRecoveryBase(object):
         """
         Check action and mode if step file exist.
         if any dorado options not finished(step file exist),
-        not allowed doing any other streaming options except query.
+        not allowed doing any other dorado options except query.
         """
         self.logger.debug("Checking action and mode.")
         exist_step_file_names = []
@@ -1471,7 +1196,7 @@ class DoradoDisasterRecoveryBase(object):
                                 % "check action and mode, the step files %s already exist, "
                                   "please ensure the action %s is finished before "
                                   "doing current options" % (exist_step_file_names, exist_action))
-        self.logger.debug("Successfully checked action and mode.")
+        self.logger.debug("clean_global_configSuccessfully checked action and mode.")
 
     def clean_dorado_dir(self):
         """
@@ -1487,24 +1212,6 @@ class DoradoDisasterRecoveryBase(object):
                 "Failed to remove dorado dir with error:%s" % error)
         self.logger.log("Finished remove dorado dir.")
 
-    def clean_global_config(self):
-        """
-        Clean global config
-        """
-        self.logger.log("Clean hadr user info.")
-        sql = "DROP GLOBAL CONFIGURATION hadr_user_info;"
-        primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes for dn_inst in
-                       db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
-        output = "None"
-        for dn_inst in primary_dns:
-            status, output = ClusterCommand.remoteSQLCommand(
-                sql, self.user, dn_inst.hostname, dn_inst.port, True)
-            if status == 0:
-                self.logger.log("Successfully clean hadr user info from database.")
-                return
-        msg = ErrorCode.GAUSS_516['GAUSS_51632'] % "clean hadr user info from database"
-        self.logger.debug(msg + "Error:%s" % SensitiveMask.mask_pwd(output))
-
     def get_build_info(self):
         """
         Assemble build infos
@@ -1538,116 +1245,6 @@ class DoradoDisasterRecoveryBase(object):
         self.logger.debug("Successfully get remote dn info:%s." % remote_ip_port)
         return dn_inst_info, remote_ip_port
 
-    def build_file_from_remote(self):
-        """
-        Build files from remote cluster
-        """
-        local_dn_info, remote_ip_port = self.get_build_info()
-        cmd_local = 'source %s; %s/gs_ctl build -D %s -M standby -b copy_secure_files -Z datanode' \
-                    ' -U %s -P "%s" -C "localhost=%s localport=%s remotehost=%s remoteport=%s"'
-        cmd_remote = "echo \"source %s; %s/gs_ctl build -D %s -M standby -b copy_secure_files -Z " \
-                     "datanode -U %s -P '%s' -C 'localhost=%s localport=%s " \
-                     "remotehost=%s remoteport=%s'\"" \
-                     " | pssh -s -H %s"
-
-        end_time = datetime.now() + timedelta(seconds=self.params.waitingTimeout)
-        self.logger.debug("Retry Building with timeout:%ss." % self.params.waitingTimeout)
-        succeed = False
-        while datetime.now() < end_time:
-            for local_primary in local_dn_info:
-                for remote_ip, remote_port in remote_ip_port:
-                    if local_primary["host_name"] == NetUtil.GetHostIpOrName():
-                        cmd = cmd_local % (self.mpp_file, "%s/bin" % self.gauss_home,
-                                           local_primary["data_dir"],
-                                           self.params.hadrUserName, self.params.hadrUserPassword,
-                                           local_primary["listen_ip"], local_primary["port"],
-                                           remote_ip, remote_port)
-                    else:
-                        cmd = cmd_remote % (self.mpp_file, "%s/bin" % self.gauss_home,
-                                            local_primary["data_dir"],
-                                            self.params.hadrUserName, self.params.hadrUserPassword,
-                                            local_primary["listen_ip"], local_primary["port"],
-                                            remote_ip, remote_port, local_primary["host_name"])
-                    result = DefaultValue.fast_ping_on_node(local_primary["host_name"],
-                                                            local_primary["listen_ip"],
-                                                            remote_ip, self.logger)
-                    if not result[-1]:
-                        self.logger.debug("Ignore build from %s, ping result:%s"
-                                          % (remote_ip, result[-1]))
-                        continue
-                    if self.cluster_info.isSingleInstCluster():
-                        cmd = cmd.replace(" -Z datanode", "")
-                    self.logger.debug("Building with cmd:%s."
-                                      % cmd.replace(self.params.hadrUserPassword, "***"))
-                    status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd)
-                    if status == 0:
-                        succeed = True
-                        self.logger.debug("Successfully Building with cmd:%s."
-                                          % cmd.replace(self.params.hadrUserPassword, "***"))
-                        return succeed
-                    else:
-                        self.logger.debug("Building result:%s." % SensitiveMask.mask_pwd(output))
-                time.sleep(1)
-        return succeed
-
-    def __copy_secure_dir_from_dn_dir(self):
-        """
-        Find and copy key file dir from all dn dir
-        """
-        local_temp_secure_path = os.path.join(
-            self.dorado_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
-        if os.path.isdir(local_temp_secure_path):
-            FileUtil.removeDirectory(local_temp_secure_path)
-        rand_path = os.path.join(local_temp_secure_path, DoradoDisasterRecoveryConstants.HADR_KEY_RAND)
-        cipher_path = os.path.join(local_temp_secure_path, DoradoDisasterRecoveryConstants.HADR_KEY_CIPHER)
-        cmd_tep = "echo \"if [ -d '%s' ];then source %s && pscp --trace-id %s -H %s '%s' '%s' " \
-                  "&& rm -rf '%s';fi\" | pssh -s -H %s"
-        succeed = False
-        for db_node in self.cluster_info.dbNodes:
-            for dn_inst in db_node.datanodes:
-                if int(dn_inst.mirrorId) == 1:
-                    key_file_path = os.path.realpath(os.path.join(
-                        dn_inst.datadir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES))
-                    cmd_copy_dir = cmd_tep % (key_file_path, self.mpp_file, self.trace_id,
-                                              self.local_host, key_file_path,
-                                              self.dorado_file_dir,
-                                              key_file_path, dn_inst.hostname)
-                    status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd_copy_dir)
-                    self.logger.debug("Copy cmd:%s" % cmd_copy_dir)
-                    if status != 0:
-                        self.logger.debug("Try copy secure dir from:[%s][%s], error:%s" % (
-                            dn_inst.hostname, key_file_path, output))
-                    if os.path.isdir(local_temp_secure_path) and os.path.isfile(rand_path) \
-                            and os.path.isfile(cipher_path):
-                        succeed = True
-        if not succeed:
-            raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "copy secure file dir")
-        self.logger.debug("Successfully copy secure dir, file list:%s." %
-                          os.listdir(local_temp_secure_path))
-
-    def build_and_distribute_key_files(self, only_mode=None):
-        """
-        Distribute key files
-        """
-        if only_mode and self.params.mode != only_mode:
-            self.logger.debug("Wal keep segment opts not for mode:%s." % self.params.mode)
-            return
-        self.logger.log("Start build key files from remote cluster.")
-        # build file
-        if not self.build_file_from_remote():
-            raise Exception(ErrorCode.GAUSS_516['GAUSS_51632'] % "build files from cluster")
-        # copy file from data dir to streaming dir
-        self.__copy_secure_dir_from_dn_dir()
-        # check version consistency
-        self.__check_version_file()
-        # check cluster user consistency
-        self.__check_cluster_user()
-        # distribute key files to all node
-        secure_dir_path = os.path.join(self.dorado_file_dir, DoradoDisasterRecoveryConstants.GS_SECURE_FILES)
-        self.__copy_hadr_user_key(secure_dir_path, update=True)
-        FileUtil.removeDirectory(secure_dir_path)
-        self.logger.log("Successfully build and distribute key files to all nodes.")
-
     def __check_version_file(self):
         """
         function: Check whether the version numbers of the host
@@ -1755,7 +1352,7 @@ class DoradoDisasterRecoveryBase(object):
 
     def remove_pg_hba(self, ignore_error=False):
         """
-        Remove remote ips from pg hba of streaming disaster
+        Remove remote ips from pg hba of dorado disaster
         """
         self.logger.log("Start remove pg_hba config.")
         remove_ips = []
@@ -1783,65 +1380,6 @@ class DoradoDisasterRecoveryBase(object):
                 raise error
         self.logger.log("Finished remove pg_hba config.")
 
-    def streaming_drop_replication_slot(self, dn_inst, drop_slots):
-        """
-        Delete dn_xxx_hadr on all dn nodes if dn_xxx_hadr exists when the disaster tolerance
-        relationship is lifted
-        """
-        if not drop_slots:
-            self.logger.debug("WARNING:Not found dn_xxx_hadr on %s node, No need to "
-                              "delete." % dn_inst.instanceId)
-        else:
-            for slot in drop_slots:
-                self.logger.debug("starting drop inst %s %s" % (dn_inst.instanceId, slot.strip()))
-                sql = "select * from pg_catalog.pg_drop_replication_slot('%s');" % slot.strip()
-                status_dr, output_dr = ClusterCommand.remoteSQLCommand(
-                    sql, self.user, dn_inst.hostname, dn_inst.port, maintenance_mode=True)
-                self.logger.debug("get %s need drop replication_slots, status=%d, "
-                                  "output: %s." % (dn_inst.hostname, status_dr,
-                                                   SensitiveMask.mask_pwd(output_dr)))
-                if status_dr != 0:
-                    self.logger.debug("Failed to remove inst %s %s with error: %s" % (
-                        dn_inst.instanceId, slot.strip(), output_dr))
-                self.logger.debug(
-                    "Successfully drop node %s %s" % (dn_inst.instanceId, slot.strip()))
-
-    def concurrent_drop_slot(self, dn_inst):
-        """
-        concurrent drop all dn replication slots
-        """
-        sql_check = "select * from pg_catalog.pg_get_replication_slots();"
-        self.logger.debug("Starting concurrent drop node %s instance [%s] replication slots" %
-                          (dn_inst.hostname, dn_inst.instanceId))
-        status, output = ClusterCommand.remoteSQLCommand(
-            sql_check, self.user, dn_inst.hostname, dn_inst.port, maintenance_mode=True)
-        self.logger.debug("get %s all replication slots, status=%d, output: %s." %
-                          (dn_inst.instanceId, status, SensitiveMask.mask_pwd(output)))
-        if status == 0 and output.strip():
-            drop_slots = []
-            if str(dn_inst.instanceId).startswith("6"):
-                drop_slots = re.findall(r"dn_\d+_hadr", output.strip())
-            if str(dn_inst.instanceId).startswith("5"):
-                drop_slots = re.findall(r"cn_\d+_\d+\.\d+\.\d+\.\d+_\d+", output.strip())
-            self.logger.debug("Waiting to delete instance [%s] replication slots is: %s" %
-                              (dn_inst.instanceId, drop_slots))
-            self.streaming_drop_replication_slot(dn_inst, drop_slots)
-        else:
-            self.logger.debug("Obtain all replication slot results:%s." % output)
-
-    def streaming_clean_replication_slot(self):
-        """
-        Delete dn_xxx_hadr on all dn nodes if dn_xxx_hadr exists when the disaster tolerance
-        relationship is lifted
-        """
-        self.logger.log("Starting drop all node replication slots")
-        params = [dn_inst for db_node in self.cluster_info.dbNodes
-                  for dn_inst in db_node.datanodes if dn_inst.instanceId in self.normal_dn_ids]
-        self.logger.debug("need drop all node replication slots: %s" %
-                          [inst.instanceId for inst in params])
-        parallelTool.parallelExecute(self.concurrent_drop_slot, params)
-        self.logger.log("Finished drop all node replication slots")
-
     def update_dorado_info(self, key, value, only_mode=None):
         """
         Update info for dorado status
@@ -2045,40 +1583,6 @@ class DoradoDisasterRecoveryBase(object):
                 self.update_dorado_info(DoradoDisasterRecoveryConstants.ACTION_SWITCHOVER, "100%")
                 self.update_dorado_info("cluster", "archive")
 
-    def streaming_clean_archive_slot(self):
-        """
-        drop lot_type is physical and slot_name not contain (gs_roach_full,gs_roach_inc,
-        cn_xxx,dn_xxx, dn_xxx_hadr) on all cn node and all primary dn node if the
-        slot_name exists when the disaster cluster become primary cluster
-        """
-        self.logger.debug("Starting drop archive slots")
-        params = [dn_inst for db_node in self.cluster_info.dbNodes
-                  for dn_inst in db_node.datanodes if dn_inst.instanceId in self.primary_dn_ids]
-        self.logger.debug("need drop all node archive slots: %s" %
-                          [inst.instanceId for inst in params])
-        parallelTool.parallelExecute(self.parallel_drop_archive_slot, params)
-        self.logger.debug("Successfully drop all node archive slots")
-
-    def parallel_drop_archive_slot(self, dn_inst):
-        """
-        concurrent drop all primary dn and all cn archive slots
-        """
-        sql_check = "select slot_name from pg_catalog.pg_get_replication_slots() " \
-                    "where slot_type='physical' and slot_name not in " \
-                    "('gs_roach_full', 'gs_roach_inc') and slot_name not like 'cn_%' and " \
-                    "slot_name not like 'dn_%';"
-        self.logger.debug("Starting concurrent drop node %s instance [%s] archive slots" %
-                          (dn_inst.hostname, dn_inst.instanceId))
-        (status, output) = ClusterCommand.remoteSQLCommand(
-            sql_check, self.user, dn_inst.hostname, dn_inst.port)
-        self.logger.debug("get %s all archive slots, status=%d, output: %s." %
-                          (dn_inst.instanceId, status, output))
-        if status == 0 and output.strip():
-            archive_slots = output.strip().split('\n')
-            self.logger.debug("Waiting to delete instance [%s] archive slots is: %s" %
-                              (dn_inst.instanceId, archive_slots))
-            self.streaming_drop_replication_slot(dn_inst, archive_slots)
-
     def get_specified_dn_infos(self, update=False, dn_status="Primary"):
 
         """
@@ -2111,7 +1615,7 @@ class DoradoDisasterRecoveryBase(object):
 
     def start_primary_dn(self, params):
         """
-        Start main standby as primary dn in streaming failover.
+        Start main standby as primary dn in dorado failover.
         """
         dn_info, max_term = params
         opt_type = " -Z datanode" if not self.cluster_info.isSingleInstCluster() else ""
@@ -2159,12 +1663,12 @@ class DoradoDisasterRecoveryBase(object):
         :param guc_type: init type
         :return: NA
         """
-        self.logger.debug("Starting set cm server for streaming disaster.")
+        self.logger.debug("Starting set cm server for dorado disaster.")
         cmd = "source %s && gs_guc %s -Z cmserver -D 'cm_instance_data_path' -c \"%s=%s\" " \
               % (self.mpp_file, guc_type, guc_parameter, guc_value)
-        self.logger.debug("streaming disaster calling set cms, cmd=[%s]" % cmd)
+        self.logger.debug("dorado disaster calling set cms, cmd=[%s]" % cmd)
         self.ssh_tool.executeCommand(cmd, hostList=self.normal_cm_ips)
-        self.logger.debug("Successfully set cm server for streaming disaster.")
+        self.logger.debug("Successfully set cm server for dorado disaster.")
 
     def stream_disaster_set_cmagent_guc(self, guc_parameter, guc_value, guc_type):
         """
@@ -2174,12 +1678,12 @@ class DoradoDisasterRecoveryBase(object):
         :param guc_type: init type
         :return: NA
         """
-        self.logger.debug("Starting set cm agent for streaming disaster.")
+        self.logger.debug("Starting set cm agent for dorado disaster.")
         cmd = "source %s && gs_guc %s -Z cmagent -D 'cm_instance_data_path' -c \"%s=%s\" " \
               % (self.mpp_file, guc_type, guc_parameter, guc_value)
-        self.logger.debug("streaming disaster calling set cma, cmd=[%s]" % cmd)
+        self.logger.debug("dorado disaster calling set cma, cmd=[%s]" % cmd)
         self.ssh_tool.executeCommand(cmd, hostList=self.normal_node_list)
-        self.logger.debug("Successfully set cm agent for streaming disaster.")
+        self.logger.debug("Successfully set cm agent for dorado disaster.")
 
     def _failover_config_step(self, dorado_disaster_step, action_flag):
         """
@@ -2328,51 +1832,6 @@ class DoradoDisasterRecoveryBase(object):
                 return True
         return False
 
-    def set_stream_cluster_run_mode_guc(self, guc_mode, fail_over=False):
-        """
-        function: set cluster run mode guc
-        :return:
-        """
-        cluster_run_mode = "cluster_primary" if self.params.mode == "primary" \
-            else "cluster_standby"
-        if fail_over:
-            cluster_run_mode = "cluster_primary"
-        guc_cmd = "source %s && gs_guc %s -Z datanode -N all -I all -c " \
-                  "\"stream_cluster_run_mode = '%s'\"" % \
-                  (self.mpp_file, guc_mode, cluster_run_mode)
-        host_names = self.cluster_info.getClusterNodeNames()
-        ignore_node = [node for node in host_names if node not in self.normal_node_list]
-        if ignore_node:
-            self.logger.debug(
-                "WARNING: cluster_run_mode for datanode ignore nodes:%s" % ignore_node)
-            nodes = ",".join(ignore_node)
-            guc_cmd = guc_cmd + " --ignore-node %s" % nodes
-        self.logger.debug("Set dn stream_cluster_run_mode with cmd:%s" % guc_cmd)
-        (status, output) = CmdUtil.retryGetstatusoutput(guc_cmd)
-        if status != 0:
-            self.logger.debug("Warning: Failed %s dn stream_cluster_run_mode=%s, output: %s" %
-                              (guc_mode, cluster_run_mode, str(output)))
-        else:
-            self.logger.debug("Successfully %s streaming cluster run mode for "
-                              "datanode param %s" % (guc_mode, cluster_run_mode))
-
-        guc_cmd_cn = "source %s && gs_guc %s -Z coordinator -N all -I all -c " \
-                     "\"stream_cluster_run_mode = '%s'\"" % \
-                     (self.mpp_file, guc_mode, cluster_run_mode)
-        if ignore_node:
-            self.logger.debug(
-                "WARNING: cluster_run_mode for coordinator ignore nodes:%s" % ignore_node)
-            nodes = ",".join(ignore_node)
-            guc_cmd_cn = guc_cmd_cn + " --ignore-node %s" % nodes
-        self.logger.debug("Set cn stream_cluster_run_mode with cmd:%s" % guc_cmd_cn)
-        (status, output) = CmdUtil.retryGetstatusoutput(guc_cmd_cn)
-        if status != 0:
-            self.logger.debug("Warning: Failed %s cn stream_cluster_run_mode=%s, output: %s" %
-                              (guc_mode, cluster_run_mode, str(output)))
-        else:
-            self.logger.debug("Successfully %s streaming cluster run mode for "
-                              "coordinator param %s" % (guc_mode, cluster_run_mode))
-
     def set_data_in_dcc(self, key, value, only_mode=None):
         """
         Set data in dcc
diff --git a/script/impl/dorado_disaster_recovery/ddr_constants.py b/script/impl/dorado_disaster_recovery/ddr_constants.py
index 8469e324..8279df7d 100644
--- a/script/impl/dorado_disaster_recovery/ddr_constants.py
+++ b/script/impl/dorado_disaster_recovery/ddr_constants.py
@@ -23,7 +23,7 @@
 
 class DoradoDisasterRecoveryConstants:
 
-    # streaming files
+    # dorado files
     DDR_LOG_FILE = "gs_ddr.log"
     DDR_FILES_DIR = 'ddr_cabin'
     DDR_CLUSTER_STATUS_TMP_FILE = "cluster_state_tmp"
@@ -32,10 +32,10 @@ class DoradoDisasterRecoveryConstants:
     GS_SECURE_FILES = "gs_secure_files"
     HADR_KEY_CIPHER = "hadr.key.cipher"
     HADR_KEY_RAND = "hadr.key.rand"
-    STREAM_SWITCHOVER_STATE = ".switchover_cluster_state"
+    DDR_SWITCHOVER_STATE = ".switchover_cluster_state"
     MAX_TERM_RECORD = ".max_term_record"
     PROCESS_LOCK_FILE = 'ddr_lock_'
-    STREAMING_CONFIG_XML = "ddr_config.xml"
+    DDR_CONFIG_XML = "ddr_config.xml"
     GUC_BACKUP_FILE = ".ddr_guc_backup"
     CLUSTER_USER_RECORD = ".cluster_user_record"
 
@@ -45,13 +45,13 @@ class DoradoDisasterRecoveryConstants:
 
     ACTION_ESTABLISH = "establish"
 
-    # streaming query temp file
+    # dorado query temp file
     DDR_CLUSTER_STAT = ".ddr_cluster_stat"
     DDR_FAILOVER_STAT = ".ddr_failover_stat"
     DDR_SWICHOVER_STAT = ".ddr_switchover_stat"
     DDR_ESTABLISH_STAT = ".ddr_establish_stat"
 
-    DDR_DISTRIBUTE_ACTION = "distribute_stream_failover"
+    DDR_DISTRIBUTE_ACTION = "distribute_dorado_failover"
 
     # GUC CHANGE MAP
     GUC_CHANGE_MAP = {}
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
index 4f12b804..0249c85a 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_failover.py
@@ -16,14 +16,14 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : streaming_disaster_recovery_failover.py is utility for
+# Description  : dorado_disaster_recovery_failover.py is utility for
 # standby cluster failover to primary cluster.
 
 
 from gspylib.common.Common import DefaultValue
 from gspylib.common.ErrorCode import ErrorCode
 from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
-
+from impl.dorado_disaster_recovery.ddr_constants import DoradoDisasterRecoveryConstants
 
 class DisasterRecoveryFailoverHandler(DoradoDisasterRecoveryBase):
     def __init__(self, *args, **kwargs):
@@ -35,7 +35,7 @@ class DisasterRecoveryFailoverHandler(DoradoDisasterRecoveryBase):
         step = self.check_dorado_failover_workable(check_type_step=3, check_status_step=0)
         self.check_is_under_upgrade()
         try:
-            self.dorado_failover_single_inst(step)
+            self.dorado_failover_single_inst(step, DoradoDisasterRecoveryConstants.ACTION_FAILOVER)
             self.update_dorado_info("cluster", "normal")
             self.clean_step_file()
             self.clean_flag_file()
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
index 371582ae..c9ad12d1 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
@@ -16,8 +16,8 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : streaming_disaster_recovery_query.py is utility for
-# query streaming disaster recovery condition.
+# Description  : dorado_disaster_recovery_query.py is utility for
+# query dorado disaster recovery condition.
 
 import os
 
@@ -27,11 +27,11 @@ from gspylib.common.Common import ClusterCommand
 from impl.dorado_disaster_recovery.ddr_base import DoradoDisasterRecoveryBase
 
 
-class StreamingQueryHandler(DoradoDisasterRecoveryBase):
+class DoradoQueryHandler(DoradoDisasterRecoveryBase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    def get_streaming_cluster_query_value(self, file_name):
+    def get_ddr_cluster_query_value(self, file_name):
         """
         Query infos from files.
         """
@@ -99,70 +99,32 @@ class StreamingQueryHandler(DoradoDisasterRecoveryBase):
             return recovery_status
         return "recovery"
 
-    def get_max_rpo_rto(self):
-        """
-        Get max rpo and rto.
-        """
-        self.logger.log("Start check RPO & RTO.")
-        rpo_sql = "SELECT current_rpo FROM dbe_perf.global_streaming_hadr_rto_and_rpo_stat;"
-        rto_sql = "SELECT current_rto FROM dbe_perf.global_streaming_hadr_rto_and_rpo_stat;"
-        rto_rpo_sql = rpo_sql + rto_sql
-        if not self.primary_dn_ids:
-            self.logger.debug("Not found primary dn in cluster, cluster status:%s, "
-                              "main standby:%s." % (self.cluster_status, self.main_standby_ids))
-            return "", ""
-        log_info = "Execute sql [%s] on node [%s: %s] with result:%s"
-        dn_instances = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes
-                        if inst.instanceId in self.primary_dn_ids]
-        if dn_instances:
-            status, output = ClusterCommand.remoteSQLCommand(
-                rto_rpo_sql, self.user, dn_instances[0].hostname, dn_instances[0].port)
-            if status == 0 and output:
-                try:
-                    rets = output.strip().split('\n')
-                    length = len(rets) // 2
-                    rpo_list = [int(i) for i in rets[:length]]
-                    rto_list = [int(j) for j in rets[length:]]
-                    max_rpo, max_rto = str(max(rpo_list)), str(max(rto_list))
-                except ValueError:
-                    return "", ""
-                self.logger.debug("Successfully get max rpo:%s, rto:%s, output:%s"
-                                  % (max_rpo, max_rto, ','.join(output.split('\n'))))
-                return max_rpo, max_rto
-            else:
-                self.logger.debug(log_info % (rto_rpo_sql, dn_instances[0].hostname,
-                                              dn_instances[0].port, ','.join(output.split('\n'))))
-        return "", ""
-
     def run(self):
-        self.logger.log("Start streaming disaster query.")
+        self.logger.log("Start dorado disaster query.")
         cluster_info = self.query_cluster_info()
         if cluster_info:
             self.parse_cluster_status(current_status=cluster_info)
         self.check_is_under_upgrade()
-        check_cluster_stat = self.get_streaming_cluster_query_value(
+        check_cluster_stat = self.get_ddr_cluster_query_value(
             DoradoDisasterRecoveryConstants.DDR_CLUSTER_STAT)
         archive_status = self.check_archive(check_cluster_stat, self.cluster_status)
         recovery_status = self.check_recovery(check_cluster_stat, self.cluster_status)
-        hadr_cluster_stat = archive_status or recovery_status or check_cluster_stat
+        ddr_cluster_stat = archive_status or recovery_status or check_cluster_stat
 
-        hadr_failover_stat = self.get_streaming_cluster_query_value(
+        ddr_failover_stat = self.get_ddr_cluster_query_value(
             DoradoDisasterRecoveryConstants.DDR_FAILOVER_STAT)
-        hadr_switchover_stat = self.get_streaming_cluster_query_value(
+        ddr_switchover_stat = self.get_ddr_cluster_query_value(
             DoradoDisasterRecoveryConstants.DDR_SWICHOVER_STAT)
-        if hadr_cluster_stat != "promote":
-            hadr_failover_stat = ""
-        if hadr_cluster_stat != "switchover":
-            hadr_switchover_stat = ""
+        if ddr_cluster_stat != "promote":
+            ddr_failover_stat = ""
+        if ddr_cluster_stat != "switchover":
+            ddr_switchover_stat = ""
 
         self.logger.debug("Start check max rpo and rto.")
-        max_rpo, max_rto = self.get_max_rpo_rto()
         self.logger.debug("Finished check max rpo and rto.")
         values = dict()
-        values["hadr_cluster_stat"] = hadr_cluster_stat
-        values["hadr_failover_stat"] = hadr_failover_stat
-        values["hadr_switchover_stat"] = hadr_switchover_stat
-        values["RPO"] = max_rpo
-        values["RTO"] = max_rto
-        self.logger.log("Successfully executed streaming disaster "
+        values["ddr_cluster_stat"] = ddr_cluster_stat
+        values["ddr_failover_stat"] = ddr_failover_stat
+        values["ddr_switchover_stat"] = ddr_switchover_stat
+        self.logger.log("Successfully executed dorado disaster "
                         "recovery query, result:\n%s" % values)
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
index 0bfb7ecd..cab635a7 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
@@ -170,26 +170,6 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         #self.clean_gs_secure_dir()
         self.clean_step_file()
 
-    def _check_and_refresh_disaster_user_permission(self):
-        """check and refresh disaster user permission"""
-        if self.params.mode != "primary":
-            return
-        self.check_hadr_user(only_mode='primary')
-        self.check_hadr_pwd(only_mode='primary')
-        self.logger.debug("Encrypt hadr user info to database not "
-                          "for mode:%s." % self.params.mode)
-        hadr_cipher_path = os.path.join(self.bin_path, "hadr.key.cipher")
-        hadr_rand_path = os.path.join(self.bin_path, "hadr.key.rand")
-        if not os.path.isfile(hadr_cipher_path) or not os.path.isfile(hadr_rand_path):
-            self.hadr_key_generator('hadr')
-        user_info = DefaultValue.obtain_hadr_user_encrypt_str(self.cluster_info, self.user,
-                                                              self.logger, False, True)
-        if user_info:
-            self.clean_global_config()
-        pass_str = self.encrypt_hadr_user_info(
-            'hadr', self.params.hadrUserName, self.params.hadrUserPassword)
-        self.keep_hadr_user_info(pass_str)
-
     def run(self):
         self.logger.log("Start create dorado storage disaster relationship.")
         step = self.query_dorado_step()
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
index ee08e2b0..fe981dc4 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_switchover.py
@@ -16,7 +16,7 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : streaming_disaster_recovery_switchover.py is a utility for
+# Description  : dorado_disaster_recovery_switchover.py is a utility for
 # changing role between primary cluster and standby cluster.
 import json
 import os
@@ -231,7 +231,7 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
         if not primary_dns:
             raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
                             % "obtain primary dns for switchover")
-        if self.streaming_dr_in_switchover(primary_dns):
+        if self.dorado_dr_in_switchover(primary_dns):
             if self.streaming_dr_service_truncation_check(primary_dns):
                 self.logger.debug("Successfully set streaming master cluster in switchover.")
 
@@ -270,9 +270,9 @@ class DisasterRecoverySwitchoverHandler(DoradoDisasterRecoveryBase):
                           "truncation." % (dn_inst.hostname, dn_inst.mirrorId, dn_inst.instanceId))
         return True
 
-    def streaming_dr_in_switchover(self, primary_dns_list):
+    def dorado_dr_in_switchover(self, primary_dns_list):
         """
-        set steaming dr in switchover
+        set dorado dr in switchover
         """
         results = parallelTool.parallelExecute(self.concurrent_set_dr_in_switchover,
                                                primary_dns_list)
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index 6feadd46..7a1b1940 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -16,7 +16,7 @@
 # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 # See the Mulan PSL v2 for more details.
 # ----------------------------------------------------------------------------
-# Description  : params_handler.py is a utility for parsing and verifying streaming
+# Description  : params_handler.py is a utility for parsing and verifying dorado
 # disaster recovery params.
 #############################################################################
 
@@ -33,7 +33,7 @@ from base_utils.security.security_checker import SecurityChecker, ValidationErro
 from domain_utils.cluster_file.version_info import VersionInfo
 
 
-def check_streaming_start_mode(mode):
+def check_ddr_start_mode(mode):
     """
     Check start mode
     """
@@ -118,9 +118,9 @@ def check_remote_cluster_conf(value):
             SecurityChecker.check_ip_valid('dataIp of remoteClusterConf', data_ip)
 
 
-STREAMING_PARAMS_FOR_MODULE = {
+DORADO_PARAMS_FOR_MODULE = {
     "start": {
-        "mode": check_streaming_start_mode,
+        "mode": check_ddr_start_mode,
         "xml_path": check_xml_file,
         # "hadrUserName": check_hadr_user,
         # "hadrUserPassword": check_hadr_pwd,
@@ -136,7 +136,7 @@ STREAMING_PARAMS_FOR_MODULE = {
         "remoteClusterConf": check_remote_cluster_conf
     },
     "switchover": {
-        "mode": check_streaming_start_mode,
+        "mode": check_ddr_start_mode,
         "waitingTimeout": check_wait_timeout
     },
     "failover": {
@@ -146,7 +146,7 @@ STREAMING_PARAMS_FOR_MODULE = {
 }
 
 HELP_MSG = """
-gs_ddr is a utility for streaming disaster recovery fully options.
+gs_ddr is a utility for dorado disaster recovery fully options.
 
 Usage:
   gs_ddr -? | --help
@@ -209,7 +209,7 @@ class ParamsHandler(object):
         parser.add_option('-X', dest='xml_path', type='string',
                           help='Cluster config xml path.')
         parser.add_option('--json', dest='json_path', type='string',
-                          help='Config json file of streaming options')
+                          help='Config json file of dorado options')
         parser.add_option('--time-out=', dest='timeout', default="1200", type='string',
                           help='time out.')
         parser.add_option("-l", dest='logFile', type='string',
@@ -259,16 +259,16 @@ class ParamsHandler(object):
         remote_cluster_conf.setdefault("port", cluster_info.remote_dn_base_port)
         remote_cluster_conf.setdefault("shards", cluster_info.remote_stream_ip_map)
         setattr(self.params, "remoteClusterConf", remote_cluster_conf)
-        self.logger.debug("Remote stream cluster conf: %s." % str(remote_cluster_conf))
+        self.logger.debug("Remote cluster conf: %s." % str(remote_cluster_conf))
 
         local_cluster_conf = dict()
         local_cluster_conf.setdefault("port", cluster_info.local_dn_base_port)
         local_cluster_conf.setdefault("shards", cluster_info.local_stream_ip_map)
         setattr(self.params, "localClusterConf", local_cluster_conf)
-        self.logger.debug("Local stream cluster conf: %s." % str(local_cluster_conf))
+        self.logger.debug("Local cluster conf: %s." % str(local_cluster_conf))
         if not remote_cluster_conf["shards"] or len(remote_cluster_conf["shards"])\
                 != len(local_cluster_conf["shards"]):
-            raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50026'] % "streaming DR")
+            raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50026'] % "dorado DR")
 
     def __init_default_params(self):
         """
@@ -342,7 +342,7 @@ class ParamsHandler(object):
             self.logger.log(DoradoDisasterRecoveryConstants.LOG_REMARK)
             self.__init_default_params()
             #self.__reload_hadr_user_info()
-            for param_name, validate in STREAMING_PARAMS_FOR_MODULE[self.params.task].items():
+            for param_name, validate in DORADO_PARAMS_FOR_MODULE[self.params.task].items():
                 check_value = getattr(self.params, param_name)
                 if self.params.task == "stop":
                     if param_name == "xml_path" and not check_value:
-- 
Gitee


From 4683c32902e4bf880aaa5ef9afb5bef31b302096 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Mon, 21 Aug 2023 09:51:43 +0800
Subject: [PATCH 20/23] gs_ddr query update

---
 script/impl/dorado_disaster_recovery/ddr_constants.py         | 3 ++-
 .../ddr_modules/dorado_disaster_recovery_query.py             | 2 +-
 script/impl/dorado_disaster_recovery/params_handler.py        | 4 ----
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_constants.py b/script/impl/dorado_disaster_recovery/ddr_constants.py
index 8279df7d..b8f5e961 100644
--- a/script/impl/dorado_disaster_recovery/ddr_constants.py
+++ b/script/impl/dorado_disaster_recovery/ddr_constants.py
@@ -61,7 +61,8 @@ class DoradoDisasterRecoveryConstants:
         "start": ["localClusterConf", "remoteClusterConf"],
         "stop": ["localClusterConf", "remoteClusterConf"],
         "switchover": [],
-        "failover": []
+        "failover": [],
+        "query": []
     }
 
     # step file of each module
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
index c9ad12d1..1049790e 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_query.py
@@ -60,7 +60,7 @@ class DoradoQueryHandler(DoradoDisasterRecoveryBase):
             self.logger.debug("Ignore update archive for disaster_standby cluster.")
             return archive_status
         sql_check = "select 1 from pg_catalog.pg_stat_get_wal_senders() where sync_state" \
-                    "='Async' and peer_role='Standby' and peer_state='Normal';"
+                    "='Async' and peer_role='StandbyCluster_Standby' and peer_state='Normal';"
         dn_instances = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes
                         if inst.instanceId in self.primary_dn_ids]
         self.logger.debug("Check archive with cmd:%s." % sql_check)
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index 7a1b1940..81c07821 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -202,10 +202,6 @@ class ParamsHandler(object):
                                '"switchover", "failover", "query"')
         parser.add_option('-m', dest='mode', type='string',
                           help='Cluster run mode. It could be ["primary", "disaster_standby"].')
-        # parser.add_option('-U', dest='hadrusername', type='string',
-        #                   help='hadr user name.')
-        # parser.add_option('-W', dest='hadruserpasswd', type='string',
-        #                   help='hadr user password.')
         parser.add_option('-X', dest='xml_path', type='string',
                           help='Cluster config xml path.')
         parser.add_option('--json', dest='json_path', type='string',
-- 
Gitee


From 55ff5d4bd6c482a14cba4f7a735f3f81b1b77232 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Tue, 22 Aug 2023 10:39:54 +0800
Subject: [PATCH 21/23] update comment

---
 .../dorado_disaster_recovery_start.py         | 26 +++++--------------
 .../dorado_disaster_recovery_stop.py          |  2 +-
 .../params_handler.py                         |  5 ++--
 3 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
index cab635a7..17bfffe1 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_start.py
@@ -34,14 +34,12 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
 
     def _first_step_for_ddr_start(self, step):
         """
-        First step for streaming start
+        First step for ddr start
         """
         if step >= 2:
             return
         self.logger.debug("Start first step of DisasterRecovery start.")
-        #创建容灾过程使用的临时目录
         self.create_disaster_recovery_dir(self.dorado_file_dir)
-        #检查执行的标志文件
         self.check_action_and_mode()
         self.init_cluster_status()
 
@@ -59,7 +57,6 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
             self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51632"] %
                                 "check cm_ctl is available for current cluster")
         self.check_is_under_upgrade()
-        #检查dn的GUC参数
         self.check_dn_instance_params()
         self.write_dorado_step("2_check_cluster_step")
 
@@ -73,7 +70,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
 
     def _third_step_for_ddr_start(self, step):
         """
-        Third step for streaming start
+        Third step for ddr start
         """
         if step >= 3:
             return
@@ -89,7 +86,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
 
     def _fourth_step_for_ddr_start(self, step):
         """
-        Fourth step for streaming start
+        Fourth step for ddr start
         """
         if step >= 4:
             return
@@ -104,8 +101,6 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
         if step >= 5:
             return
         self.logger.debug("Start fifth step of ddr start.")
-        # self.set_data_in_dcc(self.backup_open_key, "0", only_mode='primary')
-        # self.set_data_in_dcc(self.backup_open_key, "1", only_mode='disaster_standby')
         self.start_cluster(only_mode="primary")
         self.write_dorado_step("5_start_primary_cluster_step")
 
@@ -122,7 +117,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
 
     def _seventh_step_for_ddr_start(self, step):
         """
-        Seventh step for streaming start
+        Seventh step for ddr start
         """
         if step >= 7 or self.params.mode == "primary":
             return
@@ -139,7 +134,7 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
 
     def _eighth_step_for_ddr_start(self, step):
         """
-        Eighth step for streaming start
+        Eighth step for ddr start
         """
         if step >= 8:
             return
@@ -161,31 +156,24 @@ class DisasterRecoveryStartHandler(DoradoDisasterRecoveryBase):
 
     def _ninth_step_for_ddr_start(self, step):
         """
-        ninth step for streaming start
+        ninth step for ddr start
         """
         if step >= 9:
             return
-        self.logger.debug("Start ninth step of streaming start.")
-        #self.restore_wal_keep_segments(only_mode='primary')
-        #self.clean_gs_secure_dir()
+        self.logger.debug("Start ninth step of ddr start.")
         self.clean_step_file()
 
     def run(self):
         self.logger.log("Start create dorado storage disaster relationship.")
         step = self.query_dorado_step()
         self._first_step_for_ddr_start(step)
-        #1.检查集群状态正常
         self.parse_cluster_status()
-        #检查集群内dn状态和cm服务
         self._second_step_for_ddr_start(step)
-        #更新pg_hba和replinfo
         self.common_step_for_ddr_start()
         self._third_step_for_ddr_start(step)
         self._fourth_step_for_ddr_start(step)
         self._fifth_step_for_ddr_start(step)
-        #设置CM backup_open参数，灾备backup_open=1， 主集群backup_open=0
         self._sixth_step_for_ddr_start(step)
-        #start dss，build main standby
         self._seventh_step_for_ddr_start(step)
         self._eighth_step_for_ddr_start(step)
         self._ninth_step_for_ddr_start(step)
diff --git a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
index 881aba57..c2d8fe99 100644
--- a/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
+++ b/script/impl/dorado_disaster_recovery/ddr_modules/dorado_disaster_recovery_stop.py
@@ -92,7 +92,7 @@ class DisasterRecoveryStopHandler(DoradoDisasterRecoveryBase):
         self.clean_dorado_dir()
 
     def run(self):
-        self.logger.log("Start remove dorado disaster relationship.")
+        self.logger.log("Start remove dorado disaster recovery relationship.")
         step = self.query_dorado_step()
         self._first_step_for_ddr_stop(step)
         self.parse_cluster_status()
diff --git a/script/impl/dorado_disaster_recovery/params_handler.py b/script/impl/dorado_disaster_recovery/params_handler.py
index 81c07821..134a92d6 100644
--- a/script/impl/dorado_disaster_recovery/params_handler.py
+++ b/script/impl/dorado_disaster_recovery/params_handler.py
@@ -122,8 +122,6 @@ DORADO_PARAMS_FOR_MODULE = {
     "start": {
         "mode": check_ddr_start_mode,
         "xml_path": check_xml_file,
-        # "hadrUserName": check_hadr_user,
-        # "hadrUserPassword": check_hadr_pwd,
         "doradoConfig": check_dorado_config,
         "waitingTimeout": check_wait_timeout,
         "localClusterConf": check_local_cluster_conf,
@@ -151,7 +149,7 @@ gs_ddr is a utility for dorado disaster recovery fully options.
 Usage:
   gs_ddr -? | --help
   gs_ddr -V | --version
-  gs_ddr -t start -m [primary|disaster_standby] -X XMLFILE [--time-out=SECS] [-l LOGFILE] 
+  gs_ddr -t start -m [primary|disaster_standby] -X XMLFILE --dorado-config=XLOG_SHARED_DISK [--time-out=SECS] [-l LOGFILE] 
   gs_ddr -t stop -X XMLFILE|--json JSONFILE [-l LOGFILE] 
   gs_ddr -t switchover -m [primary|disaster_standby] [--time-out=SECS] [-l LOGFILE]
   gs_ddr -t failover [-l LOGFILE]
@@ -168,6 +166,7 @@ General options:
   -W                             Disaster recovery user password.
   -X                             Path of the XML configuration file.
   -l                             Path of log file.
+  --dorado-config                Path of dorado xlog share disk.
   --time-out=SECS                Maximum waiting time when Main standby connect to the primary dn,
                                     default value is 1200s.
 """
-- 
Gitee


From 767968066aefb580b3b1e7b2c00e12d298140f99 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Tue, 22 Aug 2023 16:48:28 +0800
Subject: [PATCH 22/23] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20ddr=5Fbase.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 script/gspylib/common/Common.py                  |  4 ++--
 script/gspylib/common/DbClusterInfo.py           | 12 ------------
 script/impl/dorado_disaster_recovery/ddr_base.py |  3 +--
 3 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py
index 4f91eb9b..2d1016fa 100644
--- a/script/gspylib/common/Common.py
+++ b/script/gspylib/common/Common.py
@@ -3027,7 +3027,7 @@ class DefaultValue():
             if os.path.isfile(cm_agent_conf_temp_file):
                 with open(cm_agent_conf_temp_file, "r") as cma_conf_file:
                     content = cma_conf_file.read()
-                    ret = re.findall(r'agent_backup_open *= *1', content)
+                    ret = re.findall(r'agent_backup_open *= *1|agent_backup_open *= *2', content)
                     g_file.removeFile(cm_agent_conf_temp_file)
                     if ret:
                         return True
@@ -3037,7 +3037,7 @@ class DefaultValue():
                 raise Exception(ErrorCode.GAUSS_502['GAUSS_50201'] % cm_agent_conf_file)
         with open(cm_agent_conf_file, "r") as cma_conf_file:
             content = cma_conf_file.read()
-            ret = re.findall(r'agent_backup_open *= *1', content)
+            ret = re.findall(r'agent_backup_open *= *1|agent_backup_open *= *2', content)
         if ret:
             return True
         else:
diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py
index 305a00c4..91564fca 100644
--- a/script/gspylib/common/DbClusterInfo.py
+++ b/script/gspylib/common/DbClusterInfo.py
@@ -1689,18 +1689,6 @@ class dbClusterInfo():
     def get_staic_conf_path(self, user, ignore_err=False):
         return self.__getStaticConfigFilePath(user=user, ignore_err=ignore_err)
 
-    def get_mpprc_file(self, user):
-        """
-        get mpprc file
-        """
-        mpprcFile = EnvUtil.getEnvironmentParameterValue('MPPDB_ENV_SEPARATE_PATH', user)
-        if mpprcFile is not None and mpprcFile != "":
-            mpprcFile = mpprcFile.replace("\\", "\\\\").replace('"', '\\"\\"')
-            checkPathVaild(mpprcFile)
-            userProfile = mpprcFile
-        else:
-            userProfile = ClusterConstants.BASHRC
-        return userProfile
 
     def __getEnvironmentParameterValue(self, environmentParameterName, user):
         """
diff --git a/script/impl/dorado_disaster_recovery/ddr_base.py b/script/impl/dorado_disaster_recovery/ddr_base.py
index 5e0064a2..9b9a5a57 100644
--- a/script/impl/dorado_disaster_recovery/ddr_base.py
+++ b/script/impl/dorado_disaster_recovery/ddr_base.py
@@ -1436,8 +1436,7 @@ class DoradoDisasterRecoveryBase(object):
         check datanode info by "gs_ctl query" command.
         """
         state, dest_ip, datadir = params
-        # get mpprc file
-        mpprcFile = self.cluster_info.get_mpprc_file(self.user)
+        mpprcFile = self.mpp_file
         if dest_ip == self.local_host:
             cmd = "source %s && gs_ctl query -D %s" % (mpprcFile, datadir)
         else:
-- 
Gitee


From 9c0355699d89bbc95bd588ac5f7c1ae6628664e4 Mon Sep 17 00:00:00 2001
From: Hao <xianghao@vastdata.com.cn>
Date: Tue, 22 Aug 2023 16:57:50 +0800
Subject: [PATCH 23/23] revert build.sh and get_PlatForm_str.sh

---
 build.sh                  |  6 ++----
 build/get_PlatForm_str.sh | 10 +++++-----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/build.sh b/build.sh
index e4f76dba..ccf89dda 100644
--- a/build.sh
+++ b/build.sh
@@ -54,7 +54,7 @@ done
 
 PLAT_FORM_STR=$(sh "${ROOT_DIR}/build/get_PlatForm_str.sh")
 if [ "${PLAT_FORM_STR}"x == "Failed"x ]; then
-    echo "We only support kylin(aarch64), EulerOS(aarch64), FusionOS, CentOS, UnionTech(X86) platform."
+    echo "We only support openEuler(aarch64), EulerOS(aarch64), FusionOS, CentOS, UnionTech(X86) platform."
     exit 1;
 fi
 
@@ -68,8 +68,6 @@ if [ X$(echo $PLAT_FORM_STR | grep "centos") != X"" ]; then
     dist_version="CentOS"
 elif [ X$(echo $PLAT_FORM_STR | grep "openeuler") != X"" ]; then
     dist_version="openEuler"
-elif [ X$(echo $PLAT_FORM_STR | grep "kylin") != X"" ]; then
-    dist_version="kylin"
 elif [ X$(echo $PLAT_FORM_STR | grep "fusionos") != X"" ]; then
     dist_version="FusionOS"
 elif [ X$(echo $PLAT_FORM_STR | grep "euleros") != X"" ]; then
@@ -81,7 +79,7 @@ elif [ X$(echo $PLAT_FORM_STR | grep "asianux") != X"" ]; then
 elif [ X$(echo $PLAT_FORM_STR | grep "UnionTech") != X"" ]; then
     dist_version="UnionTech"
 else
-    echo "We only support kylin(aarch64), EulerOS(aarch64), FusionOS, CentOS, Ubuntu(x86), UnionTech(x86) platform."
+    echo "We only support openEuler(aarch64), EulerOS(aarch64), FusionOS, CentOS, Ubuntu(x86), UnionTech(x86) platform."
     echo "Kernel is $kernel"
     exit 1
 fi
diff --git a/build/get_PlatForm_str.sh b/build/get_PlatForm_str.sh
index 98e3233e..2bd8af9b 100644
--- a/build/get_PlatForm_str.sh
+++ b/build/get_PlatForm_str.sh
@@ -19,14 +19,14 @@ function get_os_str() {
 
     cpu_arc=$(uname -p)
 
-   if [ "$os_name"x = "centos"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
+    if [ "$os_name"x = "centos"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
         os_str=centos7.6_x86_64
     elif [ "$os_name"x = "euleros"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
         os_str=euleros2.0_sp8_aarch64
-    elif [ "$os_name"x = "kylin"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
-        os_str=kylin_aarch64
-    elif [ "$os_name"x = "kylin"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
-        os_str=kylin_x86_64
+    elif [ "$os_name"x = "openEuler"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
+        os_str=openeuler_aarch64
+    elif [ "$os_name"x = "openEuler"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
+        os_str=openeuler_x86_64
     elif [ "$os_name"x = "fusionos"x ] && [ "$cpu_arc"x = "aarch64"x ]; then
         os_str=fusionos_aarch64
     elif [ "$os_name"x = "fusionos"x ] && [ "$cpu_arc"x = "x86_64"x ]; then
-- 
Gitee