From 3889f6b26e1f733276d8b95cf1386b4ac257990d Mon Sep 17 00:00:00 2001
From: xue_meng_en <1836611252@qq.com>
Date: Fri, 5 Feb 2021 09:46:18 +0800
Subject: [PATCH 01/16] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=9C=A8=E7=BA=BF?=
=?UTF-8?q?=E6=89=A9=E5=AE=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/gs_expansion | 27 +-
script/impl/expansion/ExpansionImpl.py | 401 +++++++++++++++++--------
2 files changed, 293 insertions(+), 135 deletions(-)
diff --git a/script/gs_expansion b/script/gs_expansion
index aa05816b..50943de3 100644
--- a/script/gs_expansion
+++ b/script/gs_expansion
@@ -53,6 +53,8 @@ class Expansion(ParallelBaseOM):
self.newHostList = []
self.clusterInfoDict = {}
self.backIpNameMap = {}
+ self.newHostCasRoleMap = {}
+ self.hostAzNameMap = {}
self.packagepath = os.path.realpath(
os.path.join(os.path.realpath(__file__), "../../"))
@@ -139,12 +141,6 @@ General options:
clusterDict = clusterInfo.getClusterDirectorys()
backIpList = clusterInfo.getClusterBackIps()
nodeNameList = clusterInfo.getClusterNodeNames()
-
- # only support single az now.
- azNames = clusterInfo.getazNames()
- self.azName = "AZ1"
- if len(azNames) > 0:
- self.azName = azNames[0]
self.localIp = backIpList[0]
self.nodeNameList = nodeNameList
@@ -198,8 +194,25 @@ General options:
hostName = clusterInfo.getHostNameByNodeId(id)
clusterInfoDict[hostName]["instanceType"] = insType
self.clusterInfoDict = clusterInfoDict
+
+ for dbnode in clusterInfo.dbNodes:
+ # get azName of all hosts
+ self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName
+ # get cascadeRole of newHosts
+ if dbnode.backIps[0] in self.newHostList:
+ self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole
+
+ # check trust between the primary and other hosts
+ sshTool = SshTool(nodeNameList, timeout = 0)
+ retmap, output = sshTool.getSshStatusOutput("pwd")
+ for nodeName in nodeNameList:
+ if retmap[nodeName] != DefaultValue.SUCCESS:
+ GaussLog.exitWithError("SSH could not connect to %s." % nodeName)
+ try:
+ sshTool.clenSshResultFiles()
+ except Exception as e:
+ self.logger.debug(str(e))
-
def initLogs(self):
"""
init log file
diff --git a/script/impl/expansion/ExpansionImpl.py b/script/impl/expansion/ExpansionImpl.py
index 0455e81f..13112405 100644
--- a/script/impl/expansion/ExpansionImpl.py
+++ b/script/impl/expansion/ExpansionImpl.py
@@ -46,10 +46,17 @@ DefaultValue.doConfigForParamiko()
import paramiko
-#mode
+#boot/build mode
MODE_PRIMARY = "primary"
MODE_STANDBY = "standby"
MODE_NORMAL = "normal"
+MODE_CASCADE = "cascade_standby"
+
+# instance local_role
+ROLE_NORMAL = "normal"
+ROLE_PRIMARY = "primary"
+ROLE_STANDBY = "standby"
+ROLE_CASCADE = "cascade standby"
#db state
STAT_NORMAL = "normal"
@@ -78,7 +85,8 @@ class ExpansionImpl():
self.user = self.context.user
self.group = self.context.group
-
+ self.existingHosts = []
+ self.expansionSuccess = {}
self.logger = self.context.logger
envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH")
@@ -111,8 +119,8 @@ class ExpansionImpl():
os.path.join(srcFile, "../"))
## mkdir package dir and send package to remote nodes.
- sshTool.executeCommand("mkdir -p %s" % srcFile , "", DefaultValue.SUCCESS,
- hostList)
+ sshTool.executeCommand("mkdir -p %s" % srcFile , "",
+ DefaultValue.SUCCESS, hostList)
sshTool.scpFiles(srcFile, targetDir, hostList)
## change mode of package dir to set privileges for users
@@ -179,6 +187,7 @@ class ExpansionImpl():
tmpMppdbPath = DefaultValue.getEnv("PGHOST")
if tmpMppdbPath:
mppdbconfig = '' % tmpMppdbPath
+ azName = self.context.hostAzNameMap[backIp]
xmlConfig = """\
@@ -210,7 +219,7 @@ class ExpansionImpl():
""".format(nodeName=nodeName,backIp=backIp,appPath=appPath,
logPath=logPath,toolPath=toolPath,corePath=corePath,
- sshIp=sshIp,port=port,dataNode=dataNode,azName=self.context.azName,
+ sshIp=sshIp,port=port,dataNode=dataNode,azName=azName,
mappdbConfig=mppdbconfig)
return xmlConfig
@@ -274,7 +283,7 @@ class ExpansionImpl():
while not channel.exit_status_ready():
try:
recvOut = channel.recv(1024)
- outDecode = recvOut.decode("utf-8");
+ outDecode = recvOut.decode("utf-8")
outStr = outDecode.strip()
if(len(outStr) == 0):
continue
@@ -364,35 +373,36 @@ class ExpansionImpl():
func: after install single database on standby nodes.
build the relation with primary and standby nodes.
step:
- 1. restart primary node with Primary Mode
- (only used to Single-Node instance)
+ 1. get existing hosts
2. set guc config to primary node
3. restart standby node with Standby Mode
4. set guc config to standby node
- 5. generate cluster static file and send to each node.
+ 5. rollback guc config of existing hosts if build failed
+ 6. generate cluster static file and send to each node.
"""
- self.queryPrimaryClusterDetail()
+ self.getExistingHosts()
self.setPrimaryGUCConfig()
self.setStandbyGUCConfig()
self.addTrustOnExistNodes()
- self.restartSingleDbWithPrimaryMode()
+ self.generateGRPCCert()
self.buildStandbyHosts()
+ self.rollback()
self.generateClusterStaticFile()
- def queryPrimaryClusterDetail(self):
+ def getExistingHosts(self):
"""
- get current cluster type.
- single-node or primary-standby
+ get the exiting hosts
"""
- self.logger.debug("Query primary database instance mode.\n")
- self.isSingleNodeInstance = True
+ self.logger.debug("Get the existing hosts.\n")
primaryHost = self.getPrimaryHostName()
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
- instance = re.findall(r"node\s+node_ip\s+instance\s+state", result)
- if len(instance) > 1:
- self.isSingleNodeInstance = False
- self.logger.debug("Original instance mode is %s" %
- self.isSingleNodeInstance)
+ instances = re.split('(?:\|)|(?:\n)', result)
+ self.existingHosts = []
+ for inst in instances:
+ pattern = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*')
+ result = pattern.findall(inst)
+ if len(result) != 0:
+ self.existingHosts.append(result[0])
def setPrimaryGUCConfig(self):
"""
@@ -408,11 +418,15 @@ class ExpansionImpl():
"""
set the expansion standby node db guc config
"""
- self.logger.debug("Stat to set standby node GUC config.\n")
- nodeList = self.context.nodeNameList
+ self.logger.debug("Start to set standby node GUC config.\n")
primaryHost = self.getPrimaryHostName()
- standbyHostNames = list(set(nodeList).difference(set([primaryHost])))
- self.setGUCOnClusterHosts(standbyHostNames)
+ existingStandbyHosts = list(
+ set(self.existingHosts).difference(set([primaryHost])))
+ standbyHosts = existingStandbyHosts + self.context.newHostList
+ standbyNames = []
+ for standbyHost in standbyHosts:
+ standbyNames.append(self.context.backIpNameMap[standbyHost])
+ self.setGUCOnClusterHosts(standbyNames)
def addTrustOnExistNodes(self):
"""
@@ -441,31 +455,22 @@ class ExpansionImpl():
self.cleanSshToolFile(sshTool)
self.logger.debug("End to set host trust on existing node.")
- def restartSingleDbWithPrimaryMode(self):
+ def generateGRPCCert(self):
"""
+ generate GRPC cert
"""
primaryHost = self.getPrimaryHostName()
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
-
insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
- dataNode,self.envFile)
+ dataNode,self.envFile)
+ needGRPCHosts = self.context.newHostList
if insType != MODE_PRIMARY:
- self.commonGsCtl.stopInstance(primaryHost, dataNode, self.envFile)
- self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode,
- MODE_PRIMARY,self.envFile)
-
- # start db to primary state for three times max
- start_retry_num = 1
- while start_retry_num <= 3:
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
- dataNode, self.envFile)
- if insType == MODE_PRIMARY:
- break
- self.logger.debug("Start database as Primary mode failed, \
-retry for %s times" % start_retry_num)
- self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode,
- MODE_PRIMARY, self.envFile)
- start_retry_num = start_retry_num + 1
+ primaryHostIp = self.context.clusterInfoDict[primaryHost]["backIp"]
+ needGRPCHosts.append(primaryHostIp)
+ self.logger.debug("\nStart to generate GRPC cert.")
+ self.context.initSshTool(needGRPCHosts)
+ self.context.createGrpcCa(needGRPCHosts)
+ self.logger.debug("\nEnd to generate GRPC cert.")
def addStandbyIpInPrimaryConf(self):
"""
@@ -496,7 +501,7 @@ retry for %s times" % start_retry_num)
sshTool = SshTool([primaryHost])
self.logger.debug(command)
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [primaryHost], self.envFile)
+ [primaryHost], self.envFile)
self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
@@ -506,7 +511,7 @@ retry for %s times" % start_retry_num)
primaryHost = ""
for nodeName in self.context.nodeNameList:
if self.context.clusterInfoDict[nodeName]["instanceType"] \
- == MASTER_INSTANCE:
+ == MASTER_INSTANCE:
primaryHost = nodeName
break
return primaryHost
@@ -519,8 +524,18 @@ retry for %s times" % start_retry_num)
self.logger.debug("start to build standby node...\n")
standbyHosts = self.context.newHostList
+ primaryHost = self.getPrimaryHostName()
+ existingStandbys = list(set(self.existingHosts).difference(set([primaryHost])))
for host in standbyHosts:
+ self.expansionSuccess[host] = False
+
+ # build standby firstly
+ for host in standbyHosts:
+ if self.context.newHostCasRoleMap[host] == "on":
+ continue
+ self.logger.log("Start to build standby %s." % host)
+ startSuccess = False
hostName = self.context.backIpNameMap[host]
dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
@@ -528,44 +543,147 @@ retry for %s times" % start_retry_num)
self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile)
self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
+ MODE_STANDBY, self.envFile)
# start standby as standby mode for three times max.
start_retry_num = 1
while start_retry_num <= 3:
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if insType != MODE_STANDBY:
- self.logger.debug("Start database as Standby mode failed, \
-retry for %s times" % start_retry_num)
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ hostName,dataNode, self.envFile)
+ if insType != ROLE_STANDBY:
+ self.logger.debug("Start database as Standby mode failed, "\
+ "retry for %s times" % start_retry_num)
self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
MODE_STANDBY, self.envFile)
start_retry_num = start_retry_num + 1
else:
+ startSuccess = True
break
-
+ if startSuccess == False:
+ self.logger.debug("Start database %s as Standby mode failed!" % host)
+ continue
+
+ buildSuccess = False
# build standby node
self.addStandbyIpInPrimaryConf()
self.reloadPrimaryConf()
+ time.sleep(10)
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ primaryHost, dataNode, self.envFile)
+ if insType != ROLE_PRIMARY:
+ GaussLog.exitWithError("The server mode of primary host" \
+ "is not primary!")
+ if dbStat != STAT_NORMAL:
+ GaussLog.exitWithError("The primary is not Normal!")
+
self.commonGsCtl.buildInstance(hostName, dataNode, MODE_STANDBY,
- self.envFile)
-
+ self.envFile)
+
# if build failed first time. retry for three times.
start_retry_num = 1
+ while start_retry_num <= 3:
+ time.sleep(10)
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ hostName, dataNode, self.envFile)
+ if dbStat != STAT_NORMAL:
+ self.logger.debug("Build standby instance failed, " \
+ "retry for %s times" % start_retry_num)
+ self.commonGsCtl.buildInstance(hostName, dataNode,
+ MODE_STANDBY, self.envFile)
+ start_retry_num = start_retry_num + 1
+ else:
+ buildSuccess = True
+ self.expansionSuccess[host] = True
+ existingStandbys.append(host)
+ break
+ if buildSuccess == False:
+ self.logger.log("Build standby %s failed." % host)
+ else:
+ self.logger.log("Build standby %s success." % host)
+
+
+ # build cascade standby
+ hostAzNameMap = self.context.hostAzNameMap
+ for host in standbyHosts:
+ if self.context.newHostCasRoleMap[host] == "off":
+ continue
+ self.logger.log("Start to build cascade standby %s." % host)
+ startSuccess = False
+ hostName = self.context.backIpNameMap[host]
+ dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
+ # if no Normal standby same with the current cascade_standby, skip
+ hasStandbyWithSameAZ = False
+ for existingStandby in existingStandbys:
+ existingStandbyName = self.context.backIpNameMap[existingStandby]
+ existingStandbyDataNode = self.context.clusterInfoDict[existingStandbyName]["dataNode"]
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ hostName, dataNode, self.envFile)
+ if dbStat != STAT_NORMAL:
+ continue
+ if hostAzNameMap[existingStandby] != hostAzNameMap[host]:
+ continue
+ hasStandbyWithSameAZ = True
+ break
+ if not hasStandbyWithSameAZ:
+ self.logger.log("There is no Normal standby in %s" % \
+ hostAzNameMap[host])
+ continue
+
+ self.checkTmpDir(hostName)
+
+ self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile)
+ self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
+ MODE_STANDBY, self.envFile)
+
+ # start cascadeStandby as standby mode for three times max.
+ start_retry_num = 1
while start_retry_num <= 3:
insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
+ dataNode, self.envFile)
+ if insType != ROLE_STANDBY:
+ self.logger.debug("Start database as Standby mode failed, "\
+ "retry for %s times" % start_retry_num)
+ self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
+ MODE_STANDBY, self.envFile)
+ start_retry_num = start_retry_num + 1
+ else:
+ startSuccess = True
+ break
+ if startSuccess == False:
+ self.logger.log("Start database %s as Standby mode failed!" % host)
+ continue
+
+ # build cascade standby node
+ self.addStandbyIpInPrimaryConf()
+ self.reloadPrimaryConf()
+ self.commonGsCtl.buildInstance(hostName, dataNode, MODE_CASCADE, \
+ self.envFile)
+
+ buildSuccess = False
+ # if build failed first time. retry for three times.
+ start_retry_num = 1
+ while start_retry_num <= 3:
+ time.sleep(10)
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ hostName, dataNode, self.envFile)
if dbStat != STAT_NORMAL:
- self.logger.debug("Build standby instance failed, \
-retry for %s times" % start_retry_num)
+ self.logger.debug("Build standby instance failed, "\
+ "retry for %s times" % start_retry_num)
self.addStandbyIpInPrimaryConf()
self.reloadPrimaryConf()
- self.commonGsCtl.buildInstance(hostName, dataNode,
- MODE_STANDBY, self.envFile)
+ self.commonGsCtl.buildInstance(hostName, dataNode, \
+ MODE_CASCADE, self.envFile)
start_retry_num = start_retry_num + 1
else:
+ buildSuccess = True
+ self.expansionSuccess[host] = True
break
-
+ if buildSuccess == False:
+ self.logger.log("Build cascade standby %s failed." % host)
+ else:
+ self.logger.log("Build cascade standby %s success." % host)
+
+
def checkTmpDir(self, hostName):
"""
if the tmp dir id not exist, create it.
@@ -586,7 +704,7 @@ retry for %s times" % start_retry_num)
"""
generate static_config_files and send to all hosts
"""
- self.logger.debug("Start to generate and send cluster static file.\n")
+ self.logger.log("Start to generate and send cluster static file.\n")
primaryHost = self.getPrimaryHostName()
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
@@ -595,9 +713,11 @@ retry for %s times" % start_retry_num)
nodeIp = nodeInfo["backIp"]
dataNode = nodeInfo["dataNode"]
exist_reg = r"(.*)%s[\s]*%s(.*)%s(.*)" % (nodeName, nodeIp, dataNode)
+ dbNode = self.context.clusterInfo.getDbNodeByName(nodeName)
if not re.search(exist_reg, result) and nodeIp not in self.context.newHostList:
self.logger.debug("The node ip [%s] will not be added to cluster." % nodeIp)
- dbNode = self.context.clusterInfo.getDbNodeByName(nodeName)
+ self.context.clusterInfo.dbNodes.remove(dbNode)
+ if nodeIp in self.context.newHostList and not self.expansionSuccess[nodeIp]:
self.context.clusterInfo.dbNodes.remove(dbNode)
toolPath = self.context.clusterInfoDict["toolPath"]
@@ -607,6 +727,12 @@ retry for %s times" % start_retry_num)
if not os.path.exists(static_config_dir):
os.makedirs(static_config_dir)
+ # valid if dynamic config file exists.
+ dynamic_file = "%s/bin/cluster_dynamic_config" % appPath
+ dynamic_file_exist = False
+ if os.path.exists(dynamic_file):
+ dynamic_file_exist = True
+
for dbNode in self.context.clusterInfo.dbNodes:
hostName = dbNode.name
staticConfigPath = "%s/script/static_config_files/cluster_static_config_%s" % \
@@ -618,36 +744,21 @@ retry for %s times" % start_retry_num)
hostSsh = SshTool([hostName])
targetFile = "%s/bin/cluster_static_config" % appPath
hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile)
+ # if dynamic config file exists, freshconfig it.
+ if dynamic_file_exist:
+ refresh_cmd = "gs_om -t refreshconf"
+ hostSsh.getSshStatusOutput(refresh_cmd, [hostName], self.envFile)
+
self.cleanSshToolFile(hostSsh)
self.logger.debug("End to generate and send cluster static file.\n")
- time.sleep(10)
-
- # Single-node database need start cluster after expansion
- if self.isSingleNodeInstance:
- primaryHost = self.getPrimaryHostName()
- self.logger.debug("Single-Node instance need restart.\n")
- self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
-
- # if primary database not normal, restart it
- dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
- dataNode, self.envFile)
- if insType != MODE_PRIMARY:
- self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode,
- MODE_PRIMARY, self.envFile)
- # if stat if not normal,rebuild standby database
- standbyHosts = self.context.newHostList
- for host in standbyHosts:
- hostName = self.context.backIpNameMap[host]
- dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if dbStat != STAT_NORMAL:
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
-
- self.commonGsCtl.startOmCluster(primaryHost, self.envFile)
+
+ self.logger.log("Expansion results:")
+ for newHost in self.context.newHostList:
+ if self.expansionSuccess[newHost]:
+ self.logger.log("%s:\tSuccess" % nodeIp)
+ else:
+ self.logger.log("%s:\tFailed" % nodeIp)
def setGUCOnClusterHosts(self, hostNames=[]):
"""
@@ -661,8 +772,17 @@ retry for %s times" % start_retry_num)
if len(hostNames) == 0:
hostNames = self.context.nodeNameList
+ nodeDict = self.context.clusterInfoDict
+ newHostList = self.context.newHostList
+ hostAzNameMap = self.context.hostAzNameMap
for host in hostNames:
-
+ # set Available_zone for the new standby
+ backIp = nodeDict[host]["backIp"]
+ if backIp in newHostList:
+ dataNode = nodeDict[host]["dataNode"]
+ gucDict[host] += """\
+gs_guc set -D {dn} -c "available_zone='{azName}'"
+ """.format(dn = dataNode, azName = hostAzNameMap[backIp])
command = "source %s ; " % self.envFile + gucDict[host]
self.logger.debug(command)
@@ -672,13 +792,13 @@ retry for %s times" % start_retry_num)
# create temporary dir to save guc command bashfile.
mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
(self.tempFileDir,self.user,self.group,self.tempFileDir)
- retmap, output = sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile)
+ retmap, output = sshTool.getSshStatusOutput(mkdirCmd, [host], \
+ self.envFile)
- subprocess.getstatusoutput("mkdir -m a+x -p %s; touch %s; \
- cat /dev/null > %s" % \
- (self.tempFileDir, tempShFile, tempShFile))
+ subprocess.getstatusoutput("touch %s; cat /dev/null > %s" % \
+ (tempShFile, tempShFile))
with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
- stat.S_IWUSR | stat.S_IRUSR),'w') as fo:
+ stat.S_IWUSR | stat.S_IRUSR),'w') as fo:
fo.write("#bash\n")
fo.write( command )
fo.close()
@@ -697,8 +817,6 @@ retry for %s times" % start_retry_num)
"""
get guc config of each node:
replconninfo[index]
- remote_read_mode
- replication_type
"""
nodeDict = self.context.clusterInfoDict
hostNames = self.context.nodeNameList
@@ -739,12 +857,6 @@ remoteservice={remoteservice}'"
index += 1
- guc_mode_type = """
- gs_guc set -D {dn} -c 'remote_read_mode=off';
- gs_guc set -D {dn} -c 'replication_type=1';
- """.format(dn=localeHostInfo["dataNode"])
- guc_tempate_str += guc_mode_type
-
gucDict[hostName] = guc_tempate_str
return gucDict
@@ -763,7 +875,7 @@ remoteservice={remoteservice}'"
dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
dataNode, self.envFile)
- if insType not in (MODE_PRIMARY, MODE_STANDBY, MODE_NORMAL):
+ if insType not in (ROLE_PRIMARY, ROLE_STANDBY, ROLE_NORMAL, ROLE_CASCADE):
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35703"] %
(hostName, self.user, dataNode, dataNode))
@@ -780,15 +892,12 @@ remoteservice={remoteservice}'"
[], envfile)
self.cleanSshToolFile(sshTool)
versionLines = outputCollect.splitlines()
- for verline in versionLines:
- if verline[0:9] == '[SUCCESS]':
- ipKey = verline[10:-1]
- continue
- else:
- versionStr = "".join(verline)
- preVersion = versionStr.split(' ')
- versionInfo = preVersion[4]
- versionDic[ipKey] = versionInfo[:-2]
+ for i in range(int(len(versionLines)/2)):
+ ipPattern = re.compile("\[.*\] (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):")
+ ipKey = ipPattern.findall(versionLines[2 * i])[0]
+ versionPattern = re.compile("gaussdb \((.*)\) .*")
+ version = versionPattern.findall(versionLines[2 * i + 1])[0]
+ versionDic[ipKey] = version
for hostip in versionDic:
if hostip == self.context.localIp:
versionCompare = ""
@@ -823,16 +932,15 @@ standby nodes.")
clearCmd = "if [ -d '%s' ];then rm -rf %s;fi" % \
(self.tempFileDir, self.tempFileDir)
hostNames = self.context.nodeNameList
- for host in hostNames:
- try:
- sshTool = SshTool(hostNames)
- result, output = sshTool.getSshStatusOutput(clearCmd,
- hostNames, self.envFile)
- self.logger.debug(output)
- self.cleanSshToolFile(sshTool)
- except Exception as e:
- self.logger.debug(str(e))
- self.cleanSshToolFile(sshTool)
+ try:
+ sshTool = SshTool(hostNames)
+ result, output = sshTool.getSshStatusOutput(clearCmd,
+ hostNames, self.envFile)
+ self.logger.debug(output)
+ self.cleanSshToolFile(sshTool)
+ except Exception as e:
+ self.logger.debug(str(e))
+ self.cleanSshToolFile(sshTool)
def cleanSshToolFile(self, sshTool):
@@ -1012,12 +1120,50 @@ standby nodes.")
self.logger.log("\nStandby nodes is installed with locale mode.")
self.checkLocalModeOnStandbyHosts()
- self.logger.log("\nDatabase on standby nodes installed finished. \
-Start to establish the primary-standby relationship.")
+ self.logger.log("\nDatabase on standby nodes installed finished.")
+ self.logger.log("\nStart to establish the primary-standby relationship.")
self.buildStandbyRelation()
# process success
pvalue.value = 1
+ def rollback(self):
+ """
+ rollback all hosts' replconninfo about failed hosts
+ """
+ existingHosts = self.existingHosts
+ failedHosts = []
+ for host in self.expansionSuccess.keys():
+ if self.expansionSuccess[host]:
+ existingHosts.append(host)
+ else:
+ failedHosts.append(host)
+ clusterInfoDict = self.context.clusterInfoDict
+ primaryHostName = self.getPrimaryHostName()
+ for failedHost in failedHosts:
+ self.logger.debug("start to rollback replconninfo about %s" % failedHost)
+ for host in existingHosts:
+ hostName = self.context.backIpNameMap[host]
+ dataNode = clusterInfoDict[hostName]["dataNode"]
+ confFile = os.path.join(dataNode, "postgresql.conf")
+ rollbackReplconninfoCmd = "sed -i '/remotehost=%s/s/^/#&/' %s" \
+ % (failedHost, confFile)
+ self.logger.debug(rollbackReplconninfoCmd)
+ sshTool = SshTool(host)
+ (statusMap, output) = sshTool.getSshStatusOutput(rollbackReplconninfoCmd, [host])
+ if hostName == primaryHostName:
+ pg_hbaFile = os.path.join(dataNode, "pg_hba.conf")
+ rollbackPg_hbaCmd = "sed -i '/%s/s/^/#&/' %s" \
+ % (failedHost, pg_hbaFile)
+ self.logger.debug(rollbackPg_hbaCmd)
+ (statusMap, output) = sshTool.getSshStatusOutput(rollbackPg_hbaCmd, [host])
+ reloadGUCCommand = "source %s ; gs_ctl reload -D %s " % \
+ (self.envFile, dataNode)
+ self.logger.debug(reloadGUCCommand)
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ reloadGUCCommand, [host], self.envFile)
+ self.logger.debug(outputCollect)
+ self.cleanSshToolFile(sshTool)
+
def run(self):
"""
start expansion
@@ -1028,8 +1174,7 @@ Start to establish the primary-standby relationship.")
self.preInstall()
self.installAndExpansion()
-
- self.logger.log("\nSuccess to expansion standby nodes.")
+ self.logger.log("Expansion Finish.")
class GsCtlCommon:
--
Gitee
From f04d34f33bb2cfda3f0d0f23622ee9d3dc9bc3a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=96=9B=E8=92=99=E6=81=A9?= <1836611252@qq.com>
Date: Sun, 7 Feb 2021 17:25:31 +0800
Subject: [PATCH 02/16] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=B8=AA=E4=BA=BA?=
=?UTF-8?q?=E7=94=A8=E6=88=B7=E4=BA=92=E4=BF=A1=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/gs_expansion | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/script/gs_expansion b/script/gs_expansion
index 50943de3..1e5b6b56 100644
--- a/script/gs_expansion
+++ b/script/gs_expansion
@@ -21,7 +21,7 @@
import os
import sys
-
+import subprocess
sys.path.append(sys.path[0])
from gspylib.common.DbClusterInfo import dbClusterInfo, \
@@ -206,12 +206,18 @@ General options:
sshTool = SshTool(nodeNameList, timeout = 0)
retmap, output = sshTool.getSshStatusOutput("pwd")
for nodeName in nodeNameList:
+ # check root's trust
if retmap[nodeName] != DefaultValue.SUCCESS:
- GaussLog.exitWithError("SSH could not connect to %s." % nodeName)
+ GaussLog.exitWithError("SSH could not connect to %s by root." % nodeName)
try:
sshTool.clenSshResultFiles()
except Exception as e:
self.logger.debug(str(e))
+ # check individual user's trust
+ checkUserTrustCmd = "su - %s -c 'ssh %s \"pwd\"'" % (self.user, nodeName)
+ (status, output) = subprocess.getstatusoutput(checkUserTrustCmd)
+ if status != 0:
+ GaussLog.exitWithError("SSH could not connect to %s by individual user." % nodeName)
def initLogs(self):
"""
--
Gitee
From bf616fa443f28b743c899eb78b3b29b238022f8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=96=9B=E8=92=99=E6=81=A9?= <1836611252@qq.com>
Date: Fri, 19 Feb 2021 18:23:26 +0800
Subject: [PATCH 03/16] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BB=A3=E7=A0=81bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/impl/expansion/ExpansionImpl.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/script/impl/expansion/ExpansionImpl.py b/script/impl/expansion/ExpansionImpl.py
index 6c52cbd5..484bdfd8 100644
--- a/script/impl/expansion/ExpansionImpl.py
+++ b/script/impl/expansion/ExpansionImpl.py
@@ -31,6 +31,7 @@ import time
import grp
import socket
import stat
+import copy
from multiprocessing import Process, Value
sys.path.append(sys.path[0] + "/../../../../")
@@ -467,7 +468,7 @@ class ExpansionImpl():
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
dataNode,self.envFile)
- needGRPCHosts = self.context.newHostList
+ needGRPCHosts = copy.copy(self.context.newHostList)
if insType != MODE_PRIMARY:
primaryHostIp = self.context.clusterInfoDict[primaryHost]["backIp"]
needGRPCHosts.append(primaryHostIp)
--
Gitee
From 659dd43a448d0ac92c330847ef1fa4421034328e Mon Sep 17 00:00:00 2001
From: "Ricardo.Cui"
Date: Thu, 18 Feb 2021 09:50:15 +0800
Subject: [PATCH 04/16] Fix -L does not work in gs_om -t status
---
script/gspylib/common/DbClusterInfo.py | 4 +---
script/gspylib/component/Kernel/Kernel.py | 9 ++++++++-
script/impl/om/OmImpl.py | 2 ++
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py
index 7aaac20d..7c95d936 100644
--- a/script/gspylib/common/DbClusterInfo.py
+++ b/script/gspylib/common/DbClusterInfo.py
@@ -1962,9 +1962,7 @@ class dbClusterInfo():
with open(fileName, "a") as fp:
fp.write(content)
fp.flush()
-
- else:
- sys.stdout.write(content)
+ sys.stdout.write(content)
def __checkOsUser(self, user):
"""
diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py
index d2a85d94..0fe15f1b 100644
--- a/script/gspylib/component/Kernel/Kernel.py
+++ b/script/gspylib/component/Kernel/Kernel.py
@@ -129,7 +129,14 @@ class Kernel(BaseComponent):
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] %
"instance" + " Error: \n%s." % output)
if output.find("No such process") > 0:
- GaussLog.exitWithError(output)
+ cmd = "ps c -eo pid,euid,cmd | grep gaussdb | grep -v grep | " \
+ "awk '{if($2 == curuid && $1!=\"-n\") " \
+ "print \"/proc/\"$1\"/cwd\"}' curuid=`id -u`|" \
+ " xargs ls -l |awk '{if ($NF==\"%s\") print $(NF-2)}' | " \
+ "awk -F/ '{print $3 }'" % (self.instInfo.datadir)
+ (status, rightpid) = subprocess.getstatusoutput(cmd)
+ if rightpid or status != 0:
+ GaussLog.exitWithError(output)
def isPidFileExist(self):
pidFile = "%s/postmaster.pid" % self.instInfo.datadir
diff --git a/script/impl/om/OmImpl.py b/script/impl/om/OmImpl.py
index ddc6e4a2..4dcaa744 100644
--- a/script/impl/om/OmImpl.py
+++ b/script/impl/om/OmImpl.py
@@ -199,6 +199,8 @@ class OmImpl:
cmd = queryCmd()
if (self.context.g_opts.outFile != ""):
cmd.outputFile = self.context.g_opts.outFile
+ else:
+ cmd.outputFile = self.logger.logFile
if (self.context.g_opts.show_detail):
if (
self.context.clusterInfo.clusterType
--
Gitee
From e8365d883c84dc9953106308c151a83b856b73eb Mon Sep 17 00:00:00 2001
From: "Ricardo.Cui"
Date: Thu, 18 Feb 2021 09:50:15 +0800
Subject: [PATCH 05/16] Fix -L does not work in gs_om -t status
---
script/gspylib/common/DbClusterInfo.py | 7 ++-----
script/gspylib/component/Kernel/Kernel.py | 9 ++++++++-
script/impl/om/OmImpl.py | 2 ++
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py
index 7aaac20d..0e95400b 100644
--- a/script/gspylib/common/DbClusterInfo.py
+++ b/script/gspylib/common/DbClusterInfo.py
@@ -1861,7 +1861,6 @@ class dbClusterInfo():
"receiver_received_location, receiver_write_location," \
"receiver_flush_location, receiver_replay_location," \
"sync_percent, channel from pg_stat_get_wal_receiver();"
- cascadeOutput = ""
if dbNode.name != localHostName:
cmd = "[need_replace_quotes] gsql -m -d postgres -p " \
"%s -A -t -c \"%s\"" % \
@@ -1872,7 +1871,7 @@ class dbClusterInfo():
"failed to connect") >= 0:
continue
else:
- output = cascadeOutput.split('\n')[1:-1]
+ cascadeOutput = cascadeOutput.split('\n')[1:-1]
else:
cmd = "gsql -m -d postgres -p %s -A -t -c \"%s\"" % (
dnInst.port, subsql)
@@ -1962,9 +1961,7 @@ class dbClusterInfo():
with open(fileName, "a") as fp:
fp.write(content)
fp.flush()
-
- else:
- sys.stdout.write(content)
+ sys.stdout.write(content)
def __checkOsUser(self, user):
"""
diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py
index d2a85d94..0fe15f1b 100644
--- a/script/gspylib/component/Kernel/Kernel.py
+++ b/script/gspylib/component/Kernel/Kernel.py
@@ -129,7 +129,14 @@ class Kernel(BaseComponent):
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] %
"instance" + " Error: \n%s." % output)
if output.find("No such process") > 0:
- GaussLog.exitWithError(output)
+ cmd = "ps c -eo pid,euid,cmd | grep gaussdb | grep -v grep | " \
+ "awk '{if($2 == curuid && $1!=\"-n\") " \
+ "print \"/proc/\"$1\"/cwd\"}' curuid=`id -u`|" \
+ " xargs ls -l |awk '{if ($NF==\"%s\") print $(NF-2)}' | " \
+ "awk -F/ '{print $3 }'" % (self.instInfo.datadir)
+ (status, rightpid) = subprocess.getstatusoutput(cmd)
+ if rightpid or status != 0:
+ GaussLog.exitWithError(output)
def isPidFileExist(self):
pidFile = "%s/postmaster.pid" % self.instInfo.datadir
diff --git a/script/impl/om/OmImpl.py b/script/impl/om/OmImpl.py
index ddc6e4a2..4dcaa744 100644
--- a/script/impl/om/OmImpl.py
+++ b/script/impl/om/OmImpl.py
@@ -199,6 +199,8 @@ class OmImpl:
cmd = queryCmd()
if (self.context.g_opts.outFile != ""):
cmd.outputFile = self.context.g_opts.outFile
+ else:
+ cmd.outputFile = self.logger.logFile
if (self.context.g_opts.show_detail):
if (
self.context.clusterInfo.clusterType
--
Gitee
From 5e23b9de645176476553bd0787de0f0e413775ed Mon Sep 17 00:00:00 2001
From: cchen676
Date: Wed, 24 Feb 2021 12:20:38 +0800
Subject: [PATCH 06/16] =?UTF-8?q?fix=20issue=20=E5=A4=87=E6=9C=BA=E5=92=8C?=
=?UTF-8?q?=E7=BA=A7=E8=81=94=E5=A4=87=E5=85=B3=E9=97=AD=E6=83=85=E5=86=B5?=
=?UTF-8?q?=E4=B8=8Bgs=5Fom=20-t=20status=20--all=E6=9F=A5=E8=AF=A2?=
=?UTF-8?q?=E5=A4=B1=E8=B4=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/gspylib/common/DbClusterInfo.py | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py
index 0e95400b..b5fadbfd 100644
--- a/script/gspylib/common/DbClusterInfo.py
+++ b/script/gspylib/common/DbClusterInfo.py
@@ -1453,7 +1453,7 @@ class dbClusterInfo():
dnInst.azName)
if dnInst.localRole == "Primary":
outText = outText + (
- "static_connections : %s\n\n" %
+ "static_connections : %s\n" %
dnInst.staticConnections)
outText = outText + (
"HA_state : %s\n" %
@@ -1462,14 +1462,14 @@ class dbClusterInfo():
"instance_role : %s\n" %
dnInst.localRole)
if dnInst.localRole == "Primary":
- outText = outText + "------------------------" \
+ outText = outText + "\n------------------------" \
"---------------" \
"--------------------------------\n\n"
continue
for i_loop in syncInfo:
- if i_loop[11] == '':
- i_loop[11] = 'Unknown'
if i_loop[0] == dnInst.listenIps[0]:
+ if i_loop[11] == '':
+ i_loop[11] = 'Unknown'
outText = outText + (
"HA_state : %s\n" %
i_loop[1])
@@ -1507,11 +1507,10 @@ class dbClusterInfo():
outText = outText + (
"upstream_nodeIp : %s\n" %
i_loop[12])
- outText = outText + ("\n")
- outText = outText + "------------------------" \
- "---------------" \
- "--------------------------------\n\n"
break
+ outText = outText + "\n------------------------" \
+ "---------------" \
+ "--------------------------------\n\n"
if nodeId != 0:
break
else:
--
Gitee
From 9f3f4db3cb9eb920299a563a8b2b5957d65e4f44 Mon Sep 17 00:00:00 2001
From: gyt0221 <846772234@qq.com>
Date: Mon, 1 Mar 2021 17:07:25 +0800
Subject: [PATCH 07/16] =?UTF-8?q?=E7=81=B0=E5=BA=A6=E5=8D=87=E7=BA=A7?=
=?UTF-8?q?=E6=8E=A5=E5=8F=A3=E6=94=B9=E5=8F=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/gs_upgradectl | 49 +-------------------
script/gspylib/common/ParameterParsecheck.py | 2 +-
script/impl/upgrade/UpgradeImpl.py | 40 +---------------
3 files changed, 5 insertions(+), 86 deletions(-)
diff --git a/script/gs_upgradectl b/script/gs_upgradectl
index f4cddf5a..7c4edd8d 100644
--- a/script/gs_upgradectl
+++ b/script/gs_upgradectl
@@ -96,9 +96,7 @@ Usage:
gs_upgradectl -t chose-strategy [-l LOGFILE]
gs_upgradectl -t commit-upgrade -X XMLFILE [-l LOGFILE]
- gs_upgradectl -t auto-upgrade -X XMLFILE [-l LOGFILE]
- gs_upgradectl -t auto-upgrade -X XMLFILE --grey [-l LOGFILE]
- {-h HOSTNAME | -g NODENUMBER | --continue}
+ gs_upgradectl -t auto-upgrade -X XMLFILE [-l LOGFILE] [--grey]
gs_upgradectl -t auto-rollback -X XMLFILE [-l LOGFILE] [--force]
General options:
@@ -112,13 +110,8 @@ General options:
later version cluster.
--force Force to rollback when cluster status is
not normal
+ --grey Use grey-binary-upgrade
-Options for grey upgrade
- -h Under grey upgrade, specified nodes name in
- ssh mode.
- -g Under grey upgrade, upgrade node numbers.
- --continue Under grey upgrade, continue to upgrade
- remain nodes.
"""
print(self.usage.__doc__)
@@ -145,12 +138,6 @@ Options for grey upgrade
if "grey" in ParaDict.keys():
self.is_grey_upgrade = True
self.is_inplace_upgrade = False
- if "nodename" in ParaDict.keys():
- self.nodeNames = ParaDict.get("nodename")
- if "nodesNum" in ParaDict.keys():
- self.nodesNum = ParaDict.get("nodesNum")
- if "continue" in ParaDict.keys():
- self.upgrade_remain = True
if "force" in ParaDict.keys():
self.forceRollback = True
@@ -203,14 +190,6 @@ Options for grey upgrade
if not os.path.exists(self.xmlFile):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
self.xmlFile)
-
- # check parameter base on different action
- # check the param which input
- if self.action == Const.ACTION_AUTO_UPGRADE:
- if self.is_grey_upgrade:
- self.checkCommandConflicts(inplace=False)
- else:
- self.checkCommandConflicts()
# check mpprc file path
# get mpprcFile by MPPDB_ENV_SEPARATE_PATH. Even if the return value
# is "" or None, no need to pay attention
@@ -285,30 +264,6 @@ Options for grey upgrade
self.logger.debug("Retry distributing xml command, "
"the {0} time.".format(count))
- def checkCommandConflicts(self, inplace=True):
- """
- function: check the command line for conflict input
- :return:
- """
- conflictPara = 0
- if self.upgrade_remain:
- conflictPara += 1
- if len(self.nodeNames) != 0:
- conflictPara += 1
- if self.nodesNum != -1:
- conflictPara += 1
- if inplace:
- if conflictPara > 0:
- raise Exception("The parameter %s should be used in grey "
- "upgrade" % "'-continue, -h, -g'")
- else:
- if conflictPara > 1:
- GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50005"] % (
- "-continue", "-h, -g"))
- if conflictPara == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] %
- "-continue, -h, -g")
-
if __name__ == '__main__':
"""
diff --git a/script/gspylib/common/ParameterParsecheck.py b/script/gspylib/common/ParameterParsecheck.py
index 1f2137e6..89660924 100644
--- a/script/gspylib/common/ParameterParsecheck.py
+++ b/script/gspylib/common/ParameterParsecheck.py
@@ -130,7 +130,7 @@ gs_upgradectl_chose_strategy = ["-t:", "-?", "--help", "-V", "--version",
"-l:"]
# auto-upgrade parameter lists
gs_upgradectl_auto_upgrade = ["-t:", "-?", "--help", "-V", "--version", "-l:",
- "-X:", "--grey", "-h:", "-g:", "--continue"]
+ "-X:", "--grey"]
# auto-rollback parameter lists
gs_upgradectl_auto_rollback = ["-t:", "-?", "--help", "-V", "--version",
"-l:", "-X:", "--force"]
diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py
index 60d0eac9..27f11a3b 100644
--- a/script/impl/upgrade/UpgradeImpl.py
+++ b/script/impl/upgrade/UpgradeImpl.py
@@ -1407,44 +1407,8 @@ class UpgradeImpl:
# when number and node names is empty
self.context.logger.debug("Choose the nodes to be upgraded.")
self.setClusterDetailInfo()
-
- if len(self.context.nodeNames) != 0 :
- self.context.logger.log(
- "Upgrade nodes %s." % self.context.nodeNames)
- greyNodeNames = self.getUpgradedNodeNames()
- checkH_nodes = \
- [val for val in greyNodeNames if val in self.context.nodeNames]
- if len(checkH_nodes) > 0:
- raise Exception("The nodes %s have been upgraded" %
- checkH_nodes)
- # confirm the nodesNum in checkParameter, 1 or specified by -g
- elif self.context.upgrade_remain:
- greyNodeNames = self.getUpgradedNodeNames()
- otherNodeNames = [
- i for i in self.context.clusterNodes if i not in greyNodeNames]
- self.context.nodeNames = otherNodeNames
- self.context.logger.debug(
- "Upgrade remain nodes %s." % self.context.nodeNames)
- # specify the node num, try to find matched combination
- else:
- nodeTotalNum = len(self.context.clusterNodes)
- if len(self.context.clusterNodes) == 1:
- self.context.nodeNames.append(
- self.context.clusterInfo.dbNodes[0].name)
- self.context.logger.log(
- "Upgrade one node '%s'." % self.context.nodeNames[0])
- # SinglePrimaryMultiStandbyCluster / MasterStandbyCluster with
- # more than 1 node
- elif self.context.nodesNum == nodeTotalNum:
- self.context.nodeNames = self.context.clusterNodes
- self.context.logger.log("Upgrade all nodes.")
- elif self.context.nodesNum > nodeTotalNum:
- raise Exception(ErrorCode.GAUSS_529["GAUSS_52906"])
- else:
- self.context.nodeNames = self.findOneMatchedCombin(
- self.context.clusterNodes)
- self.context.logger.log(
- "Upgrade nodes %s." % self.context.nodeNames)
+ self.context.nodeNames = self.context.clusterNodes
+ self.context.logger.log("Upgrade all nodes.")
def getUpgradedNodeNames(self, step=GreyUpgradeStep.STEP_INIT_STATUS):
"""
--
Gitee
From 1640da9ac74f60f60d4b04789b7b04291cd8238b Mon Sep 17 00:00:00 2001
From: gyt0221 <846772234@qq.com>
Date: Tue, 2 Mar 2021 15:42:35 +0800
Subject: [PATCH 08/16] =?UTF-8?q?=E5=8D=87=E7=BA=A7=E8=BF=87=E7=A8=8B?=
=?UTF-8?q?=E4=B8=AD=E4=B8=8D=E6=94=AF=E6=8C=81=E6=89=93=E5=BC=80Kerberos?=
=?UTF-8?q?=E5=BC=80=E5=85=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/impl/upgrade/UpgradeImpl.py | 75 ++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py
index 60d0eac9..c39f9090 100644
--- a/script/impl/upgrade/UpgradeImpl.py
+++ b/script/impl/upgrade/UpgradeImpl.py
@@ -1695,6 +1695,21 @@ class UpgradeImpl:
# Normal and the database could be connected
# if not, exit.
self.startCluster()
+
+ # uninstall kerberos if has already installed
+ pghost_path = DefaultValue.getEnvironmentParameterValue(
+ 'PGHOST', self.context.user)
+ kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
+ if os.path.exists(kerberosflagfile):
+ self.stopCluster()
+ self.context.logger.log("Starting uninstall Kerberos.",
+ "addStep")
+ cmd = "source %s && " % self.context.userProfile
+ cmd += "%s -m uninstall -U %s" % (OMCommand.getLocalScript(
+ "Local_Kerberos"), self.context.user)
+ self.context.sshTool.executeCommand(cmd, "")
+ self.context.logger.log("Successfully uninstall Kerberos.")
+ self.startCluster()
if self.unSetClusterReadOnlyMode() != 0:
raise Exception("NOTICE: "
+ ErrorCode.GAUSS_529["GAUSS_52907"])
@@ -1902,9 +1917,43 @@ class UpgradeImpl:
self.stopCluster()
self.startCluster()
+ # install Kerberos
+ self.install_kerberos()
self.context.logger.log("Commit binary upgrade succeeded.")
self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, True)
+ def install_kerberos(self):
+ """
+ install kerberos after upgrade
+ :return:NA
+ """
+ pghost_path = DefaultValue.getEnvironmentParameterValue(
+ 'PGHOST', self.context.user)
+ kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
+ if os.path.exists(kerberosflagfile):
+ # install kerberos
+ cmd = "source %s &&" % self.context.userProfile
+ cmd += "gs_om -t stop && "
+ cmd += "%s -m install -U %s --krb-server" % (
+ OMCommand.getLocalScript("Local_Kerberos"),
+ self.context.user)
+ (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5)
+ if status != 0:
+ raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
+ "Command:%s. Error:\n%s" % (cmd, output))
+ cmd = "source %s && " % self.context.userProfile
+ cmd += "%s -m install -U %s --krb-client " % (
+ OMCommand.getLocalScript("Local_Kerberos"), self.context.user)
+ self.context.sshTool.executeCommand(
+ cmd, "", hostList=self.context.clusterNodes)
+ self.context.logger.log("Successfully install Kerberos.")
+ cmd = "source %s && gs_om -t start" % self.context.userProfile
+ (status, output) = subprocess.getstatusoutput(cmd)
+ if status != 0 and not self.context.ignoreInstance:
+ raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
+ "Command:%s. Error:\n%s" % (cmd, output))
+ os.remove(kerberosflagfile)
+
def refresh_dynamic_config_file(self):
"""
refresh dynamic config file
@@ -3613,6 +3662,8 @@ class UpgradeImpl:
ErrorCode.GAUSS_529["GAUSS_52907"])
self.cleanBinaryUpgradeBakFiles(True)
self.cleanInstallPath(Const.NEW)
+ # install kerberos
+ self.install_kerberos()
except Exception as e:
self.context.logger.error(str(e))
self.context.logger.log("Rollback failed.")
@@ -4138,6 +4189,30 @@ class UpgradeImpl:
else:
raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' +
" Value: %s" % self.context.action)
+
+ # judgment has installed kerberos before action_inplace_upgrade
+ self.context.logger.debug(
+ "judgment has installed kerberos before action_inplace_upgrade")
+ xmlfile = os.path.join(os.path.dirname(self.context.userProfile),
+ DefaultValue.FI_KRB_XML)
+ if os.path.exists(xmlfile) and \
+ self.context.action == Const.ACTION_AUTO_UPGRADE \
+ and self.context.is_grey_upgrade:
+ raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % "kerberos")
+ if os.path.exists(xmlfile) and self.context.is_inplace_upgrade:
+ pghost_path = DefaultValue.getEnvironmentParameterValue(
+ 'PGHOST', self.context.user)
+ destfile = "%s/krb5.conf" % os.path.dirname(
+ self.context.userProfile)
+ kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
+ cmd = "cp -rf %s %s " % (destfile, kerberosflagfile)
+ (status, output) = DefaultValue.retryGetstatusoutput(cmd, 3, 5)
+ if status != 0:
+ raise Exception(
+ ErrorCode.GAUSS_502["GAUSS_50206"] % kerberosflagfile
+ + " Error: \n%s" % output)
+ self.context.logger.debug(
+ "Successful back up kerberos config file.")
except Exception as e:
self.context.logger.debug(traceback.format_exc())
self.exitWithRetCode(self.context.action, False, str(e))
--
Gitee
From e24a2dbdd615ec2808a54aec33667090a855ad5a Mon Sep 17 00:00:00 2001
From: gyt0221 <846772234@qq.com>
Date: Wed, 3 Mar 2021 14:22:57 +0800
Subject: [PATCH 09/16] =?UTF-8?q?1.1.0-330=20=E6=A0=A1=E9=AA=8Cenable=5Fst?=
=?UTF-8?q?ream=5Freplication=E5=8F=82=E6=95=B0=E4=B8=BAoff=E7=9A=84?=
=?UTF-8?q?=E6=97=B6=E5=80=99=20=E5=8D=87=E7=BA=A7=E6=8A=9B=E5=87=BA?=
=?UTF-8?q?=E5=BC=82=E5=B8=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/impl/upgrade/UpgradeConst.py | 6 +++++-
script/impl/upgrade/UpgradeImpl.py | 18 ++++++++++++++++++
script/local/UpgradeUtility.py | 13 ++++++-------
3 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/script/impl/upgrade/UpgradeConst.py b/script/impl/upgrade/UpgradeConst.py
index 680063e3..0280a199 100644
--- a/script/impl/upgrade/UpgradeConst.py
+++ b/script/impl/upgrade/UpgradeConst.py
@@ -124,7 +124,9 @@ BACKUP_DIR_LIST = ['global', 'pg_clog', 'pg_xlog', 'pg_multixact',
BACKUP_DIR_LIST_BASE = ['global', 'pg_clog', 'pg_csnlog']
BACKUP_DIR_LIST_64BIT_XID = ['pg_multixact', 'pg_replslot', 'pg_notify',
'pg_subtrans', 'pg_twophase']
-
+VALUE_OFF = ["off", "false", "0", "no"]
+VALUE_ON = ["on", "true", "1", "yes"]
+DN_GUC = ["upgrade_mode", "enable_stream_replication"]
FIRST_GREY_UPGRADE_NUM = 92
UPGRADE_PRECOMMIT_NUM = 0.001
@@ -156,3 +158,5 @@ COMBIN_NUM = 30
ON_INPLACE_UPGRADE = "IsInplaceUpgrade"
MAX_APP_SIZE = 2000
UPGRADE_VERSION_64bit_xid = 91.208
+ENABLE_STREAM_REPLICATION_VERSION = "92.149"
+ENABLE_STREAM_REPLICATION_NAME = "enable_stream_replication"
diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py
index c39f9090..9900849f 100644
--- a/script/impl/upgrade/UpgradeImpl.py
+++ b/script/impl/upgrade/UpgradeImpl.py
@@ -4525,6 +4525,11 @@ class UpgradeImpl:
self.context.logger.log("Failed to check upgrade environment.",
"constant")
raise Exception(str(e))
+ if not self.context.forceRollback:
+ if self.context.oldClusterNumber >= \
+ Const.ENABLE_STREAM_REPLICATION_VERSION:
+ self.check_gucval_is_inval_given(
+ Const.ENABLE_STREAM_REPLICATION_NAME, Const.VALUE_ON)
try:
if self.context.action == Const.ACTION_INPLACE_UPGRADE:
self.context.logger.log(
@@ -4541,6 +4546,19 @@ class UpgradeImpl:
self.context.logger.log(
"Successfully checked upgrade environment.", "constant")
+ def check_gucval_is_inval_given(self, guc_name, val_list):
+ """
+ Checks whether a given parameter is a given value list in a
+ given instance list.
+ """
+ self.context.logger.debug("checks whether the parameter:{0} is "
+ "the value:{1}.".format(guc_name, val_list))
+ guc_str = "{0}:{1}".format(guc_name, ",".join(val_list))
+ self.checkParam(guc_str)
+ self.context.logger.debug("Success to check the parameter:{0} value "
+ "is in the value:{1}.".format(guc_name,
+ val_list))
+
def checkDifferentVersion(self):
"""
if the cluster has only one version. no need to check
diff --git a/script/local/UpgradeUtility.py b/script/local/UpgradeUtility.py
index f8f242ce..5d610254 100644
--- a/script/local/UpgradeUtility.py
+++ b/script/local/UpgradeUtility.py
@@ -1813,12 +1813,11 @@ def checkGucValue():
"""
key = g_opts.gucStr.split(':')[0].strip()
value = g_opts.gucStr.split(':')[1].strip()
- if key == "upgrade_from":
- instances = g_dbNode.cmagents
- fileName = "cm_agent.conf"
- elif key == "upgrade_mode":
- #instances = g_dbNode.coordinators
- #instances.extend(g_dbNode.datanodes)
+ if value in const.VALUE_OFF:
+ value = const.VALUE_OFF
+ if value in const.VALUE_ON:
+ value = const.VALUE_ON
+ if key in const.DN_GUC:
instances = g_dbNode.datanodes
fileName = "postgresql.conf"
else:
@@ -1849,7 +1848,7 @@ def checkGucValue():
realValue = realValue.split('#')[0].strip()
g_logger.debug("[key:%s]: Realvalue %s, ExpectValue %s" % (
key, str(realValue), str(value)))
- if str(value) != str(realValue):
+ if str(realValue) not in str(value):
raise Exception(
ErrorCode.GAUSS_521["GAUSS_52102"] % key
+ " Real value %s, expect value %s"
--
Gitee
From 20098110e5e0bd3ba63f7b96c8b3d4e2b981e8dd Mon Sep 17 00:00:00 2001
From: gyt0221 <846772234@qq.com>
Date: Fri, 5 Mar 2021 15:24:25 +0800
Subject: [PATCH 10/16] =?UTF-8?q?=E7=81=B0=E5=BA=A6=E5=8D=87=E7=BA=A7?=
=?UTF-8?q?=E6=8E=A5=E5=8F=A3=E6=94=B9=E5=8F=98=EF=BC=8C=E4=BF=AE=E6=94=B9?=
=?UTF-8?q?=E7=9B=B8=E5=85=B3=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/impl/upgrade/UpgradeImpl.py | 29 +++++++++++++++++++++++++----
1 file changed, 25 insertions(+), 4 deletions(-)
diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py
index 9900849f..386879de 100644
--- a/script/impl/upgrade/UpgradeImpl.py
+++ b/script/impl/upgrade/UpgradeImpl.py
@@ -1067,7 +1067,7 @@ class UpgradeImpl:
self.upgradeAgain()
except Exception as e:
errmsg = ErrorCode.GAUSS_529["GAUSS_52934"] + \
- "You can use -h to upgrade or manually rollback."
+ "You can use --grey to upgrade or manually rollback."
self.context.logger.log(errmsg + str(e))
self.exitWithRetCode(self.context.action, False)
else:
@@ -1076,7 +1076,7 @@ class UpgradeImpl:
def upgradeAgain(self):
try:
- self.context.logger.log(
+ self.context.logger.debug(
"From this step, you can use -h to upgrade again if failed.")
# we have guarantee specified nodes have same step,
# so we only need to get one node step
@@ -1108,8 +1108,9 @@ class UpgradeImpl:
self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG)
except Exception as e:
- self.context.logger.log("Failed to upgrade, can use -h to "
- "upgrade again. Error: %s" % str(e))
+ self.context.logger.log("Failed to upgrade, can use --grey to "
+ "upgrade again after rollback. Error: "
+ "%s" % str(e))
self.context.logger.debug(traceback.format_exc())
self.exitWithRetCode(self.context.action, False, str(e))
self.context.logger.log(
@@ -4602,6 +4603,8 @@ class UpgradeImpl:
we can upgrade again
:return:
"""
+ if self.context.is_grey_upgrade:
+ self.check_option_grey()
if len(self.context.nodeNames) != 0:
self.checkOptionH()
elif self.context.upgrade_remain:
@@ -4609,6 +4612,24 @@ class UpgradeImpl:
else:
self.checkOptionG()
+ def check_option_grey(self):
+ """
+ if user use --grey first, and then can not use --grey
+ :return:
+ """
+ stepFile = os.path.join(
+ self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE)
+ if not os.path.isfile(stepFile):
+ self.context.logger.debug(
+ "File %s does not exists. No need to check." %
+ Const.GREY_UPGRADE_STEP_FILE)
+ return
+ grey_node_names = self.getUpgradedNodeNames()
+ if grey_node_names:
+ self.context.logger.log(
+ "All nodes have been upgrade, no need to upgrade again.")
+ self.exitWithRetCode(self.action, True)
+
def checkOptionH(self):
self.checkNodeNames()
stepFile = os.path.join(
--
Gitee
From e176dbac70c9fbf388c2dbf37ef4c17cd1b37626 Mon Sep 17 00:00:00 2001
From: xue_weijing <1007396880@qq.com>
Date: Mon, 8 Mar 2021 11:10:02 +0800
Subject: [PATCH 11/16] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=B8=80=E4=B8=BB?=
=?UTF-8?q?=E4=B8=80=E5=A4=87=E8=87=AA=E5=8A=A8=E5=8C=96=E5=AE=89=E8=A3=85?=
=?UTF-8?q?=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
simpleInstall/one_master_one_slave.sh | 400 ++++++++++++++++++
.../one_master_one_slave_template.xml | 33 ++
2 files changed, 433 insertions(+)
create mode 100644 simpleInstall/one_master_one_slave.sh
create mode 100644 simpleInstall/one_master_one_slave_template.xml
diff --git a/simpleInstall/one_master_one_slave.sh b/simpleInstall/one_master_one_slave.sh
new file mode 100644
index 00000000..8c2b4161
--- /dev/null
+++ b/simpleInstall/one_master_one_slave.sh
@@ -0,0 +1,400 @@
+#!/bin/bash
+
+if [ `id -u` -ne 0 ];then
+ echo "only a user with the root permission can run this script."
+ exit 1
+fi
+
+declare -r SCRIPT_PATH=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
+declare -r SCRIPT_NAME=$0
+echo "SCRIPT_PATH : ${SCRIPT_PATH}"
+declare PACKAGE_PATH=`dirname ${SCRIPT_PATH}`
+declare USER_NAME=""
+declare HOST_IPS=""
+declare HOST_IPS_ARR=""
+declare HOST_IPS_ARRAY=""
+declare HOST_NAMES=""
+declare HOST_NAMES_ARRAY=
+declare USER_GROUP="dbgrp"
+declare PORT="20050"
+declare XML_DIR=${SCRIPT_PATH}/one_master_one_slave_template.xml
+declare INSTALL_PATH=""
+declare SYSTEM_ARCH=""
+declare SYSTEM_NAME=""
+declare PASSWORD=""
+
+function print_help()
+{
+ echo "Usage: $0 [OPTION]
+ -?|--help show help information
+ -U|--user_name cluster user
+ -h|--host_ip intranet ip address of the host in the backend storage network(host1,host2)
+ -G|--user_grp group of the cluster user(default value dbgrp)
+ -p|--port database server port(default value 20050)
+ -D|--install_location installation directory of the openGauss program(default value ~/cluser)
+ -X|--xml_location cluster xml configuration file path
+ "
+}
+
+function die()
+{
+ echo -e "\033[31merror:\033[0m $1"
+ exit 1
+}
+
+function warn()
+{
+ echo -e "\033[33mwarnning:\033[0m $1"
+ sleep 2s
+}
+
+function info()
+{
+ echo -e "\033[32minfo:\033[0m $1"
+}
+
+function expect_ssh()
+{
+ /usr/bin/expect <<-EOF
+ set timeout -1
+ spawn $1
+ expect {
+ "*yes/no" { send "yes\r"; exp_continue }
+ "*assword:" { send "$2\r"; exp_continue }
+ "*$3*" { exit }
+ }
+ expect eof
+EOF
+ if [ $? == 0 ]
+ then
+ return 0
+ else
+ return 1
+ fi
+}
+
+function expect_hostname()
+{
+ expect < expectFile
+ set timeout -1
+ spawn $1
+ expect {
+ "*yes/no" { send "yes\r"; exp_continue }
+ "*assword:" {send "$2\r"; exp_continue}
+ }
+EOF
+ if [ $? == 0 ]
+ then
+ return 0
+ else
+ return 1
+ fi
+}
+
+
+function main()
+{
+ while [ $# -gt 0 ]
+ do
+ case "$1" in
+ -h|--help)
+ print_help
+ exit 1
+ ;;
+ -U|--user_name)
+ if [ "$2"X = X ]
+ then
+ die "no cluster user values"
+ fi
+ USER_NAME=$2
+ shift 2
+ ;;
+ -G|--user_grp)
+ if [ "$2"X = X ]
+ then
+ die "no group values"
+ fi
+ USER_GROUP=$2
+ shift 2
+ ;;
+ -H|--host_ip)
+ if [ "$2"X = X ]
+ then
+ die "no intranet ip address of the host values"
+ fi
+ HOST_IPS=$2
+ shift 2
+ HOST_IPS_ARR=${HOST_IPS//,/ }
+ HOST_IPS_ARRAY=(${HOST_IPS_ARR})
+ if [ ${#HOST_IPS_ARRAY[*]} != 2 ]
+ then
+ die "the current script can be installed only on two nodes, one active node and one standby node"
+ fi
+ ;;
+ -X|--xml_location)
+ if [ "$2"X = X ]
+ then
+ die "no cluster xml configuration file values"
+ fi
+ XML_DIR=$2
+ shift 2
+ ;;
+ -D|--install_location)
+ if [ "$2"X = X ]
+ then
+ die "no installation directory of the openGauss program values"
+ fi
+ INSTALL_PATH=$2
+ shift 2
+ ;;
+ -p|--port)
+ if [ "$2"X = X ]
+ then
+ die "the port number cannot be empty."
+ fi
+ PORT=$2
+ shift 2
+ ;;
+ -P|--password)
+ if [ "$2"X = X ]
+ then
+ die "the password cannot be empty."
+ fi
+ PASSWORD=$2
+ shift 2
+ ;;
+ *)
+ echo "Internal Error: option processing error" 1>&2
+ echo "please input right paramtenter, the following command may help you"
+ echo "sh active_standby_nodes_install.sh --help or sh active_standby_nodes_install.sh -h"
+ exit 1
+ esac
+ done
+
+ if [ "${USER_NAME}"X == X ]
+ then
+ die "no cluster user values"
+ fi
+
+ if [ -z ${INSTALL_PATH} ]
+ then
+ INSTALL_PATH="/home/${USER_NAME}"
+ fi
+
+ if [ "${PASSWORD}"X == X ]
+ then
+ echo "please enter the password of the root user&the password of a common user(the two passwords must be the same)"
+ echo -n "password:"
+ read PASSWORD
+ while [ -z ${PASSWORD} ]
+ do
+ echo "the value cannot be null, please enter the password again"
+ echo -n "password:"
+ read PASSWORD
+ done
+ fi
+
+ if [ "${HOST_IPS}"X == X ]
+ then
+ die "no intranet ip address values"
+ else
+ len=${#HOST_IPS_ARRAY[*]}
+ index=0
+ while [ ${index} -lt ${len} ]
+ do
+ expect_hostname "ssh ${HOST_IPS_ARRAY[${index}]} hostname" ${PASSWORD}
+ if [ $? == 0 ]
+ then
+ expectResult=`tail -1 expectFile|head -1| tr -d "\r"| tr -d "\n"`
+ if [ -z ${expectResult} ]
+ then
+ die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}."
+ fi
+ HOST_NAMES_ARRAY[${index}]=${expectResult}
+ else
+ die "failed to obtain the hostname based on the ip address of ${HOST_IPS_ARRAY[${index}]}."
+ fi
+ index=$[ ${index} + 1 ]
+ done
+ fi
+ rm -rf expectFile
+ HOST_NAMES="${HOST_NAMES_ARRAY[0]},${HOST_NAMES_ARRAY[1]}"
+ SYSTEM_ARCH=`uname -p`
+ SYSTEM_NAME=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'`
+ if [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "aarch64" ]
+ then
+ info "the current system environment is openEuler + arm"
+ elif [ "${SYSTEM_NAME}" == "openEuler" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is openEuler + x86"
+ elif [ "${SYSTEM_NAME}" == "centos" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is CentOS + x86"
+ elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is redhat + x86"
+ elif [ "${SYSTEM_NAME}" == "redhat" ] && [ "${SYSTEM_ARCH}" == "aarch64" ]
+ then
+ info "the current system environment is redhat + arm"
+ elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "x86_64" ]
+ then
+ info "the current system environment is kylin + x86"
+ elif [ "${SYSTEM_NAME}" == "kylin" ] && [ "${SYSTEM_ARCH}" == "aarch64" ]
+ then
+ info "the current system environment is kylin + arm"
+ else
+ warn "the current system environment is ${SYSTEM_NAME} + ${SYSTEM_ARCH}, \
+ you are advised to use the centos, openEuler, redhat, or kylin system. because OpenGauss may not adapt to the current system."
+ fi
+ info "installation parameter verification completed."
+}
+
+function checks()
+{
+ system_arch=`uname -p`
+ system_name=`cat /etc/*-release | grep '^ID=".*'|awk -F "[=\"]" '{print $3}'`
+ if [ ${system_arch} != "$8" -o ${system_name} != "$9" ]
+ then
+ warn "inconsistency between the system and the execution machine"
+ fi
+
+ egrep "^$3" /etc/group >& /dev/null
+ if [ $? != 0 ];then
+ groupadd $3
+ fi
+ egrep "^$4" /etc/passwd >& /dev/null
+ if [ $? != 0 ];then
+ useradd -g $3 -d /home/$4 -m -s /bin/bash $4 2>/dev/null
+ if [ $? != 0 ]
+ then
+ die "failed to create the user on the node $2."
+ fi
+ expect_ssh "passwd $4" "$5" "passwd:"
+ if [ $? != 0 ]
+ then
+ die "an error occurred when setting the user password on the node $2"
+ fi
+ fi
+
+ sed -i "s/SELINUX=.*/SELINUX=disabled/g" /etc/selinux/config && firewall-cmd --permanent --add-port="$6/tcp" && firewall-cmd --reload
+ if [ $? != 0 ]
+ then
+ warn "some errors occur during system environment setting on host $2"
+ fi
+
+ INSTALL_PATH=$7
+ if [ ! -e ${INSTALL_PATH} ]
+ then
+ mkdir -p ${INSTALL_PATH}
+ else
+ rm -rf ${INSTALL_PATH}/*
+ fi
+ chmod -R 755 ${INSTALL_PATH}/
+ chown -R $4:$3 ${INSTALL_PATH}/
+ if [ -f /${10} ]
+ then
+ mv /${10} $(eval echo ~$4)/
+ fi
+ echo "check end"
+}
+
+function pre_checks()
+{
+ if [ ${#HOST_IPS_ARRAY[*]} == 0 ]
+ then
+ die "the number of internal IP addresses of the host is incorrect."
+ fi
+ localips=`/sbin/ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"`
+ for ip in ${HOST_IPS_ARRAY[@]}
+ do
+ info "start to check the installation environment of host ${ip}."
+ sleep 2s
+ # standby node
+ if [[ $localips != *${ip}* ]]
+ then
+ sshcmd="scp ${SCRIPT_PATH}/${SCRIPT_NAME} root@${ip}:/"
+ expect_ssh "${sshcmd}" "${PASSWORD}" "100%"
+ if [ $? != 0 ]
+ then
+ die "an error occurred when copying the script to the target host ${ip}."
+ fi
+ sshcmd="ssh ${ip} \"sh /${SCRIPT_NAME} inner ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME}\""
+ expect_ssh "${sshcmd}" "${PASSWORD}" "check end"
+ if [ $? != 0 ]
+ then
+ die "an error occurred during the pre-installation check on the target host ${ip}."
+ fi
+ else
+ # local
+ checks "" ${ip} ${USER_GROUP} ${USER_NAME} ${PASSWORD} ${PORT} ${INSTALL_PATH} ${SYSTEM_ARCH} ${SYSTEM_NAME} ${SCRIPT_NAME}
+ if [ $? != 0 ]
+ then
+ die "an error occurred during the pre-installation check on the target host ${ip}."
+ fi
+ fi
+ info "succeeded in checking the installation environment of host ${ip}."
+ done
+ return 0
+}
+
+function xmlconfig()
+{
+ info "start to automatically configure the installation file."
+ install_localtion=${INSTALL_PATH//\//\\\/}
+ if [ -e ${XML_DIR} ]
+ then
+ sed 's/@{nodeNames}/'${HOST_NAMES}'/g' ${XML_DIR} |
+ sed 's/@{backIpls}/'${HOST_IPS}'/g' |
+ sed 's/@{clusterName}/'${USER_NAME}'/g' |
+ sed 's/@{port}/'${PORT}'/g' |
+ sed 's/@{installPath}/'${install_localtion}'/g' |
+ sed 's/@{nodeName1}/'${HOST_NAMES_ARRAY[0]}'/g' |
+ sed 's/@{backIp1}/'${HOST_IPS_ARRAY[0]}'/g' |
+ sed 's/@{nodeName2}/'${HOST_NAMES_ARRAY[1]}'/g' |
+ sed 's/@{backIp2}/'${HOST_IPS_ARRAY[1]}'/g' > $(eval echo ~${USER_NAME})/one_master_one_slave.xml
+ else
+ die "cannot find one_master_one_slave_template.xml in ${XML_DIR}"
+ fi
+ cat $(eval echo ~${USER_NAME})/one_master_one_slave.xml
+ info "the installation file is automatically configured"
+ return 0
+}
+
+function install()
+{
+ info "preparing for preinstallation"
+ home_path=$(eval echo ~${USER_NAME})
+ export LD_LIBRARY_PATH="${PACKAGE_PATH}/script/gspylib/clib:"$LD_LIBRARY_PATH
+ sshcmd="python3 "${PACKAGE_PATH}"/script/gs_preinstall -U "${USER_NAME}" \
+ -G "${USER_GROUP}" -X "${home_path}"/one_master_one_slave.xml --sep-env-file="${home_path}"/env_master_slave"
+ info "cmd \"${sshcmd}\""
+ expect_ssh "${sshcmd}" "${PASSWORD}" "Preinstallation succeeded"
+ if [ $? != 0 ]
+ then
+ die "preinstall failed."
+ fi
+ info "preinstallation succeeded."
+ chmod 755 ${home_path}'/one_master_one_slave.xml'
+ chown ${USER_NAME}:${USER_GROUP} ${home_path}'/one_master_one_slave.xml'
+ info "start the installation."
+ su - ${USER_NAME} -c"source ${home_path}/env_master_slave;gs_install -X ${home_path}/one_master_one_slave.xml;gs_om -t status --detail"
+ if [ $? -ne 0 ]
+ then
+ die "install failed."
+ else
+ info "install success."
+ fi
+ exit 0
+}
+
+if [ $1 == "inner" ]
+then
+ checks $@
+else
+ main $@
+ pre_checks
+ xmlconfig
+ install
+fi
+exit 0
+
diff --git a/simpleInstall/one_master_one_slave_template.xml b/simpleInstall/one_master_one_slave_template.xml
new file mode 100644
index 00000000..9735758d
--- /dev/null
+++ b/simpleInstall/one_master_one_slave_template.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--
Gitee
From 674946a6b9d7ed13276f32e8e37a5951ec53a642 Mon Sep 17 00:00:00 2001
From: xue_weijing <1007396880@qq.com>
Date: Mon, 8 Mar 2021 11:13:42 +0800
Subject: [PATCH 12/16] =?UTF-8?q?=E4=BF=AE=E6=94=B9gs=5Fcheck=20-e=20insta?=
=?UTF-8?q?ll=E5=91=BD=E4=BB=A4=E7=89=B9=E5=AE=9A=E7=8E=AF=E5=A2=83connect?=
=?UTF-8?q?=E5=BC=82=E5=B8=B8=E7=9A=84=E5=9C=BA=E6=99=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.idea/workspace.xml | 39 ++++++++++
.../inspection/lib/checknetspeed/speed_test | 76 ++++++++++---------
2 files changed, 80 insertions(+), 35 deletions(-)
create mode 100644 .idea/workspace.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 00000000..93d77f26
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,39 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1615170173453
+
+
+ 1615170173453
+
+
+
+
\ No newline at end of file
diff --git a/script/gspylib/inspection/lib/checknetspeed/speed_test b/script/gspylib/inspection/lib/checknetspeed/speed_test
index 1607f8f1..6505de56 100644
--- a/script/gspylib/inspection/lib/checknetspeed/speed_test
+++ b/script/gspylib/inspection/lib/checknetspeed/speed_test
@@ -25,45 +25,51 @@ listen_port = 31111
run_mode = 0 # 0:connect, 1:send, 2:recv
def send_main():
- global listen_ip
- global listen_port
- buf = "this is a test !" * 512 # buf 8192 block
- sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
- print(listen_ip+":"+listen_port)
- while(sockets.connect_ex((listen_ip, int(listen_port))) != 0):
- print("connect failed:%m\n")
- time.sleep(1)
- print("connect succeed, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
- print("send satrt, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
- i = 0
- while True:
- i = i + 1
- n = sockets.send(buf.encode())
- if n == 0:
- print("send failed:%m\n")
- break
- print("%d send:%s, len=%d\n", i, buf, n)
+ try:
+ global listen_ip
+ global listen_port
+ buf = "this is a test !" * 512 # buf 8192 block
+ sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+ print(listen_ip+":"+listen_port)
+ while(sockets.connect_ex((listen_ip, int(listen_port))) != 0):
+ print("connect failed:%m\n")
+ time.sleep(1)
+ print("connect succeed, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
+ print("send satrt, dest[%s:%d], mode[%s]\n", listen_ip, listen_port, "tcp")
+ i = 0
+ while True:
+ i = i + 1
+ n = sockets.send(buf.encode())
+ if n == 0:
+ print("send failed:%m\n")
+ break
+ print("%d send:%s, len=%d\n", i, buf, n)
+ except Exception as e:
+ print(str(e))
def recv_main():
- global listen_ip
- global listen_port
- sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- sockets.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True)
- sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
- sockets.bind((listen_ip, int(listen_port)))
- sockets.listen(128)
- while True:
- client, addr = sockets.accept()
- print('client:', client)
- print('addr:', addr)
+ try:
+ global listen_ip
+ global listen_port
+ sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sockets.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True)
+ sockets.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+ sockets.bind((listen_ip, int(listen_port)))
+ sockets.listen(128)
while True:
- data = client.recv(8192)
- print(data.decode())
- if not data:
- client.close()
- break
+ client, addr = sockets.accept()
+ print('client:', client)
+ print('addr:', addr)
+ while True:
+ data = client.recv(8192)
+ print(data.decode())
+ if not data:
+ client.close()
+ break
+ except Exception as e:
+ print(str(e))
def connect_main():
sockets = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
--
Gitee
From fb91e1f2dc94519653e7b20479756e606c5874ca Mon Sep 17 00:00:00 2001
From: gyt0221 <846772234@qq.com>
Date: Mon, 8 Mar 2021 14:28:48 +0800
Subject: [PATCH 13/16] =?UTF-8?q?=E5=8D=87=E7=BA=A7=E8=BF=87=E7=A8=8B?=
=?UTF-8?q?=E4=B8=AD=EF=BC=8C=E5=AF=B9=E6=89=A9=E5=AE=B9=EF=BC=8C=E7=BC=A9?=
=?UTF-8?q?=E5=AE=B9=E8=BF=9B=E8=A1=8C=E9=99=90=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/gs_dropnode | 3 +++
script/gs_expansion | 3 +++
script/impl/upgrade/UpgradeImpl.py | 2 +-
3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/script/gs_dropnode b/script/gs_dropnode
index 291bcd73..9f4635fe 100644
--- a/script/gs_dropnode
+++ b/script/gs_dropnode
@@ -137,6 +137,9 @@ General options:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-G")
if len(self.hostIpListForDel) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-h")
+ # check if upgrade action is exist
+ if DefaultValue.isUnderUpgrade(self.user):
+ GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
try:
pw_user = pwd.getpwnam(self.user)
diff --git a/script/gs_expansion b/script/gs_expansion
index 34bc040a..98fc125e 100644
--- a/script/gs_expansion
+++ b/script/gs_expansion
@@ -140,6 +140,9 @@ General options:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X")
if len(self.newHostList) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h")
+ # check if upgrade action is exist
+ if DefaultValue.isUnderUpgrade(self.user):
+ GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
clusterInfo = ExpansipnClusterInfo()
self.clusterInfo = clusterInfo
diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py
index 386879de..0d6246f7 100644
--- a/script/impl/upgrade/UpgradeImpl.py
+++ b/script/impl/upgrade/UpgradeImpl.py
@@ -4614,7 +4614,7 @@ class UpgradeImpl:
def check_option_grey(self):
"""
- if user use --grey first, and then can not use --grey
+ if nodes have been upgraded, no need to use --grey to upgrade again
:return:
"""
stepFile = os.path.join(
--
Gitee
From e0505c7e733dc31e897a47fd118a7bb611efb626 Mon Sep 17 00:00:00 2001
From: xue_weijing <1007396880@qq.com>
Date: Tue, 9 Mar 2021 11:23:14 +0800
Subject: [PATCH 14/16] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=A4=9A=E4=BD=99?=
=?UTF-8?q?=E7=9A=84idea=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.idea/workspace.xml | 39 ---------------------------------------
1 file changed, 39 deletions(-)
delete mode 100644 .idea/workspace.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
deleted file mode 100644
index 93d77f26..00000000
--- a/.idea/workspace.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1615170173453
-
-
- 1615170173453
-
-
-
-
\ No newline at end of file
--
Gitee
From 9763570ca5ae4a60cbb509b39099ac9b3aa4c08e Mon Sep 17 00:00:00 2001
From: cchen676
Date: Fri, 5 Mar 2021 16:33:53 +0800
Subject: [PATCH 15/16] =?UTF-8?q?fix=20issue=20gs=5Fom=20start=E5=B1=95?=
=?UTF-8?q?=E7=A4=BA=E7=9A=84warning=E4=BF=A1=E6=81=AF=E4=B8=8E=E5=AF=B9?=
=?UTF-8?q?=E5=BA=94=E7=9A=84=E8=8A=82=E7=82=B9=E4=B8=8D=E4=B8=80=E8=87=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
fix issue gs_preinstall安装时指定--sep-env-file为/home/user的路径下,安装报错
---
script/gs_preinstall | 13 -------------
script/impl/om/OLAP/OmImplOLAP.py | 20 ++++++++++++--------
2 files changed, 12 insertions(+), 21 deletions(-)
diff --git a/script/gs_preinstall b/script/gs_preinstall
index 2bcf81ed..5cfdd2bf 100644
--- a/script/gs_preinstall
+++ b/script/gs_preinstall
@@ -258,19 +258,6 @@ General options:
"be a directory."
% self.mpprcFile)
- try:
- # check the user if exist
- DefaultValue.getUserId(self.user)
- except Exception as e:
- mpprcFileTopPath = os.path.dirname(self.mpprcFile)
- # the mpprc file can not be specified in the /home/user directory
- userpath = "/home/%s/" % self.user
- if (mpprcFilePath.startswith(userpath)):
- GaussLog.exitWithError(
- ErrorCode.GAUSS_500["GAUSS_50004"] % '-sep-env-file' + \
- " Environment variable separation file can not be "
- "created under %s." % mpprcFileTopPath)
-
DefaultValue.checkMpprcFileChange(self.mpprcFile, "", self.mpprcFile)
(checkstatus, checkoutput) = DefaultValue.checkEnvFile(self.mpprcFile)
if (not checkstatus):
diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py
index f4a81e87..44ae7dbc 100644
--- a/script/impl/om/OLAP/OmImplOLAP.py
+++ b/script/impl/om/OLAP/OmImplOLAP.py
@@ -223,16 +223,20 @@ class OmImplOLAP(OmImpl):
self.context.g_opts.security_mode)
if self.dataDir != "":
cmd += " -D %s" % self.dataDir
- (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList)
+ failedOutput = ''
for nodeName in hostList:
+ (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, [nodeName])
if statusMap[nodeName] != 'Success':
- raise Exception(
- ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, output))
- if re.search("another server might be running", output):
- self.logger.log(output)
- if re.search("] WARNING:", output):
- tmp = '\n'.join(re.findall(".*] WARNING:.*", output))
- self.logger.log(output[0:output.find(":")] + '\n' + tmp)
+ failedOutput += output
+ elif re.search("another server might be running", output):
+ self.logger.log(output)
+ elif re.search("] WARNING:", output):
+ tmp = '\n'.join(re.findall(".*] WARNING:.*", output))
+ self.logger.log(output[0:output.find(":")] + '\n' + tmp)
+ if len(failedOutput):
+ self.logger.log("=========================================")
+ raise Exception(
+ ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, failedOutput))
if startType == "cluster":
starttime = time.time()
cluster_state = ""
--
Gitee
From b8e1b45f6a1df004c600d0673802fed2e46da371 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=96=9B=E8=92=99=E6=81=A9?= <1836611252@qq.com>
Date: Tue, 9 Mar 2021 20:20:00 +0800
Subject: [PATCH 16/16] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=9C=A8=E7=BA=BF?=
=?UTF-8?q?=E6=89=A9=E5=AE=B9bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
script/gs_expansion | 190 +++--
script/gspylib/common/DbClusterInfo.py | 20 +-
script/gspylib/common/ErrorCode.py | 4 +-
script/impl/expansion/ExpansionImpl.py | 924 ++++++++++++-------------
4 files changed, 568 insertions(+), 570 deletions(-)
diff --git a/script/gs_expansion b/script/gs_expansion
index 591a9b85..f695a4e6 100644
--- a/script/gs_expansion
+++ b/script/gs_expansion
@@ -22,6 +22,8 @@
import os
import sys
import subprocess
+
+import socket
package_path = os.path.dirname(os.path.realpath(__file__))
ld_path = package_path + "/gspylib/clib"
if 'LD_LIBRARY_PATH' not in os.environ:
@@ -92,7 +94,7 @@ General options:
-V, --version Show version information.
"""
print(self.usage.__doc__)
-
+
def parseCommandLine(self):
"""
parse parameter from command line
@@ -126,44 +128,34 @@ General options:
if (ParaDict.__contains__("nodename")):
self.newHostList = ParaDict.get("nodename")
-
def checkParameters(self):
"""
function: Check parameter from command line
input: NA
output: NA
"""
-
+
# check user | group | xmlfile | node
if len(self.user) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U")
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U")
if len(self.group) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G")
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G")
if len(self.xmlFile) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X")
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X")
if len(self.newHostList) == 0:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h")
-
- clusterInfo = ExpansipnClusterInfo()
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h")
+ # check if upgrade action is exist
+ if DefaultValue.isUnderUpgrade(self.user):
+ GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
+
+ def _getClusterInfoDict(self):
+ clusterInfo = ExpansionClusterInfo()
self.clusterInfo = clusterInfo
hostNameIpDict = clusterInfo.initFromXml(self.xmlFile)
clusterDict = clusterInfo.getClusterDirectorys()
- backIpList = clusterInfo.getClusterBackIps()
- nodeNameList = clusterInfo.getClusterNodeNames()
-
- self.localIp = backIpList[0]
- self.nodeNameList = nodeNameList
- self.backIpNameMap = {}
- for backip in backIpList:
- self.backIpNameMap[backip] = clusterInfo.getNodeNameByBackIp(backip)
+ self.nodeNameList = clusterInfo.getClusterNodeNames()
- # check parameter node must in xml config file
- for nodeid in self.newHostList:
- if nodeid not in backIpList:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \
- nodeid)
-
- # get corepath and toolpath from xml file
+ # get corepath and toolpath from xml file
corePath = clusterInfo.readClustercorePath(self.xmlFile)
toolPath = clusterInfo.getToolPath(self.xmlFile)
# parse xml file and cache node info
@@ -172,20 +164,16 @@ General options:
clusterInfoDict["logPath"] = clusterDict["logPath"][0]
clusterInfoDict["corePath"] = corePath
clusterInfoDict["toolPath"] = toolPath
- for nodeName in nodeNameList:
+ for nodeName in self.nodeNameList:
hostInfo = hostNameIpDict[nodeName]
ipList = hostInfo[0]
portList = hostInfo[1]
- backIp = ""
- sshIp = ""
- if len(ipList) == 1:
- backIp = sshIp = ipList[0]
- elif len(ipList) == 2:
- backIp = ipList[0]
- sshIp = ipList[1]
+ backIp = ipList[0]
+ sshIp = ipList[1]
port = portList[0]
cluster = clusterDict[nodeName]
dataNode = cluster[2]
+ dbNode = clusterInfo.getDbNodeByName(nodeName)
clusterInfoDict[nodeName] = {
"backIp": backIp,
"sshIp": sshIp,
@@ -194,9 +182,10 @@ General options:
"localservice": int(port) + 4,
"heartBeatPort": int(port) + 3,
"dataNode": dataNode,
- "instanceType": -1
+ "instanceType": -1,
+ "azPriority": dbNode.azPriority
}
-
+
nodeIdList = clusterInfo.getClusterNodeIds()
for id in nodeIdList:
insType = clusterInfo.getdataNodeInstanceType(id)
@@ -204,30 +193,6 @@ General options:
clusterInfoDict[hostName]["instanceType"] = insType
self.clusterInfoDict = clusterInfoDict
- for dbnode in clusterInfo.dbNodes:
- # get azName of all hosts
- self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName
- # get cascadeRole of newHosts
- if dbnode.backIps[0] in self.newHostList:
- self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole
-
- # check trust between the primary and other hosts
- sshTool = SshTool(nodeNameList, timeout = 0)
- retmap, output = sshTool.getSshStatusOutput("pwd")
- for nodeName in nodeNameList:
- # check root's trust
- if retmap[nodeName] != DefaultValue.SUCCESS:
- GaussLog.exitWithError("SSH could not connect to %s by root." % nodeName)
- try:
- sshTool.clenSshResultFiles()
- except Exception as e:
- self.logger.debug(str(e))
- # check individual user's trust
- checkUserTrustCmd = "su - %s -c 'ssh %s \"pwd\"'" % (self.user, nodeName)
- (status, output) = subprocess.getstatusoutput(checkUserTrustCmd)
- if status != 0:
- GaussLog.exitWithError("SSH could not connect to %s by individual user." % nodeName)
-
def initLogs(self):
"""
init log file
@@ -243,12 +208,108 @@ General options:
self.initLogger("gs_expansion")
self.logger.ignoreErr = True
+
+ def getExpansionInfo(self):
+ self._getClusterInfoDict()
+ self._getBackIpNameMap()
+ self._getHostAzNameMap()
+ self._getNewHostCasRoleMap()
-class ExpansipnClusterInfo(dbClusterInfo):
+ def checkXmlIncludeNewHost(self):
+ """
+ check parameter node must in xml config file
+ """
+ backIpList = self.clusterInfo.getClusterBackIps()
+ for nodeIp in self.newHostList:
+ if nodeIp not in backIpList:
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \
+ nodeIp)
+
+ def _getBackIpNameMap(self):
+ backIpList = self.clusterInfo.getClusterBackIps()
+ for backip in backIpList:
+ self.backIpNameMap[backip] = \
+ self.clusterInfo.getNodeNameByBackIp(backip)
+
+ def checkExecutingUser(self):
+ """
+ check whether current user executing this command is root
+ """
+ if os.getuid() != 0:
+ GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"])
+
+ def checkExecutingHost(self):
+ """
+ check whether current host is primary host
+ """
+ currentHost = socket.gethostname()
+ primaryHost = ""
+ for nodeName in self.nodeNameList:
+ if self.clusterInfoDict[nodeName]["instanceType"] \
+ == 0:
+ primaryHost = nodeName
+ break
+ if currentHost != primaryHost:
+ GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50110"] % \
+ (currentHost + ", which is not primary."))
+
+ def checkTrust(self, hostList = None):
+ """
+ check trust between primary/current host and every host in hostList
+ """
+ if hostList == None:
+ hostList = self.nodeNameList
+ rootSSHExceptionHosts = []
+ individualSSHExceptionHosts = []
+ sshTool = SshTool(hostList, timeout = 0)
+ retmap, output = sshTool.getSshStatusOutput("pwd")
+ for host in hostList:
+ # check root's trust
+ if retmap[host] != DefaultValue.SUCCESS:
+ rootSSHExceptionHosts.append(host)
+ try:
+ sshTool.clenSshResultFiles()
+ except Exception as e:
+ self.logger.debug(str(e))
+ # check individual user's trust
+ checkUserTrustCmd = "su - %s -c 'ssh %s \"pwd\"'" % (self.user, host)
+ (status, output) = subprocess.getstatusoutput(checkUserTrustCmd)
+ if status != 0:
+ individualSSHExceptionHosts.append(host)
+ # output ssh exception info if ssh connect failed
+ if rootSSHExceptionHosts or individualSSHExceptionHosts:
+ sshExceptionInfo = ""
+ if rootSSHExceptionHosts:
+ sshExceptionInfo += "\n"
+ sshExceptionInfo += ", ".join(rootSSHExceptionHosts)
+ sshExceptionInfo += " by root"
+ if individualSSHExceptionHosts:
+ sshExceptionInfo += "\n"
+ sshExceptionInfo += ", ".join(individualSSHExceptionHosts)
+ sshExceptionInfo += " by individual user"
+ GaussLog.exitWithError(ErrorCode.GAUSS_511["GAUSS_51100"] %
+ sshExceptionInfo)
+
+ def _getHostAzNameMap(self):
+ """
+ get azName of all hosts
+ """
+ for dbnode in self.clusterInfo.dbNodes:
+ self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName
+
+ def _getNewHostCasRoleMap(self):
+ """
+ get cascadeRole of newHosts
+ """
+ for dbnode in self.clusterInfo.dbNodes:
+ if dbnode.backIps[0] in self.newHostList:
+ self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole
+
+class ExpansionClusterInfo(dbClusterInfo):
def __init__(self):
dbClusterInfo.__init__(self)
-
+
def getToolPath(self, xmlFile):
"""
function : Read tool path from default xml file
@@ -266,13 +327,18 @@ class ExpansipnClusterInfo(dbClusterInfo):
checkPathVaild(toolPath)
return toolPath
+
if __name__ == "__main__":
"""
"""
expansion = Expansion()
+ expansion.checkExecutingUser()
expansion.parseCommandLine()
expansion.checkParameters()
expansion.initLogs()
+ expansion.getExpansionInfo()
+ expansion.checkXmlIncludeNewHost()
+ expansion.checkExecutingHost()
+ expansion.checkTrust()
expImpl = ExpansionImpl(expansion)
- expImpl.run()
-
+ expImpl.run()
\ No newline at end of file
diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py
index 7aaac20d..0d30c796 100644
--- a/script/gspylib/common/DbClusterInfo.py
+++ b/script/gspylib/common/DbClusterInfo.py
@@ -3188,36 +3188,24 @@ class dbClusterInfo():
"with cm and etcd") + errMsg)
# create a dictionary
nodeipport[dbNode.name] = [nodeips, nodeports]
- # delete redundant records
- self.__Deduplication(nodeports)
- self.__Deduplication(nodeips)
# check port and ip
self.__checkPortandIP(nodeips, nodeports, dbNode.name)
return nodeipport
- def __Deduplication(self, currentlist):
- """
- function : Delete the deduplication.
- input : []
- output : NA
- """
- currentlist.sort()
- for i in range(len(currentlist) - 2, -1, -1):
- if currentlist.count(currentlist[i]) > 1:
- del currentlist[i]
-
def __checkPortandIP(self, ips, ports, name):
"""
function : Check port and IP.
input : String,int,string
output : NA
"""
- for port in ports:
+ ipsCopy = list(set(ips))
+ portsCopy = list(set(ports))
+ for port in portsCopy:
if (not self.__isPortValid(port)):
raise Exception(ErrorCode.GAUSS_512["GAUSS_51233"]
% (port, name) + " Please check it.")
- for ip in ips:
+ for ip in ipsCopy:
if (not self.__isIpValid(ip)):
raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \
"The IP address is: %s." % ip + " Please "
diff --git a/script/gspylib/common/ErrorCode.py b/script/gspylib/common/ErrorCode.py
index 4f5ca1c0..ad38a224 100644
--- a/script/gspylib/common/ErrorCode.py
+++ b/script/gspylib/common/ErrorCode.py
@@ -1112,8 +1112,8 @@ class ErrorCode():
"detail.",
"GAUSS_35704": "[GAUSS-35704] %s [%s] does not exist on node [%s].",
"GAUSS_35705": "[GAUSS-35705] Error, the database version is "
- "inconsistent in %s: %s"
-
+ "inconsistent in %s: %s",
+ "GAUSS_35706": "[GAUSS-35706] All new hosts %s failed."
}
##########################################################################
diff --git a/script/impl/expansion/ExpansionImpl.py b/script/impl/expansion/ExpansionImpl.py
index 484bdfd8..495ec08b 100644
--- a/script/impl/expansion/ExpansionImpl.py
+++ b/script/impl/expansion/ExpansionImpl.py
@@ -31,7 +31,6 @@ import time
import grp
import socket
import stat
-import copy
from multiprocessing import Process, Value
sys.path.append(sys.path[0] + "/../../../../")
@@ -88,6 +87,8 @@ class ExpansionImpl():
self.group = self.context.group
self.existingHosts = []
self.expansionSuccess = {}
+ for newHost in self.context.newHostList:
+ self.expansionSuccess[newHost] = False
self.logger = self.context.logger
envFile = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH")
@@ -111,32 +112,26 @@ class ExpansionImpl():
"""
create software dir and send it on each nodes
"""
- self.logger.debug("Start to send soft to each standby nodes.\n")
- hostNames = self.context.newHostList
- hostList = hostNames
-
- sshTool = SshTool(hostNames)
-
+ self.logger.log("Start to send soft to each standby nodes.")
srcFile = self.context.packagepath
- targetDir = os.path.realpath(
- os.path.join(srcFile, "../"))
-
- ## mkdir package dir and send package to remote nodes.
- sshTool.executeCommand("mkdir -p %s" % srcFile , "",
- DefaultValue.SUCCESS, hostList)
- sshTool.scpFiles(srcFile, targetDir, hostList)
-
- ## change mode of package dir to set privileges for users
+ targetDir = os.path.realpath(os.path.join(srcFile, "../"))
+ # change mode of package dir to set privileges for users
tPathList = os.path.split(targetDir)
path2ChangeMode = targetDir
if len(tPathList) > 2:
path2ChangeMode = os.path.join(tPathList[0],tPathList[1])
- changeModCmd = "chmod -R a+x {srcFile}".format(user=self.user,
- group=self.group,srcFile=path2ChangeMode)
- sshTool.executeCommand(changeModCmd, "", DefaultValue.SUCCESS,
- hostList)
- self.logger.debug("End to send soft to each standby nodes.\n")
- self.cleanSshToolFile(sshTool)
+ changeModCmd = "chmod -R a+x {srcFile}".format(user = self.user,
+ group = self.group, srcFile = path2ChangeMode)
+ for host in self.context.newHostList:
+ sshTool = SshTool([host], timeout = 300)
+ # mkdir package dir and send package to remote nodes.
+ sshTool.executeCommand("mkdir -p %s" % srcFile , "",
+ DefaultValue.SUCCESS, [host])
+ sshTool.scpFiles(srcFile, targetDir, [host])
+ sshTool.executeCommand(changeModCmd, "", DefaultValue.SUCCESS,
+ [host])
+ self.cleanSshToolFile(sshTool)
+ self.logger.log("End to send soft to each standby nodes.")
def generateAndSendXmlFile(self):
"""
@@ -160,7 +155,7 @@ class ExpansionImpl():
fo.write( xmlContent )
fo.close()
# send single deploy xml file to each standby node
- sshTool = SshTool(host)
+ sshTool = SshTool([host])
retmap, output = sshTool.getSshStatusOutput("mkdir -p %s" %
self.tempFileDir , [host], self.envFile)
retmap, output = sshTool.getSshStatusOutput("chown %s:%s %s" %
@@ -176,6 +171,7 @@ class ExpansionImpl():
"""
nodeName = self.context.backIpNameMap[backIp]
nodeInfo = self.context.clusterInfoDict[nodeName]
+ clusterName = self.context.clusterInfo.name
backIp = nodeInfo["backIp"]
sshIp = nodeInfo["sshIp"]
@@ -191,12 +187,13 @@ class ExpansionImpl():
if tmpMppdbPath:
mppdbconfig = '' % tmpMppdbPath
azName = self.context.hostAzNameMap[backIp]
+ azPriority = nodeInfo["azPriority"]
xmlConfig = """\
-
+
@@ -207,10 +204,10 @@ class ExpansionImpl():
-
+
-
+
@@ -220,10 +217,10 @@ class ExpansionImpl():
- """.format(nodeName=nodeName,backIp=backIp,appPath=appPath,
- logPath=logPath,toolPath=toolPath,corePath=corePath,
- sshIp=sshIp,port=port,dataNode=dataNode,azName=azName,
- mappdbConfig=mppdbconfig)
+ """.format(clusterName = clusterName, nodeName = nodeName, backIp = backIp,
+ appPath = appPath, logPath = logPath, toolPath = toolPath, corePath = corePath,
+ sshIp = sshIp, port = port, dataNode = dataNode, azName = azName,
+ azPriority = azPriority, mappdbConfig = mppdbconfig)
return xmlConfig
def changeUser(self):
@@ -257,29 +254,55 @@ class ExpansionImpl():
self.logger.log("Authentication failed.")
self.initSshConnect(host, user)
+ def hasNormalStandbyInAZOfCascade(self, cascadeIp, existingStandbys):
+ # check whether there are normal standbies in hostAzNameMap[cascadeIp] azZone
+ hasStandbyWithSameAZ = False
+ hostAzNameMap = self.context.hostAzNameMap
+ for existingStandby in existingStandbys:
+ existingStandbyName = self.context.backIpNameMap[existingStandby]
+ existingStandbyDataNode = \
+ self.context.clusterInfoDict[existingStandbyName]["dataNode"]
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ existingStandby, existingStandbyDataNode, self.envFile)
+ if dbStat != STAT_NORMAL:
+ continue
+ if hostAzNameMap[cascadeIp] != hostAzNameMap[existingStandby]:
+ continue
+ hasStandbyWithSameAZ = True
+ break
+ return hasStandbyWithSameAZ
+
def installDatabaseOnHosts(self):
"""
install database on each standby node
"""
- hostList = self.context.newHostList
- envfile = self.envFile
+ standbyHosts = self.context.newHostList
tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir
- installCmd = "source {envfile} ; gs_install -X {xmlfile} \
- 2>&1".format(envfile=envfile,xmlfile=tempXmlFile)
-
- statusArr = []
-
- for newHost in hostList:
-
- self.logger.log("\ninstalling database on node %s:" % newHost)
- self.logger.debug(installCmd)
-
+ installCmd = "source {envFile} ; gs_install -X {xmlFile} "\
+ "2>&1".format(envFile = self.envFile, xmlFile = tempXmlFile)
+ self.logger.debug(installCmd)
+ primaryHostName = self.getPrimaryHostName()
+ primaryHostIp = self.context.clusterInfoDict[primaryHostName]["backIp"]
+ existingStandbys = list(set(self.existingHosts) - (set([primaryHostIp])))
+ failedInstallHosts = []
+ notInstalledCascadeHosts = []
+ for newHost in standbyHosts:
+ if not self.expansionSuccess[newHost]:
+ continue
+ self.logger.log("Installing database on node %s:" % newHost)
hostName = self.context.backIpNameMap[newHost]
sshIp = self.context.clusterInfoDict[hostName]["sshIp"]
+ if self.context.newHostCasRoleMap[newHost] == "on":
+ # check whether there are normal standbies in hostAzNameMap[host] azZone
+ hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(newHost,
+ existingStandbys)
+ if not hasStandbyWithSameAZ:
+ notInstalledCascadeHosts.append(newHost)
+ self.expansionSuccess[newHost] = False
+ continue
self.initSshConnect(sshIp, self.user)
-
stdin, stdout, stderr = self.sshClient.exec_command(installCmd,
- get_pty=True)
+ get_pty=True)
channel = stdout.channel
echannel = stderr.channel
@@ -319,76 +342,74 @@ class ExpansionImpl():
not channel.recv_ready():
channel.close()
break
-
stdout.close()
stderr.close()
- status = channel.recv_exit_status()
- statusArr.append(status)
-
- isBothSuccess = True
- for status in statusArr:
- if status != 0:
- isBothSuccess = False
- break
- if isBothSuccess:
- self.logger.log("\nSuccessfully install database on node %s" %
- hostList)
- else:
- sys.exit(1)
+ if channel.recv_exit_status() != 0:
+ self.expansionSuccess[newHost] = False
+ failedInstallHosts.append(newHost)
+ else:
+ if self.context.newHostCasRoleMap[newHost] == "off":
+ existingStandbys.append(newHost)
+ self.logger.log("%s install success." % newHost)
+ if notInstalledCascadeHosts:
+ self.logger.log("OpenGauss won't be installed on cascade_standby"
+ " %s, because there is no Normal standby in the same azZone." %
+ ", ".join(notInstalledCascadeHosts))
+ if failedInstallHosts:
+ self.logger.log(ErrorCode.GAUSS_527["GAUSS_52707"] %
+ ", ".join(failedInstallHosts))
+ self.logger.log("Finish to install database on all nodes.")
+ if self._isAllFailed():
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "install")
def preInstallOnHosts(self):
"""
execute preinstall step
"""
- self.logger.debug("Start to preinstall database step.\n")
- newBackIps = self.context.newHostList
- newHostNames = []
- for host in newBackIps:
- newHostNames.append(self.context.backIpNameMap[host])
- envfile = self.envFile
+ self.logger.log("Start to preinstall database step.")
tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir
- userpath = pwd.getpwnam(self.user).pw_dir
- mpprcFile = os.path.join(userpath, ".bashrc")
- if envfile == mpprcFile:
- preinstallCmd = "{softpath}/script/gs_preinstall -U {user} -G {group} \
- -X {xmlfile} --non-interactive 2>&1\
- ".format(softpath=self.context.packagepath,user=self.user,
- group=self.group,xmlfile=tempXmlFile)
+ if not DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"):
+ preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\
+ "-X {xmlFile} --non-interactive 2>&1".format(
+ softPath = self.context.packagepath, user = self.user,
+ group = self.group, xmlFile = tempXmlFile)
else:
- preinstallCmd = "{softpath}/script/gs_preinstall -U {user} -G {group} \
- -X {xmlfile} --sep-env-file={envfile} \
- --non-interactive 2>&1\
- ".format(softpath=self.context.packagepath,user=self.user,
- group=self.group,xmlfile=tempXmlFile,envfile=envfile)
-
- sshTool = SshTool(newHostNames)
-
- status, output = sshTool.getSshStatusOutput(preinstallCmd , [], envfile)
- statusValues = status.values()
- if STATUS_FAIL in statusValues:
- GaussLog.exitWithError(output)
-
- self.logger.debug("End to preinstall database step.\n")
- self.cleanSshToolFile(sshTool)
-
+ preinstallCmd = "{softPath}/script/gs_preinstall -U {user} -G {group} "\
+ "-X {xmlFile} --sep-env-file={envFile} --non-interactive 2>&1".format(
+ softPath = self.context.packagepath, user = self.user,
+ group = self.group, xmlFile = tempXmlFile, envFile = self.envFile)
+
+ failedPreinstallHosts = []
+ for host in self.context.newHostList:
+ sshTool = SshTool([host], timeout = 300)
+ resultMap, output = sshTool.getSshStatusOutput(preinstallCmd, [], self.envFile)
+ if resultMap[host] == DefaultValue.SUCCESS:
+ self.expansionSuccess[host] = True
+ self.logger.log("Preinstall %s success" % host)
+ else:
+ failedPreinstallHosts.append(host)
+ self.cleanSshToolFile(sshTool)
+ if failedPreinstallHosts:
+ self.logger.log("Failed to preinstall on: \n%s" % ", ".join(failedPreinstallHosts))
+ self.logger.log("End to preinstall database step.")
def buildStandbyRelation(self):
"""
func: after install single database on standby nodes.
build the relation with primary and standby nodes.
step:
- 1. get existing hosts
- 2. set guc config to primary node
- 3. restart standby node with Standby Mode
- 4. set guc config to standby node
+ 1. set all nodes' guc config parameter: replconninfo, available_zone(only for new)
+ 2. add trust on all hosts
+ 3. generate GRPC cert on new hosts, and primary if current cluster is single instance
+ 4. build new hosts :
+ (1) restart new instance with standby mode
+ (2) build new instances
5. rollback guc config of existing hosts if build failed
6. generate cluster static file and send to each node.
"""
- self.getExistingHosts()
- self.setPrimaryGUCConfig()
- self.setStandbyGUCConfig()
- self.addTrustOnExistNodes()
+ self.setGucConfig()
+ self.addTrust()
self.generateGRPCCert()
self.buildStandbyHosts()
self.rollback()
@@ -398,104 +419,123 @@ class ExpansionImpl():
"""
get the exiting hosts
"""
- self.logger.debug("Get the existing hosts.\n")
+ self.logger.debug("Get the existing hosts.")
primaryHost = self.getPrimaryHostName()
- result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
- instances = re.split('(?:\|)|(?:\n)', result)
+ command = ""
+ if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"):
+ command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
+ (self.user, self.envFile)
+ else:
+ command = "su - %s -c 'source /etc/profile;source /home/%s/.bashrc;"\
+ "gs_om -t status --detail'" % (self.user, self.user)
+ sshTool = SshTool([primaryHost])
+ resultMap, outputCollect = sshTool.getSshStatusOutput(command,
+ [primaryHost], self.envFile)
+ self.logger.debug(outputCollect)
+ if resultMap[primaryHost] != DefaultValue.SUCCESS:
+ GaussLog.exitWithError("Unable to query current cluster state.")
+ instances = re.split('(?:\|)|(?:\n)', outputCollect)
self.existingHosts = []
+ pattern = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*')
for inst in instances:
- pattern = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*')
- result = pattern.findall(inst)
- if len(result) != 0:
- self.existingHosts.append(result[0])
+ existingHosts = pattern.findall(inst)
+ if len(existingHosts) != 0:
+ self.existingHosts.append(existingHosts[0])
- def setPrimaryGUCConfig(self):
+ def setGucConfig(self):
"""
+ set replconninfo on all hosts
"""
- self.logger.debug("Start to set primary node GUC config.\n")
- primaryHost = self.getPrimaryHostName()
+ self.logger.debug("Start to set GUC config on all hosts.\n")
+ gucDict = self.getGUCConfig()
+ tempShFile = "%s/guc.sh" % self.tempFileDir
+ hostIpList = list(self.existingHosts)
+ for host in self.expansionSuccess:
+ hostIpList.append(host)
- self.setGUCOnClusterHosts([primaryHost])
- self.addStandbyIpInPrimaryConf()
-
-
- def setStandbyGUCConfig(self):
- """
- set the expansion standby node db guc config
- """
- self.logger.debug("Start to set standby node GUC config.\n")
- primaryHost = self.getPrimaryHostName()
- existingStandbyHosts = list(
- set(self.existingHosts).difference(set([primaryHost])))
- standbyHosts = existingStandbyHosts + self.context.newHostList
- standbyNames = []
- for standbyHost in standbyHosts:
- standbyNames.append(self.context.backIpNameMap[standbyHost])
- self.setGUCOnClusterHosts(standbyNames)
-
- def addTrustOnExistNodes(self):
- """
- add host trust in pg_hba.conf on existing standby node.
- """
- self.logger.debug("Start to set host trust on existing node.")
- allNodeNames = self.context.nodeNameList
- newNodeIps = self.context.newHostList
- newNodeNames = []
- trustCmd = []
- for node in newNodeIps:
- nodeName = self.context.backIpNameMap[node]
- newNodeNames.append(nodeName)
- cmd = 'host all all %s/32 trust' % node
- trustCmd.append(cmd)
- existNodes = list(set(allNodeNames).difference(set(newNodeNames)))
- for node in existNodes:
- dataNode = self.context.clusterInfoDict[node]["dataNode"]
- cmd = ""
- for trust in trustCmd:
- cmd += "source %s; gs_guc set -D %s -h '%s';" % \
- (self.envFile, dataNode, trust)
- sshTool = SshTool([node])
- resultMap, outputCollect = sshTool.getSshStatusOutput(cmd,
- [node], self.envFile)
+ nodeDict = self.context.clusterInfoDict
+ backIpNameMap = self.context.backIpNameMap
+ hostAzNameMap = self.context.hostAzNameMap
+ for host in hostIpList:
+ hostName = backIpNameMap[host]
+ # set Available_zone for the new standby
+ if host in self.context.newHostList:
+ dataNode = nodeDict[hostName]["dataNode"]
+ gucDict[hostName] += """\
+gs_guc set -D {dn} -c "available_zone='{azName}'"
+ """.format(dn=dataNode, azName=hostAzNameMap[host])
+ command = "source %s ; " % self.envFile + gucDict[hostName]
+ self.logger.debug("[%s] gucCommand:%s" % (host, command))
+
+ sshTool = SshTool([host])
+ # create temporary dir to save guc command bashfile.
+ mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
+ (self.tempFileDir, self.user, self.group, self.tempFileDir)
+ sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile)
+ subprocess.getstatusoutput("touch %s; cat /dev/null > %s" %
+ (tempShFile, tempShFile))
+ with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
+ stat.S_IWUSR | stat.S_IRUSR), 'w') as fo:
+ fo.write("#bash\n")
+ fo.write(command)
+ fo.close()
+
+ # send guc command bashfile to each host and execute it.
+ sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host],
+ self.envFile)
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ "sh %s" % tempShFile, [host], self.envFile)
+
+ self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
- self.logger.debug("End to set host trust on existing node.")
-
+
+ def addTrust(self):
+ """
+ add authentication rules about new host ip in existing hosts and
+ add authentication rules about other all hosts ip in new hosts
+ """
+ self.logger.debug("Start to set host trust on all node.")
+ allHosts = self.existingHosts + self.context.newHostList
+ for hostExec in allHosts:
+ hostExecName = self.context.backIpNameMap[hostExec]
+ dataNode = self.context.clusterInfoDict[hostExecName]["dataNode"]
+ cmd = "source %s;gs_guc set -D %s" % (self.envFile, dataNode)
+ if hostExec in self.existingHosts:
+ for hostParam in self.context.newHostList:
+ cmd += " -h 'host all all %s/32 trust'" % \
+ hostParam
+ else:
+ for hostParam in allHosts:
+ if hostExec != hostParam:
+ cmd += " -h 'host all all %s/32 trust'" % \
+ hostParam
+ self.logger.debug("[%s] trustCmd:%s" % (hostExec, cmd))
+ sshTool = SshTool([hostExec])
+ resultMap, outputCollect = sshTool.getSshStatusOutput(cmd,
+ [hostExec], self.envFile)
+ self.cleanSshToolFile(sshTool)
+ self.logger.debug("End to set host trust on all node.")
+
def generateGRPCCert(self):
"""
- generate GRPC cert
+ generate GRPC cert for single node
"""
primaryHost = self.getPrimaryHostName()
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
+ needGRPCHosts = []
+ for host in self.expansionSuccess:
+ if self.expansionSuccess[host]:
+ needGRPCHosts.append(host)
insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
dataNode,self.envFile)
- needGRPCHosts = copy.copy(self.context.newHostList)
if insType != MODE_PRIMARY:
primaryHostIp = self.context.clusterInfoDict[primaryHost]["backIp"]
needGRPCHosts.append(primaryHostIp)
- self.logger.debug("\nStart to generate GRPC cert.")
- self.context.initSshTool(needGRPCHosts)
- self.context.createGrpcCa(needGRPCHosts)
- self.logger.debug("\nEnd to generate GRPC cert.")
-
- def addStandbyIpInPrimaryConf(self):
- """
- add standby hosts ip in primary node pg_hba.conf
- """
-
- standbyHosts = self.context.newHostList
- primaryHost = self.getPrimaryHostName()
- command = ''
- for host in standbyHosts:
- hostName = self.context.backIpNameMap[host]
- dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
- command += ("source %s; gs_guc set -D %s -h 'host all all %s/32 " + \
- "trust';") % (self.envFile, dataNode, host)
- self.logger.debug(command)
- sshTool = SshTool([primaryHost])
- resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [primaryHost], self.envFile)
- self.logger.debug(outputCollect)
- self.cleanSshToolFile(sshTool)
+ self.logger.debug("Start to generate GRPC cert.")
+ if needGRPCHosts:
+ self.context.initSshTool(needGRPCHosts)
+ self.context.createGrpcCa(needGRPCHosts)
+ self.logger.debug("End to generate GRPC cert.")
def reloadPrimaryConf(self):
"""
@@ -526,168 +566,120 @@ class ExpansionImpl():
"""
stop the new standby host`s database and build it as standby mode
"""
- self.logger.debug("start to build standby node...\n")
+ self.logger.debug("Start to build new nodes.")
standbyHosts = self.context.newHostList
- primaryHost = self.getPrimaryHostName()
+ hostAzNameMap = self.context.hostAzNameMap
+ primaryHostName = self.getPrimaryHostName()
+ primaryHost = self.context.clusterInfoDict[primaryHostName]["backIp"]
existingStandbys = list(set(self.existingHosts).difference(set([primaryHost])))
+ primaryDataNode = self.context.clusterInfoDict[primaryHostName]["dataNode"]
+ self.reloadPrimaryConf()
+ time.sleep(10)
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ primaryHost, primaryDataNode, self.envFile)
+ primaryExceptionInfo = ""
+ if insType != ROLE_PRIMARY:
+ primaryExceptionInfo = "The server mode of primary host" \
+ "is not primary."
+ if dbStat != STAT_NORMAL:
+ primaryExceptionInfo = "The primary is not in Normal state."
+ if primaryExceptionInfo != "":
+ self.rollback()
+ GaussLog.exitWithError(primaryExceptionInfo)
for host in standbyHosts:
- self.expansionSuccess[host] = False
-
- # build standby firstly
- for host in standbyHosts:
- if self.context.newHostCasRoleMap[host] == "on":
+ if not self.expansionSuccess[host]:
continue
- self.logger.log("Start to build standby %s." % host)
- startSuccess = False
hostName = self.context.backIpNameMap[host]
dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
-
+ buildMode = ""
+ hostRole = ""
+ if self.context.newHostCasRoleMap[host] == "on":
+ buildMode = MODE_CASCADE
+ hostRole = ROLE_CASCADE
+ # check whether there are normal standbies in hostAzNameMap[host] azZone
+ hasStandbyWithSameAZ = self.hasNormalStandbyInAZOfCascade(host,
+ existingStandbys)
+ if not hasStandbyWithSameAZ:
+ self.logger.log("There is no Normal standby in %s" %
+ hostAzNameMap[host])
+ self.expansionSuccess[host] = False
+ continue
+ else:
+ buildMode = MODE_STANDBY
+ hostRole = ROLE_STANDBY
+ self.logger.log("Start to build %s %s." % (hostRole, host))
self.checkTmpDir(hostName)
-
+ # start new host as standby mode
self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile)
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
-
- # start standby as standby mode for three times max.
- start_retry_num = 1
- while start_retry_num <= 3:
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(
- hostName,dataNode, self.envFile)
- if insType != ROLE_STANDBY:
- self.logger.debug("Start database as Standby mode failed, "\
- "retry for %s times" % start_retry_num)
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
- start_retry_num = start_retry_num + 1
- else:
- startSuccess = True
- break
- if startSuccess == False:
- self.logger.debug("Start database %s as Standby mode failed!" % host)
- continue
-
- buildSuccess = False
- # build standby node
- self.addStandbyIpInPrimaryConf()
- self.reloadPrimaryConf()
- time.sleep(10)
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(
- primaryHost, dataNode, self.envFile)
- if insType != ROLE_PRIMARY:
- GaussLog.exitWithError("The server mode of primary host" \
- "is not primary!")
- if dbStat != STAT_NORMAL:
- GaussLog.exitWithError("The primary is not Normal!")
-
- self.commonGsCtl.buildInstance(hostName, dataNode, MODE_STANDBY,
- self.envFile)
-
- # if build failed first time. retry for three times.
- start_retry_num = 1
- while start_retry_num <= 3:
- time.sleep(10)
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(
- hostName, dataNode, self.envFile)
- if dbStat != STAT_NORMAL:
- self.logger.debug("Build standby instance failed, " \
- "retry for %s times" % start_retry_num)
- self.commonGsCtl.buildInstance(hostName, dataNode,
- MODE_STANDBY, self.envFile)
- start_retry_num = start_retry_num + 1
+ result, output = self.commonGsCtl.startInstanceWithMode(host,
+ dataNode, MODE_STANDBY, self.envFile)
+ if result[host] != DefaultValue.SUCCESS:
+ if "uncompleted build is detected" not in output:
+ self.expansionSuccess[host] = False
+ self.logger.log("Failed to start %s as standby "
+ "before building." % host)
else:
- buildSuccess = True
- self.expansionSuccess[host] = True
- existingStandbys.append(host)
- break
- if buildSuccess == False:
- self.logger.log("Build standby %s failed." % host)
+ self.logger.debug("Uncompleted build is detected on %s." %
+ host)
else:
- self.logger.log("Build standby %s success." % host)
-
-
- # build cascade standby
- hostAzNameMap = self.context.hostAzNameMap
- for host in standbyHosts:
- if self.context.newHostCasRoleMap[host] == "off":
- continue
- self.logger.log("Start to build cascade standby %s." % host)
- startSuccess = False
- hostName = self.context.backIpNameMap[host]
- dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
- # if no Normal standby same with the current cascade_standby, skip
- hasStandbyWithSameAZ = False
- for existingStandby in existingStandbys:
- existingStandbyName = self.context.backIpNameMap[existingStandby]
- existingStandbyDataNode = self.context.clusterInfoDict[existingStandbyName]["dataNode"]
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
hostName, dataNode, self.envFile)
- if dbStat != STAT_NORMAL:
- continue
- if hostAzNameMap[existingStandby] != hostAzNameMap[host]:
+ if insType != ROLE_STANDBY:
+ self.logger.log("Build %s failed." % host)
+ self.expansionSuccess[host] = False
continue
- hasStandbyWithSameAZ = True
- break
- if not hasStandbyWithSameAZ:
- self.logger.log("There is no Normal standby in %s" % \
- hostAzNameMap[host])
- continue
-
- self.checkTmpDir(hostName)
-
- self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile)
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
- # start cascadeStandby as standby mode for three times max.
- start_retry_num = 1
- while start_retry_num <= 3:
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if insType != ROLE_STANDBY:
- self.logger.debug("Start database as Standby mode failed, "\
- "retry for %s times" % start_retry_num)
- self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
- MODE_STANDBY, self.envFile)
- start_retry_num = start_retry_num + 1
- else:
- startSuccess = True
- break
- if startSuccess == False:
- self.logger.log("Start database %s as Standby mode failed!" % host)
+ # build new host
+ sshTool = SshTool([host])
+ tempShFile = "%s/buildStandby.sh" % self.tempFileDir
+ # create temporary dir to save gs_ctl build command bashfile.
+ mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
+ (self.tempFileDir, self.user, self.group, self.tempFileDir)
+ sshTool.getSshStatusOutput(mkdirCmd, [host], self.envFile)
+ subprocess.getstatusoutput("touch %s; cat /dev/null > %s" %
+ (tempShFile, tempShFile))
+ buildCmd = "gs_ctl build -D %s -M %s" % (dataNode, buildMode)
+ gs_ctlBuildCmd = "source %s ;nohup " % self.envFile + buildCmd + " 1>/dev/null 2>/dev/null &"
+ self.logger.debug("[%s] gs_ctlBuildCmd: %s" % (host, gs_ctlBuildCmd))
+ with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
+ stat.S_IWUSR | stat.S_IRUSR),'w') as fo:
+ fo.write("#bash\n")
+ fo.write(gs_ctlBuildCmd)
+ fo.close()
+ # send gs_ctlBuildCmd bashfile to the standby host and execute it.
+ sshTool.scpFiles(tempShFile, tempShFile, [host], self.envFile)
+ resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \
+ tempShFile, [host], self.envFile)
+ if resultMap[host] != DefaultValue.SUCCESS:
+ self.expansionSuccess[host] = False
+ self.logger.debug("Failed to send gs_ctlBuildCmd bashfile "
+ "to %s." % host)
+ self.logger.log("Build %s %s failed." % (hostRole, host))
continue
-
- # build cascade standby node
- self.addStandbyIpInPrimaryConf()
- self.reloadPrimaryConf()
- self.commonGsCtl.buildInstance(hostName, dataNode, MODE_CASCADE, \
- self.envFile)
-
- buildSuccess = False
- # if build failed first time. retry for three times.
- start_retry_num = 1
- while start_retry_num <= 3:
- time.sleep(10)
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(
- hostName, dataNode, self.envFile)
- if dbStat != STAT_NORMAL:
- self.logger.debug("Build standby instance failed, "\
- "retry for %s times" % start_retry_num)
- self.addStandbyIpInPrimaryConf()
- self.reloadPrimaryConf()
- self.commonGsCtl.buildInstance(hostName, dataNode, \
- MODE_CASCADE, self.envFile)
- start_retry_num = start_retry_num + 1
- else:
- buildSuccess = True
- self.expansionSuccess[host] = True
+ # check whether build process has finished
+ checkProcessExistCmd = "ps x"
+ while True:
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ checkProcessExistCmd, [host])
+ if buildCmd not in outputCollect:
break
- if buildSuccess == False:
- self.logger.log("Build cascade standby %s failed." % host)
+ else:
+ time.sleep(10)
+ # check build result after build process finished
+ insType, dbStat = self.commonGsCtl.queryInstanceStatus(
+ hostName, dataNode, self.envFile)
+ if insType == hostRole and dbStat == STAT_NORMAL:
+ if self.context.newHostCasRoleMap[host] == "off":
+ existingStandbys.append(host)
+ self.logger.log("Build %s %s success." % (hostRole, host))
else:
- self.logger.log("Build cascade standby %s success." % host)
-
+ self.expansionSuccess[host] = False
+ self.logger.log("Build %s %s failed." % (hostRole, host))
+ if self._isAllFailed():
+ self.rollback()
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "build")
def checkTmpDir(self, hostName):
"""
@@ -709,7 +701,7 @@ class ExpansionImpl():
"""
generate static_config_files and send to all hosts
"""
- self.logger.log("Start to generate and send cluster static file.\n")
+ self.logger.log("Start to generate and send cluster static file.")
primaryHost = self.getPrimaryHostName()
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
@@ -731,7 +723,7 @@ class ExpansionImpl():
static_config_dir = "%s/script/static_config_files" % toolPath
if not os.path.exists(static_config_dir):
os.makedirs(static_config_dir)
-
+
# valid if dynamic config file exists.
dynamic_file = "%s/bin/cluster_dynamic_config" % appPath
dynamic_file_exist = False
@@ -753,91 +745,38 @@ class ExpansionImpl():
if dynamic_file_exist:
refresh_cmd = "gs_om -t refreshconf"
hostSsh.getSshStatusOutput(refresh_cmd, [hostName], self.envFile)
-
self.cleanSshToolFile(hostSsh)
-
- self.logger.debug("End to generate and send cluster static file.\n")
+ self.logger.log("End to generate and send cluster static file.\n")
self.logger.log("Expansion results:")
for newHost in self.context.newHostList:
if self.expansionSuccess[newHost]:
- self.logger.log("%s:\tSuccess" % nodeIp)
+ self.logger.log("%s:\tSuccess" % newHost)
else:
- self.logger.log("%s:\tFailed" % nodeIp)
-
- def setGUCOnClusterHosts(self, hostNames=[]):
- """
- guc config on all hosts
- """
-
- gucDict = self.getGUCConfig()
-
- tempShFile = "%s/guc.sh" % self.tempFileDir
-
- if len(hostNames) == 0:
- hostNames = self.context.nodeNameList
-
- nodeDict = self.context.clusterInfoDict
- newHostList = self.context.newHostList
- hostAzNameMap = self.context.hostAzNameMap
- for host in hostNames:
- # set Available_zone for the new standby
- backIp = nodeDict[host]["backIp"]
- if backIp in newHostList:
- dataNode = nodeDict[host]["dataNode"]
- gucDict[host] += """\
-gs_guc set -D {dn} -c "available_zone='{azName}'"
- """.format(dn = dataNode, azName = hostAzNameMap[backIp])
- command = "source %s ; " % self.envFile + gucDict[host]
-
- self.logger.debug(command)
-
- sshTool = SshTool([host])
-
- # create temporary dir to save guc command bashfile.
- mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
- (self.tempFileDir,self.user,self.group,self.tempFileDir)
- retmap, output = sshTool.getSshStatusOutput(mkdirCmd, [host], \
- self.envFile)
-
- subprocess.getstatusoutput("touch %s; cat /dev/null > %s" % \
- (tempShFile, tempShFile))
- with os.fdopen(os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
- stat.S_IWUSR | stat.S_IRUSR),'w') as fo:
- fo.write("#bash\n")
- fo.write( command )
- fo.close()
-
- # send guc command bashfile to each host and execute it.
- sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host],
- self.envFile)
-
- resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \
- tempShFile, [host], self.envFile)
-
- self.logger.debug(outputCollect)
- self.cleanSshToolFile(sshTool)
+ self.logger.log("%s:\tFailed" % newHost)
def getGUCConfig(self):
"""
get guc config of each node:
replconninfo[index]
"""
- nodeDict = self.context.clusterInfoDict
- hostNames = self.context.nodeNameList
+ clusterInfoDict = self.context.clusterInfoDict
+ hostIpList = list(self.existingHosts)
+ for host in self.expansionSuccess:
+ hostIpList.append(host)
+ hostNames = []
+ for host in hostIpList:
+ hostNames.append(self.context.backIpNameMap[host])
gucDict = {}
-
for hostName in hostNames:
-
- localeHostInfo = nodeDict[hostName]
+ localeHostInfo = clusterInfoDict[hostName]
index = 1
guc_tempate_str = "source %s; " % self.envFile
for remoteHost in hostNames:
- if(remoteHost == hostName):
+ if remoteHost == hostName:
continue
- remoteHostInfo = nodeDict[remoteHost]
-
+ remoteHostInfo = clusterInfoDict[remoteHost]
guc_repl_template = """\
gs_guc set -D {dn} -c "replconninfo{index}=\
'localhost={localhost} localport={localport} \
@@ -857,9 +796,7 @@ remoteservice={remoteservice}'"
remotePort=remoteHostInfo["localport"],
remoteHeartPort=remoteHostInfo["heartBeatPort"],
remoteservice=remoteHostInfo["localservice"])
-
guc_tempate_str += guc_repl_template
-
index += 1
gucDict[hostName] = guc_tempate_str
@@ -868,66 +805,59 @@ remoteservice={remoteservice}'"
def checkLocalModeOnStandbyHosts(self):
"""
expansion the installed standby node. check standby database.
- 1. if the database is normal
- 2. if the databases version are same before existing and new
+ 1. if the database is installed correctly
+ 2. if the databases version are same before existing and new
"""
standbyHosts = self.context.newHostList
- envfile = self.envFile
-
- self.logger.log("Checking the database with locale mode.")
+ envFile = self.envFile
+ for host in standbyHosts:
+ self.expansionSuccess[host] = True
+ self.logger.log("Checking if the database is installed correctly with local mode.")
+ getversioncmd = "source %s;gaussdb --version" % envFile
+ primaryHostName = self.getPrimaryHostName()
+ sshPrimary = SshTool([primaryHostName])
+ resultMap, outputCollect = sshPrimary.getSshStatusOutput(
+ getversioncmd, [], envFile)
+ if resultMap[primaryHostName] != DefaultValue.SUCCESS:
+ GaussLog.exitWithError("Fail to check the version of primary.")
+ ipPattern = re.compile("\[.*\] (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):")
+ versionPattern = re.compile("gaussdb \((.*)\) .*")
+ primaryVersion = versionPattern.findall(outputCollect)[0]
+ notInstalledHosts = []
+ wrongVersionHosts = []
for host in standbyHosts:
hostName = self.context.backIpNameMap[host]
dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
- insType, dbStat = self.commonGsCtl.queryInstanceStatus(hostName,
- dataNode, self.envFile)
- if insType not in (ROLE_PRIMARY, ROLE_STANDBY, ROLE_NORMAL, ROLE_CASCADE):
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35703"] %
- (hostName, self.user, dataNode, dataNode))
-
- allHostIp = []
- allHostIp.append(self.context.localIp)
- versionDic = {}
-
- for hostip in standbyHosts:
- allHostIp.append(hostip)
- sshTool= SshTool(allHostIp)
- #get version in the nodes
- getversioncmd = "gaussdb --version"
- resultMap, outputCollect = sshTool.getSshStatusOutput(getversioncmd,
- [], envfile)
- self.cleanSshToolFile(sshTool)
- versionLines = outputCollect.splitlines()
- for i in range(int(len(versionLines)/2)):
- ipPattern = re.compile("\[.*\] (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):")
- ipKey = ipPattern.findall(versionLines[2 * i])[0]
- versionPattern = re.compile("gaussdb \((.*)\) .*")
- version = versionPattern.findall(versionLines[2 * i + 1])[0]
- versionDic[ipKey] = version
- for hostip in versionDic:
- if hostip == self.context.localIp:
- versionCompare = ""
- versionCompare = versionDic[hostip]
+ sshTool = SshTool([host])
+ resultMap, outputCollect = sshTool.getSshStatusOutput(
+ getversioncmd, [], envFile)
+ if resultMap[host] != DefaultValue.SUCCESS:
+ self.expansionSuccess[host] = False
+ notInstalledHosts.append(host)
else:
- if versionDic[hostip] == versionCompare:
- continue
- else:
- GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35705"] \
- %(hostip, versionDic[hostip]))
-
- self.logger.log("Successfully checked the database with locale mode.")
+ version = versionPattern.findall(outputCollect)[0]
+ if version != primaryVersion:
+ self.expansionSuccess[host] = False
+ wrongVersionHosts.append(host)
+ if notInstalledHosts:
+ self.logger.log("In local mode, database is not installed "
+ "correctly on these nodes:\n%s" % ", ".join(notInstalledHosts))
+ if wrongVersionHosts:
+ self.logger.log("In local mode, the database version is not same "
+ "with primary on these nodes:\n%s" % ", ".join(wrongVersionHosts))
+ self.logger.log("End to check the database with locale mode.")
def preInstall(self):
"""
preinstall on new hosts.
"""
- self.logger.log("Start to preinstall database on the new \
-standby nodes.")
+ self.logger.log("Start to preinstall database on new nodes.")
self.sendSoftToHosts()
self.generateAndSendXmlFile()
self.preInstallOnHosts()
- self.logger.log("Successfully preinstall database on the new \
-standby nodes.")
-
+ self.logger.log("End to preinstall database on new nodes.\n")
+ if self._isAllFailed():
+ GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35706"] % "preinstall")
def clearTmpFile(self):
"""
@@ -972,11 +902,16 @@ standby nodes.")
self.logger.debug("Start to check cluster status.\n")
curHostName = socket.gethostname()
- command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
- (self.user, self.envFile)
+ command = ""
+ if DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH"):
+ command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
+ (self.user, self.envFile)
+ else:
+ command = "su - %s -c 'source /etc/profile;source /home/%s/.bashrc;"\
+ "gs_om -t status --detail'" % (self.user, self.user)
sshTool = SshTool([curHostName])
- resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [curHostName], self.envFile)
+ resultMap, outputCollect = sshTool.getSshStatusOutput(command,
+ [curHostName], self.envFile)
if outputCollect.find("Primary Normal") == -1:
GaussLog.exitWithError("Unable to query current cluster status. " + \
"Please import environment variables or " +\
@@ -984,42 +919,47 @@ standby nodes.")
self.logger.debug("The primary database is normal.\n")
+
+ def _adjustOrderOfNewHostList(self):
+ """
+ Adjust the order of hostlist so that
+ standby comes first and cascade standby comes last
+ """
+ newHostList = self.context.newHostList
+ newHostCasRoleMap = self.context.newHostCasRoleMap
+ i, j = 0, len(newHostList) - 1
+ while i < j:
+ while i < j and newHostCasRoleMap[newHostList[i]] == "off":
+ i += 1
+ while i < j and newHostCasRoleMap[newHostList[j]] == "on":
+ j -= 1
+ newHostList[i], newHostList[j] = newHostList[j], newHostList[i]
+ i += 1
+ j -= 1
+
def validNodeInStandbyList(self):
"""
check if the node has been installed in the cluster.
"""
- self.logger.debug("Start to check if the nodes in standby list\n")
-
- curHostName = socket.gethostname()
- command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
- (self.user, self.envFile)
- sshTool = SshTool([curHostName])
- resultMap, outputCollect = sshTool.getSshStatusOutput(command,
- [curHostName], self.envFile)
- self.logger.debug(outputCollect)
-
- newHosts = self.context.newHostList
- standbyHosts = []
- existHosts = []
- while len(newHosts) > 0:
- hostIp = newHosts.pop()
- nodeName = self.context.backIpNameMap[hostIp]
- nodeInfo = self.context.clusterInfoDict[nodeName]
- dataNode = nodeInfo["dataNode"]
- exist_reg = r"(.*)%s[\s]*%s(.*)" % (nodeName, hostIp)
- if not re.search(exist_reg, outputCollect):
- standbyHosts.append(hostIp)
- else:
- existHosts.append(hostIp)
- self.context.newHostList = standbyHosts
- if len(existHosts) > 0:
- self.logger.log("The nodes [%s] are already in the cluster. Skip expand these nodes." \
- % ",".join(existHosts))
- self.cleanSshToolFile(sshTool)
- if len(standbyHosts) == 0:
+ self.logger.debug("Start to check if the nodes in standby list.")
+ self.getExistingHosts()
+ newHostList = self.context.newHostList
+ existedNewHosts = \
+ [host for host in newHostList if host in self.existingHosts]
+ if existedNewHosts:
+ newHostList = \
+ [host for host in newHostList if host not in existedNewHosts]
+ self.context.newHostList = newHostList
+ self.expansionSuccess = {}
+ for host in newHostList:
+ self.expansionSuccess[host] = False
+ self.logger.log("These nodes [%s] are already in the cluster. "
+ "Skip expand these nodes." % ",".join(existedNewHosts))
+ if len(newHostList) == 0:
self.logger.log("There is no node can be expanded.")
sys.exit(0)
-
+ self._adjustOrderOfNewHostList()
+
def checkXmlFileAccessToUser(self):
"""
Check if the xml config file has readable access to user.
@@ -1118,15 +1058,13 @@ standby nodes.")
self.changeUser()
if not self.context.standbyLocalMode:
- self.logger.log("\nStart to install database on the new \
-standby nodes.")
+ self.logger.log("Start to install database on new nodes.")
self.installDatabaseOnHosts()
else:
- self.logger.log("\nStandby nodes is installed with locale mode.")
self.checkLocalModeOnStandbyHosts()
- self.logger.log("\nDatabase on standby nodes installed finished.")
- self.logger.log("\nStart to establish the primary-standby relationship.")
+ self.logger.log("Database on standby nodes installed finished.\n")
+ self.logger.log("Start to establish the relationship.")
self.buildStandbyRelation()
# process success
pvalue.value = 1
@@ -1137,38 +1075,46 @@ standby nodes.")
"""
existingHosts = self.existingHosts
failedHosts = []
- for host in self.expansionSuccess.keys():
+ for host in self.expansionSuccess:
if self.expansionSuccess[host]:
existingHosts.append(host)
else:
failedHosts.append(host)
clusterInfoDict = self.context.clusterInfoDict
- primaryHostName = self.getPrimaryHostName()
for failedHost in failedHosts:
- self.logger.debug("start to rollback replconninfo about %s" % failedHost)
+ self.logger.debug("Start to rollback replconninfo about %s" % failedHost)
for host in existingHosts:
hostName = self.context.backIpNameMap[host]
dataNode = clusterInfoDict[hostName]["dataNode"]
confFile = os.path.join(dataNode, "postgresql.conf")
rollbackReplconninfoCmd = "sed -i '/remotehost=%s/s/^/#&/' %s" \
% (failedHost, confFile)
- self.logger.debug(rollbackReplconninfoCmd)
- sshTool = SshTool(host)
+ self.logger.debug("[%s] rollbackReplconninfoCmd:%s" % (host,
+ rollbackReplconninfoCmd))
+ sshTool = SshTool([host])
(statusMap, output) = sshTool.getSshStatusOutput(rollbackReplconninfoCmd, [host])
- if hostName == primaryHostName:
- pg_hbaFile = os.path.join(dataNode, "pg_hba.conf")
- rollbackPg_hbaCmd = "sed -i '/%s/s/^/#&/' %s" \
- % (failedHost, pg_hbaFile)
- self.logger.debug(rollbackPg_hbaCmd)
- (statusMap, output) = sshTool.getSshStatusOutput(rollbackPg_hbaCmd, [host])
+ pg_hbaFile = os.path.join(dataNode, "pg_hba.conf")
+ rollbackPg_hbaCmd = "sed -i '/%s/s/^/#&/' %s" \
+ % (failedHost, pg_hbaFile)
+ self.logger.debug("[%s] rollbackPg_hbaCmd:%s" % (host,
+ rollbackPg_hbaCmd))
+ (statusMap, output) = sshTool.getSshStatusOutput(rollbackPg_hbaCmd, [host])
reloadGUCCommand = "source %s ; gs_ctl reload -D %s " % \
(self.envFile, dataNode)
- self.logger.debug(reloadGUCCommand)
resultMap, outputCollect = sshTool.getSshStatusOutput(
reloadGUCCommand, [host], self.envFile)
self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
+ def _isAllFailed(self):
+ """
+ check whether all new hosts preinstall/install/build failed
+ """
+ for host in self.expansionSuccess:
+ if self.expansionSuccess[host]:
+ return False
+ return True
+
def run(self):
"""
start expansion
@@ -1238,6 +1184,7 @@ class GsCtlCommon:
self.logger.debug(host)
self.logger.debug(outputCollect)
self.cleanSshToolTmpFile(sshTool)
+ return resultMap, outputCollect
def buildInstance(self, host, datanode, mode, env):
command = "source %s ; gs_ctl build -D %s -M %s" % (env, datanode, mode)
@@ -1286,7 +1233,4 @@ class GsCtlCommon:
try:
sshTool.clenSshResultFiles()
except Exception as e:
- self.logger.debug(str(e))
-
-
-
+ self.logger.debug(str(e))
\ No newline at end of file
--
Gitee